Sat Nov 23 16:36:39 2019 UTC ()
For this case during build.sh:

        rw_enter(lock, RW_READ);

Having instrumented it, it turns out that >99.5% of the time the lock is
completely unknowned.  Make this assumption in the assembly stub for
rw_enter(), and avoid the initial read of the lock word.  Where there are
existing read holds, we'll do an additional CMPXCHG but should already have
the cache line in the EXCLUSIVE state.


(ad)
diff -r1.33 -r1.34 src/sys/arch/amd64/amd64/lock_stubs.S
diff -r1.30 -r1.31 src/sys/arch/i386/i386/lock_stubs.S

cvs diff -r1.33 -r1.34 src/sys/arch/amd64/amd64/lock_stubs.S (expand / switch to unified diff)

--- src/sys/arch/amd64/amd64/lock_stubs.S 2019/11/14 16:23:52 1.33
+++ src/sys/arch/amd64/amd64/lock_stubs.S 2019/11/23 16:36:38 1.34
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: lock_stubs.S,v 1.33 2019/11/14 16:23:52 maxv Exp $ */ 1/* $NetBSD: lock_stubs.S,v 1.34 2019/11/23 16:36:38 ad Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc. 4 * Copyright (c) 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran. 8 * by Andrew Doran.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
@@ -181,49 +181,50 @@ ENTRY(mutex_spin_exit) @@ -181,49 +181,50 @@ ENTRY(mutex_spin_exit)
1814: 1814:
182 jmp 1b 182 jmp 1b
183 183
184#endif /* DIAGNOSTIC */ 184#endif /* DIAGNOSTIC */
185 185
186END(mutex_spin_exit) 186END(mutex_spin_exit)
187 187
188/* 188/*
189 * void rw_enter(krwlock_t *rwl, krw_t op); 189 * void rw_enter(krwlock_t *rwl, krw_t op);
190 * 190 *
191 * Acquire one hold on a RW lock. 191 * Acquire one hold on a RW lock.
192 */ 192 */
193ENTRY(rw_enter) 193ENTRY(rw_enter)
194 cmpl $RW_READER, %esi 194 xorl %eax, %eax
 195 testl %esi, %esi /* RW_READER = 0 */
195 jne 2f 196 jne 2f
196 197
197 /* 198 /*
198 * Reader: this is the most common case. 199 * Reader, and no existing readers on the lock: this is a most
 200 * common case. Instead of reading from the lock word, use cmpxchg
 201 * and get the cache line into the EXCLUSIVE state to begin with.
199 */ 202 */
200 movq (%rdi), %rax 
2010: 2030:
202 testb $(RW_WRITE_LOCKED|RW_WRITE_WANTED), %al 204 testb $(RW_WRITE_LOCKED|RW_WRITE_WANTED), %al
203 jnz 3f 205 jnz 3f
204 leaq RW_READ_INCR(%rax), %rdx 206 leaq RW_READ_INCR(%rax), %rdx
205 LOCK 207 LOCK
206 cmpxchgq %rdx, (%rdi) 208 cmpxchgq %rdx, (%rdi)
207 jnz 1f 209 jnz 1f
208 RET 210 RET
2091: 2111:
210 jmp 0b 212 jmp 0b
211 213
212 /* 214 /*
213 * Writer: if the compare-and-set fails, don't bother retrying. 215 * Writer: if the compare-and-set fails, don't bother retrying.
214 */ 216 */
2152: movq CPUVAR(CURLWP), %rcx 2172: movq CPUVAR(CURLWP), %rcx
216 xorq %rax, %rax 
217 orq $RW_WRITE_LOCKED, %rcx 218 orq $RW_WRITE_LOCKED, %rcx
218 LOCK 219 LOCK
219 cmpxchgq %rcx, (%rdi) 220 cmpxchgq %rcx, (%rdi)
220 jnz 3f 221 jnz 3f
221 RET 222 RET
2223: 2233:
223 jmp _C_LABEL(rw_vector_enter) 224 jmp _C_LABEL(rw_vector_enter)
224END(rw_enter) 225END(rw_enter)
225 226
226/* 227/*
227 * void rw_exit(krwlock_t *rwl); 228 * void rw_exit(krwlock_t *rwl);
228 * 229 *
229 * Release one hold on a RW lock. 230 * Release one hold on a RW lock.
@@ -258,50 +259,51 @@ ENTRY(rw_exit) @@ -258,50 +259,51 @@ ENTRY(rw_exit)
258 cmpxchgq %rdx, (%rdi) 259 cmpxchgq %rdx, (%rdi)
259 jnz 3f 260 jnz 3f
260 ret 261 ret
261 262
2623: jmp _C_LABEL(rw_vector_exit) 2633: jmp _C_LABEL(rw_vector_exit)
263END(rw_exit) 264END(rw_exit)
264 265
265/* 266/*
266 * int rw_tryenter(krwlock_t *rwl, krw_t op); 267 * int rw_tryenter(krwlock_t *rwl, krw_t op);
267 * 268 *
268 * Try to acquire one hold on a RW lock. 269 * Try to acquire one hold on a RW lock.
269 */ 270 */
270ENTRY(rw_tryenter) 271ENTRY(rw_tryenter)
271 cmpl $RW_READER, %esi 272 xorl %eax, %eax
 273 testl %esi, %esi /* RW_READER = 0 */
272 jne 2f 274 jne 2f
273 275
274 /* 276 /*
275 * Reader: this is the most common case. 277 * Reader, and no existing readers on the lock: this is a most
 278 * common case. Instead of reading from the lock word, use cmpxchg
 279 * and get the cache line into the EXCLUSIVE state to begin with.
276 */ 280 */
277 movq (%rdi), %rax 
2780: 2810:
279 testb $(RW_WRITE_LOCKED|RW_WRITE_WANTED), %al 282 testb $(RW_WRITE_LOCKED|RW_WRITE_WANTED), %al
280 jnz 4f 283 jnz 4f
281 leaq RW_READ_INCR(%rax), %rdx 284 leaq RW_READ_INCR(%rax), %rdx
282 LOCK 285 LOCK
283 cmpxchgq %rdx, (%rdi) 286 cmpxchgq %rdx, (%rdi)
284 jnz 1f 287 jnz 1f
285 movl %edx, %eax /* nonzero */ 288 movl %edx, %eax /* nonzero */
286 RET 289 RET
2871: 2901:
288 jmp 0b 291 jmp 0b
289 292
290 /* 293 /*
291 * Writer: if the compare-and-set fails, don't bother retrying. 294 * Writer: if the compare-and-set fails, don't bother retrying.
292 */ 295 */
2932: movq CPUVAR(CURLWP), %rcx 2962: movq CPUVAR(CURLWP), %rcx
294 xorq %rax, %rax 
295 orq $RW_WRITE_LOCKED, %rcx 297 orq $RW_WRITE_LOCKED, %rcx
296 LOCK 298 LOCK
297 cmpxchgq %rcx, (%rdi) 299 cmpxchgq %rcx, (%rdi)
298 movl $0, %eax 300 movl $0, %eax
299 setz %al 301 setz %al
3003: 3023:
301 RET 303 RET
302 ret 304 ret
3034: 3054:
304 xorl %eax, %eax 306 xorl %eax, %eax
305 jmp 3b 307 jmp 3b
306END(rw_tryenter) 308END(rw_tryenter)
307 309

cvs diff -r1.30 -r1.31 src/sys/arch/i386/i386/lock_stubs.S (expand / switch to unified diff)

--- src/sys/arch/i386/i386/lock_stubs.S 2019/02/11 14:59:32 1.30
+++ src/sys/arch/i386/i386/lock_stubs.S 2019/11/23 16:36:38 1.31
@@ -1,17 +1,17 @@ @@ -1,17 +1,17 @@
1/* $NetBSD: lock_stubs.S,v 1.30 2019/02/11 14:59:32 cherry Exp $ */ 1/* $NetBSD: lock_stubs.S,v 1.31 2019/11/23 16:36:38 ad Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc. 4 * Copyright (c) 2006, 2007, 2008, 2009, 2019 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran. 8 * by Andrew Doran.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
@@ -25,27 +25,27 @@ @@ -25,27 +25,27 @@
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32/* 32/*
33 * Where possible we make each routine fit into an assumed 64-byte cache 33 * Where possible we make each routine fit into an assumed 64-byte cache
34 * line. Please check alignment with 'objdump -d' after making changes.  34 * line. Please check alignment with 'objdump -d' after making changes.
35 */ 35 */
36 36
37#include <machine/asm.h> 37#include <machine/asm.h>
38__KERNEL_RCSID(0, "$NetBSD: lock_stubs.S,v 1.30 2019/02/11 14:59:32 cherry Exp $"); 38__KERNEL_RCSID(0, "$NetBSD: lock_stubs.S,v 1.31 2019/11/23 16:36:38 ad Exp $");
39 39
40#include "opt_lockdebug.h" 40#include "opt_lockdebug.h"
41 41
42#include <machine/cputypes.h> 42#include <machine/cputypes.h>
43#include <machine/frameasm.h> 43#include <machine/frameasm.h>
44 44
45#include "assym.h" 45#include "assym.h"
46 46
47#define ALIGN64 .align 64 47#define ALIGN64 .align 64
48#define ALIGN32 .align 32 48#define ALIGN32 .align 32
49#define LOCK(num) \ 49#define LOCK(num) \
50 HOTPATCH(HP_NAME_NOLOCK, 1) ; \ 50 HOTPATCH(HP_NAME_NOLOCK, 1) ; \
51 lock 51 lock
@@ -94,48 +94,50 @@ ENTRY(mutex_exit) @@ -94,48 +94,50 @@ ENTRY(mutex_exit)
94 jnz 1f 94 jnz 1f
95 ret 95 ret
961: 961:
97 jmp _C_LABEL(mutex_vector_exit) 97 jmp _C_LABEL(mutex_vector_exit)
98END(mutex_exit) 98END(mutex_exit)
99 99
100/* 100/*
101 * void rw_enter(krwlock_t *rwl, krw_t op); 101 * void rw_enter(krwlock_t *rwl, krw_t op);
102 * 102 *
103 * Acquire one hold on a RW lock. 103 * Acquire one hold on a RW lock.
104 */ 104 */
105ENTRY(rw_enter) 105ENTRY(rw_enter)
106 movl 4(%esp), %edx 106 movl 4(%esp), %edx
 107 xorl %eax, %eax
107 cmpl $RW_READER, 8(%esp) 108 cmpl $RW_READER, 8(%esp)
108 jne 2f 109 jne 2f
109 110
110 /* 111 /*
111 * Reader 112 * Reader, and no existing readers on the lock: this is a most
 113 * common case. Instead of reading from the lock word, use cmpxchg
 114 * and get the cache line into the EXCLUSIVE state to begin with.
112 */ 115 */
113 movl (%edx), %eax 
1140: 1160:
115 testb $(RW_WRITE_LOCKED|RW_WRITE_WANTED), %al 117 testb $(RW_WRITE_LOCKED|RW_WRITE_WANTED), %al
116 jnz 3f 118 jnz 3f
117 leal RW_READ_INCR(%eax), %ecx 119 leal RW_READ_INCR(%eax), %ecx
118 LOCK(2) 120 LOCK(2)
119 cmpxchgl %ecx, (%edx) 121 cmpxchgl %ecx, (%edx)
120 jnz 1f 122 jnz 1f
121 RET(2) 123 RET(2)
1221: 1241:
123 jmp 0b 125 jmp 0b
124 126
125 /* 127 /*
126 * Writer 128 * Writer
127 */ 129 */
1282: xorl %eax, %eax 1302:
129 movl %fs:CPU_INFO_CURLWP(%eax), %ecx 131 movl %fs:CPU_INFO_CURLWP(%eax), %ecx
130 orl $RW_WRITE_LOCKED, %ecx 132 orl $RW_WRITE_LOCKED, %ecx
131 LOCK(3) 133 LOCK(3)
132 cmpxchgl %ecx, (%edx) 134 cmpxchgl %ecx, (%edx)
133 jnz 3f 135 jnz 3f
134 RET(3) 136 RET(3)
1353: 1373:
136 jmp _C_LABEL(rw_vector_enter) 138 jmp _C_LABEL(rw_vector_enter)
137END(rw_enter) 139END(rw_enter)
138 140
139/* 141/*
140 * void rw_exit(krwlock_t *rwl); 142 * void rw_exit(krwlock_t *rwl);
141 * 143 *
@@ -176,50 +178,51 @@ ENTRY(rw_exit) @@ -176,50 +178,51 @@ ENTRY(rw_exit)
176 /* 178 /*
177 * Slow path. 179 * Slow path.
178 */ 180 */
1793: jmp _C_LABEL(rw_vector_exit) 1813: jmp _C_LABEL(rw_vector_exit)
180END(rw_exit) 182END(rw_exit)
181 183
182/* 184/*
183 * int rw_tryenter(krwlock_t *rwl, krw_t op); 185 * int rw_tryenter(krwlock_t *rwl, krw_t op);
184 * 186 *
185 * Try to acquire one hold on a RW lock. 187 * Try to acquire one hold on a RW lock.
186 */ 188 */
187ENTRY(rw_tryenter) 189ENTRY(rw_tryenter)
188 movl 4(%esp), %edx 190 movl 4(%esp), %edx
 191 xorl %eax, %eax
189 cmpl $RW_READER, 8(%esp) 192 cmpl $RW_READER, 8(%esp)
190 jne 2f 193 jne 2f
191 194
192 /* 195 /*
193 * Reader 196 * Reader, and no existing readers on the lock: this is a most
 197 * common case. Instead of reading from the lock word, use cmpxchg
 198 * and get the cache line into the EXCLUSIVE state to begin with.
194 */ 199 */
195 movl (%edx), %eax 
1960: 2000:
197 testb $(RW_WRITE_LOCKED|RW_WRITE_WANTED), %al 201 testb $(RW_WRITE_LOCKED|RW_WRITE_WANTED), %al
198 jnz 4f 202 jnz 4f
199 leal RW_READ_INCR(%eax), %ecx 203 leal RW_READ_INCR(%eax), %ecx
200 LOCK(12) 204 LOCK(12)
201 cmpxchgl %ecx, (%edx) 205 cmpxchgl %ecx, (%edx)
202 jnz 1f 206 jnz 1f
203 movl %edx, %eax /* nonzero */ 207 movl %edx, %eax /* nonzero */
204 RET(4) 208 RET(4)
2051: 2091:
206 jmp 0b 210 jmp 0b
207 211
208 /* 212 /*
209 * Writer 213 * Writer
210 */ 214 */
2112: 2152:
212 xorl %eax, %eax 
213 movl %fs:CPU_INFO_CURLWP(%eax), %ecx 216 movl %fs:CPU_INFO_CURLWP(%eax), %ecx
214 orl $RW_WRITE_LOCKED, %ecx 217 orl $RW_WRITE_LOCKED, %ecx
215 LOCK(13) 218 LOCK(13)
216 cmpxchgl %ecx, (%edx) 219 cmpxchgl %ecx, (%edx)
217 movl $0, %eax 220 movl $0, %eax
218 setz %al 221 setz %al
2193: 2223:
220 RET(5) 223 RET(5)
2214: 2244:
222 xorl %eax, %eax 225 xorl %eax, %eax
223 jmp 3b 226 jmp 3b
224END(rw_tryenter) 227END(rw_tryenter)
225 228