| @@ -1,411 +1,415 @@ | | | @@ -1,411 +1,415 @@ |
1 | /* $NetBSD: lock_stubs.s,v 1.8 2021/07/13 13:58:30 thorpej Exp $ */ | | 1 | /* $NetBSD: lock_stubs.s,v 1.9 2021/07/14 02:18:10 thorpej Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 2007, 2021 The NetBSD Foundation, Inc. | | 4 | * Copyright (c) 2007, 2021 The NetBSD Foundation, Inc. |
5 | * All rights reserved. | | 5 | * All rights reserved. |
6 | * | | 6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation | | 7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Andrew Doran, and by Jason R. Thorpe. | | 8 | * by Andrew Doran, and by Jason R. Thorpe. |
9 | * | | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
12 | * are met: | | 12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright | | 15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the | | 16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. | | 17 | * documentation and/or other materials provided with the distribution. |
18 | * | | 18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | | 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | | 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | * POSSIBILITY OF SUCH DAMAGE. | | 29 | * POSSIBILITY OF SUCH DAMAGE. |
30 | */ | | 30 | */ |
31 | | | 31 | |
32 | #include "opt_lockdebug.h" | | 32 | #include "opt_lockdebug.h" |
33 | #include "opt_multiprocessor.h" | | 33 | #include "opt_multiprocessor.h" |
34 | | | 34 | |
35 | #include <machine/asm.h> | | 35 | #include <machine/asm.h> |
36 | | | 36 | |
37 | __KERNEL_RCSID(0, "$NetBSD: lock_stubs.s,v 1.8 2021/07/13 13:58:30 thorpej Exp $"); | | 37 | __KERNEL_RCSID(0, "$NetBSD: lock_stubs.s,v 1.9 2021/07/14 02:18:10 thorpej Exp $"); |
38 | | | 38 | |
39 | #include "assym.h" | | 39 | #include "assym.h" |
40 | | | 40 | |
41 | #if defined(MULTIPROCESSOR) | | 41 | #if defined(MULTIPROCESSOR) |
42 | /* | | 42 | /* |
43 | * These 'unop' insns will be patched with 'mb' insns at run-time if | | 43 | * These 'unop' insns will be patched with 'mb' insns at run-time if |
44 | * the system has more than one processor. | | 44 | * the system has more than one processor. |
45 | */ | | 45 | */ |
46 | #define MB(label) label: unop | | 46 | #define MB(label) label: unop |
47 | #else | | 47 | #else |
48 | #define MB(label) /* nothing */ | | 48 | #define MB(label) /* nothing */ |
49 | #endif | | 49 | #endif |
50 | | | 50 | |
51 | /* | | 51 | /* |
52 | * int _lock_cas(uintptr_t *ptr, uintptr_t old, uintptr_t new) | | 52 | * int _lock_cas(uintptr_t *ptr, uintptr_t old, uintptr_t new) |
53 | */ | | 53 | */ |
54 | LEAF(_lock_cas, 3) | | 54 | LEAF(_lock_cas, 3) |
55 | 1: | | 55 | 1: |
56 | mov a2, v0 | | 56 | mov a2, v0 |
57 | ldq_l t1, 0(a0) | | 57 | ldq_l t1, 0(a0) |
58 | cmpeq t1, a1, t1 | | 58 | cmpeq t1, a1, t1 |
59 | beq t1, 2f | | 59 | beq t1, 2f |
60 | stq_c v0, 0(a0) | | 60 | stq_c v0, 0(a0) |
61 | beq v0, 3f | | 61 | beq v0, 3f |
62 | MB(.L__lock_cas_mb_1) | | 62 | MB(.L__lock_cas_mb_1) |
63 | RET | | 63 | RET |
64 | 2: | | 64 | 2: |
65 | mov zero, v0 | | 65 | mov zero, v0 |
66 | MB(.L__lock_cas_mb_2) | | 66 | MB(.L__lock_cas_mb_2) |
67 | RET | | 67 | RET |
68 | 3: | | 68 | 3: |
69 | br 1b | | 69 | br 1b |
70 | END(_lock_cas) | | 70 | END(_lock_cas) |
71 | | | 71 | |
72 | #if !defined(LOCKDEBUG) | | 72 | #if !defined(LOCKDEBUG) |
73 | | | 73 | |
74 | /* | | 74 | /* |
75 | * void mutex_enter(kmutex_t *mtx); | | 75 | * void mutex_enter(kmutex_t *mtx); |
76 | */ | | 76 | */ |
77 | LEAF(mutex_enter, 1) | | 77 | LEAF(mutex_enter, 1) |
78 | LDGP(pv) | | 78 | LDGP(pv) |
79 | GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ | | 79 | GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ |
80 | 1: | | 80 | 1: |
81 | mov v0, t1 | | 81 | mov v0, t1 |
82 | ldq_l t2, 0(a0) | | 82 | ldq_l t2, 0(a0) |
83 | bne t2, 2f | | 83 | bne t2, 2f |
84 | stq_c t1, 0(a0) | | 84 | stq_c t1, 0(a0) |
85 | beq t1, 3f | | 85 | beq t1, 3f |
86 | MB(.L_mutex_enter_mb_1) | | 86 | MB(.L_mutex_enter_mb_1) |
87 | RET | | 87 | RET |
88 | 2: | | 88 | 2: |
89 | lda t12, mutex_vector_enter | | 89 | lda t12, mutex_vector_enter |
90 | jmp (t12) | | 90 | jmp (t12) |
91 | 3: | | 91 | 3: |
92 | br 1b | | 92 | br 1b |
93 | END(mutex_enter) | | 93 | END(mutex_enter) |
94 | | | 94 | |
95 | /* | | 95 | /* |
96 | * void mutex_exit(kmutex_t *mtx); | | 96 | * void mutex_exit(kmutex_t *mtx); |
97 | */ | | 97 | */ |
98 | LEAF(mutex_exit, 1) | | 98 | LEAF(mutex_exit, 1) |
99 | LDGP(pv) | | 99 | LDGP(pv) |
100 | MB(.L_mutex_exit_mb_1) | | 100 | MB(.L_mutex_exit_mb_1) |
101 | GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ | | 101 | GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ |
102 | mov zero, t3 | | 102 | mov zero, t3 |
103 | 1: | | 103 | 1: |
104 | ldq_l t2, 0(a0) | | 104 | ldq_l t2, 0(a0) |
105 | cmpeq v0, t2, t2 | | 105 | cmpeq v0, t2, t2 |
106 | beq t2, 2f | | 106 | beq t2, 2f |
107 | stq_c t3, 0(a0) | | 107 | stq_c t3, 0(a0) |
108 | beq t3, 3f | | 108 | beq t3, 3f |
109 | RET | | 109 | RET |
110 | 2: | | 110 | 2: |
111 | lda t12, mutex_vector_exit | | 111 | lda t12, mutex_vector_exit |
112 | jmp (t12) | | 112 | jmp (t12) |
113 | 3: | | 113 | 3: |
114 | br 1b | | 114 | br 1b |
115 | END(mutex_exit) | | 115 | END(mutex_exit) |
116 | | | 116 | |
| | | 117 | #if 0 /* XXX disabled for now XXX */ |
117 | /* | | 118 | /* |
118 | * void mutex_spin_enter(kmutex_t *mtx); | | 119 | * void mutex_spin_enter(kmutex_t *mtx); |
119 | */ | | 120 | */ |
120 | LEAF(mutex_spin_enter, 1); | | 121 | LEAF(mutex_spin_enter, 1); |
121 | LDGP(pv) | | 122 | LDGP(pv) |
122 | | | 123 | |
123 | /* | | 124 | /* |
124 | * STEP 1: Perform the MUTEX_SPIN_SPLRAISE() function. | | 125 | * STEP 1: Perform the MUTEX_SPIN_SPLRAISE() function. |
125 | * (see sys/kern/kern_mutex.c) | | 126 | * (see sys/kern/kern_mutex.c) |
126 | * | | 127 | * |
127 | * s = splraise(mtx->mtx_ipl); | | 128 | * s = splraise(mtx->mtx_ipl); |
128 | * if (curcpu->ci_mtx_count-- == 0) | | 129 | * if (curcpu->ci_mtx_count-- == 0) |
129 | * curcpu->ci_mtx_oldspl = s; | | 130 | * curcpu->ci_mtx_oldspl = s; |
130 | */ | | 131 | */ |
131 | | | 132 | |
132 | call_pal PAL_OSF1_rdps /* clobbers v0, t0, t8..t11 */ | | 133 | call_pal PAL_OSF1_rdps /* clobbers v0, t0, t8..t11 */ |
133 | /* v0 = cur_ipl */ | | 134 | /* v0 = cur_ipl */ |
134 | #ifdef __BWX__ | | 135 | #ifdef __BWX__ |
135 | mov a0, a1 /* a1 = mtx */ | | 136 | mov a0, a1 /* a1 = mtx */ |
136 | ldbu a0, MUTEX_IPL(a0) /* a0 = new_ipl */ | | 137 | ldbu a0, MUTEX_IPL(a0) /* a0 = new_ipl */ |
137 | mov v0, a4 /* save cur_ipl in a4 */ | | 138 | mov v0, a4 /* save cur_ipl in a4 */ |
138 | #else | | 139 | #else |
139 | mov a0, a1 /* a1 = mtx */ | | 140 | mov a0, a1 /* a1 = mtx */ |
140 | ldq_u a2, MUTEX_IPL(a0) | | 141 | ldq_u a2, MUTEX_IPL(a0) |
141 | mov v0, a4 /* save cur_ipl in a4 */ | | 142 | mov v0, a4 /* save cur_ipl in a4 */ |
142 | extbl a2, MUTEX_IPL, a0 /* a0 = new_ipl */ | | 143 | extbl a2, MUTEX_IPL, a0 /* a0 = new_ipl */ |
143 | #endif /* __BWX__ */ | | 144 | #endif /* __BWX__ */ |
144 | cmplt v0, a0, a3 /* a3 = (cur_ipl < new_ipl) */ | | 145 | cmplt v0, a0, a3 /* a3 = (cur_ipl < new_ipl) */ |
145 | GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ | | 146 | GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ |
146 | mov v0, a5 /* save curlwp in a5 */ | | 147 | mov v0, a5 /* save curlwp in a5 */ |
147 | /* | | 148 | /* |
148 | * The forward-branch over the SWPIPL call is correctly predicted | | 149 | * The forward-branch over the SWPIPL call is correctly predicted |
149 | * not-taken by the CPU because it's rare for a code path to acquire | | 150 | * not-taken by the CPU because it's rare for a code path to acquire |
150 | * 2 spin mutexes. | | 151 | * 2 spin mutexes. |
151 | */ | | 152 | */ |
152 | beq a3, 1f /* no? -> skip... */ | | 153 | beq a3, 1f /* no? -> skip... */ |
153 | call_pal PAL_OSF1_swpipl /* clobbers v0, t0, t8..t11 */ | | 154 | call_pal PAL_OSF1_swpipl /* clobbers v0, t0, t8..t11 */ |
154 | /* | | 155 | /* |
155 | * v0 returns the old_ipl, which will be the same as the | | 156 | * v0 returns the old_ipl, which will be the same as the |
156 | * cur_ipl we squirreled away in a4 earlier. | | 157 | * cur_ipl we squirreled away in a4 earlier. |
157 | */ | | 158 | */ |
158 | 1: | | 159 | 1: |
159 | /* | | 160 | /* |
160 | * curlwp->l_cpu is now stable. Update the counter and | | 161 | * curlwp->l_cpu is now stable. Update the counter and |
161 | * stash the old_ipl. Just in case it's not clear what's | | 162 | * stash the old_ipl. Just in case it's not clear what's |
162 | * going on, we: | | 163 | * going on, we: |
163 | * | | 164 | * |
164 | * - Load previous value of mtx_oldspl into t1. | | 165 | * - Load previous value of mtx_oldspl into t1. |
165 | * - Conditionally move old_ipl into t1 if mtx_count == 0. | | 166 | * - Conditionally move old_ipl into t1 if mtx_count == 0. |
166 | * - Store t1 back to mtx_oldspl; if mtx_count != 0, | | 167 | * - Store t1 back to mtx_oldspl; if mtx_count != 0, |
167 | * the store is redundant, but it's faster than a forward | | 168 | * the store is redundant, but it's faster than a forward |
168 | * branch. | | 169 | * branch. |
169 | */ | | 170 | */ |
170 | ldq a3, L_CPU(a5) /* a3 = curlwp->l_cpu (curcpu) */ | | 171 | ldq a3, L_CPU(a5) /* a3 = curlwp->l_cpu (curcpu) */ |
171 | ldl t0, CPU_INFO_MTX_COUNT(a3) | | 172 | ldl t0, CPU_INFO_MTX_COUNT(a3) |
172 | ldl t1, CPU_INFO_MTX_OLDSPL(a3) | | 173 | ldl t1, CPU_INFO_MTX_OLDSPL(a3) |
173 | cmoveq t0, a4, t1 /* mtx_count == 0? -> t1 = old_ipl */ | | 174 | cmoveq t0, a4, t1 /* mtx_count == 0? -> t1 = old_ipl */ |
174 | subl t0, 1, t2 /* mtx_count-- */ | | 175 | subl t0, 1, t2 /* mtx_count-- */ |
175 | stl t1, CPU_INFO_MTX_OLDSPL(a3) | | 176 | stl t1, CPU_INFO_MTX_OLDSPL(a3) |
176 | stl t2, CPU_INFO_MTX_COUNT(a3) | | 177 | stl t2, CPU_INFO_MTX_COUNT(a3) |
177 | | | 178 | |
178 | /* | | 179 | /* |
179 | * STEP 2: __cpu_simple_lock_try(&mtx->mtx_lock) | | 180 | * STEP 2: __cpu_simple_lock_try(&mtx->mtx_lock) |
180 | */ | | 181 | */ |
181 | ldl_l t0, MUTEX_SIMPLELOCK(a1) | | 182 | ldl_l t0, MUTEX_SIMPLELOCK(a1) |
182 | ldiq t1, __SIMPLELOCK_LOCKED | | 183 | ldiq t1, __SIMPLELOCK_LOCKED |
183 | bne t0, 2f /* contended */ | | 184 | bne t0, 2f /* contended */ |
184 | stl_c t1, MUTEX_SIMPLELOCK(a1) | | 185 | stl_c t1, MUTEX_SIMPLELOCK(a1) |
185 | beq t1, 2f /* STL_C failed; consider contended */ | | 186 | beq t1, 2f /* STL_C failed; consider contended */ |
186 | MB(.L_mutex_spin_enter_mb_1) | | 187 | MB(.L_mutex_spin_enter_mb_1) |
187 | RET | | 188 | RET |
188 | 2: | | 189 | 2: |
189 | mov a1, a0 /* restore first argument */ | | 190 | mov a1, a0 /* restore first argument */ |
190 | lda pv, mutex_spin_retry | | 191 | lda pv, mutex_spin_retry |
191 | jmp (pv) | | 192 | jmp (pv) |
192 | END(mutex_spin_enter) | | 193 | END(mutex_spin_enter) |
193 | | | 194 | |
194 | /* | | 195 | /* |
195 | * void mutex_spin_exit(kmutex_t *mtx); | | 196 | * void mutex_spin_exit(kmutex_t *mtx); |
196 | */ | | 197 | */ |
197 | LEAF(mutex_spin_exit, 1) | | 198 | LEAF(mutex_spin_exit, 1) |
198 | LDGP(pv); | | 199 | LDGP(pv); |
199 | MB(.L_mutex_spin_exit_mb_1) | | 200 | MB(.L_mutex_spin_exit_mb_1) |
200 | | | 201 | |
201 | /* | | 202 | /* |
202 | * STEP 1: __cpu_simple_unlock(&mtx->mtx_lock) | | 203 | * STEP 1: __cpu_simple_unlock(&mtx->mtx_lock) |
203 | */ | | 204 | */ |
204 | stl zero, MUTEX_SIMPLELOCK(a0) | | 205 | stl zero, MUTEX_SIMPLELOCK(a0) |
205 | | | 206 | |
206 | /* | | 207 | /* |
207 | * STEP 2: Perform the MUTEX_SPIN_SPLRESTORE() function. | | 208 | * STEP 2: Perform the MUTEX_SPIN_SPLRESTORE() function. |
208 | * (see sys/kern/kern_mutex.c) | | 209 | * (see sys/kern/kern_mutex.c) |
209 | * | | 210 | * |
210 | * s = curcpu->ci_mtx_oldspl; | | 211 | * s = curcpu->ci_mtx_oldspl; |
211 | * if (++curcpu->ci_mtx_count == 0) | | 212 | * if (++curcpu->ci_mtx_count == 0) |
212 | * splx(s); | | 213 | * splx(s); |
213 | */ | | 214 | */ |
214 | GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ | | 215 | GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ |
215 | ldq a3, L_CPU(v0) /* a3 = curlwp->l_cpu (curcpu) */ | | 216 | ldq a3, L_CPU(v0) /* a3 = curlwp->l_cpu (curcpu) */ |
216 | ldl t0, CPU_INFO_MTX_COUNT(a3) | | 217 | ldl t0, CPU_INFO_MTX_COUNT(a3) |
217 | ldl a0, CPU_INFO_MTX_OLDSPL(a3) | | 218 | ldl a0, CPU_INFO_MTX_OLDSPL(a3) |
218 | addl t0, 1, t2 /* mtx_count++ */ | | 219 | addl t0, 1, t2 /* mtx_count++ */ |
219 | stl t2, CPU_INFO_MTX_COUNT(a3) | | 220 | stl t2, CPU_INFO_MTX_COUNT(a3) |
220 | /* | | 221 | /* |
221 | * The forward-branch over the SWPIPL call is correctly predicted | | 222 | * The forward-branch over the SWPIPL call is correctly predicted |
222 | * not-taken by the CPU because it's rare for a code path to acquire | | 223 | * not-taken by the CPU because it's rare for a code path to acquire |
223 | * 2 spin mutexes. | | 224 | * 2 spin mutexes. |
224 | */ | | 225 | */ |
225 | bne t2, 1f /* t2 != 0? Skip... */ | | 226 | bne t2, 1f /* t2 != 0? Skip... */ |
226 | call_pal PAL_OSF1_swpipl /* clobbers v0, t0, t8..t11 */ | | 227 | call_pal PAL_OSF1_swpipl /* clobbers v0, t0, t8..t11 */ |
227 | 1: | | 228 | 1: |
228 | RET | | 229 | RET |
229 | END(mutex_spin_exit) | | 230 | END(mutex_spin_exit) |
| | | 231 | #endif /* XXX disabled for now XXX */ |
230 | | | 232 | |
231 | /* | | 233 | /* |
232 | * void rw_enter(krwlock_t *rwl, krw_t op); | | 234 | * void rw_enter(krwlock_t *rwl, krw_t op); |
233 | * | | 235 | * |
234 | * Acquire one hold on a RW lock. | | 236 | * Acquire one hold on a RW lock. |
235 | */ | | 237 | */ |
236 | LEAF(rw_enter, 2) | | 238 | LEAF(rw_enter, 2) |
237 | LDGP(pv) | | 239 | LDGP(pv) |
238 | | | 240 | |
239 | /* | | 241 | /* |
240 | * RW_READER == 0 (we have a compile-time assert in machdep.c | | 242 | * RW_READER == 0 (we have a compile-time assert in machdep.c |
241 | * to ensure this). | | 243 | * to ensure this). |
242 | * | | 244 | * |
243 | * Acquire for read is the most common case. | | 245 | * Acquire for read is the most common case. |
244 | */ | | 246 | */ |
245 | bne a1, 3f | | 247 | bne a1, 3f |
246 | | | 248 | |
247 | /* Acquiring for read. */ | | 249 | /* Acquiring for read. */ |
248 | 1: ldq_l t0, 0(a0) | | 250 | 1: ldq_l t0, 0(a0) |
249 | and t0, (RW_WRITE_LOCKED|RW_WRITE_WANTED), t1 | | 251 | and t0, (RW_WRITE_LOCKED|RW_WRITE_WANTED), t1 |
250 | addq t0, RW_READ_INCR, t2 | | 252 | addq t0, RW_READ_INCR, t2 |
251 | bne t1, 4f /* contended */ | | 253 | bne t1, 4f /* contended */ |
252 | stq_c t2, 0(a0) | | 254 | stq_c t2, 0(a0) |
253 | beq t2, 2f /* STQ_C failed; retry */ | | 255 | beq t2, 2f /* STQ_C failed; retry */ |
254 | MB(.L_rw_enter_mb_1) | | 256 | MB(.L_rw_enter_mb_1) |
255 | RET | | 257 | RET |
256 | | | 258 | |
257 | 2: br 1b | | 259 | 2: br 1b |
258 | | | 260 | |
259 | 3: /* Acquiring for write. */ | | 261 | 3: /* Acquiring for write. */ |
260 | GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ | | 262 | GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ |
261 | ldq_l t0, 0(a0) | | 263 | ldq_l t0, 0(a0) |
262 | or v0, RW_WRITE_LOCKED, t2 | | 264 | or v0, RW_WRITE_LOCKED, t2 |
263 | bne t0, 4f /* contended */ | | 265 | bne t0, 4f /* contended */ |
264 | stq_c t2, 0(a0) | | 266 | stq_c t2, 0(a0) |
265 | beq t2, 4f /* STQ_C failed; consider it contended */ | | 267 | beq t2, 4f /* STQ_C failed; consider it contended */ |
266 | MB(.L_rw_enter_mb_2) | | 268 | MB(.L_rw_enter_mb_2) |
267 | RET | | 269 | RET |
268 | | | 270 | |
269 | 4: lda pv, rw_vector_enter | | 271 | 4: lda pv, rw_vector_enter |
270 | jmp (pv) | | 272 | jmp (pv) |
271 | END(rw_enter) | | 273 | END(rw_enter) |
272 | | | 274 | |
273 | /* | | 275 | /* |
274 | * int rw_tryenter(krwlock_t *rwl, krw_t op); | | 276 | * int rw_tryenter(krwlock_t *rwl, krw_t op); |
275 | * | | 277 | * |
276 | * Try to acquire one hold on a RW lock. | | 278 | * Try to acquire one hold on a RW lock. |
277 | */ | | 279 | */ |
278 | LEAF(rw_tryenter, 2) | | 280 | LEAF(rw_tryenter, 2) |
279 | LDGP(pv) | | 281 | LDGP(pv) |
280 | | | 282 | |
281 | /* See above. */ | | 283 | /* See above. */ |
282 | bne a1, 3f | | 284 | bne a1, 3f |
283 | | | 285 | |
284 | /* Acquiring for read. */ | | 286 | /* Acquiring for read. */ |
285 | 1: ldq_l t0, 0(a0) | | 287 | 1: ldq_l t0, 0(a0) |
286 | and t0, (RW_WRITE_LOCKED|RW_WRITE_WANTED), t1 | | 288 | and t0, (RW_WRITE_LOCKED|RW_WRITE_WANTED), t1 |
287 | addq t0, RW_READ_INCR, v0 | | 289 | addq t0, RW_READ_INCR, v0 |
288 | bne t1, 4f /* contended */ | | 290 | bne t1, 4f /* contended */ |
289 | stq_c v0, 0(a0) | | 291 | stq_c v0, 0(a0) |
290 | beq v0, 2f /* STQ_C failed; retry */ | | 292 | beq v0, 2f /* STQ_C failed; retry */ |
291 | MB(.L_rw_tryenter_mb_1) | | 293 | MB(.L_rw_tryenter_mb_1) |
292 | RET /* v0 contains non-zero LOCK_FLAG from STQ_C */ | | 294 | RET /* v0 contains non-zero LOCK_FLAG from STQ_C */ |
293 | | | 295 | |
294 | 2: br 1b | | 296 | 2: br 1b |
295 | | | 297 | |
296 | /* Acquiring for write. */ | | 298 | /* Acquiring for write. */ |
297 | 3: GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ | | 299 | 3: GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ |
298 | ldq_l t0, 0(a0) | | 300 | ldq_l t0, 0(a0) |
299 | or v0, RW_WRITE_LOCKED, v0 | | 301 | or v0, RW_WRITE_LOCKED, v0 |
300 | bne t0, 4f /* contended */ | | 302 | bne t0, 4f /* contended */ |
301 | stq_c v0, 0(a0) | | 303 | stq_c v0, 0(a0) |
302 | /* | | 304 | /* |
303 | * v0 now contains the LOCK_FLAG value from STQ_C, which is either | | 305 | * v0 now contains the LOCK_FLAG value from STQ_C, which is either |
304 | * 0 for failure, or non-zero for success. In either case, v0's | | 306 | * 0 for failure, or non-zero for success. In either case, v0's |
305 | * value is correct. Go ahead and perform the memory barrier even | | 307 | * value is correct. Go ahead and perform the memory barrier even |
306 | * in the failure case because we expect it to be rare and it saves | | 308 | * in the failure case because we expect it to be rare and it saves |
307 | * a branch-not-taken instruction in the success case. | | 309 | * a branch-not-taken instruction in the success case. |
308 | */ | | 310 | */ |
309 | MB(.L_rw_tryenter_mb_2) | | 311 | MB(.L_rw_tryenter_mb_2) |
310 | RET | | 312 | RET |
311 | | | 313 | |
312 | 4: mov zero, v0 /* return 0 (failure) */ | | 314 | 4: mov zero, v0 /* return 0 (failure) */ |
313 | RET | | 315 | RET |
314 | END(rw_tryenter) | | 316 | END(rw_tryenter) |
315 | | | 317 | |
316 | /* | | 318 | /* |
317 | * void rw_exit(krwlock_t *rwl); | | 319 | * void rw_exit(krwlock_t *rwl); |
318 | * | | 320 | * |
319 | * Release one hold on a RW lock. | | 321 | * Release one hold on a RW lock. |
320 | */ | | 322 | */ |
321 | LEAF(rw_exit, 1) | | 323 | LEAF(rw_exit, 1) |
322 | LDGP(pv) | | 324 | LDGP(pv) |
323 | MB(.L_rw_exit_mb_1) | | 325 | MB(.L_rw_exit_mb_1) |
324 | | | 326 | |
325 | /* | | 327 | /* |
326 | * Check for write-lock release, and get the owner/count field | | 328 | * Check for write-lock release, and get the owner/count field |
327 | * on its own for sanity-checking against expected values. | | 329 | * on its own for sanity-checking against expected values. |
328 | */ | | 330 | */ |
329 | ldq a1, 0(a0) | | 331 | ldq a1, 0(a0) |
330 | and a1, RW_WRITE_LOCKED, t1 | | 332 | and a1, RW_WRITE_LOCKED, t1 |
331 | srl a1, RW_READ_COUNT_SHIFT, a2 | | 333 | srl a1, RW_READ_COUNT_SHIFT, a2 |
332 | bne t1, 3f | | 334 | bne t1, 3f |
333 | | | 335 | |
334 | /* | | 336 | /* |
335 | * Releasing a read-lock. Make sure the count is non-zero. | | 337 | * Releasing a read-lock. Make sure the count is non-zero. |
336 | * If it is zero, take the slow path where the juicy diagnostic | | 338 | * If it is zero, take the slow path where the juicy diagnostic |
337 | * checks are located. | | 339 | * checks are located. |
338 | */ | | 340 | */ |
339 | beq a2, 4f | | 341 | beq a2, 4f |
340 | | | 342 | |
341 | /* | | 343 | /* |
342 | * We do the following trick to check to see if we're releasing | | 344 | * We do the following trick to check to see if we're releasing |
343 | * the last read-count and there are waiters: | | 345 | * the last read-count and there are waiters: |
344 | * | | 346 | * |
345 | * 1. Set v0 to 1. | | 347 | * 1. Set v0 to 1. |
346 | * 2. Shift the new read count into t1. | | 348 | * 2. Shift the new read count into t1. |
347 | * 3. Conditally move t1 to v0 based on low-bit-set of t0 | | 349 | * 3. Conditally move t1 to v0 based on low-bit-set of t0 |
348 | * (RW_HAS_WAITERS). If RW_HAS_WAITERS is not set, then | | 350 | * (RW_HAS_WAITERS). If RW_HAS_WAITERS is not set, then |
349 | * the move will not take place, and v0 will remain 1. | | 351 | * the move will not take place, and v0 will remain 1. |
350 | * Otherwise, v0 will contain the updated read count. | | 352 | * Otherwise, v0 will contain the updated read count. |
351 | * 4. Jump to slow path if v0 == 0. | | 353 | * 4. Jump to slow path if v0 == 0. |
352 | */ | | 354 | */ |
353 | 1: ldq_l t0, 0(a0) | | 355 | 1: ldq_l t0, 0(a0) |
354 | ldiq v0, 1 | | 356 | ldiq v0, 1 |
355 | subq t0, RW_READ_INCR, t2 | | 357 | subq t0, RW_READ_INCR, t2 |
356 | srl t2, RW_READ_COUNT_SHIFT, t1 | | 358 | srl t2, RW_READ_COUNT_SHIFT, t1 |
357 | cmovlbs t0, t1, v0 | | 359 | cmovlbs t0, t1, v0 |
358 | beq v0, 4f | | 360 | beq v0, 4f |
359 | stq_c t2, 0(a0) | | 361 | stq_c t2, 0(a0) |
360 | beq t2, 2f /* STQ_C failed; try again */ | | 362 | beq t2, 2f /* STQ_C failed; try again */ |
361 | RET | | 363 | RET |
362 | | | 364 | |
363 | 2: br 1b | | 365 | 2: br 1b |
364 | | | 366 | |
365 | /* | | 367 | /* |
366 | * Releasing a write-lock. Make sure the owner field points | | 368 | * Releasing a write-lock. Make sure the owner field points |
367 | * to our LWP. If it does not, take the slow path where the | | 369 | * to our LWP. If it does not, take the slow path where the |
368 | * juicy diagnostic checks are located. a2 contains the owner | | 370 | * juicy diagnostic checks are located. a2 contains the owner |
369 | * field shifted down. Shift it back up to compare to curlwp; | | 371 | * field shifted down. Shift it back up to compare to curlwp; |
370 | * this conveniently discards the bits we don't want to compare. | | 372 | * this conveniently discards the bits we don't want to compare. |
371 | */ | | 373 | */ |
372 | 3: GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ | | 374 | 3: GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ |
373 | sll a2, RW_READ_COUNT_SHIFT, a2 | | 375 | sll a2, RW_READ_COUNT_SHIFT, a2 |
374 | mov zero, t2 /* fast-path write-unlock stores NULL */ | | 376 | mov zero, t2 /* fast-path write-unlock stores NULL */ |
375 | cmpeq v0, a2, v0 /* v0 = (owner == curlwp) */ | | 377 | cmpeq v0, a2, v0 /* v0 = (owner == curlwp) */ |
376 | ldq_l t0, 0(a0) | | 378 | ldq_l t0, 0(a0) |
377 | beq v0, 4f /* owner field mismatch; need slow path */ | | 379 | beq v0, 4f /* owner field mismatch; need slow path */ |
378 | blbs t0, 4f /* RW_HAS_WAITERS set; need slow-path */ | | 380 | blbs t0, 4f /* RW_HAS_WAITERS set; need slow-path */ |
379 | stq_c t2, 0(a0) | | 381 | stq_c t2, 0(a0) |
380 | beq t2, 4f /* STQ_C failed; need slow-path */ | | 382 | beq t2, 4f /* STQ_C failed; need slow-path */ |
381 | RET | | 383 | RET |
382 | | | 384 | |
383 | 4: lda pv, rw_vector_exit | | 385 | 4: lda pv, rw_vector_exit |
384 | jmp (pv) | | 386 | jmp (pv) |
385 | END(rw_exit) | | 387 | END(rw_exit) |
386 | | | 388 | |
387 | #endif /* !LOCKDEBUG */ | | 389 | #endif /* !LOCKDEBUG */ |
388 | | | 390 | |
389 | #if defined(MULTIPROCESSOR) | | 391 | #if defined(MULTIPROCESSOR) |
390 | /* | | 392 | /* |
391 | * Table of locations to patch with MB instructions on multiprocessor | | 393 | * Table of locations to patch with MB instructions on multiprocessor |
392 | * systems. | | 394 | * systems. |
393 | */ | | 395 | */ |
394 | .section ".rodata" | | 396 | .section ".rodata" |
395 | .globl lock_stub_patch_table | | 397 | .globl lock_stub_patch_table |
396 | lock_stub_patch_table: | | 398 | lock_stub_patch_table: |
397 | .quad .L__lock_cas_mb_1 | | 399 | .quad .L__lock_cas_mb_1 |
398 | .quad .L__lock_cas_mb_2 | | 400 | .quad .L__lock_cas_mb_2 |
399 | #if !defined(LOCKDEBUG) | | 401 | #if !defined(LOCKDEBUG) |
400 | .quad .L_mutex_enter_mb_1 | | 402 | .quad .L_mutex_enter_mb_1 |
401 | .quad .L_mutex_exit_mb_1 | | 403 | .quad .L_mutex_exit_mb_1 |
| | | 404 | #if 0 /* XXX disabled for now XXX */ |
402 | .quad .L_mutex_spin_enter_mb_1 | | 405 | .quad .L_mutex_spin_enter_mb_1 |
403 | .quad .L_mutex_spin_exit_mb_1 | | 406 | .quad .L_mutex_spin_exit_mb_1 |
| | | 407 | #endif /* XXX disabled for now XXX */ |
404 | .quad .L_rw_enter_mb_1 | | 408 | .quad .L_rw_enter_mb_1 |
405 | .quad .L_rw_enter_mb_2 | | 409 | .quad .L_rw_enter_mb_2 |
406 | .quad .L_rw_tryenter_mb_1 | | 410 | .quad .L_rw_tryenter_mb_1 |
407 | .quad .L_rw_tryenter_mb_2 | | 411 | .quad .L_rw_tryenter_mb_2 |
408 | .quad .L_rw_exit_mb_1 | | 412 | .quad .L_rw_exit_mb_1 |
409 | #endif /* ! LOCKDEBUG */ | | 413 | #endif /* ! LOCKDEBUG */ |
410 | .quad 0 /* NULL terminator */ | | 414 | .quad 0 /* NULL terminator */ |
411 | #endif /* MULTIPROCESSOR */ | | 415 | #endif /* MULTIPROCESSOR */ |