| @@ -1,14 +1,14 @@ | | | @@ -1,14 +1,14 @@ |
1 | /* $NetBSD: lock.h,v 1.21 2020/08/05 05:24:44 simonb Exp $ */ | | 1 | /* $NetBSD: lock.h,v 1.22 2022/02/12 17:10:02 riastradh Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 2001, 2007 The NetBSD Foundation, Inc. | | 4 | * Copyright (c) 2001, 2007 The NetBSD Foundation, Inc. |
5 | * All rights reserved. | | 5 | * All rights reserved. |
6 | * | | 6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation | | 7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Wayne Knowles and Andrew Doran. | | 8 | * by Wayne Knowles and Andrew Doran. |
9 | * | | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
12 | * are met: | | 12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
| @@ -31,26 +31,28 @@ | | | @@ -31,26 +31,28 @@ |
31 | | | 31 | |
32 | /* | | 32 | /* |
33 | * Machine-dependent spin lock operations for MIPS processors. | | 33 | * Machine-dependent spin lock operations for MIPS processors. |
34 | * | | 34 | * |
35 | * Note: R2000/R3000 doesn't have any atomic update instructions; this | | 35 | * Note: R2000/R3000 doesn't have any atomic update instructions; this |
36 | * will cause problems for user applications using this header. | | 36 | * will cause problems for user applications using this header. |
37 | */ | | 37 | */ |
38 | | | 38 | |
39 | #ifndef _MIPS_LOCK_H_ | | 39 | #ifndef _MIPS_LOCK_H_ |
40 | #define _MIPS_LOCK_H_ | | 40 | #define _MIPS_LOCK_H_ |
41 | | | 41 | |
42 | #include <sys/param.h> | | 42 | #include <sys/param.h> |
43 | | | 43 | |
| | | 44 | #include <sys/atomic.h> |
| | | 45 | |
44 | static __inline int | | 46 | static __inline int |
45 | __SIMPLELOCK_LOCKED_P(const __cpu_simple_lock_t *__ptr) | | 47 | __SIMPLELOCK_LOCKED_P(const __cpu_simple_lock_t *__ptr) |
46 | { | | 48 | { |
47 | return *__ptr != __SIMPLELOCK_UNLOCKED; | | 49 | return *__ptr != __SIMPLELOCK_UNLOCKED; |
48 | } | | 50 | } |
49 | | | 51 | |
50 | static __inline int | | 52 | static __inline int |
51 | __SIMPLELOCK_UNLOCKED_P(const __cpu_simple_lock_t *__ptr) | | 53 | __SIMPLELOCK_UNLOCKED_P(const __cpu_simple_lock_t *__ptr) |
52 | { | | 54 | { |
53 | return *__ptr == __SIMPLELOCK_UNLOCKED; | | 55 | return *__ptr == __SIMPLELOCK_UNLOCKED; |
54 | } | | 56 | } |
55 | | | 57 | |
56 | static __inline void | | 58 | static __inline void |
| @@ -88,109 +90,117 @@ __cpu_simple_lock_try(__cpu_simple_lock_ | | | @@ -88,109 +90,117 @@ __cpu_simple_lock_try(__cpu_simple_lock_ |
88 | " j 3f \n" | | 90 | " j 3f \n" |
89 | " nop \n" | | 91 | " nop \n" |
90 | " nop \n" | | 92 | " nop \n" |
91 | "2: li %1, 0 \n" | | 93 | "2: li %1, 0 \n" |
92 | "3: \n" | | 94 | "3: \n" |
93 | " .set pop \n" | | 95 | " .set pop \n" |
94 | "# -- END __cpu_simple_lock_try \n" | | 96 | "# -- END __cpu_simple_lock_try \n" |
95 | : "=r" (t0), "=r" (v0), "+m" (*lp) | | 97 | : "=r" (t0), "=r" (v0), "+m" (*lp) |
96 | : "i" (__SIMPLELOCK_LOCKED), "m" (*lp)); | | 98 | : "i" (__SIMPLELOCK_LOCKED), "m" (*lp)); |
97 | | | 99 | |
98 | return (v0 != 0); | | 100 | return (v0 != 0); |
99 | } | | 101 | } |
100 | | | 102 | |
101 | #ifdef MIPS1 | | | |
102 | static __inline void | | | |
103 | mb_read(void) | | | |
104 | { | | | |
105 | __insn_barrier(); | | | |
106 | } | | | |
107 | | | | |
108 | static __inline void | | | |
109 | mb_write(void) | | | |
110 | { | | | |
111 | __insn_barrier(); | | | |
112 | } | | | |
113 | | | | |
114 | static __inline void | | | |
115 | mb_memory(void) | | | |
116 | { | | | |
117 | __insn_barrier(); | | | |
118 | } | | | |
119 | #else /* MIPS1*/ | | | |
120 | static __inline void | | | |
121 | mb_read(void) | | | |
122 | { | | | |
123 | __asm volatile( | | | |
124 | " .set push \n" | | | |
125 | " .set mips2 \n" | | | |
126 | " sync \n" | | | |
127 | " .set pop" | | | |
128 | ::: "memory" | | | |
129 | ); | | | |
130 | } | | | |
131 | | | | |
132 | static __inline void | | | |
133 | mb_write(void) | | | |
134 | { | | | |
135 | mb_read(); | | | |
136 | } | | | |
137 | | | | |
138 | static __inline void | | | |
139 | mb_memory(void) | | | |
140 | { | | | |
141 | mb_read(); | | | |
142 | } | | | |
143 | #endif /* MIPS1 */ | | | |
144 | | | | |
145 | #else /* !_HARDKERNEL */ | | 103 | #else /* !_HARDKERNEL */ |
146 | | | 104 | |
147 | u_int _atomic_cas_uint(volatile u_int *, u_int, u_int); | | 105 | u_int _atomic_cas_uint(volatile u_int *, u_int, u_int); |
148 | u_long _atomic_cas_ulong(volatile u_long *, u_long, u_long); | | 106 | u_long _atomic_cas_ulong(volatile u_long *, u_long, u_long); |
149 | void * _atomic_cas_ptr(volatile void *, void *, void *); | | 107 | void * _atomic_cas_ptr(volatile void *, void *, void *); |
150 | void mb_read(void); | | | |
151 | void mb_write(void); | | | |
152 | void mb_memory(void); | | | |
153 | | | 108 | |
154 | static __inline int | | 109 | static __inline int |
155 | __cpu_simple_lock_try(__cpu_simple_lock_t *lp) | | 110 | __cpu_simple_lock_try(__cpu_simple_lock_t *lp) |
156 | { | | 111 | { |
157 | | | 112 | |
| | | 113 | /* |
| | | 114 | * Successful _atomic_cas_uint functions as a load-acquire -- |
| | | 115 | * on MP systems, it issues sync after the LL/SC CAS succeeds; |
| | | 116 | * on non-MP systems every load is a load-acquire so it's moot. |
| | | 117 | * This pairs with the membar_exit and store sequence in |
| | | 118 | * __cpu_simple_unlock that functions as a store-release |
| | | 119 | * operation. |
| | | 120 | * |
| | | 121 | * NOTE: This applies only to _atomic_cas_uint (with the |
| | | 122 | * underscore), in sys/arch/mips/mips/lock_stubs_*.S. Not true |
| | | 123 | * for atomic_cas_uint (without the underscore), from |
| | | 124 | * common/lib/libc/arch/mips/atomic/atomic_cas.S which does not |
| | | 125 | * imply a load-acquire. It is unclear why these disagree. |
| | | 126 | */ |
158 | return _atomic_cas_uint(lp, | | 127 | return _atomic_cas_uint(lp, |
159 | __SIMPLELOCK_UNLOCKED, __SIMPLELOCK_LOCKED) == | | 128 | __SIMPLELOCK_UNLOCKED, __SIMPLELOCK_LOCKED) == |
160 | __SIMPLELOCK_UNLOCKED; | | 129 | __SIMPLELOCK_UNLOCKED; |
161 | } | | 130 | } |
162 | | | 131 | |
163 | #endif /* _HARDKERNEL */ | | 132 | #endif /* _HARDKERNEL */ |
164 | | | 133 | |
165 | static __inline void | | 134 | static __inline void |
166 | __cpu_simple_lock_init(__cpu_simple_lock_t *lp) | | 135 | __cpu_simple_lock_init(__cpu_simple_lock_t *lp) |
167 | { | | 136 | { |
168 | | | 137 | |
169 | *lp = __SIMPLELOCK_UNLOCKED; | | 138 | *lp = __SIMPLELOCK_UNLOCKED; |
170 | mb_memory(); | | | |
171 | } | | 139 | } |
172 | | | 140 | |
173 | static __inline void | | 141 | static __inline void |
174 | __cpu_simple_lock(__cpu_simple_lock_t *lp) | | 142 | __cpu_simple_lock(__cpu_simple_lock_t *lp) |
175 | { | | 143 | { |
176 | | | 144 | |
177 | while (!__cpu_simple_lock_try(lp)) { | | 145 | while (!__cpu_simple_lock_try(lp)) { |
178 | while (*lp == __SIMPLELOCK_LOCKED) | | 146 | while (*lp == __SIMPLELOCK_LOCKED) |
179 | /* spin */; | | 147 | /* spin */; |
180 | } | | 148 | } |
181 | } | | 149 | } |
182 | | | 150 | |
183 | static __inline void | | 151 | static __inline void |
184 | __cpu_simple_unlock(__cpu_simple_lock_t *lp) | | 152 | __cpu_simple_unlock(__cpu_simple_lock_t *lp) |
185 | { | | 153 | { |
186 | | | 154 | |
187 | #ifndef _MIPS_ARCH_OCTEONP | | 155 | /* |
188 | mb_memory(); | | 156 | * The membar_exit and then store functions as a store-release |
189 | #endif | | 157 | * operation that pairs with the load-acquire operation in |
| | | 158 | * successful __cpu_simple_lock_try. |
| | | 159 | * |
| | | 160 | * Can't use atomic_store_release here because that's not |
| | | 161 | * available in userland at the moment. |
| | | 162 | */ |
| | | 163 | membar_exit(); |
190 | *lp = __SIMPLELOCK_UNLOCKED; | | 164 | *lp = __SIMPLELOCK_UNLOCKED; |
| | | 165 | |
191 | #ifdef _MIPS_ARCH_OCTEONP | | 166 | #ifdef _MIPS_ARCH_OCTEONP |
192 | mb_write(); | | 167 | /* |
| | | 168 | * On Cavium's recommendation, we issue an extra SYNCW that is |
| | | 169 | * not necessary for correct ordering because apparently stores |
| | | 170 | * can get stuck in Octeon store buffers for hundreds of |
| | | 171 | * thousands of cycles, according to the following note: |
| | | 172 | * |
| | | 173 | * Programming Notes: |
| | | 174 | * [...] |
| | | 175 | * Core A (writer) |
| | | 176 | * SW R1, DATA |
| | | 177 | * LI R2, 1 |
| | | 178 | * SYNCW |
| | | 179 | * SW R2, FLAG |
| | | 180 | * SYNCW |
| | | 181 | * [...] |
| | | 182 | * |
| | | 183 | * The second SYNCW instruction executed by core A is not |
| | | 184 | * necessary for correctness, but has very important |
| | | 185 | * performance effects on OCTEON. Without it, the store |
| | | 186 | * to FLAG may linger in core A's write buffer before it |
| | | 187 | * becomes visible to other cores. (If core A is not |
| | | 188 | * performing many stores, this may add hundreds of |
| | | 189 | * thousands of cycles to the flag release time since the |
| | | 190 | * OCTEON core normally retains stores to attempt to merge |
| | | 191 | * them before sending the store on the CMB.) |
| | | 192 | * Applications should include this second SYNCW |
| | | 193 | * instruction after flag or lock releases. |
| | | 194 | * |
| | | 195 | * Cavium Networks OCTEON Plus CN50XX Hardware Reference |
| | | 196 | * Manual, July 2008, Appendix A, p. 943. |
| | | 197 | * https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/hactive/CN50XX-HRM-V0.99E.pdf |
| | | 198 | * |
| | | 199 | * XXX It might be prudent to put this into |
| | | 200 | * atomic_store_release itself. |
| | | 201 | */ |
| | | 202 | __asm volatile("syncw" ::: "memory"); |
193 | #endif | | 203 | #endif |
194 | } | | 204 | } |
195 | | | 205 | |
196 | #endif /* _MIPS_LOCK_H_ */ | | 206 | #endif /* _MIPS_LOCK_H_ */ |