| @@ -1,806 +1,806 @@ | | | @@ -1,806 +1,806 @@ |
1 | .\" $NetBSD: atomic_loadstore.9,v 1.5 2019/12/08 00:00:59 uwe Exp $ | | 1 | .\" $NetBSD: atomic_loadstore.9,v 1.6 2020/09/03 00:23:57 riastradh Exp $ |
2 | .\" | | 2 | .\" |
3 | .\" Copyright (c) 2019 The NetBSD Foundation | | 3 | .\" Copyright (c) 2019 The NetBSD Foundation |
4 | .\" All rights reserved. | | 4 | .\" All rights reserved. |
5 | .\" | | 5 | .\" |
6 | .\" This code is derived from software contributed to The NetBSD Foundation | | 6 | .\" This code is derived from software contributed to The NetBSD Foundation |
7 | .\" by Taylor R. Campbell. | | 7 | .\" by Taylor R. Campbell. |
8 | .\" | | 8 | .\" |
9 | .\" Redistribution and use in source and binary forms, with or without | | 9 | .\" Redistribution and use in source and binary forms, with or without |
10 | .\" modification, are permitted provided that the following conditions | | 10 | .\" modification, are permitted provided that the following conditions |
11 | .\" are met: | | 11 | .\" are met: |
12 | .\" 1. Redistributions of source code must retain the above copyright | | 12 | .\" 1. Redistributions of source code must retain the above copyright |
13 | .\" notice, this list of conditions and the following disclaimer. | | 13 | .\" notice, this list of conditions and the following disclaimer. |
14 | .\" 2. Redistributions in binary form must reproduce the above copyright | | 14 | .\" 2. Redistributions in binary form must reproduce the above copyright |
15 | .\" notice, this list of conditions and the following disclaimer in the | | 15 | .\" notice, this list of conditions and the following disclaimer in the |
16 | .\" documentation and/or other materials provided with the distribution. | | 16 | .\" documentation and/or other materials provided with the distribution. |
17 | .\" | | 17 | .\" |
18 | .\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | | 18 | .\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
19 | .\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 19 | .\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
20 | .\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 20 | .\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
21 | .\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | | 21 | .\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
22 | .\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | 22 | .\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
23 | .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | 23 | .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
24 | .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | 24 | .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
25 | .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 25 | .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
26 | .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 26 | .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
27 | .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 27 | .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
28 | .\" POSSIBILITY OF SUCH DAMAGE. | | 28 | .\" POSSIBILITY OF SUCH DAMAGE. |
29 | .\" | | 29 | .\" |
30 | .Dd November 25, 2019 | | 30 | .Dd November 25, 2019 |
31 | .Dt ATOMIC_LOADSTORE 9 | | 31 | .Dt ATOMIC_LOADSTORE 9 |
32 | .Os | | 32 | .Os |
33 | .Sh NAME | | 33 | .Sh NAME |
34 | .Nm atomic_load_relaxed , | | 34 | .Nm atomic_load_relaxed , |
35 | .Nm atomic_load_acquire , | | 35 | .Nm atomic_load_acquire , |
36 | .Nm atomic_load_consume , | | 36 | .Nm atomic_load_consume , |
37 | .Nm atomic_store_relaxed , | | 37 | .Nm atomic_store_relaxed , |
38 | .Nm atomic_store_release | | 38 | .Nm atomic_store_release |
39 | .Nd atomic and ordered memory operations | | 39 | .Nd atomic and ordered memory operations |
40 | .Sh SYNOPSIS | | 40 | .Sh SYNOPSIS |
41 | .In sys/atomic.h | | 41 | .In sys/atomic.h |
42 | .Ft T | | 42 | .Ft T |
43 | .Fn atomic_load_relaxed "const volatile T *p" | | 43 | .Fn atomic_load_relaxed "const volatile T *p" |
44 | .Ft T | | 44 | .Ft T |
45 | .Fn atomic_load_acquire "const volatile T *p" | | 45 | .Fn atomic_load_acquire "const volatile T *p" |
46 | .Ft T | | 46 | .Ft T |
47 | .Fn atomic_load_consume "const volatile T *p" | | 47 | .Fn atomic_load_consume "const volatile T *p" |
48 | .Ft void | | 48 | .Ft void |
49 | .Fn atomic_store_relaxed "volatile T *p" "T v" | | 49 | .Fn atomic_store_relaxed "volatile T *p" "T v" |
50 | .Ft void | | 50 | .Ft void |
51 | .Fn atomic_store_release "volatile T *p" "T v" | | 51 | .Fn atomic_store_release "volatile T *p" "T v" |
52 | .Sh DESCRIPTION | | 52 | .Sh DESCRIPTION |
53 | These type-generic macros implement memory operations that are | | 53 | These type-generic macros implement memory operations that are |
54 | .Em atomic | | 54 | .Em atomic |
55 | and that have | | 55 | and that have |
56 | .Em memory ordering constraints . | | 56 | .Em memory ordering constraints . |
57 | Aside from atomicity and ordering, the load operations are equivalent | | 57 | Aside from atomicity and ordering, the load operations are equivalent |
58 | to | | 58 | to |
59 | .Li * Ns Fa p | | 59 | .Li * Ns Fa p |
60 | and the store operations are equivalent to | | 60 | and the store operations are equivalent to |
61 | .Li * Ns Fa p Li "=" Fa v . | | 61 | .Li * Ns Fa p Li "=" Fa v . |
62 | The pointer | | 62 | The pointer |
63 | .Fa p | | 63 | .Fa p |
64 | must be aligned, even on architectures like x86 which generally lack | | 64 | must be aligned, even on architectures like x86 which generally lack |
65 | strict alignment requirements; see | | 65 | strict alignment requirements; see |
66 | .Sx SIZE AND ALIGNMENT | | 66 | .Sx SIZE AND ALIGNMENT |
67 | for details. | | 67 | for details. |
68 | .Pp | | 68 | .Pp |
69 | .Em Atomic | | 69 | .Em Atomic |
70 | means that the memory operations cannot be | | 70 | means that the memory operations cannot be |
71 | .Em fused | | 71 | .Em fused |
72 | or | | 72 | or |
73 | .Em torn : | | 73 | .Em torn : |
74 | .Bl -bullet | | 74 | .Bl -bullet |
75 | .It | | 75 | .It |
76 | .Em Fusing | | 76 | .Em Fusing |
77 | is combining multiple memory operations on a single object into one | | 77 | is combining multiple memory operations on a single object into one |
78 | memory operation, such as replacing | | 78 | memory operation, such as replacing |
79 | .Bd -literal -compact | | 79 | .Bd -literal -compact |
80 | *p = v; | | 80 | *p = v; |
81 | x = *p; | | 81 | x = *p; |
82 | .Ed | | 82 | .Ed |
83 | by | | 83 | by |
84 | .Bd -literal -compact | | 84 | .Bd -literal -compact |
85 | *p = v; | | 85 | *p = v; |
86 | x = v; | | 86 | x = v; |
87 | .Ed | | 87 | .Ed |
88 | since the compiler can prove that | | 88 | since the compiler can prove that |
89 | .Li \&*p | | 89 | .Li \&*p |
90 | will yield | | 90 | will yield |
91 | .Li v | | 91 | .Li v |
92 | after | | 92 | after |
93 | .Li \&*p\ =\ v . | | 93 | .Li \&*p\ =\ v . |
94 | For | | 94 | For |
95 | .Em atomic | | 95 | .Em atomic |
96 | memory operations, the implementation | | 96 | memory operations, the implementation |
97 | .Em will not | | 97 | .Em will not |
98 | assume that | | 98 | assume that |
99 | .Bl -dash -compact | | 99 | .Bl -dash -compact |
100 | .It | | 100 | .It |
101 | consecutive loads of the same object will return the same value, or | | 101 | consecutive loads of the same object will return the same value, or |
102 | .It | | 102 | .It |
103 | a store followed by a load of the same object will return the value | | 103 | a store followed by a load of the same object will return the value |
104 | stored, or | | 104 | stored, or |
105 | .It | | 105 | .It |
106 | consecutive stores of the same object are redundant. | | 106 | consecutive stores of the same object are redundant. |
107 | .El | | 107 | .El |
108 | Thus, the implementation will not replace two consecutive atomic loads | | 108 | Thus, the implementation will not replace two consecutive atomic loads |
109 | by one, will not elide an atomic load following a store, and will not | | 109 | by one, will not elide an atomic load following a store, and will not |
110 | combine two consecutive atomic stores into one. | | 110 | combine two consecutive atomic stores into one. |
111 | .Pp | | 111 | .Pp |
112 | For example, | | 112 | For example, |
113 | .Bd -literal | | 113 | .Bd -literal |
114 | atomic_store_relaxed(&flag, 1); | | 114 | atomic_store_relaxed(&flag, 1); |
115 | while (atomic_load_relaxed(&flag)) | | 115 | while (atomic_load_relaxed(&flag)) |
116 | continue; | | 116 | continue; |
117 | .Ed | | 117 | .Ed |
118 | .Pp | | 118 | .Pp |
119 | may be used to set a flag and then busy-wait until another thread | | 119 | may be used to set a flag and then busy-wait until another thread |
120 | clears it, whereas | | 120 | clears it, whereas |
121 | .Bd -literal | | 121 | .Bd -literal |
122 | flag = 1; | | 122 | flag = 1; |
123 | while (flag) | | 123 | while (flag) |
124 | continue; | | 124 | continue; |
125 | .Ed | | 125 | .Ed |
126 | .Pp | | 126 | .Pp |
127 | may be transformed into the infinite loop | | 127 | may be transformed into the infinite loop |
128 | .Bd -literal | | 128 | .Bd -literal |
129 | flag = 1; | | 129 | flag = 1; |
130 | while (1) | | 130 | while (1) |
131 | continue; | | 131 | continue; |
132 | .Ed | | 132 | .Ed |
133 | .It | | 133 | .It |
134 | .Em Tearing | | 134 | .Em Tearing |
135 | is implementing a memory operation on a large data unit such as a | | 135 | is implementing a memory operation on a large data unit such as a |
136 | 32-bit word by issuing multiple memory operations on smaller data units | | 136 | 32-bit word by issuing multiple memory operations on smaller data units |
137 | such as 8-bit bytes. | | 137 | such as 8-bit bytes. |
138 | The implementation will not tear | | 138 | The implementation will not tear |
139 | .Em atomic | | 139 | .Em atomic |
140 | loads or stores into smaller ones. | | 140 | loads or stores into smaller ones. |
141 | Thus, as far as any interrupt, other thread, or other CPU can tell, an | | 141 | Thus, as far as any interrupt, other thread, or other CPU can tell, an |
142 | atomic memory operation is issued either all at once or not at all. | | 142 | atomic memory operation is issued either all at once or not at all. |
143 | .Pp | | 143 | .Pp |
144 | For example, if a 32-bit word | | 144 | For example, if a 32-bit word |
145 | .Va w | | 145 | .Va w |
146 | is written with | | 146 | is written with |
147 | .Pp | | 147 | .Pp |
148 | .Dl atomic_store_relaxed(&w,\ 0x00010002); | | 148 | .Dl atomic_store_relaxed(&w,\ 0x00010002); |
149 | .Pp | | 149 | .Pp |
150 | then an interrupt, other thread, or other CPU reading it with | | 150 | then an interrupt, other thread, or other CPU reading it with |
151 | .Li atomic_load_relaxed(&w) | | 151 | .Li atomic_load_relaxed(&w) |
152 | will never witness it partially written, whereas | | 152 | will never witness it partially written, whereas |
153 | .Pp | | 153 | .Pp |
154 | .Dl w\ =\ 0x00010002; | | 154 | .Dl w\ =\ 0x00010002; |
155 | .Pp | | 155 | .Pp |
156 | might be compiled into a pair of separate 16-bit store instructions | | 156 | might be compiled into a pair of separate 16-bit store instructions |
157 | instead of one single word-sized store instruction, in which case other | | 157 | instead of one single word-sized store instruction, in which case other |
158 | threads may see the intermediate state with only one of the halves | | 158 | threads may see the intermediate state with only one of the halves |
159 | written. | | 159 | written. |
160 | .El | | 160 | .El |
161 | .Pp | | 161 | .Pp |
162 | Atomic operations on any single object occur in a total order shared by | | 162 | Atomic operations on any single object occur in a total order shared by |
163 | all interrupts, threads, and CPUs, which is consistent with the program | | 163 | all interrupts, threads, and CPUs, which is consistent with the program |
164 | order in every interrupt, thread, and CPU. | | 164 | order in every interrupt, thread, and CPU. |
165 | A single program without interruption or other threads or CPUs will | | 165 | A single program without interruption or other threads or CPUs will |
166 | always observe its own loads and stores in program order, but another | | 166 | always observe its own loads and stores in program order, but another |
167 | program in an interrupt handler, in another thread, or on another CPU | | 167 | program in an interrupt handler, in another thread, or on another CPU |
168 | may issue loads that return values as if the first program's stores | | 168 | may issue loads that return values as if the first program's stores |
169 | occurred out of program order, and vice versa. | | 169 | occurred out of program order, and vice versa. |
170 | Two different threads might each observe a third thread's memory | | 170 | Two different threads might each observe a third thread's memory |
171 | operations in different orders. | | 171 | operations in different orders. |
172 | .Pp | | 172 | .Pp |
173 | The | | 173 | The |
174 | .Em memory ordering constraints | | 174 | .Em memory ordering constraints |
175 | make limited guarantees of ordering relative to memory operations on | | 175 | make limited guarantees of ordering relative to memory operations on |
176 | .Em other | | 176 | .Em other |
177 | objects as witnessed by interrupts, other threads, or other CPUs, and | | 177 | objects as witnessed by interrupts, other threads, or other CPUs, and |
178 | have the following meanings: | | 178 | have the following meanings: |
179 | .Bl -tag -width relaxed | | 179 | .Bl -tag -width relaxed |
180 | .It relaxed | | 180 | .It relaxed |
181 | No ordering relative to memory operations on any other objects is | | 181 | No ordering relative to memory operations on any other objects is |
182 | guaranteed. | | 182 | guaranteed. |
183 | Relaxed ordering is the default for ordinary non-atomic memory | | 183 | Relaxed ordering is the default for ordinary non-atomic memory |
184 | operations like | | 184 | operations like |
185 | .Li "*p" | | 185 | .Li "*p" |
186 | and | | 186 | and |
187 | .Li "*p = v" . | | 187 | .Li "*p = v" . |
188 | .Pp | | 188 | .Pp |
189 | Atomic operations with relaxed ordering are cheap: they are not | | 189 | Atomic operations with relaxed ordering are cheap: they are not |
190 | read/modify/write atomic operations, and they do not involve any kind | | 190 | read/modify/write atomic operations, and they do not involve any kind |
191 | of inter-CPU ordering barriers. | | 191 | of inter-CPU ordering barriers. |
192 | .It acquire | | 192 | .It acquire |
193 | This memory operation happens before all subsequent memory operations | | 193 | This memory operation happens before all subsequent memory operations |
194 | in program order. | | 194 | in program order. |
195 | However, prior memory operations in program order may be reordered to | | 195 | However, prior memory operations in program order may be reordered to |
196 | happen after this one. | | 196 | happen after this one. |
197 | For example, assuming no aliasing between the pointers, the | | 197 | For example, assuming no aliasing between the pointers, the |
198 | implementation is allowed to treat | | 198 | implementation is allowed to treat |
199 | .Bd -literal | | 199 | .Bd -literal |
200 | int x = *p; | | 200 | int x = *p; |
201 | if (atomic_load_acquire(q)) { | | 201 | if (atomic_load_acquire(q)) { |
202 | int y = *r; | | 202 | int y = *r; |
203 | *s = x + y; | | 203 | *s = x + y; |
204 | return 1; | | 204 | return 1; |
205 | } | | 205 | } |
206 | .Ed | | 206 | .Ed |
207 | .Pp | | 207 | .Pp |
208 | as if it were | | 208 | as if it were |
209 | .Bd -literal | | 209 | .Bd -literal |
210 | if (atomic_load_acquire(q)) { | | 210 | if (atomic_load_acquire(q)) { |
211 | int x = *p; | | 211 | int x = *p; |
212 | int y = *r; | | 212 | int y = *r; |
213 | *s = x + y; | | 213 | *s = x + y; |
214 | return 1; | | 214 | return 1; |
215 | } | | 215 | } |
216 | .Ed | | 216 | .Ed |
217 | .Pp | | 217 | .Pp |
218 | but | | 218 | but |
219 | .Em not | | 219 | .Em not |
220 | as if it were | | 220 | as if it were |
221 | .Bd -literal | | 221 | .Bd -literal |
222 | int x = *p; | | 222 | int x = *p; |
223 | int y = *r; | | 223 | int y = *r; |
224 | *s = x + y; | | 224 | *s = x + y; |
225 | if (atomic_load_acquire(q)) { | | 225 | if (atomic_load_acquire(q)) { |
226 | return 1; | | 226 | return 1; |
227 | } | | 227 | } |
228 | .Ed | | 228 | .Ed |
229 | .It consume | | 229 | .It consume |
230 | This memory operation happens before all memory operations on objects | | 230 | This memory operation happens before all memory operations on objects |
231 | at addresses that are computed from the value returned by this one. | | 231 | at addresses that are computed from the value returned by this one. |
232 | Otherwise, no ordering relative to memory operations on other objects | | 232 | Otherwise, no ordering relative to memory operations on other objects |
233 | is implied. | | 233 | is implied. |
234 | .Pp | | 234 | .Pp |
235 | For example, the implementation is allowed to treat | | 235 | For example, the implementation is allowed to treat |
236 | .Bd -literal | | 236 | .Bd -literal |
237 | struct foo *foo0, *foo1; | | 237 | struct foo *foo0, *foo1; |
238 | | | 238 | |
239 | struct foo *f0 = atomic_load_consume(&foo0); | | 239 | struct foo *f0 = atomic_load_consume(&foo0); |
240 | struct foo *f1 = atomic_load_consume(&foo1); | | 240 | struct foo *f1 = atomic_load_consume(&foo1); |
241 | int x = f0->x; | | 241 | int x = f0->x; |
242 | int y = f1->y; | | 242 | int y = f1->y; |
243 | .Ed | | 243 | .Ed |
244 | .Pp | | 244 | .Pp |
245 | as if it were | | 245 | as if it were |
246 | .Bd -literal | | 246 | .Bd -literal |
247 | struct foo *foo0, *foo1; | | 247 | struct foo *foo0, *foo1; |
248 | | | 248 | |
249 | struct foo *f1 = atomic_load_consume(&foo1); | | 249 | struct foo *f1 = atomic_load_consume(&foo1); |
250 | struct foo *f0 = atomic_load_consume(&foo0); | | 250 | struct foo *f0 = atomic_load_consume(&foo0); |
251 | int y = f1->y; | | 251 | int y = f1->y; |
252 | int x = f0->x; | | 252 | int x = f0->x; |
253 | .Ed | | 253 | .Ed |
254 | .Pp | | 254 | .Pp |
255 | but loading | | 255 | but loading |
256 | .Li f0->x | | 256 | .Li f0->x |
257 | is guaranteed to happen after loading | | 257 | is guaranteed to happen after loading |
258 | .Li foo0 | | 258 | .Li foo0 |
259 | even if the CPU had a cached value for the address that | | 259 | even if the CPU had a cached value for the address that |
260 | .Li f0->x | | 260 | .Li f0->x |
261 | happened to be at, and likewise for | | 261 | happened to be at, and likewise for |
262 | .Li f1->y | | 262 | .Li f1->y |
263 | and | | 263 | and |
264 | .Li foo1 . | | 264 | .Li foo1 . |
265 | .Pp | | 265 | .Pp |
266 | .Fn atomic_load_consume | | 266 | .Fn atomic_load_consume |
267 | functions like | | 267 | functions like |
268 | .Fn atomic_load_acquire | | 268 | .Fn atomic_load_acquire |
269 | as long as the memory operations that must happen after it are limited | | 269 | as long as the memory operations that must happen after it are limited |
270 | to addresses that depend on the value returned by it, but it is almost | | 270 | to addresses that depend on the value returned by it, but it is almost |
271 | always as cheap as | | 271 | always as cheap as |
272 | .Fn atomic_load_relaxed . | | 272 | .Fn atomic_load_relaxed . |
273 | See | | 273 | See |
274 | .Sx ACQUIRE OR CONSUME? | | 274 | .Sx ACQUIRE OR CONSUME? |
275 | below for more details. | | 275 | below for more details. |
276 | .It release | | 276 | .It release |
277 | All prior memory operations in program order happen before this one. | | 277 | All prior memory operations in program order happen before this one. |
278 | However, subsequent memory operations in program order may be reordered | | 278 | However, subsequent memory operations in program order may be reordered |
279 | to happen before this one too. | | 279 | to happen before this one too. |
280 | For example, assuming no aliasing between the pointers, the | | 280 | For example, assuming no aliasing between the pointers, the |
281 | implementation is allowed to treat | | 281 | implementation is allowed to treat |
282 | .Bd -literal | | 282 | .Bd -literal |
283 | int x = *p; | | 283 | int x = *p; |
284 | *q = x; | | 284 | *q = x; |
285 | atomic_store_release(r, 0); | | 285 | atomic_store_release(r, 0); |
286 | int y = *s; | | 286 | int y = *s; |
287 | return x + y; | | 287 | return x + y; |
288 | .Ed | | 288 | .Ed |
289 | .Pp | | 289 | .Pp |
290 | as if it were | | 290 | as if it were |
291 | .Bd -literal | | 291 | .Bd -literal |
292 | int y = *s; | | 292 | int y = *s; |
293 | int x = *p; | | 293 | int x = *p; |
294 | *q = x; | | 294 | *q = x; |
295 | atomic_store_release(r, 0); | | 295 | atomic_store_release(r, 0); |
296 | return x + y; | | 296 | return x + y; |
297 | .Ed | | 297 | .Ed |
298 | .Pp | | 298 | .Pp |
299 | but | | 299 | but |
300 | .Em not | | 300 | .Em not |
301 | as if it were | | 301 | as if it were |
302 | .Bd -literal | | 302 | .Bd -literal |
303 | atomic_store_release(r, 0); | | 303 | atomic_store_release(r, 0); |
304 | int x = *p; | | 304 | int x = *p; |
305 | int y = *s; | | 305 | int y = *s; |
306 | *q = x; | | 306 | *q = x; |
307 | return x + y; | | 307 | return x + y; |
308 | .Ed | | 308 | .Ed |
309 | .El | | 309 | .El |
310 | .Ss PAIRING ORDERED MEMORY OPERATIONS | | 310 | .Ss PAIRING ORDERED MEMORY OPERATIONS |
311 | In general, each | | 311 | In general, each |
312 | .Fn atomic_store_release | | 312 | .Fn atomic_store_release |
313 | .Em must | | 313 | .Em must |
314 | be paired with either | | 314 | be paired with either |
315 | .Fn atomic_load_acquire | | 315 | .Fn atomic_load_acquire |
316 | or | | 316 | or |
317 | .Fn atomic_load_consume | | 317 | .Fn atomic_load_consume |
318 | in order to have an effect \(em it is only when a release operation | | 318 | in order to have an effect \(em it is only when a release operation |
319 | synchronizes with an acquire or consume operation that any ordering | | 319 | synchronizes with an acquire or consume operation that any ordering |
320 | guaranteed between memory operations | | 320 | guaranteed between memory operations |
321 | .Em before | | 321 | .Em before |
322 | the release operation and memory operations | | 322 | the release operation and memory operations |
323 | .Em after | | 323 | .Em after |
324 | the acquire/consume operation. | | 324 | the acquire/consume operation. |
325 | .Pp | | 325 | .Pp |
326 | For example, to set up an entry in a table and then mark the entry | | 326 | For example, to set up an entry in a table and then mark the entry |
327 | ready, you should: | | 327 | ready, you should: |
328 | .Bl -enum | | 328 | .Bl -enum |
329 | .It | | 329 | .It |
330 | Perform memory operations to initialize the data. | | 330 | Perform memory operations to initialize the data. |
331 | .Bd -literal | | 331 | .Bd -literal |
332 | tab[i].x = ...; | | 332 | tab[i].x = ...; |
333 | tab[i].y = ...; | | 333 | tab[i].y = ...; |
334 | .Ed | | 334 | .Ed |
335 | .It | | 335 | .It |
336 | Issue | | 336 | Issue |
337 | .Fn atomic_store_release | | 337 | .Fn atomic_store_release |
338 | to mark it ready. | | 338 | to mark it ready. |
339 | .Bd -literal | | 339 | .Bd -literal |
340 | atomic_store_release(&tab[i].ready, 1); | | 340 | atomic_store_release(&tab[i].ready, 1); |
341 | .Ed | | 341 | .Ed |
342 | .It | | 342 | .It |
343 | Possibly in another thread, issue | | 343 | Possibly in another thread, issue |
344 | .Fn atomic_load_acquire | | 344 | .Fn atomic_load_acquire |
345 | to ascertain whether it is ready. | | 345 | to ascertain whether it is ready. |
346 | .Bd -literal | | 346 | .Bd -literal |
347 | if (atomic_load_acquire(&tab[i].ready) == 0) | | 347 | if (atomic_load_acquire(&tab[i].ready) == 0) |
348 | return EWOULDBLOCK; | | 348 | return EWOULDBLOCK; |
349 | .Ed | | 349 | .Ed |
350 | .It | | 350 | .It |
351 | Perform memory operations to use the data. | | 351 | Perform memory operations to use the data. |
352 | .Bd -literal | | 352 | .Bd -literal |
353 | do_stuff(tab[i].x, tab[i].y); | | 353 | do_stuff(tab[i].x, tab[i].y); |
354 | .Ed | | 354 | .Ed |
355 | .El | | 355 | .El |
356 | .Pp | | 356 | .Pp |
357 | Similarly, if you want to create an object, initialize it, and then | | 357 | Similarly, if you want to create an object, initialize it, and then |
358 | publish it to be used by another thread, then you should: | | 358 | publish it to be used by another thread, then you should: |
359 | .Bl -enum | | 359 | .Bl -enum |
360 | .It | | 360 | .It |
361 | Perform memory operations to initialize the object. | | 361 | Perform memory operations to initialize the object. |
362 | .Bd -literal | | 362 | .Bd -literal |
363 | struct mumble *m = kmem_alloc(sizeof(*m), KM_SLEEP); | | 363 | struct mumble *m = kmem_alloc(sizeof(*m), KM_SLEEP); |
364 | m->x = x; | | 364 | m->x = x; |
365 | m->y = y; | | 365 | m->y = y; |
366 | m->z = m->x + m->y; | | 366 | m->z = m->x + m->y; |
367 | .Ed | | 367 | .Ed |
368 | .It | | 368 | .It |
369 | Issue | | 369 | Issue |
370 | .Fn atomic_store_release | | 370 | .Fn atomic_store_release |
371 | to publish it. | | 371 | to publish it. |
372 | .Bd -literal | | 372 | .Bd -literal |
373 | atomic_store_release(&the_mumble, m); | | 373 | atomic_store_release(&the_mumble, m); |
374 | .Ed | | 374 | .Ed |
375 | .It | | 375 | .It |
376 | Possibly in another thread, issue | | 376 | Possibly in another thread, issue |
377 | .Fn atomic_load_consume | | 377 | .Fn atomic_load_consume |
378 | to get it. | | 378 | to get it. |
379 | .Bd -literal | | 379 | .Bd -literal |
380 | struct mumble *m = atomic_load_consume(&the_mumble); | | 380 | struct mumble *m = atomic_load_consume(&the_mumble); |
381 | .Ed | | 381 | .Ed |
382 | .It | | 382 | .It |
383 | Perform memory operations to use the object's members. | | 383 | Perform memory operations to use the object's members. |
384 | .Bd -literal | | 384 | .Bd -literal |
385 | m->y &= m->x; | | 385 | m->y &= m->x; |
386 | do_things(m->x, m->y, m->z); | | 386 | do_things(m->x, m->y, m->z); |
387 | .Ed | | 387 | .Ed |
388 | .El | | 388 | .El |
389 | .Pp | | 389 | .Pp |
390 | In both examples, assuming that the value written by | | 390 | In both examples, assuming that the value written by |
391 | .Fn atomic_store_release | | 391 | .Fn atomic_store_release |
392 | in step\~2 | | 392 | in step\~2 |
393 | is read by | | 393 | is read by |
394 | .Fn atomic_load_acquire | | 394 | .Fn atomic_load_acquire |
395 | or | | 395 | or |
396 | .Fn atomic_load_consume | | 396 | .Fn atomic_load_consume |
397 | in step\~3, this guarantees that all of the memory operations in | | 397 | in step\~3, this guarantees that all of the memory operations in |
398 | step\~1 complete before any of the memory operations in step\~4 \(em | | 398 | step\~1 complete before any of the memory operations in step\~4 \(em |
399 | even if they happen on different CPUs. | | 399 | even if they happen on different CPUs. |
400 | .Pp | | 400 | .Pp |
401 | Without | | 401 | Without |
402 | .Em both | | 402 | .Em both |
403 | the release operation in step\~2 | | 403 | the release operation in step\~2 |
404 | .Em and | | 404 | .Em and |
405 | the acquire or consume operation in step\~3, no ordering is guaranteed | | 405 | the acquire or consume operation in step\~3, no ordering is guaranteed |
406 | between the memory operations in steps\~1 and\~4. | | 406 | between the memory operations in steps\~1 and\~4. |
407 | In fact, without | | 407 | In fact, without |
408 | .Em both | | 408 | .Em both |
409 | release and acquire/consume, even the assignment | | 409 | release and acquire/consume, even the assignment |
410 | .Li m->z\ =\ m->x\ +\ m->y | | 410 | .Li m->z\ =\ m->x\ +\ m->y |
411 | in step\~1 might read values of | | 411 | in step\~1 might read values of |
412 | .Li m->x | | 412 | .Li m->x |
413 | and | | 413 | and |
414 | .Li m->y | | 414 | .Li m->y |
415 | that were written in step\~4. | | 415 | that were written in step\~4. |
416 | .Ss ACQUIRE OR CONSUME? | | 416 | .Ss ACQUIRE OR CONSUME? |
417 | You must use | | 417 | You must use |
418 | .Fn atomic_load_acquire | | 418 | .Fn atomic_load_acquire |
419 | when subsequent memory operations in program order that must happen | | 419 | when subsequent memory operations in program order that must happen |
420 | after the load are on objects at | | 420 | after the load are on objects at |
421 | .Em addresses that might not depend arithmetically on the resulting value . | | 421 | .Em addresses that might not depend arithmetically on the resulting value . |
422 | This applies particularly when the choice of whether to do the | | 422 | This applies particularly when the choice of whether to do the |
423 | subsequent memory operation depends on a | | 423 | subsequent memory operation depends on a |
424 | .Em control-flow decision based on the resulting value : | | 424 | .Em control-flow decision based on the resulting value : |
425 | .Bd -literal | | 425 | .Bd -literal |
426 | struct gadget { | | 426 | struct gadget { |
427 | int ready, x; | | 427 | int ready, x; |
428 | } the_gadget; | | 428 | } the_gadget; |
429 | | | 429 | |
430 | /* Producer */ | | 430 | /* Producer */ |
431 | the_gadget.x = 42; | | 431 | the_gadget.x = 42; |
432 | atomic_store_release(&the_gadget.ready, 1); | | 432 | atomic_store_release(&the_gadget.ready, 1); |
433 | | | 433 | |
434 | /* Consumer */ | | 434 | /* Consumer */ |
435 | if (atomic_load_acquire(&the_gadget.ready) == 0) | | 435 | if (atomic_load_acquire(&the_gadget.ready) == 0) |
436 | return EWOULDBLOCK; | | 436 | return EWOULDBLOCK; |
437 | int x = the_gadget.x; | | 437 | int x = the_gadget.x; |
438 | .Ed | | 438 | .Ed |
439 | .Pp | | 439 | .Pp |
440 | Here the | | 440 | Here the |
441 | .Em decision of whether to load | | 441 | .Em decision of whether to load |
442 | .Li the_gadget.x | | 442 | .Li the_gadget.x |
443 | depends on a control-flow decision depending on value loaded from | | 443 | depends on a control-flow decision depending on value loaded from |
444 | .Li the_gadget.ready , | | 444 | .Li the_gadget.ready , |
445 | and loading | | 445 | and loading |
446 | .Li the_gadget.x | | 446 | .Li the_gadget.x |
447 | must happen after loading | | 447 | must happen after loading |
448 | .Li the_gadget.ready . | | 448 | .Li the_gadget.ready . |
449 | Using | | 449 | Using |
450 | .Fn atomic_load_acquire | | 450 | .Fn atomic_load_acquire |
451 | guarantees that the compiler and CPU do not conspire to load | | 451 | guarantees that the compiler and CPU do not conspire to load |
452 | .Li the_gadget.x | | 452 | .Li the_gadget.x |
453 | before we have ascertained that it is ready. | | 453 | before we have ascertained that it is ready. |
454 | .Pp | | 454 | .Pp |
455 | You may use | | 455 | You may use |
456 | .Fn atomic_load_consume | | 456 | .Fn atomic_load_consume |
457 | if all subsequent memory operations in program order that must happen | | 457 | if all subsequent memory operations in program order that must happen |
458 | after the load are performed on objects at | | 458 | after the load are performed on objects at |
459 | .Em addresses computed arithmetically from the resulting value , | | 459 | .Em addresses computed arithmetically from the resulting value , |
460 | such as loading a pointer to a structure object and then dereferencing | | 460 | such as loading a pointer to a structure object and then dereferencing |
461 | it: | | 461 | it: |
462 | .Bd -literal | | 462 | .Bd -literal |
463 | struct gizmo { | | 463 | struct gizmo { |
464 | int x, y, z; | | 464 | int x, y, z; |
465 | }; | | 465 | }; |
466 | struct gizmo null_gizmo; | | 466 | struct gizmo null_gizmo; |
467 | struct gizmo *the_gizmo = &null_gizmo; | | 467 | struct gizmo *the_gizmo = &null_gizmo; |
468 | | | 468 | |
469 | /* Producer */ | | 469 | /* Producer */ |
470 | struct gizmo *g = kmem_alloc(sizeof(*g), KM_SLEEP); | | 470 | struct gizmo *g = kmem_alloc(sizeof(*g), KM_SLEEP); |
471 | g->x = 12; | | 471 | g->x = 12; |
472 | g->y = 34; | | 472 | g->y = 34; |
473 | g->z = 56; | | 473 | g->z = 56; |
474 | atomic_store_release(&the_gizmo, g); | | 474 | atomic_store_release(&the_gizmo, g); |
475 | | | 475 | |
476 | /* Consumer */ | | 476 | /* Consumer */ |
477 | struct gizmo *g = atomic_load_consume(&the_gizmo); | | 477 | struct gizmo *g = atomic_load_consume(&the_gizmo); |
478 | int y = g->y; | | 478 | int y = g->y; |
479 | .Ed | | 479 | .Ed |
480 | .Pp | | 480 | .Pp |
481 | Here the | | 481 | Here the |
482 | .Em address | | 482 | .Em address |
483 | of | | 483 | of |
484 | .Li g->y | | 484 | .Li g->y |
485 | depends on the value of the pointer loaded from | | 485 | depends on the value of the pointer loaded from |
486 | .Li the_gizmo . | | 486 | .Li the_gizmo . |
487 | Using | | 487 | Using |
488 | .Fn atomic_load_consume | | 488 | .Fn atomic_load_consume |
489 | guarantees that we do not witness a stale cache for that address. | | 489 | guarantees that we do not witness a stale cache for that address. |
490 | .Pp | | 490 | .Pp |
491 | In some cases it may be unclear. | | 491 | In some cases it may be unclear. |
492 | For example: | | 492 | For example: |
493 | .Bd -literal | | 493 | .Bd -literal |
494 | int x[2]; | | 494 | int x[2]; |
495 | bool b; | | 495 | bool b; |
496 | | | 496 | |
497 | /* Producer */ | | 497 | /* Producer */ |
498 | x[0] = 42; | | 498 | x[0] = 42; |
499 | atomic_store_release(&b, 0); | | 499 | atomic_store_release(&b, 0); |
500 | | | 500 | |
501 | /* Consumer 1 */ | | 501 | /* Consumer 1 */ |
502 | int y = atomic_load_???(&b) ? x[0] : x[1]; | | 502 | int y = atomic_load_???(&b) ? x[0] : x[1]; |
503 | | | 503 | |
504 | /* Consumer 2 */ | | 504 | /* Consumer 2 */ |
505 | int y = x[atomic_load_???(&b) ? 0 : 1]; | | 505 | int y = x[atomic_load_???(&b) ? 0 : 1]; |
506 | | | 506 | |
507 | /* Consumer 3 */ | | 507 | /* Consumer 3 */ |
508 | int y = x[atomic_load_???(&b) ^ 1]; | | 508 | int y = x[atomic_load_???(&b) ^ 1]; |
509 | .Ed | | 509 | .Ed |
510 | .Pp | | 510 | .Pp |
511 | Although the three consumers seem to be equivalent, by the letter of | | 511 | Although the three consumers seem to be equivalent, by the letter of |
512 | C11 consumers\~1 and\~2 require | | 512 | C11 consumers\~1 and\~2 require |
513 | .Fn atomic_load_acquire | | 513 | .Fn atomic_load_acquire |
514 | because the value determines the address of a subsequent load only via | | 514 | because the value determines the address of a subsequent load only via |
515 | control-flow decisions in the | | 515 | control-flow decisions in the |
516 | .Li ?: | | 516 | .Li ?: |
517 | operator, whereas consumer\~3 can use | | 517 | operator, whereas consumer\~3 can use |
518 | .Fn atomic_load_consume . | | 518 | .Fn atomic_load_consume . |
519 | However, if you're not sure, you should err on the side of | | 519 | However, if you're not sure, you should err on the side of |
520 | .Fn atomic_load_acquire | | 520 | .Fn atomic_load_acquire |
521 | until C11 implementations have ironed out the kinks in the semantics. | | 521 | until C11 implementations have ironed out the kinks in the semantics. |
522 | .Pp | | 522 | .Pp |
523 | On all CPUs other than DEC Alpha, | | 523 | On all CPUs other than DEC Alpha, |
524 | .Fn atomic_load_consume | | 524 | .Fn atomic_load_consume |
525 | is cheap \(em it is identical to | | 525 | is cheap \(em it is identical to |
526 | .Fn atomic_load_relaxed . | | 526 | .Fn atomic_load_relaxed . |
527 | In contrast, | | 527 | In contrast, |
528 | .Fn atomic_load_acquire | | 528 | .Fn atomic_load_acquire |
529 | usually implies an expensive memory barrier. | | 529 | usually implies an expensive memory barrier. |
530 | .Ss SIZE AND ALIGNMENT | | 530 | .Ss SIZE AND ALIGNMENT |
531 | The pointer | | 531 | The pointer |
532 | .Fa p | | 532 | .Fa p |
533 | must be aligned \(em that is, if the object it points to is | | 533 | must be aligned \(em that is, if the object it points to is |
534 | .\" | | 534 | .\" |
535 | 2\c | | 535 | 2\c |
536 | .ie t \s-2\v'-0.4m'n\v'+0.4m'\s+2 | | 536 | .ie t \s-2\v'-0.4m'n\v'+0.4m'\s+2 |
537 | .el ^n | | 537 | .el ^n |
538 | .\" | | 538 | .\" |
539 | bytes long, then the low-order | | 539 | bytes long, then the low-order |
540 | .Ar n | | 540 | .Ar n |
541 | bits of | | 541 | bits of |
542 | .Fa p | | 542 | .Fa p |
543 | must be zero. | | 543 | must be zero. |
544 | .Pp | | 544 | .Pp |
545 | All | | 545 | All |
546 | .Nx | | 546 | .Nx |
547 | ports support atomic loads and stores on units of data up to 32 bits. | | 547 | ports support atomic loads and stores on units of data up to 32 bits. |
548 | Some ports additionally support atomic loads and stores on larger | | 548 | Some ports additionally support atomic loads and stores on larger |
549 | quantities, like 64-bit quantities, if | | 549 | quantities, like 64-bit quantities, if |
550 | .Dv __HAVE_ATOMIC64_LOADSTORE | | 550 | .Dv __HAVE_ATOMIC64_LOADSTORE |
551 | is defined. | | 551 | is defined. |
552 | The macros are not allowed on larger quantities of data than the port | | 552 | The macros are not allowed on larger quantities of data than the port |
553 | supports atomically; attempts to use them for such quantities should | | 553 | supports atomically; attempts to use them for such quantities should |
554 | result in a compile-time assertion failure. | | 554 | result in a compile-time assertion failure. |
555 | .Pp | | 555 | .Pp |
556 | For example, as long as you use | | 556 | For example, as long as you use |
557 | .Fn atomic_store_* | | 557 | .Fn atomic_store_* |
558 | to write a 32-bit quantity, you can safely use | | 558 | to write a 32-bit quantity, you can safely use |
559 | .Fn atomic_load_relaxed | | 559 | .Fn atomic_load_relaxed |
560 | to optimistically read it outside a lock, but for a 64-bit quantity it | | 560 | to optimistically read it outside a lock, but for a 64-bit quantity it |
561 | must be conditional on | | 561 | must be conditional on |
562 | .Dv __HAVE_ATOMIC64_LOADSTORE | | 562 | .Dv __HAVE_ATOMIC64_LOADSTORE |
563 | \(em otherwise it will lead to compile-time errors on platforms without | | 563 | \(em otherwise it will lead to compile-time errors on platforms without |
564 | 64-bit atomic loads and stores: | | 564 | 64-bit atomic loads and stores: |
565 | .Bd -literal | | 565 | .Bd -literal |
566 | struct foo { | | 566 | struct foo { |
567 | kmutex_t f_lock; | | 567 | kmutex_t f_lock; |
568 | uint32_t f_refcnt; | | 568 | uint32_t f_refcnt; |
569 | uint64_t f_ticket; | | 569 | uint64_t f_ticket; |
570 | }; | | 570 | }; |
571 | | | 571 | |
572 | if (atomic_load_relaxed(&foo->f_refcnt) == 0) | | 572 | if (atomic_load_relaxed(&foo->f_refcnt) == 0) |
573 | return 123; | | 573 | return 123; |
574 | #ifdef __HAVE_ATOMIC64_LOADSTORE | | 574 | #ifdef __HAVE_ATOMIC64_LOADSTORE |
575 | if (atomic_load_relaxed(&foo->f_ticket) == ticket) | | 575 | if (atomic_load_relaxed(&foo->f_ticket) == ticket) |
576 | return 123; | | 576 | return 123; |
577 | #endif | | 577 | #endif |
578 | mutex_enter(&foo->f_lock); | | 578 | mutex_enter(&foo->f_lock); |
579 | if (foo->f_refcnt == 0 || foo->f_ticket == ticket) | | 579 | if (foo->f_refcnt == 0 || foo->f_ticket == ticket) |
580 | ret = 123; | | 580 | ret = 123; |
581 | ... | | 581 | ... |
582 | #ifdef __HAVE_ATOMIC64_LOADSTORE | | 582 | #ifdef __HAVE_ATOMIC64_LOADSTORE |
583 | atomic_store_relaxed(&foo->f_ticket, foo->f_ticket + 1); | | 583 | atomic_store_relaxed(&foo->f_ticket, foo->f_ticket + 1); |
584 | #else | | 584 | #else |
585 | foo->f_ticket++; | | 585 | foo->f_ticket++; |
586 | #endif | | 586 | #endif |
587 | ... | | 587 | ... |
588 | mutex_exit(&foo->f_lock); | | 588 | mutex_exit(&foo->f_lock); |
589 | .Ed | | 589 | .Ed |
590 | .Sh C11 COMPATIBILITY | | 590 | .Sh C11 COMPATIBILITY |
591 | These macros are meant to follow | | 591 | These macros are meant to follow |
592 | .Tn C11 | | 592 | .Tn C11 |
593 | semantics, in terms of | | 593 | semantics, in terms of |
594 | .Li atomic_load_explicit() | | 594 | .Li atomic_load_explicit() |
595 | and | | 595 | and |
596 | .Li atomic_store_explicit() | | 596 | .Li atomic_store_explicit() |
597 | with the appropriate memory order specifiers, and are meant to make | | 597 | with the appropriate memory order specifiers, and are meant to make |
598 | future adoption of the | | 598 | future adoption of the |
599 | .Tn C11 | | 599 | .Tn C11 |
600 | atomic API easier. | | 600 | atomic API easier. |
601 | Eventually it may be mandatory to use the | | 601 | Eventually it may be mandatory to use the |
602 | .Tn C11 | | 602 | .Tn C11 |
603 | .Vt _Atomic | | 603 | .Vt _Atomic |
604 | type qualifier or equivalent for the operands. | | 604 | type qualifier or equivalent for the operands. |
605 | .Sh LINUX ANALOGUES | | 605 | .Sh LINUX ANALOGUES |
606 | The Linux kernel provides two macros | | 606 | The Linux kernel provides two macros |
607 | .Li READ_ONCE(x) | | 607 | .Li READ_ONCE(x) |
608 | and | | 608 | and |
609 | .Li WRITE_ONCE(x,\ v) | | 609 | .Li WRITE_ONCE(x,\ v) |
610 | which are similar to | | 610 | which are similar to |
611 | .Li atomic_load_consume(&x) | | 611 | .Li atomic_load_consume(&x) |
612 | and | | 612 | and |
613 | .Li atomic_store_relaxed(&x,\ v) , | | 613 | .Li atomic_store_relaxed(&x,\ v) , |
614 | respectively. | | 614 | respectively. |
615 | However, while Linux's | | 615 | However, while Linux's |
616 | .Li READ_ONCE | | 616 | .Li READ_ONCE |
617 | and | | 617 | and |
618 | .Li WRITE_ONCE | | 618 | .Li WRITE_ONCE |
619 | prevent fusing, they may in some cases be torn \(em and therefore fail | | 619 | prevent fusing, they may in some cases be torn \(em and therefore fail |
620 | to guarantee atomicity \(em because: | | 620 | to guarantee atomicity \(em because: |
621 | .Bl -bullet | | 621 | .Bl -bullet |
622 | .It | | 622 | .It |
623 | They do not require the address | | 623 | They do not require the address |
624 | .Li "&x" | | 624 | .Li "&x" |
625 | to be aligned. | | 625 | to be aligned. |
626 | .It | | 626 | .It |
627 | They do not require | | 627 | They do not require |
628 | .Li sizeof(x) | | 628 | .Li sizeof(x) |
629 | to be at most the largest size of available atomic loads and stores on | | 629 | to be at most the largest size of available atomic loads and stores on |
630 | the host architecture. | | 630 | the host architecture. |
631 | .El | | 631 | .El |
632 | .Sh MEMORY BARRIERS AND ATOMIC READ/MODIFY/WRITE | | 632 | .Sh MEMORY BARRIERS AND ATOMIC READ/MODIFY/WRITE |
633 | The atomic read/modify/write operations in | | 633 | The atomic read/modify/write operations in |
634 | .Xr atomic_ops 3 | | 634 | .Xr atomic_ops 3 |
635 | have relaxed ordering by default, but can be combined with the memory | | 635 | have relaxed ordering by default, but can be combined with the memory |
636 | barriers in | | 636 | barriers in |
637 | .Xr membar_ops 3 | | 637 | .Xr membar_ops 3 |
638 | for the same effect as an acquire operation and a release operation for | | 638 | for the same effect as an acquire operation and a release operation for |
639 | the purposes of pairing with | | 639 | the purposes of pairing with |
640 | .Fn atomic_store_release | | 640 | .Fn atomic_store_release |
641 | and | | 641 | and |
642 | .Fn atomic_load_acquire | | 642 | .Fn atomic_load_acquire |
643 | or | | 643 | or |
644 | .Fn atomic_load_consume . | | 644 | .Fn atomic_load_consume . |
645 | If | | 645 | If |
646 | .Li atomic_r/m/w() | | 646 | .Li atomic_r/m/w() |
647 | is an atomic read/modify/write operation in | | 647 | is an atomic read/modify/write operation in |
648 | .Xr atomic_ops 3 , | | 648 | .Xr atomic_ops 3 , |
649 | then | | 649 | then |
650 | .Bd -literal | | 650 | .Bd -literal |
651 | membar_exit(); | | 651 | membar_exit(); |
652 | atomic_r/m/w(obj, ...); | | 652 | atomic_r/m/w(obj, ...); |
653 | .Ed | | 653 | .Ed |
654 | .Pp | | 654 | .Pp |
655 | functions like a release operation on | | 655 | functions like a release operation on |
656 | .Va obj , | | 656 | .Va obj , |
657 | and | | 657 | and |
658 | .Bd -literal | | 658 | .Bd -literal |
659 | atomic_r/m/w(obj, ...); | | 659 | atomic_r/m/w(obj, ...); |
660 | membar_enter(); | | 660 | membar_enter(); |
661 | .Ed | | 661 | .Ed |
662 | .Pp | | 662 | .Pp |
663 | functions like a acquire operation on | | 663 | functions like a acquire operation on |
664 | .Va obj . | | 664 | .Va obj . |
665 | .Pp | | 665 | .Pp |
666 | .Sy WARNING : | | 666 | .Sy WARNING : |
667 | The combination of | | 667 | The combination of |
668 | .Fn atomic_load_relaxed | | 668 | .Fn atomic_load_relaxed |
669 | and | | 669 | and |
670 | .Xr membar_enter 3 | | 670 | .Xr membar_enter 3 |
671 | .Em does not | | 671 | .Em does not |
672 | make an acquire operation; only read/modify/write atomics may be | | 672 | make an acquire operation; only read/modify/write atomics may be |
673 | combined with | | 673 | combined with |
674 | .Xr membar_enter 3 | | 674 | .Xr membar_enter 3 |
675 | this way. | | 675 | this way. |
676 | .Pp | | 676 | .Pp |
677 | On architectures where | | 677 | On architectures where |
678 | .Dv __HAVE_ATOMIC_AS_MEMBAR | | 678 | .Dv __HAVE_ATOMIC_AS_MEMBAR |
679 | is defined, all the | | 679 | is defined, all the |
680 | .Xr atomic_ops 3 | | 680 | .Xr atomic_ops 3 |
681 | imply release and acquire operations, so the | | 681 | imply release and acquire operations, so the |
682 | .Xr membar_enter 3 | | 682 | .Xr membar_enter 3 |
683 | and | | 683 | and |
684 | .Xr membar_exit 3 | | 684 | .Xr membar_exit 3 |
685 | are redundant. | | 685 | are redundant. |
686 | .Sh EXAMPLES | | 686 | .Sh EXAMPLES |
687 | Maintaining lossy counters. | | 687 | Maintaining lossy counters. |
688 | These may lose some counts, because the read/modify/write cycle as a | | 688 | These may lose some counts, because the read/modify/write cycle as a |
689 | whole is not atomic. | | 689 | whole is not atomic. |
690 | But this guarantees that the count will increase by at most one each | | 690 | But this guarantees that the count will increase by at most one each |
691 | time. | | 691 | time. |
692 | In contrast, without atomic operations, in principle a write to a | | 692 | In contrast, without atomic operations, in principle a write to a |
693 | 32-bit counter might be torn into multiple smaller stores, which could | | 693 | 32-bit counter might be torn into multiple smaller stores, which could |
694 | appear to happen out of order from another CPU's perspective, leading | | 694 | appear to happen out of order from another CPU's perspective, leading |
695 | to nonsensical counter readouts. | | 695 | to nonsensical counter readouts. |
696 | (For frequent events, consider using per-CPU counters instead in | | 696 | (For frequent events, consider using per-CPU counters instead in |
697 | practice.) | | 697 | practice.) |
698 | .Bd -literal | | 698 | .Bd -literal |
699 | unsigned count; | | 699 | unsigned count; |
700 | | | 700 | |
701 | void | | 701 | void |
702 | record_event(void) | | 702 | record_event(void) |
703 | { | | 703 | { |
704 | atomic_store_relaxed(&count, | | 704 | atomic_store_relaxed(&count, |
705 | 1 + atomic_load_relaxed(&count)); | | 705 | 1 + atomic_load_relaxed(&count)); |
706 | } | | 706 | } |
707 | | | 707 | |
708 | unsigned | | 708 | unsigned |
709 | read_event_count(void) | | 709 | read_event_count(void) |
710 | { | | 710 | { |
711 | return atomic_load_relaxed(&count); | | 711 | return atomic_load_relaxed(&count); |
712 | } | | 712 | } |
713 | .Ed | | 713 | .Ed |
714 | .Pp | | 714 | .Pp |
715 | Initialization barrier. | | 715 | Initialization barrier. |
716 | .Bd -literal | | 716 | .Bd -literal |
717 | int ready; | | 717 | int ready; |
718 | struct data d; | | 718 | struct data d; |
719 | | | 719 | |
720 | void | | 720 | void |
721 | setup_and_notify(void) | | 721 | setup_and_notify(void) |
722 | { | | 722 | { |
723 | setup_data(&d.things); | | 723 | setup_data(&d.things); |
724 | atomic_store_release(&ready, 1); | | 724 | atomic_store_release(&ready, 1); |
725 | } | | 725 | } |
726 | | | 726 | |
727 | void | | 727 | void |
728 | try_if_ready(void) | | 728 | try_if_ready(void) |
729 | { | | 729 | { |
730 | if (atomic_load_acquire(&ready)) | | 730 | if (atomic_load_acquire(&ready)) |
731 | do_stuff(d.things); | | 731 | do_stuff(d.things); |
732 | } | | 732 | } |
733 | .Ed | | 733 | .Ed |
734 | .Pp | | 734 | .Pp |
735 | Publishing a pointer to the current snapshot of data. | | 735 | Publishing a pointer to the current snapshot of data. |
736 | (Caller must arrange that only one call to | | 736 | (Caller must arrange that only one call to |
737 | .Li take_snapshot() | | 737 | .Li take_snapshot() |
738 | happens at any | | 738 | happens at any |
739 | given time; generally this should be done in coordination with | | 739 | given time; generally this should be done in coordination with |
740 | .Xr pserialize 9 | | 740 | .Xr pserialize 9 |
741 | or similar to enable resource reclamation.) | | 741 | or similar to enable resource reclamation.) |
742 | .Bd -literal | | 742 | .Bd -literal |
743 | struct data *current_d; | | 743 | struct data *current_d; |
744 | | | 744 | |
745 | void | | 745 | void |
746 | take_snapshot(void) | | 746 | take_snapshot(void) |
747 | { | | 747 | { |
748 | struct data *d = kmem_alloc(sizeof(*d)); | | 748 | struct data *d = kmem_alloc(sizeof(*d)); |
749 | | | 749 | |
750 | d->things = ...; | | 750 | d->things = ...; |
751 | | | 751 | |
752 | atomic_store_release(¤t_d, d); | | 752 | atomic_store_release(¤t_d, d); |
753 | } | | 753 | } |
754 | | | 754 | |
755 | struct data * | | 755 | struct data * |
756 | get_snapshot(void) | | 756 | get_snapshot(void) |
757 | { | | 757 | { |
758 | return atomic_load_consume(¤t_d); | | 758 | return atomic_load_consume(¤t_d); |
759 | } | | 759 | } |
760 | .Ed | | 760 | .Ed |
761 | .Sh CODE REFERENCES | | 761 | .Sh CODE REFERENCES |
762 | .Pa sys/sys/atomic.h | | 762 | .Pa sys/sys/atomic.h |
763 | .Sh SEE ALSO | | 763 | .Sh SEE ALSO |
764 | .Xr atomic_ops 3 , | | 764 | .Xr atomic_ops 3 , |
765 | .Xr membar_ops 3 , | | 765 | .Xr membar_ops 3 , |
766 | .Xr pserialize 9 | | 766 | .Xr pserialize 9 |
767 | .Sh HISTORY | | 767 | .Sh HISTORY |
768 | These atomic operations first appeared in | | 768 | These atomic operations first appeared in |
769 | .Nx 10.0 . | | 769 | .Nx 9.0 . |
770 | .Sh CAVEATS | | 770 | .Sh CAVEATS |
771 | C11 formally specifies that all subexpressions, except the left | | 771 | C11 formally specifies that all subexpressions, except the left |
772 | operands of the | | 772 | operands of the |
773 | .Ql && , | | 773 | .Ql && , |
774 | .Ql || , | | 774 | .Ql || , |
775 | .Ql ?: , | | 775 | .Ql ?: , |
776 | and | | 776 | and |
777 | .Ql \&, | | 777 | .Ql \&, |
778 | operators and the | | 778 | operators and the |
779 | .Li kill_dependency() | | 779 | .Li kill_dependency() |
780 | macro, carry dependencies for which | | 780 | macro, carry dependencies for which |
781 | .Dv memory_order_consume | | 781 | .Dv memory_order_consume |
782 | guarantees ordering, but most or all implementations to date simply | | 782 | guarantees ordering, but most or all implementations to date simply |
783 | treat | | 783 | treat |
784 | .Dv memory_order_consume | | 784 | .Dv memory_order_consume |
785 | as | | 785 | as |
786 | .Dv memory_order_acquire | | 786 | .Dv memory_order_acquire |
787 | and do not take advantage of data dependencies to elide costly memory | | 787 | and do not take advantage of data dependencies to elide costly memory |
788 | barriers or load-acquire CPU instructions. | | 788 | barriers or load-acquire CPU instructions. |
789 | .Pp | | 789 | .Pp |
790 | Instead, we implement | | 790 | Instead, we implement |
791 | .Fn atomic_load_consume | | 791 | .Fn atomic_load_consume |
792 | as | | 792 | as |
793 | .Fn atomic_load_relaxed | | 793 | .Fn atomic_load_relaxed |
794 | followed by | | 794 | followed by |
795 | .Xr membar_datadep_consumer 3 , | | 795 | .Xr membar_datadep_consumer 3 , |
796 | which is equivalent to | | 796 | which is equivalent to |
797 | .Xr membar_consumer 3 | | 797 | .Xr membar_consumer 3 |
798 | on DEC Alpha and | | 798 | on DEC Alpha and |
799 | .Xr __insn_barrier 3 | | 799 | .Xr __insn_barrier 3 |
800 | elsewhere. | | 800 | elsewhere. |
801 | .Sh BUGS | | 801 | .Sh BUGS |
802 | Some idiot decided to call it | | 802 | Some idiot decided to call it |
803 | .Em tearing , | | 803 | .Em tearing , |
804 | depriving us of the opportunity to say that atomic operations prevent | | 804 | depriving us of the opportunity to say that atomic operations prevent |
805 | fusion and | | 805 | fusion and |
806 | .Em fission . | | 806 | .Em fission . |