| @@ -1,14 +1,14 @@ | | | @@ -1,14 +1,14 @@ |
1 | .\" $NetBSD: membar_ops.3,v 1.5 2017/10/24 18:19:17 abhinav Exp $ | | 1 | .\" $NetBSD: membar_ops.3,v 1.6 2020/09/03 00:00:06 riastradh Exp $ |
2 | .\" | | 2 | .\" |
3 | .\" Copyright (c) 2007, 2008 The NetBSD Foundation, Inc. | | 3 | .\" Copyright (c) 2007, 2008 The NetBSD Foundation, Inc. |
4 | .\" All rights reserved. | | 4 | .\" All rights reserved. |
5 | .\" | | 5 | .\" |
6 | .\" This code is derived from software contributed to The NetBSD Foundation | | 6 | .\" This code is derived from software contributed to The NetBSD Foundation |
7 | .\" by Jason R. Thorpe. | | 7 | .\" by Jason R. Thorpe. |
8 | .\" | | 8 | .\" |
9 | .\" Redistribution and use in source and binary forms, with or without | | 9 | .\" Redistribution and use in source and binary forms, with or without |
10 | .\" modification, are permitted provided that the following conditions | | 10 | .\" modification, are permitted provided that the following conditions |
11 | .\" are met: | | 11 | .\" are met: |
12 | .\" 1. Redistributions of source code must retain the above copyright | | 12 | .\" 1. Redistributions of source code must retain the above copyright |
13 | .\" notice, this list of conditions and the following disclaimer. | | 13 | .\" notice, this list of conditions and the following disclaimer. |
14 | .\" 2. Redistributions in binary form must reproduce the above copyright | | 14 | .\" 2. Redistributions in binary form must reproduce the above copyright |
| @@ -17,121 +17,342 @@ | | | @@ -17,121 +17,342 @@ |
17 | .\" | | 17 | .\" |
18 | .\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | | 18 | .\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
19 | .\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 19 | .\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
20 | .\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 20 | .\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
21 | .\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | | 21 | .\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
22 | .\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | 22 | .\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
23 | .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | 23 | .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
24 | .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | 24 | .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
25 | .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 25 | .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
26 | .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 26 | .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
27 | .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 27 | .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
28 | .\" POSSIBILITY OF SUCH DAMAGE. | | 28 | .\" POSSIBILITY OF SUCH DAMAGE. |
29 | .\" | | 29 | .\" |
30 | .Dd November 20, 2014 | | 30 | .Dd September 2, 2020 |
31 | .Dt MEMBAR_OPS 3 | | 31 | .Dt MEMBAR_OPS 3 |
32 | .Os | | 32 | .Os |
33 | .Sh NAME | | 33 | .Sh NAME |
34 | .Nm membar_ops , | | 34 | .Nm membar_ops , |
35 | .Nm membar_enter , | | 35 | .Nm membar_enter , |
36 | .Nm membar_exit , | | 36 | .Nm membar_exit , |
37 | .Nm membar_producer , | | 37 | .Nm membar_producer , |
38 | .Nm membar_consumer , | | 38 | .Nm membar_consumer , |
39 | .Nm membar_datadep_consumer , | | 39 | .Nm membar_datadep_consumer , |
40 | .Nm membar_sync | | 40 | .Nm membar_sync |
41 | .Nd memory access barrier operations | | 41 | .Nd memory ordering barriers |
42 | .\" .Sh LIBRARY | | 42 | .\" .Sh LIBRARY |
43 | .\" .Lb libc | | 43 | .\" .Lb libc |
44 | .Sh SYNOPSIS | | 44 | .Sh SYNOPSIS |
45 | .In sys/atomic.h | | 45 | .In sys/atomic.h |
46 | .\" | | 46 | .\" |
47 | .Ft void | | 47 | .Ft void |
48 | .Fn membar_enter "void" | | 48 | .Fn membar_enter "void" |
49 | .Ft void | | 49 | .Ft void |
50 | .Fn membar_exit "void" | | 50 | .Fn membar_exit "void" |
51 | .Ft void | | 51 | .Ft void |
52 | .Fn membar_producer "void" | | 52 | .Fn membar_producer "void" |
53 | .Ft void | | 53 | .Ft void |
54 | .Fn membar_consumer "void" | | 54 | .Fn membar_consumer "void" |
55 | .Ft void | | 55 | .Ft void |
56 | .Fn membar_datadep_consumer "void" | | 56 | .Fn membar_datadep_consumer "void" |
57 | .Ft void | | 57 | .Ft void |
58 | .Fn membar_sync "void" | | 58 | .Fn membar_sync "void" |
59 | .Sh DESCRIPTION | | 59 | .Sh DESCRIPTION |
60 | The | | 60 | The |
61 | .Nm membar_ops | | 61 | .Nm |
62 | family of functions provide memory access barrier operations necessary | | 62 | family of functions prevent reordering of memory operations, as needed |
63 | for synchronization in multiprocessor execution environments that have | | 63 | for synchronization in multiprocessor execution environments that have |
64 | relaxed load and store order. | | 64 | relaxed load and store order. |
65 | .Bl -tag -width "mem" | | 65 | .Pp |
| | | 66 | In general, memory barriers must come in pairs \(em a barrier on one |
| | | 67 | CPU, such as |
| | | 68 | .Fn membar_exit , |
| | | 69 | must pair with a barrier on another CPU, such as |
| | | 70 | .Fn membar_enter , |
| | | 71 | in order to synchronize anything between the two CPUs. |
| | | 72 | Code using |
| | | 73 | .Nm |
| | | 74 | should generally be annotated with comments identifying how they are |
| | | 75 | paired. |
| | | 76 | .Pp |
| | | 77 | .Nm |
| | | 78 | affect only operations on regular memory, not on device |
| | | 79 | memory; see |
| | | 80 | .Xr bus_space 9 |
| | | 81 | and |
| | | 82 | .Xr bus_dma 9 |
| | | 83 | for machine-independent interfaces to handling device memory and DMA |
| | | 84 | operations for device drivers. |
| | | 85 | .Pp |
| | | 86 | Unlike C11, |
| | | 87 | .Em all |
| | | 88 | memory operations \(em that is, all loads and stores on regular |
| | | 89 | memory \(em are affected by |
| | | 90 | .Nm , |
| | | 91 | not just C11 atomic operations on |
| | | 92 | .Vt _Atomic Ns -qualified |
| | | 93 | objects. |
| | | 94 | .Bl -tag -width abcd |
66 | .It Fn membar_enter | | 95 | .It Fn membar_enter |
67 | Any store preceding | | 96 | Any store preceding |
68 | .Fn membar_enter | | 97 | .Fn membar_enter |
69 | will reach global visibility before all loads and stores following it. | | 98 | will happen before all memory operations following it. |
| | | 99 | .Pp |
| | | 100 | An atomic read/modify/write operation |
| | | 101 | .Pq Xr atomic_ops 3 |
| | | 102 | followed by a |
| | | 103 | .Fn membar_enter |
| | | 104 | implies a |
| | | 105 | .Em load-acquire |
| | | 106 | operation in the language of C11. |
| | | 107 | .Pp |
| | | 108 | .Sy WARNING : |
| | | 109 | A load followed by |
| | | 110 | .Fn membar_enter |
| | | 111 | .Em does not |
| | | 112 | imply a |
| | | 113 | .Em load-acquire |
| | | 114 | operation, even though |
| | | 115 | .Fn membar_exit |
| | | 116 | followed by a store implies a |
| | | 117 | .Em store-release |
| | | 118 | operation; the symmetry of these names and asymmetry of the semantics |
| | | 119 | is a historical mistake. |
| | | 120 | In the |
| | | 121 | .Nx |
| | | 122 | kernel, you can use |
| | | 123 | .Xr atomic_load_acquire 9 |
| | | 124 | for a |
| | | 125 | .Em load-acquire |
| | | 126 | operation without any atomic read/modify/write. |
70 | .Pp | | 127 | .Pp |
71 | .Fn membar_enter | | 128 | .Fn membar_enter |
72 | is typically used in code that implements locking primitives to ensure | | 129 | is typically used in code that implements locking primitives to ensure |
73 | that a lock protects its data. | | 130 | that a lock protects its data, and is typically paired with |
| | | 131 | .Fn membar_exit ; |
| | | 132 | see below for an example. |
74 | .It Fn membar_exit | | 133 | .It Fn membar_exit |
75 | All loads and stores preceding | | 134 | All memory operations preceding |
76 | .Fn membar_exit | | 135 | .Fn membar_exit |
77 | will reach global visibility before any store that follows it. | | 136 | will happen before any store that follows it. |
| | | 137 | .Pp |
| | | 138 | A |
| | | 139 | .Fn membar_exit |
| | | 140 | followed by a store implies a |
| | | 141 | .Em store-release |
| | | 142 | operation in the language of C11. |
| | | 143 | For a regular store, rather than an atomic read/modify/write store, you |
| | | 144 | should use |
| | | 145 | .Xr atomic_store_release 9 |
| | | 146 | instead of |
| | | 147 | .Fn membar_exit |
| | | 148 | followed by the store. |
78 | .Pp | | 149 | .Pp |
79 | .Fn membar_exit | | 150 | .Fn membar_exit |
80 | is typically used in code that implements locking primitives to ensure | | 151 | is typically used in code that implements locking primitives to ensure |
81 | that a lock protects its data. | | 152 | that a lock protects its data, and is typically paired with |
| | | 153 | .Fn membar_enter . |
| | | 154 | For example: |
| | | 155 | .Bd -literal -offset abcdefgh |
| | | 156 | /* thread A */ |
| | | 157 | obj->state.mumblefrotz = 42; |
| | | 158 | KASSERT(valid(&obj->state)); |
| | | 159 | membar_exit(); |
| | | 160 | obj->lock = 0; |
| | | 161 | |
| | | 162 | /* thread B */ |
| | | 163 | if (atomic_cas_uint(&obj->lock, 0, 1) != 0) |
| | | 164 | return; |
| | | 165 | membar_enter(); |
| | | 166 | KASSERT(valid(&obj->state)); |
| | | 167 | obj->state.mumblefrotz--; |
| | | 168 | .Ed |
| | | 169 | .Pp |
| | | 170 | In this example, |
| | | 171 | .Em if |
| | | 172 | the |
| | | 173 | .Fn atomic_cas_uint |
| | | 174 | operation in thread B witnesses the store |
| | | 175 | .Li "obj->lock = 0" |
| | | 176 | from thread A, |
| | | 177 | .Em then |
| | | 178 | everything in thread A before the |
| | | 179 | .Fn membar_exit |
| | | 180 | is guaranteed to happen before everything in thread B after the |
| | | 181 | .Fn membar_enter , |
| | | 182 | as if the machine had sequentially executed: |
| | | 183 | .Bd -literal -offset abcdefgh |
| | | 184 | obj->state.mumblefrotz = 42; /* from thread A */ |
| | | 185 | KASSERT(valid(&obj->state)); |
| | | 186 | \&... |
| | | 187 | KASSERT(valid(&obj->state)); /* from thread B */ |
| | | 188 | obj->state.mumblefrotz--; |
| | | 189 | .Ed |
| | | 190 | .Pp |
| | | 191 | .Fn membar_exit |
| | | 192 | followed by a store, serving as a |
| | | 193 | .Em store-release |
| | | 194 | operation, may also be paired with a subsequent load followed by |
| | | 195 | .Fn membar_sync , |
| | | 196 | serving as the corresponding |
| | | 197 | .Em load-acquire |
| | | 198 | operation. |
| | | 199 | However, you should use |
| | | 200 | .Xr atomic_store_release 9 |
| | | 201 | and |
| | | 202 | .Xr atomic_load_acquire 9 |
| | | 203 | instead in that situation, unless the store is an atomic |
| | | 204 | read/modify/write which requires a separate |
| | | 205 | .Fn membar_exit . |
82 | .It Fn membar_producer | | 206 | .It Fn membar_producer |
83 | All stores preceding the memory barrier will reach global visibility | | 207 | All stores preceding |
84 | before any stores after the memory barrier reach global visibility. | | 208 | .Fn membar_producer |
| | | 209 | will happen before any stores following it. |
| | | 210 | .Pp |
| | | 211 | .Fn membar_producer |
| | | 212 | has no analogue in C11. |
| | | 213 | .Pp |
| | | 214 | .Fn membar_producer |
| | | 215 | is typically used in code that produces data for read-only consumers |
| | | 216 | which use |
| | | 217 | .Fn membar_consumer , |
| | | 218 | such as |
| | | 219 | .Sq seqlocked |
| | | 220 | snapshots of statistics; see below for an example. |
85 | .It Fn membar_consumer | | 221 | .It Fn membar_consumer |
86 | All loads preceding the memory barrier will complete before any loads | | 222 | All loads preceding |
87 | after the memory barrier complete. | | 223 | .Fn membar_consumer |
| | | 224 | will complete before any loads after it. |
| | | 225 | .Pp |
| | | 226 | .Fn membar_consumer |
| | | 227 | has no analogue in C11. |
| | | 228 | .Pp |
| | | 229 | .Fn membar_consumer |
| | | 230 | is typically used in code that reads data from producers which use |
| | | 231 | .Fn membar_producer , |
| | | 232 | such as |
| | | 233 | .Sq seqlocked |
| | | 234 | snapshots of statistics. |
| | | 235 | For example: |
| | | 236 | .Bd -literal |
| | | 237 | struct { |
| | | 238 | /* version number and in-progress bit */ |
| | | 239 | unsigned seq; |
| | | 240 | |
| | | 241 | /* read-only statistics, too large for atomic load */ |
| | | 242 | unsigned foo; |
| | | 243 | int bar; |
| | | 244 | uint64_t baz; |
| | | 245 | } stats; |
| | | 246 | |
| | | 247 | /* producer (must be serialized, e.g. with mutex(9)) */ |
| | | 248 | stats->seq |= 1; /* mark update in progress */ |
| | | 249 | membar_producer(); |
| | | 250 | stats->foo = count_foo(); |
| | | 251 | stats->bar = measure_bar(); |
| | | 252 | stats->baz = enumerate_baz(); |
| | | 253 | membar_producer(); |
| | | 254 | stats->seq++; /* bump version number */ |
| | | 255 | |
| | | 256 | /* consumer (in parallel w/ producer, other consumers) */ |
| | | 257 | restart: |
| | | 258 | while ((seq = stats->seq) & 1) /* wait for update */ |
| | | 259 | SPINLOCK_BACKOFF_HOOK; |
| | | 260 | membar_consumer(); |
| | | 261 | foo = stats->foo; /* read out a candidate snapshot */ |
| | | 262 | bar = stats->bar; |
| | | 263 | baz = stats->baz; |
| | | 264 | membar_consumer(); |
| | | 265 | if (seq != stats->seq) /* try again if version changed */ |
| | | 266 | goto restart; |
| | | 267 | .Ed |
88 | .It Fn membar_datadep_consumer | | 268 | .It Fn membar_datadep_consumer |
89 | Same as | | 269 | Same as |
90 | .Fn membar_consumer , | | 270 | .Fn membar_consumer , |
91 | but limited to loads of addresses dependent on prior loads, or | | 271 | but limited to loads of addresses dependent on prior loads, or |
92 | .Sq data-dependent | | 272 | .Sq data-dependent |
93 | loads: | | 273 | loads: |
94 | .Bd -literal -offset indent | | 274 | .Bd -literal -offset indent |
95 | int **pp, *p, v; | | 275 | int **pp, *p, v; |
96 | | | 276 | |
97 | p = *pp; | | 277 | p = *pp; |
98 | membar_datadep_consumer(); | | 278 | membar_datadep_consumer(); |
99 | v = *p; | | 279 | v = *p; |
100 | consume(v); | | 280 | consume(v); |
101 | .Ed | | 281 | .Ed |
102 | .Pp | | 282 | .Pp |
103 | Does not guarantee ordering of loads in branches, or | | 283 | .Fn membar_datadep_consumer |
| | | 284 | is typically paired with |
| | | 285 | .Fn membar_exit |
| | | 286 | by code that initializes an object before publishing it. |
| | | 287 | However, you should use |
| | | 288 | .Xr atomic_store_release 9 |
| | | 289 | and |
| | | 290 | .Xr atomic_load_consume 9 |
| | | 291 | instead, to avoid obscure edge cases in case the consumer is not |
| | | 292 | read-only. |
| | | 293 | .Pp |
| | | 294 | .Fn membar_datadep_consumer |
| | | 295 | does not guarantee ordering of loads in branches, or |
104 | .Sq control-dependent | | 296 | .Sq control-dependent |
105 | loads -- you must use | | 297 | loads \(em you must use |
106 | .Fn membar_consumer | | 298 | .Fn membar_consumer |
107 | instead: | | 299 | instead: |
108 | .Bd -literal -offset indent | | 300 | .Bd -literal -offset indent |
109 | int *ok, *p, v; | | 301 | int *ok, *p, v; |
110 | | | 302 | |
111 | if (*ok) { | | 303 | if (*ok) { |
112 | membar_consumer(); | | 304 | membar_consumer(); |
113 | v = *p; | | 305 | v = *p; |
114 | consume(v); | | 306 | consume(v); |
115 | } | | 307 | } |
116 | .Ed | | 308 | .Ed |
117 | .Pp | | 309 | .Pp |
118 | Most CPUs do not reorder data-dependent loads (i.e., most CPUs | | 310 | Most CPUs do not reorder data-dependent loads (i.e., most CPUs |
119 | guarantee that cached values are not stale in that case), so | | 311 | guarantee that cached values are not stale in that case), so |
120 | .Fn membar_datadep_consumer | | 312 | .Fn membar_datadep_consumer |
121 | is a no-op on those CPUs. | | 313 | is a no-op on those CPUs. |
122 | .It Fn membar_sync | | 314 | .It Fn membar_sync |
123 | All loads and stores preceding the memory barrier will complete and | | 315 | All memory operations preceding |
124 | reach global visibility before any loads and stores after the memory | | 316 | .Fn membar_sync |
125 | barrier complete and reach global visibility. | | 317 | will happen before any memory operations following it. |
| | | 318 | .Pp |
| | | 319 | .Fn membar_sync |
| | | 320 | is a sequential consistency acquire/release barrier, analogous to |
| | | 321 | .Li "atomic_thread_fence(memory_order_seq_cst)" |
| | | 322 | in C11. |
| | | 323 | .Pp |
| | | 324 | .Fn membar_sync |
| | | 325 | is typically paired with |
| | | 326 | .Fn membar_sync . |
| | | 327 | .Pp |
| | | 328 | A load followed by |
| | | 329 | .Fn membar_sync , |
| | | 330 | serving as a |
| | | 331 | .Em load-acquire |
| | | 332 | operation, may also be paired with a prior |
| | | 333 | .Fn membar_exit |
| | | 334 | followed by a store, serving as the corresponding |
| | | 335 | .Em store-release |
| | | 336 | operation. |
| | | 337 | However, you should use |
| | | 338 | .Xr atomic_load_acquire 9 |
| | | 339 | instead of |
| | | 340 | .No load-then- Ns Fn membar_sync |
| | | 341 | if it is a regular load, or |
| | | 342 | .Fn membar_enter |
| | | 343 | instead of |
| | | 344 | .Fn membar_sync |
| | | 345 | if the load is in an atomic read/modify/write operation. |
126 | .El | | 346 | .El |
127 | .Sh SEE ALSO | | 347 | .Sh SEE ALSO |
128 | .Xr atomic_ops 3 | | 348 | .Xr atomic_ops 3 , |
| | | 349 | .Xr atomic_loadstore 9 |
129 | .Sh HISTORY | | 350 | .Sh HISTORY |
130 | The | | 351 | The |
131 | .Nm membar_ops | | 352 | .Nm membar_ops |
132 | functions first appeared in | | 353 | functions first appeared in |
133 | .Nx 5.0 . | | 354 | .Nx 5.0 . |
134 | The data-dependent load barrier, | | 355 | The data-dependent load barrier, |
135 | .Fn membar_datadep_consumer , | | 356 | .Fn membar_datadep_consumer , |
136 | first appeared in | | 357 | first appeared in |
137 | .Nx 7.0 . | | 358 | .Nx 7.0 . |