| @@ -1,268 +1,269 @@ | | | @@ -1,268 +1,269 @@ |
1 | /* $NetBSD: subr_pserialize.c,v 1.1 2011/07/30 17:01:04 christos Exp $ */ | | 1 | /* $NetBSD: subr_pserialize.c,v 1.2 2011/08/01 15:26:31 he Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 2010, 2011 The NetBSD Foundation, Inc. | | 4 | * Copyright (c) 2010, 2011 The NetBSD Foundation, Inc. |
5 | * All rights reserved. | | 5 | * All rights reserved. |
6 | * | | 6 | * |
7 | * Redistribution and use in source and binary forms, with or without | | 7 | * Redistribution and use in source and binary forms, with or without |
8 | * modification, are permitted provided that the following conditions | | 8 | * modification, are permitted provided that the following conditions |
9 | * are met: | | 9 | * are met: |
10 | * 1. Redistributions of source code must retain the above copyright | | 10 | * 1. Redistributions of source code must retain the above copyright |
11 | * notice, this list of conditions and the following disclaimer. | | 11 | * notice, this list of conditions and the following disclaimer. |
12 | * 2. Redistributions in binary form must reproduce the above copyright | | 12 | * 2. Redistributions in binary form must reproduce the above copyright |
13 | * notice, this list of conditions and the following disclaimer in the | | 13 | * notice, this list of conditions and the following disclaimer in the |
14 | * documentation and/or other materials provided with the distribution. | | 14 | * documentation and/or other materials provided with the distribution. |
15 | * | | 15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | | 16 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
17 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 17 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
18 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 18 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
19 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | | 19 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
20 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | 20 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. | | 26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ | | 27 | */ |
28 | | | 28 | |
29 | /* | | 29 | /* |
30 | * Passive serialization. | | 30 | * Passive serialization. |
31 | * | | 31 | * |
32 | * Implementation accurately matches the lapsed US patent 4809168, therefore | | 32 | * Implementation accurately matches the lapsed US patent 4809168, therefore |
33 | * code is patent-free in the United States. Your use of this code is at | | 33 | * code is patent-free in the United States. Your use of this code is at |
34 | * your own risk. | | 34 | * your own risk. |
35 | * | | 35 | * |
36 | * Note for NetBSD developers: all changes to this source file must be | | 36 | * Note for NetBSD developers: all changes to this source file must be |
37 | * approved by the <core>. | | 37 | * approved by the <core>. |
38 | */ | | 38 | */ |
39 | | | 39 | |
40 | #include <sys/cdefs.h> | | 40 | #include <sys/cdefs.h> |
41 | __KERNEL_RCSID(0, "$NetBSD: subr_pserialize.c,v 1.1 2011/07/30 17:01:04 christos Exp $"); | | 41 | __KERNEL_RCSID(0, "$NetBSD: subr_pserialize.c,v 1.2 2011/08/01 15:26:31 he Exp $"); |
42 | | | 42 | |
43 | #include <sys/param.h> | | 43 | #include <sys/param.h> |
44 | | | 44 | |
45 | #include <sys/condvar.h> | | 45 | #include <sys/condvar.h> |
46 | #include <sys/cpu.h> | | 46 | #include <sys/cpu.h> |
| | | 47 | #include <sys/evcnt.h> |
47 | #include <sys/kmem.h> | | 48 | #include <sys/kmem.h> |
48 | #include <sys/mutex.h> | | 49 | #include <sys/mutex.h> |
49 | #include <sys/pserialize.h> | | 50 | #include <sys/pserialize.h> |
50 | #include <sys/queue.h> | | 51 | #include <sys/queue.h> |
51 | #include <sys/xcall.h> | | 52 | #include <sys/xcall.h> |
52 | | | 53 | |
53 | struct pserialize { | | 54 | struct pserialize { |
54 | TAILQ_ENTRY(pserialize) psz_chain; | | 55 | TAILQ_ENTRY(pserialize) psz_chain; |
55 | lwp_t * psz_owner; | | 56 | lwp_t * psz_owner; |
56 | kcondvar_t psz_notifier; | | 57 | kcondvar_t psz_notifier; |
57 | kcpuset_t * psz_target; | | 58 | kcpuset_t * psz_target; |
58 | kcpuset_t * psz_pass; | | 59 | kcpuset_t * psz_pass; |
59 | }; | | 60 | }; |
60 | | | 61 | |
61 | static u_int psz_work_todo __cacheline_aligned; | | 62 | static u_int psz_work_todo __cacheline_aligned; |
62 | static kmutex_t psz_lock __cacheline_aligned; | | 63 | static kmutex_t psz_lock __cacheline_aligned; |
63 | static struct evcnt psz_ev_excl __cacheline_aligned; | | 64 | static struct evcnt psz_ev_excl __cacheline_aligned; |
64 | | | 65 | |
65 | /* | | 66 | /* |
66 | * As defined in "Method 1": | | 67 | * As defined in "Method 1": |
67 | * q0: "0 MP checkpoints have occured". | | 68 | * q0: "0 MP checkpoints have occured". |
68 | * q1: "1 MP checkpoint has occured". | | 69 | * q1: "1 MP checkpoint has occured". |
69 | * q2: "2 MP checkpoints have occured". | | 70 | * q2: "2 MP checkpoints have occured". |
70 | */ | | 71 | */ |
71 | static TAILQ_HEAD(, pserialize) psz_queue0 __cacheline_aligned; | | 72 | static TAILQ_HEAD(, pserialize) psz_queue0 __cacheline_aligned; |
72 | static TAILQ_HEAD(, pserialize) psz_queue1 __cacheline_aligned; | | 73 | static TAILQ_HEAD(, pserialize) psz_queue1 __cacheline_aligned; |
73 | static TAILQ_HEAD(, pserialize) psz_queue2 __cacheline_aligned; | | 74 | static TAILQ_HEAD(, pserialize) psz_queue2 __cacheline_aligned; |
74 | | | 75 | |
75 | /* | | 76 | /* |
76 | * pserialize_init: | | 77 | * pserialize_init: |
77 | * | | 78 | * |
78 | * Initialize passive serialization structures. | | 79 | * Initialize passive serialization structures. |
79 | */ | | 80 | */ |
80 | void | | 81 | void |
81 | pserialize_init(void) | | 82 | pserialize_init(void) |
82 | { | | 83 | { |
83 | | | 84 | |
84 | psz_work_todo = 0; | | 85 | psz_work_todo = 0; |
85 | TAILQ_INIT(&psz_queue0); | | 86 | TAILQ_INIT(&psz_queue0); |
86 | TAILQ_INIT(&psz_queue1); | | 87 | TAILQ_INIT(&psz_queue1); |
87 | TAILQ_INIT(&psz_queue2); | | 88 | TAILQ_INIT(&psz_queue2); |
88 | mutex_init(&psz_lock, MUTEX_DEFAULT, IPL_SCHED); | | 89 | mutex_init(&psz_lock, MUTEX_DEFAULT, IPL_SCHED); |
89 | evcnt_attach_dynamic(&psz_ev_excl, EVCNT_TYPE_MISC, NULL, | | 90 | evcnt_attach_dynamic(&psz_ev_excl, EVCNT_TYPE_MISC, NULL, |
90 | "pserialize", "exclusive access"); | | 91 | "pserialize", "exclusive access"); |
91 | } | | 92 | } |
92 | | | 93 | |
93 | /* | | 94 | /* |
94 | * pserialize_create: | | 95 | * pserialize_create: |
95 | * | | 96 | * |
96 | * Create and initialize a passive serialization object. | | 97 | * Create and initialize a passive serialization object. |
97 | */ | | 98 | */ |
98 | pserialize_t | | 99 | pserialize_t |
99 | pserialize_create(void) | | 100 | pserialize_create(void) |
100 | { | | 101 | { |
101 | pserialize_t psz; | | 102 | pserialize_t psz; |
102 | | | 103 | |
103 | psz = kmem_zalloc(sizeof(struct pserialize), KM_SLEEP); | | 104 | psz = kmem_zalloc(sizeof(struct pserialize), KM_SLEEP); |
104 | cv_init(&psz->psz_notifier, "psrlz"); | | 105 | cv_init(&psz->psz_notifier, "psrlz"); |
105 | psz->psz_target = kcpuset_create(); | | 106 | psz->psz_target = kcpuset_create(); |
106 | psz->psz_pass = kcpuset_create(); | | 107 | psz->psz_pass = kcpuset_create(); |
107 | psz->psz_owner = NULL; | | 108 | psz->psz_owner = NULL; |
108 | | | 109 | |
109 | return psz; | | 110 | return psz; |
110 | } | | 111 | } |
111 | | | 112 | |
112 | /* | | 113 | /* |
113 | * pserialize_destroy: | | 114 | * pserialize_destroy: |
114 | * | | 115 | * |
115 | * Destroy a passive serialization object. | | 116 | * Destroy a passive serialization object. |
116 | */ | | 117 | */ |
117 | void | | 118 | void |
118 | pserialize_destroy(pserialize_t psz) | | 119 | pserialize_destroy(pserialize_t psz) |
119 | { | | 120 | { |
120 | | | 121 | |
121 | KASSERT(psz->psz_owner == NULL); | | 122 | KASSERT(psz->psz_owner == NULL); |
122 | | | 123 | |
123 | cv_destroy(&psz->psz_notifier); | | 124 | cv_destroy(&psz->psz_notifier); |
124 | kcpuset_destroy(psz->psz_target); | | 125 | kcpuset_destroy(psz->psz_target); |
125 | kcpuset_destroy(psz->psz_pass); | | 126 | kcpuset_destroy(psz->psz_pass); |
126 | kmem_free(psz, sizeof(struct pserialize)); | | 127 | kmem_free(psz, sizeof(struct pserialize)); |
127 | } | | 128 | } |
128 | | | 129 | |
129 | /* | | 130 | /* |
130 | * pserialize_perform: | | 131 | * pserialize_perform: |
131 | * | | 132 | * |
132 | * Perform the write side of passive serialization. The calling | | 133 | * Perform the write side of passive serialization. The calling |
133 | * thread holds an exclusive lock on the data object(s) being updated. | | 134 | * thread holds an exclusive lock on the data object(s) being updated. |
134 | * We wait until every processor in the system has made at least two | | 135 | * We wait until every processor in the system has made at least two |
135 | * passes through cpu_swichto(). The wait is made with the caller's | | 136 | * passes through cpu_swichto(). The wait is made with the caller's |
136 | * update lock held, but is short term. | | 137 | * update lock held, but is short term. |
137 | */ | | 138 | */ |
138 | void | | 139 | void |
139 | pserialize_perform(pserialize_t psz) | | 140 | pserialize_perform(pserialize_t psz) |
140 | { | | 141 | { |
141 | | | 142 | |
142 | KASSERT(!cpu_intr_p()); | | 143 | KASSERT(!cpu_intr_p()); |
143 | KASSERT(!cpu_softintr_p()); | | 144 | KASSERT(!cpu_softintr_p()); |
144 | | | 145 | |
145 | if (__predict_false(panicstr != NULL)) { | | 146 | if (__predict_false(panicstr != NULL)) { |
146 | return; | | 147 | return; |
147 | } | | 148 | } |
148 | KASSERT(psz->psz_owner == NULL); | | 149 | KASSERT(psz->psz_owner == NULL); |
149 | KASSERT(kcpuset_iszero(psz->psz_target)); | | 150 | KASSERT(kcpuset_iszero(psz->psz_target)); |
150 | KASSERT(ncpu > 0); | | 151 | KASSERT(ncpu > 0); |
151 | | | 152 | |
152 | /* | | 153 | /* |
153 | * Set up the object and put it onto the queue. The lock | | 154 | * Set up the object and put it onto the queue. The lock |
154 | * activity here provides the necessary memory barrier to | | 155 | * activity here provides the necessary memory barrier to |
155 | * make the caller's data update completely visible to | | 156 | * make the caller's data update completely visible to |
156 | * other processors. | | 157 | * other processors. |
157 | */ | | 158 | */ |
158 | psz->psz_owner = curlwp; | | 159 | psz->psz_owner = curlwp; |
159 | kcpuset_fill(psz->psz_target); | | 160 | kcpuset_fill(psz->psz_target); |
160 | kcpuset_zero(psz->psz_pass); | | 161 | kcpuset_zero(psz->psz_pass); |
161 | | | 162 | |
162 | mutex_spin_enter(&psz_lock); | | 163 | mutex_spin_enter(&psz_lock); |
163 | TAILQ_INSERT_TAIL(&psz_queue0, psz, psz_chain); | | 164 | TAILQ_INSERT_TAIL(&psz_queue0, psz, psz_chain); |
164 | psz_work_todo++; | | 165 | psz_work_todo++; |
165 | mutex_spin_exit(&psz_lock); | | 166 | mutex_spin_exit(&psz_lock); |
166 | | | 167 | |
167 | /* | | 168 | /* |
168 | * Force some context switch activity on every CPU, as the system | | 169 | * Force some context switch activity on every CPU, as the system |
169 | * may not be busy. Note: should pass the point twice. | | 170 | * may not be busy. Note: should pass the point twice. |
170 | */ | | 171 | */ |
171 | xc_broadcast(XC_HIGHPRI, (xcfunc_t)nullop, NULL, NULL); | | 172 | xc_broadcast(XC_HIGHPRI, (xcfunc_t)nullop, NULL, NULL); |
172 | xc_broadcast(XC_HIGHPRI, (xcfunc_t)nullop, NULL, NULL); | | 173 | xc_broadcast(XC_HIGHPRI, (xcfunc_t)nullop, NULL, NULL); |
173 | | | 174 | |
174 | /* | | 175 | /* |
175 | * Wait for all CPUs to cycle through mi_switch() twice. | | 176 | * Wait for all CPUs to cycle through mi_switch() twice. |
176 | * The last one through will remove our update from the | | 177 | * The last one through will remove our update from the |
177 | * queue and awaken us. | | 178 | * queue and awaken us. |
178 | */ | | 179 | */ |
179 | mutex_spin_enter(&psz_lock); | | 180 | mutex_spin_enter(&psz_lock); |
180 | while (!kcpuset_iszero(psz->psz_target)) { | | 181 | while (!kcpuset_iszero(psz->psz_target)) { |
181 | cv_wait(&psz->psz_notifier, &psz_lock); | | 182 | cv_wait(&psz->psz_notifier, &psz_lock); |
182 | } | | 183 | } |
183 | psz_ev_excl.ev_count++; | | 184 | psz_ev_excl.ev_count++; |
184 | mutex_spin_exit(&psz_lock); | | 185 | mutex_spin_exit(&psz_lock); |
185 | | | 186 | |
186 | psz->psz_owner = NULL; | | 187 | psz->psz_owner = NULL; |
187 | } | | 188 | } |
188 | | | 189 | |
189 | int | | 190 | int |
190 | pserialize_read_enter(void) | | 191 | pserialize_read_enter(void) |
191 | { | | 192 | { |
192 | | | 193 | |
193 | KASSERT(!cpu_intr_p()); | | 194 | KASSERT(!cpu_intr_p()); |
194 | return splsoftclock(); | | 195 | return splsoftclock(); |
195 | } | | 196 | } |
196 | | | 197 | |
197 | void | | 198 | void |
198 | pserialize_read_exit(int s) | | 199 | pserialize_read_exit(int s) |
199 | { | | 200 | { |
200 | | | 201 | |
201 | splx(s); | | 202 | splx(s); |
202 | } | | 203 | } |
203 | | | 204 | |
204 | /* | | 205 | /* |
205 | * pserialize_switchpoint: | | 206 | * pserialize_switchpoint: |
206 | * | | 207 | * |
207 | * Monitor system context switch activity. Called from machine | | 208 | * Monitor system context switch activity. Called from machine |
208 | * independent code after mi_switch() returns. | | 209 | * independent code after mi_switch() returns. |
209 | */ | | 210 | */ |
210 | void | | 211 | void |
211 | pserialize_switchpoint(void) | | 212 | pserialize_switchpoint(void) |
212 | { | | 213 | { |
213 | pserialize_t psz, next; | | 214 | pserialize_t psz, next; |
214 | cpuid_t cid; | | 215 | cpuid_t cid; |
215 | | | 216 | |
216 | /* | | 217 | /* |
217 | * If no updates pending, bail out. No need to lock in order to | | 218 | * If no updates pending, bail out. No need to lock in order to |
218 | * test psz_work_todo; the only ill effect of missing an update | | 219 | * test psz_work_todo; the only ill effect of missing an update |
219 | * would be to delay LWPs waiting in pserialize_perform(). That | | 220 | * would be to delay LWPs waiting in pserialize_perform(). That |
220 | * will not happen because updates are on the queue before an | | 221 | * will not happen because updates are on the queue before an |
221 | * xcall is generated (serialization) to tickle every CPU. | | 222 | * xcall is generated (serialization) to tickle every CPU. |
222 | */ | | 223 | */ |
223 | if (__predict_true(psz_work_todo == 0)) { | | 224 | if (__predict_true(psz_work_todo == 0)) { |
224 | return; | | 225 | return; |
225 | } | | 226 | } |
226 | mutex_spin_enter(&psz_lock); | | 227 | mutex_spin_enter(&psz_lock); |
227 | cid = cpu_index(curcpu()); | | 228 | cid = cpu_index(curcpu()); |
228 | | | 229 | |
229 | /* | | 230 | /* |
230 | * At first, scan through the second queue and update each request, | | 231 | * At first, scan through the second queue and update each request, |
231 | * if passed all processors, then transfer to the third queue. | | 232 | * if passed all processors, then transfer to the third queue. |
232 | */ | | 233 | */ |
233 | for (psz = TAILQ_FIRST(&psz_queue1); psz != NULL; psz = next) { | | 234 | for (psz = TAILQ_FIRST(&psz_queue1); psz != NULL; psz = next) { |
234 | next = TAILQ_NEXT(psz, psz_chain); | | 235 | next = TAILQ_NEXT(psz, psz_chain); |
235 | if (!kcpuset_match(psz->psz_pass, psz->psz_target)) { | | 236 | if (!kcpuset_match(psz->psz_pass, psz->psz_target)) { |
236 | kcpuset_set(cid, psz->psz_pass); | | 237 | kcpuset_set(cid, psz->psz_pass); |
237 | continue; | | 238 | continue; |
238 | } | | 239 | } |
239 | kcpuset_zero(psz->psz_pass); | | 240 | kcpuset_zero(psz->psz_pass); |
240 | TAILQ_REMOVE(&psz_queue1, psz, psz_chain); | | 241 | TAILQ_REMOVE(&psz_queue1, psz, psz_chain); |
241 | TAILQ_INSERT_TAIL(&psz_queue2, psz, psz_chain); | | 242 | TAILQ_INSERT_TAIL(&psz_queue2, psz, psz_chain); |
242 | } | | 243 | } |
243 | /* | | 244 | /* |
244 | * Scan through the first queue and update each request, | | 245 | * Scan through the first queue and update each request, |
245 | * if passed all processors, then move to the second queue. | | 246 | * if passed all processors, then move to the second queue. |
246 | */ | | 247 | */ |
247 | for (psz = TAILQ_FIRST(&psz_queue0); psz != NULL; psz = next) { | | 248 | for (psz = TAILQ_FIRST(&psz_queue0); psz != NULL; psz = next) { |
248 | next = TAILQ_NEXT(psz, psz_chain); | | 249 | next = TAILQ_NEXT(psz, psz_chain); |
249 | if (!kcpuset_match(psz->psz_pass, psz->psz_target)) { | | 250 | if (!kcpuset_match(psz->psz_pass, psz->psz_target)) { |
250 | kcpuset_set(cid, psz->psz_pass); | | 251 | kcpuset_set(cid, psz->psz_pass); |
251 | continue; | | 252 | continue; |
252 | } | | 253 | } |
253 | kcpuset_zero(psz->psz_pass); | | 254 | kcpuset_zero(psz->psz_pass); |
254 | TAILQ_REMOVE(&psz_queue0, psz, psz_chain); | | 255 | TAILQ_REMOVE(&psz_queue0, psz, psz_chain); |
255 | TAILQ_INSERT_TAIL(&psz_queue1, psz, psz_chain); | | 256 | TAILQ_INSERT_TAIL(&psz_queue1, psz, psz_chain); |
256 | } | | 257 | } |
257 | /* | | 258 | /* |
258 | * Process the third queue: entries have been seen twice on every | | 259 | * Process the third queue: entries have been seen twice on every |
259 | * processor, remove from the queue and notify the updating thread. | | 260 | * processor, remove from the queue and notify the updating thread. |
260 | */ | | 261 | */ |
261 | while ((psz = TAILQ_FIRST(&psz_queue2)) != NULL) { | | 262 | while ((psz = TAILQ_FIRST(&psz_queue2)) != NULL) { |
262 | TAILQ_REMOVE(&psz_queue2, psz, psz_chain); | | 263 | TAILQ_REMOVE(&psz_queue2, psz, psz_chain); |
263 | kcpuset_zero(psz->psz_target); | | 264 | kcpuset_zero(psz->psz_target); |
264 | cv_signal(&psz->psz_notifier); | | 265 | cv_signal(&psz->psz_notifier); |
265 | psz_work_todo--; | | 266 | psz_work_todo--; |
266 | } | | 267 | } |
267 | mutex_spin_exit(&psz_lock); | | 268 | mutex_spin_exit(&psz_lock); |
268 | } | | 269 | } |