| @@ -1,14 +1,14 @@ | | | @@ -1,14 +1,14 @@ |
1 | /* $NetBSD: kern_turnstile.c,v 1.30 2011/07/27 14:35:34 uebayasi Exp $ */ | | 1 | /* $NetBSD: kern_turnstile.c,v 1.31 2011/12/02 12:31:53 yamt Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 2002, 2006, 2007, 2009 The NetBSD Foundation, Inc. | | 4 | * Copyright (c) 2002, 2006, 2007, 2009 The NetBSD Foundation, Inc. |
5 | * All rights reserved. | | 5 | * All rights reserved. |
6 | * | | 6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation | | 7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Jason R. Thorpe and Andrew Doran. | | 8 | * by Jason R. Thorpe and Andrew Doran. |
9 | * | | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
12 | * are met: | | 12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
| @@ -50,27 +50,27 @@ | | | @@ -50,27 +50,27 @@ |
50 | * queue. If a thread decides it doesn't need to block after all, then this | | 50 | * queue. If a thread decides it doesn't need to block after all, then this |
51 | * interlock must be released by explicitly aborting the turnstile | | 51 | * interlock must be released by explicitly aborting the turnstile |
52 | * operation. | | 52 | * operation. |
53 | * | | 53 | * |
54 | * When a thread is awakened, it needs to get its turnstile back. If there | | 54 | * When a thread is awakened, it needs to get its turnstile back. If there |
55 | * are still other threads waiting in the active turnstile, the thread | | 55 | * are still other threads waiting in the active turnstile, the thread |
56 | * grabs a free turnstile off the free list. Otherwise, it can take back | | 56 | * grabs a free turnstile off the free list. Otherwise, it can take back |
57 | * the active turnstile from the lock (thus deactivating the turnstile). | | 57 | * the active turnstile from the lock (thus deactivating the turnstile). |
58 | * | | 58 | * |
59 | * Turnstiles are the place to do priority inheritence. | | 59 | * Turnstiles are the place to do priority inheritence. |
60 | */ | | 60 | */ |
61 | | | 61 | |
62 | #include <sys/cdefs.h> | | 62 | #include <sys/cdefs.h> |
63 | __KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.30 2011/07/27 14:35:34 uebayasi Exp $"); | | 63 | __KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.31 2011/12/02 12:31:53 yamt Exp $"); |
64 | | | 64 | |
65 | #include <sys/param.h> | | 65 | #include <sys/param.h> |
66 | #include <sys/lockdebug.h> | | 66 | #include <sys/lockdebug.h> |
67 | #include <sys/pool.h> | | 67 | #include <sys/pool.h> |
68 | #include <sys/proc.h> | | 68 | #include <sys/proc.h> |
69 | #include <sys/sleepq.h> | | 69 | #include <sys/sleepq.h> |
70 | #include <sys/systm.h> | | 70 | #include <sys/systm.h> |
71 | | | 71 | |
72 | #define TS_HASH_SIZE 64 | | 72 | #define TS_HASH_SIZE 64 |
73 | #define TS_HASH_MASK (TS_HASH_SIZE - 1) | | 73 | #define TS_HASH_MASK (TS_HASH_SIZE - 1) |
74 | #define TS_HASH(obj) (((uintptr_t)(obj) >> 3) & TS_HASH_MASK) | | 74 | #define TS_HASH(obj) (((uintptr_t)(obj) >> 3) & TS_HASH_MASK) |
75 | | | 75 | |
76 | static tschain_t turnstile_tab[TS_HASH_SIZE] __cacheline_aligned; | | 76 | static tschain_t turnstile_tab[TS_HASH_SIZE] __cacheline_aligned; |
| @@ -185,107 +185,51 @@ turnstile_lookup(wchan_t obj) | | | @@ -185,107 +185,51 @@ turnstile_lookup(wchan_t obj) |
185 | * | | 185 | * |
186 | * Abort a turnstile operation. | | 186 | * Abort a turnstile operation. |
187 | */ | | 187 | */ |
188 | void | | 188 | void |
189 | turnstile_exit(wchan_t obj) | | 189 | turnstile_exit(wchan_t obj) |
190 | { | | 190 | { |
191 | tschain_t *tc; | | 191 | tschain_t *tc; |
192 | | | 192 | |
193 | tc = &turnstile_tab[TS_HASH(obj)]; | | 193 | tc = &turnstile_tab[TS_HASH(obj)]; |
194 | mutex_spin_exit(tc->tc_mutex); | | 194 | mutex_spin_exit(tc->tc_mutex); |
195 | } | | 195 | } |
196 | | | 196 | |
197 | /* | | 197 | /* |
198 | * turnstile_block: | | 198 | * turnstile_lendpri: |
| | | 199 | * |
| | | 200 | * Lend our priority to lwps on the blocking chain. |
| | | 201 | * |
199 | * | | 202 | * |
200 | * Enter an object into the turnstile chain and prepare the current | | | |
201 | * LWP for sleep. | | | |
202 | */ | | 203 | */ |
203 | void | | | |
204 | turnstile_block(turnstile_t *ts, int q, wchan_t obj, syncobj_t *sobj) | | | |
205 | { | | | |
206 | lwp_t *l; | | | |
207 | lwp_t *cur; /* cached curlwp */ | | | |
208 | lwp_t *owner; | | | |
209 | turnstile_t *ots; | | | |
210 | tschain_t *tc; | | | |
211 | sleepq_t *sq; | | | |
212 | pri_t prio, obase; | | | |
213 | | | | |
214 | tc = &turnstile_tab[TS_HASH(obj)]; | | | |
215 | l = cur = curlwp; | | | |
216 | | | 204 | |
217 | KASSERT(q == TS_READER_Q || q == TS_WRITER_Q); | | 205 | static void |
218 | KASSERT(mutex_owned(tc->tc_mutex)); | | 206 | turnstile_lendpri(lwp_t *cur) |
219 | KASSERT(l != NULL && l->l_ts != NULL); | | 207 | { |
220 | | | 208 | lwp_t * l = cur; |
221 | if (ts == NULL) { | | 209 | pri_t prio; |
222 | /* | | | |
223 | * We are the first thread to wait for this object; | | | |
224 | * lend our turnstile to it. | | | |
225 | */ | | | |
226 | ts = l->l_ts; | | | |
227 | KASSERT(TS_ALL_WAITERS(ts) == 0); | | | |
228 | KASSERT(TAILQ_EMPTY(&ts->ts_sleepq[TS_READER_Q]) && | | | |
229 | TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q])); | | | |
230 | ts->ts_obj = obj; | | | |
231 | ts->ts_inheritor = NULL; | | | |
232 | LIST_INSERT_HEAD(&tc->tc_chain, ts, ts_chain); | | | |
233 | } else { | | | |
234 | /* | | | |
235 | * Object already has a turnstile. Put our turnstile | | | |
236 | * onto the free list, and reference the existing | | | |
237 | * turnstile instead. | | | |
238 | */ | | | |
239 | ots = l->l_ts; | | | |
240 | KASSERT(ots->ts_free == NULL); | | | |
241 | ots->ts_free = ts->ts_free; | | | |
242 | ts->ts_free = ots; | | | |
243 | l->l_ts = ts; | | | |
244 | | | | |
245 | KASSERT(ts->ts_obj == obj); | | | |
246 | KASSERT(TS_ALL_WAITERS(ts) != 0); | | | |
247 | KASSERT(!TAILQ_EMPTY(&ts->ts_sleepq[TS_READER_Q]) || | | | |
248 | !TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q])); | | | |
249 | } | | | |
250 | | | | |
251 | sq = &ts->ts_sleepq[q]; | | | |
252 | ts->ts_waiters[q]++; | | | |
253 | sleepq_enter(sq, l, tc->tc_mutex); | | | |
254 | LOCKDEBUG_BARRIER(tc->tc_mutex, 1); | | | |
255 | l->l_kpriority = true; | | | |
256 | obase = l->l_kpribase; | | | |
257 | if (obase < PRI_KTHREAD) | | | |
258 | l->l_kpribase = PRI_KTHREAD; | | | |
259 | sleepq_enqueue(sq, obj, "tstile", sobj); | | | |
260 | | | | |
261 | /* | | | |
262 | * Disable preemption across this entire block, as we may drop | | | |
263 | * scheduler locks (allowing preemption), and would prefer not | | | |
264 | * to be interrupted while in a state of flux. | | | |
265 | */ | | | |
266 | KPREEMPT_DISABLE(l); | | | |
267 | | | 210 | |
268 | /* | | 211 | /* |
269 | * Lend our priority to lwps on the blocking chain. | | | |
270 | * | | | |
271 | * NOTE: if you get a panic in this code block, it is likely that | | 212 | * NOTE: if you get a panic in this code block, it is likely that |
272 | * a lock has been destroyed or corrupted while still in use. Try | | 213 | * a lock has been destroyed or corrupted while still in use. Try |
273 | * compiling a kernel with LOCKDEBUG to pinpoint the problem. | | 214 | * compiling a kernel with LOCKDEBUG to pinpoint the problem. |
274 | */ | | 215 | */ |
| | | 216 | |
| | | 217 | LOCKDEBUG_BARRIER(l->l_mutex, 1); |
| | | 218 | KASSERT(l == curlwp); |
275 | prio = lwp_eprio(l); | | 219 | prio = lwp_eprio(l); |
276 | KASSERT(cur == l); | | | |
277 | KASSERT(tc->tc_mutex == cur->l_mutex); | | | |
278 | for (;;) { | | 220 | for (;;) { |
| | | 221 | lwp_t *owner; |
| | | 222 | turnstile_t *ts; |
279 | bool dolock; | | 223 | bool dolock; |
280 | | | 224 | |
281 | if (l->l_wchan == NULL) | | 225 | if (l->l_wchan == NULL) |
282 | break; | | 226 | break; |
283 | | | 227 | |
284 | owner = (*l->l_syncobj->sobj_owner)(l->l_wchan); | | 228 | owner = (*l->l_syncobj->sobj_owner)(l->l_wchan); |
285 | if (owner == NULL) | | 229 | if (owner == NULL) |
286 | break; | | 230 | break; |
287 | | | 231 | |
288 | /* The owner may have changed as we have dropped the tc lock */ | | 232 | /* The owner may have changed as we have dropped the tc lock */ |
289 | if (cur == owner) { | | 233 | if (cur == owner) { |
290 | /* | | 234 | /* |
291 | * we own the lock: stop here, sleepq_block() | | 235 | * we own the lock: stop here, sleepq_block() |
| @@ -327,30 +271,155 @@ turnstile_block(turnstile_t *ts, int q, | | | @@ -327,30 +271,155 @@ turnstile_block(turnstile_t *ts, int q, |
327 | ts->ts_eprio = prio; | | 271 | ts->ts_eprio = prio; |
328 | lwp_lendpri(owner, prio); | | 272 | lwp_lendpri(owner, prio); |
329 | } | | 273 | } |
330 | if (dolock) | | 274 | if (dolock) |
331 | lwp_unlock(l); | | 275 | lwp_unlock(l); |
332 | l = owner; | | 276 | l = owner; |
333 | } | | 277 | } |
334 | LOCKDEBUG_BARRIER(l->l_mutex, 1); | | 278 | LOCKDEBUG_BARRIER(l->l_mutex, 1); |
335 | if (cur->l_mutex != l->l_mutex) { | | 279 | if (cur->l_mutex != l->l_mutex) { |
336 | lwp_unlock(l); | | 280 | lwp_unlock(l); |
337 | lwp_lock(cur); | | 281 | lwp_lock(cur); |
338 | } | | 282 | } |
339 | LOCKDEBUG_BARRIER(cur->l_mutex, 1); | | 283 | LOCKDEBUG_BARRIER(cur->l_mutex, 1); |
| | | 284 | } |
| | | 285 | |
| | | 286 | /* |
| | | 287 | * turnstile_unlendpri: undo turnstile_lendpri |
| | | 288 | */ |
| | | 289 | |
| | | 290 | static void |
| | | 291 | turnstile_unlendpri(turnstile_t *ts) |
| | | 292 | { |
| | | 293 | lwp_t * const l = curlwp; |
| | | 294 | turnstile_t *iter; |
| | | 295 | turnstile_t *next; |
| | | 296 | turnstile_t *prev = NULL; |
| | | 297 | pri_t prio; |
| | | 298 | bool dolock; |
| | | 299 | |
| | | 300 | KASSERT(ts->ts_inheritor != NULL); |
| | | 301 | ts->ts_inheritor = NULL; |
| | | 302 | dolock = l->l_mutex == l->l_cpu->ci_schedstate.spc_lwplock; |
| | | 303 | if (dolock) { |
| | | 304 | lwp_lock(l); |
| | | 305 | } |
| | | 306 | |
| | | 307 | /* |
| | | 308 | * the following loop does two things. |
| | | 309 | * |
| | | 310 | * - remove ts from the list. |
| | | 311 | * |
| | | 312 | * - from the rest of the list, find the highest priority. |
| | | 313 | */ |
| | | 314 | |
| | | 315 | prio = -1; |
| | | 316 | KASSERT(!SLIST_EMPTY(&l->l_pi_lenders)); |
| | | 317 | for (iter = SLIST_FIRST(&l->l_pi_lenders); |
| | | 318 | iter != NULL; iter = next) { |
| | | 319 | KASSERT(lwp_eprio(l) >= ts->ts_eprio); |
| | | 320 | next = SLIST_NEXT(iter, ts_pichain); |
| | | 321 | if (iter == ts) { |
| | | 322 | if (prev == NULL) { |
| | | 323 | SLIST_REMOVE_HEAD(&l->l_pi_lenders, |
| | | 324 | ts_pichain); |
| | | 325 | } else { |
| | | 326 | SLIST_REMOVE_AFTER(prev, ts_pichain); |
| | | 327 | } |
| | | 328 | } else if (prio < iter->ts_eprio) { |
| | | 329 | prio = iter->ts_eprio; |
| | | 330 | } |
| | | 331 | prev = iter; |
| | | 332 | } |
| | | 333 | |
| | | 334 | lwp_lendpri(l, prio); |
340 | | | 335 | |
| | | 336 | if (dolock) { |
| | | 337 | lwp_unlock(l); |
| | | 338 | } |
| | | 339 | } |
| | | 340 | |
| | | 341 | /* |
| | | 342 | * turnstile_block: |
| | | 343 | * |
| | | 344 | * Enter an object into the turnstile chain and prepare the current |
| | | 345 | * LWP for sleep. |
| | | 346 | */ |
| | | 347 | void |
| | | 348 | turnstile_block(turnstile_t *ts, int q, wchan_t obj, syncobj_t *sobj) |
| | | 349 | { |
| | | 350 | lwp_t * const l = curlwp; /* cached curlwp */ |
| | | 351 | turnstile_t *ots; |
| | | 352 | tschain_t *tc; |
| | | 353 | sleepq_t *sq; |
| | | 354 | pri_t obase; |
| | | 355 | |
| | | 356 | tc = &turnstile_tab[TS_HASH(obj)]; |
| | | 357 | |
| | | 358 | KASSERT(q == TS_READER_Q || q == TS_WRITER_Q); |
| | | 359 | KASSERT(mutex_owned(tc->tc_mutex)); |
| | | 360 | KASSERT(l != NULL && l->l_ts != NULL); |
| | | 361 | |
| | | 362 | if (ts == NULL) { |
| | | 363 | /* |
| | | 364 | * We are the first thread to wait for this object; |
| | | 365 | * lend our turnstile to it. |
| | | 366 | */ |
| | | 367 | ts = l->l_ts; |
| | | 368 | KASSERT(TS_ALL_WAITERS(ts) == 0); |
| | | 369 | KASSERT(TAILQ_EMPTY(&ts->ts_sleepq[TS_READER_Q]) && |
| | | 370 | TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q])); |
| | | 371 | ts->ts_obj = obj; |
| | | 372 | ts->ts_inheritor = NULL; |
| | | 373 | LIST_INSERT_HEAD(&tc->tc_chain, ts, ts_chain); |
| | | 374 | } else { |
| | | 375 | /* |
| | | 376 | * Object already has a turnstile. Put our turnstile |
| | | 377 | * onto the free list, and reference the existing |
| | | 378 | * turnstile instead. |
| | | 379 | */ |
| | | 380 | ots = l->l_ts; |
| | | 381 | KASSERT(ots->ts_free == NULL); |
| | | 382 | ots->ts_free = ts->ts_free; |
| | | 383 | ts->ts_free = ots; |
| | | 384 | l->l_ts = ts; |
| | | 385 | |
| | | 386 | KASSERT(ts->ts_obj == obj); |
| | | 387 | KASSERT(TS_ALL_WAITERS(ts) != 0); |
| | | 388 | KASSERT(!TAILQ_EMPTY(&ts->ts_sleepq[TS_READER_Q]) || |
| | | 389 | !TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q])); |
| | | 390 | } |
| | | 391 | |
| | | 392 | sq = &ts->ts_sleepq[q]; |
| | | 393 | ts->ts_waiters[q]++; |
| | | 394 | sleepq_enter(sq, l, tc->tc_mutex); |
| | | 395 | LOCKDEBUG_BARRIER(tc->tc_mutex, 1); |
| | | 396 | l->l_kpriority = true; |
| | | 397 | obase = l->l_kpribase; |
| | | 398 | if (obase < PRI_KTHREAD) |
| | | 399 | l->l_kpribase = PRI_KTHREAD; |
| | | 400 | sleepq_enqueue(sq, obj, "tstile", sobj); |
| | | 401 | |
| | | 402 | /* |
| | | 403 | * Disable preemption across this entire block, as we may drop |
| | | 404 | * scheduler locks (allowing preemption), and would prefer not |
| | | 405 | * to be interrupted while in a state of flux. |
| | | 406 | */ |
| | | 407 | KPREEMPT_DISABLE(l); |
| | | 408 | KASSERT(tc->tc_mutex == l->l_mutex); |
| | | 409 | turnstile_lendpri(l); |
341 | sleepq_block(0, false); | | 410 | sleepq_block(0, false); |
342 | cur->l_kpribase = obase; | | 411 | l->l_kpribase = obase; |
343 | KPREEMPT_ENABLE(cur); | | 412 | KPREEMPT_ENABLE(l); |
344 | } | | 413 | } |
345 | | | 414 | |
346 | /* | | 415 | /* |
347 | * turnstile_wakeup: | | 416 | * turnstile_wakeup: |
348 | * | | 417 | * |
349 | * Wake up the specified number of threads that are blocked | | 418 | * Wake up the specified number of threads that are blocked |
350 | * in a turnstile. | | 419 | * in a turnstile. |
351 | */ | | 420 | */ |
352 | void | | 421 | void |
353 | turnstile_wakeup(turnstile_t *ts, int q, int count, lwp_t *nl) | | 422 | turnstile_wakeup(turnstile_t *ts, int q, int count, lwp_t *nl) |
354 | { | | 423 | { |
355 | sleepq_t *sq; | | 424 | sleepq_t *sq; |
356 | tschain_t *tc; | | 425 | tschain_t *tc; |
| @@ -359,72 +428,27 @@ turnstile_wakeup(turnstile_t *ts, int q, | | | @@ -359,72 +428,27 @@ turnstile_wakeup(turnstile_t *ts, int q, |
359 | tc = &turnstile_tab[TS_HASH(ts->ts_obj)]; | | 428 | tc = &turnstile_tab[TS_HASH(ts->ts_obj)]; |
360 | sq = &ts->ts_sleepq[q]; | | 429 | sq = &ts->ts_sleepq[q]; |
361 | | | 430 | |
362 | KASSERT(q == TS_READER_Q || q == TS_WRITER_Q); | | 431 | KASSERT(q == TS_READER_Q || q == TS_WRITER_Q); |
363 | KASSERT(count > 0 && count <= TS_WAITERS(ts, q)); | | 432 | KASSERT(count > 0 && count <= TS_WAITERS(ts, q)); |
364 | KASSERT(mutex_owned(tc->tc_mutex)); | | 433 | KASSERT(mutex_owned(tc->tc_mutex)); |
365 | KASSERT(ts->ts_inheritor == curlwp || ts->ts_inheritor == NULL); | | 434 | KASSERT(ts->ts_inheritor == curlwp || ts->ts_inheritor == NULL); |
366 | | | 435 | |
367 | /* | | 436 | /* |
368 | * restore inherited priority if necessary. | | 437 | * restore inherited priority if necessary. |
369 | */ | | 438 | */ |
370 | | | 439 | |
371 | if (ts->ts_inheritor != NULL) { | | 440 | if (ts->ts_inheritor != NULL) { |
372 | turnstile_t *iter; | | 441 | turnstile_unlendpri(ts); |
373 | turnstile_t *next; | | | |
374 | turnstile_t *prev = NULL; | | | |
375 | pri_t prio; | | | |
376 | bool dolock; | | | |
377 | | | | |
378 | ts->ts_inheritor = NULL; | | | |
379 | l = curlwp; | | | |
380 | | | | |
381 | dolock = l->l_mutex == l->l_cpu->ci_schedstate.spc_lwplock; | | | |
382 | if (dolock) { | | | |
383 | lwp_lock(l); | | | |
384 | } | | | |
385 | | | | |
386 | /* | | | |
387 | * the following loop does two things. | | | |
388 | * | | | |
389 | * - remove ts from the list. | | | |
390 | * | | | |
391 | * - from the rest of the list, find the highest priority. | | | |
392 | */ | | | |
393 | | | | |
394 | prio = -1; | | | |
395 | KASSERT(!SLIST_EMPTY(&l->l_pi_lenders)); | | | |
396 | for (iter = SLIST_FIRST(&l->l_pi_lenders); | | | |
397 | iter != NULL; iter = next) { | | | |
398 | KASSERT(lwp_eprio(l) >= ts->ts_eprio); | | | |
399 | next = SLIST_NEXT(iter, ts_pichain); | | | |
400 | if (iter == ts) { | | | |
401 | if (prev == NULL) { | | | |
402 | SLIST_REMOVE_HEAD(&l->l_pi_lenders, | | | |
403 | ts_pichain); | | | |
404 | } else { | | | |
405 | SLIST_REMOVE_AFTER(prev, ts_pichain); | | | |
406 | } | | | |
407 | } else if (prio < iter->ts_eprio) { | | | |
408 | prio = iter->ts_eprio; | | | |
409 | } | | | |
410 | prev = iter; | | | |
411 | } | | | |
412 | | | | |
413 | lwp_lendpri(l, prio); | | | |
414 | | | | |
415 | if (dolock) { | | | |
416 | lwp_unlock(l); | | | |
417 | } | | | |
418 | } | | 442 | } |
419 | | | 443 | |
420 | if (nl != NULL) { | | 444 | if (nl != NULL) { |
421 | #if defined(DEBUG) || defined(LOCKDEBUG) | | 445 | #if defined(DEBUG) || defined(LOCKDEBUG) |
422 | TAILQ_FOREACH(l, sq, l_sleepchain) { | | 446 | TAILQ_FOREACH(l, sq, l_sleepchain) { |
423 | if (l == nl) | | 447 | if (l == nl) |
424 | break; | | 448 | break; |
425 | } | | 449 | } |
426 | if (l == NULL) | | 450 | if (l == NULL) |
427 | panic("turnstile_wakeup: nl not on sleepq"); | | 451 | panic("turnstile_wakeup: nl not on sleepq"); |
428 | #endif | | 452 | #endif |
429 | turnstile_remove(ts, nl, q); | | 453 | turnstile_remove(ts, nl, q); |
430 | } else { | | 454 | } else { |