Thu Jan 22 20:06:47 2009 UTC ()
Pull up following revision(s) (requested by rmind in ticket #284):
	sys/kern/kern_runq.c: revision 1.24
- Avoid calling sched_catchlwp() if CPUs have different processor-sets.
- sched_takecpu: check for psid earlier (be more strict).
PR/40419.


(snj)
diff -r1.22 -r1.22.4.1 src/sys/kern/kern_runq.c

cvs diff -r1.22 -r1.22.4.1 src/sys/kern/kern_runq.c (expand / switch to unified diff)

--- src/sys/kern/kern_runq.c 2008/10/07 09:48:27 1.22
+++ src/sys/kern/kern_runq.c 2009/01/22 20:06:47 1.22.4.1
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: kern_runq.c,v 1.22 2008/10/07 09:48:27 rmind Exp $ */ 1/* $NetBSD: kern_runq.c,v 1.22.4.1 2009/01/22 20:06:47 snj Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2007, 2008 Mindaugas Rasiukevicius <rmind at NetBSD org> 4 * Copyright (c) 2007, 2008 Mindaugas Rasiukevicius <rmind at NetBSD org>
5 * All rights reserved. 5 * All rights reserved.
6 *  6 *
7 * Redistribution and use in source and binary forms, with or without 7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions 8 * modification, are permitted provided that the following conditions
9 * are met: 9 * are met:
10 * 1. Redistributions of source code must retain the above copyright 10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer. 11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright 12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the 13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution. 14 * documentation and/or other materials provided with the distribution.
@@ -17,27 +17,27 @@ @@ -17,27 +17,27 @@
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE. 26 * SUCH DAMAGE.
27 */ 27 */
28 28
29#include <sys/cdefs.h> 29#include <sys/cdefs.h>
30__KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.22 2008/10/07 09:48:27 rmind Exp $"); 30__KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.22.4.1 2009/01/22 20:06:47 snj Exp $");
31 31
32#include <sys/param.h> 32#include <sys/param.h>
33#include <sys/kernel.h> 33#include <sys/kernel.h>
34#include <sys/bitops.h> 34#include <sys/bitops.h>
35#include <sys/cpu.h> 35#include <sys/cpu.h>
36#include <sys/idle.h> 36#include <sys/idle.h>
37#include <sys/intr.h> 37#include <sys/intr.h>
38#include <sys/kmem.h> 38#include <sys/kmem.h>
39#include <sys/lwp.h> 39#include <sys/lwp.h>
40#include <sys/mutex.h> 40#include <sys/mutex.h>
41#include <sys/proc.h> 41#include <sys/proc.h>
42#include <sys/sched.h> 42#include <sys/sched.h>
43#include <sys/syscallargs.h> 43#include <sys/syscallargs.h>
@@ -64,45 +64,45 @@ __KERNEL_RCSID(0, "$NetBSD: kern_runq.c, @@ -64,45 +64,45 @@ __KERNEL_RCSID(0, "$NetBSD: kern_runq.c,
64#define BITMAP_MASK (BITMAP_BITS - 1) 64#define BITMAP_MASK (BITMAP_BITS - 1)
65 65
66/* 66/*
67 * Structures, runqueue. 67 * Structures, runqueue.
68 */ 68 */
69 69
70const int schedppq = 1; 70const int schedppq = 1;
71 71
72typedef struct { 72typedef struct {
73 TAILQ_HEAD(, lwp) q_head; 73 TAILQ_HEAD(, lwp) q_head;
74} queue_t; 74} queue_t;
75 75
76typedef struct { 76typedef struct {
77 /* Lock and bitmap */ 77 /* Bitmap */
78 uint32_t r_bitmap[PRI_COUNT >> BITMAP_SHIFT]; 78 uint32_t r_bitmap[PRI_COUNT >> BITMAP_SHIFT];
79 /* Counters */ 79 /* Counters */
80 u_int r_count; /* Count of the threads */ 80 u_int r_count; /* Count of the threads */
81 u_int r_avgcount; /* Average count of threads */ 81 u_int r_avgcount; /* Average count of threads */
82 u_int r_mcount; /* Count of migratable threads */ 82 u_int r_mcount; /* Count of migratable threads */
83 /* Runqueues */ 83 /* Runqueues */
84 queue_t r_rt_queue[PRI_RT_COUNT]; 84 queue_t r_rt_queue[PRI_RT_COUNT];
85 queue_t r_ts_queue[PRI_TS_COUNT]; 85 queue_t r_ts_queue[PRI_TS_COUNT];
86 /* Event counters */ 86 /* Event counters */
87 struct evcnt r_ev_pull; 87 struct evcnt r_ev_pull;
88 struct evcnt r_ev_push; 88 struct evcnt r_ev_push;
89 struct evcnt r_ev_stay; 89 struct evcnt r_ev_stay;
90 struct evcnt r_ev_localize; 90 struct evcnt r_ev_localize;
91} runqueue_t; 91} runqueue_t;
92 92
93static void * sched_getrq(runqueue_t *, const pri_t); 93static void * sched_getrq(runqueue_t *, const pri_t);
94#ifdef MULTIPROCESSOR 94#ifdef MULTIPROCESSOR
95static lwp_t *sched_catchlwp(struct cpu_info *); 95static lwp_t * sched_catchlwp(struct cpu_info *);
96static void sched_balance(void *); 96static void sched_balance(void *);
97#endif 97#endif
98 98
99/* 99/*
100 * Preemption control. 100 * Preemption control.
101 */ 101 */
102int sched_upreempt_pri = PRI_KERNEL; 102int sched_upreempt_pri = PRI_KERNEL;
103#if defined(__HAVE_PREEMPTION) 103#if defined(__HAVE_PREEMPTION)
104int sched_kpreempt_pri = PRI_USER_RT; 104int sched_kpreempt_pri = PRI_USER_RT;
105#else 105#else
106int sched_kpreempt_pri = 1000; 106int sched_kpreempt_pri = 1000;
107#endif 107#endif
108 108
@@ -323,27 +323,27 @@ sched_dequeue(struct lwp *l) @@ -323,27 +323,27 @@ sched_dequeue(struct lwp *l)
323} 323}
324 324
325/* 325/*
326 * Migration and balancing. 326 * Migration and balancing.
327 */ 327 */
328 328
329#ifdef MULTIPROCESSOR 329#ifdef MULTIPROCESSOR
330 330
331/* Estimate if LWP is cache-hot */ 331/* Estimate if LWP is cache-hot */
332static inline bool 332static inline bool
333lwp_cache_hot(const struct lwp *l) 333lwp_cache_hot(const struct lwp *l)
334{ 334{
335 335
336 if (l->l_slptime || l->l_rticks == 0) 336 if (__predict_false(l->l_slptime || l->l_rticks == 0))
337 return false; 337 return false;
338 338
339 return (hardclock_ticks - l->l_rticks <= cacheht_time); 339 return (hardclock_ticks - l->l_rticks <= cacheht_time);
340} 340}
341 341
342/* Check if LWP can migrate to the chosen CPU */ 342/* Check if LWP can migrate to the chosen CPU */
343static inline bool 343static inline bool
344sched_migratable(const struct lwp *l, struct cpu_info *ci) 344sched_migratable(const struct lwp *l, struct cpu_info *ci)
345{ 345{
346 const struct schedstate_percpu *spc = &ci->ci_schedstate; 346 const struct schedstate_percpu *spc = &ci->ci_schedstate;
347 KASSERT(lwp_locked(__UNCONST(l), NULL)); 347 KASSERT(lwp_locked(__UNCONST(l), NULL));
348 348
349 /* CPU is offline */ 349 /* CPU is offline */
@@ -362,51 +362,57 @@ sched_migratable(const struct lwp *l, st @@ -362,51 +362,57 @@ sched_migratable(const struct lwp *l, st
362 * Estimate the migration of LWP to the other CPU. 362 * Estimate the migration of LWP to the other CPU.
363 * Take and return the CPU, if migration is needed. 363 * Take and return the CPU, if migration is needed.
364 */ 364 */
365struct cpu_info * 365struct cpu_info *
366sched_takecpu(struct lwp *l) 366sched_takecpu(struct lwp *l)
367{ 367{
368 struct cpu_info *ci, *tci, *first, *next; 368 struct cpu_info *ci, *tci, *first, *next;
369 struct schedstate_percpu *spc; 369 struct schedstate_percpu *spc;
370 runqueue_t *ci_rq, *ici_rq; 370 runqueue_t *ci_rq, *ici_rq;
371 pri_t eprio, lpri, pri; 371 pri_t eprio, lpri, pri;
372 372
373 KASSERT(lwp_locked(l, NULL)); 373 KASSERT(lwp_locked(l, NULL));
374 374
 375 /* If thread is strictly bound, do not estimate other CPUs */
375 ci = l->l_cpu; 376 ci = l->l_cpu;
 377 if (l->l_pflag & LP_BOUND)
 378 return ci;
 379
376 spc = &ci->ci_schedstate; 380 spc = &ci->ci_schedstate;
377 ci_rq = spc->spc_sched_info; 381 ci_rq = spc->spc_sched_info;
378 382
379 /* 383 /* Make sure that thread is in appropriate processor-set */
380 * If thread is strictly bound, do not estimate other CPUs. 384 if (__predict_true(spc->spc_psid == l->l_psid)) {
381 * If CPU of this thread is idling - run there. 385 /* If CPU of this thread is idling - run there */
382 */ 386 if (ci_rq->r_count == 0) {
383 if ((l->l_pflag & LP_BOUND) != 0 || ci_rq->r_count == 0) { 387 ci_rq->r_ev_stay.ev_count++;
384 ci_rq->r_ev_stay.ev_count++; 388 return ci;
385 return ci; 389 }
386 } 390 /* Stay if thread is cache-hot */
387 391 eprio = lwp_eprio(l);
388 /* Stay if thread is cache-hot. */ 392 if (__predict_true(l->l_stat != LSIDL) &&
389 eprio = lwp_eprio(l); 393 lwp_cache_hot(l) && eprio >= spc->spc_curpriority) {
390 if (__predict_true(l->l_stat != LSIDL) && 394 ci_rq->r_ev_stay.ev_count++;
391 lwp_cache_hot(l) && eprio >= spc->spc_curpriority) { 395 return ci;
392 ci_rq->r_ev_stay.ev_count++; 396 }
393 return ci; 397 } else {
 398 eprio = lwp_eprio(l);
394 } 399 }
395 400
396 /* Run on current CPU if priority of thread is higher */ 401 /* Run on current CPU if priority of thread is higher */
397 ci = curcpu(); 402 ci = curcpu();
398 spc = &ci->ci_schedstate; 403 spc = &ci->ci_schedstate;
399 if (eprio > spc->spc_curpriority && sched_migratable(l, ci)) { 404 if (eprio > spc->spc_curpriority && sched_migratable(l, ci)) {
 405 ci_rq = spc->spc_sched_info;
400 ci_rq->r_ev_localize.ev_count++; 406 ci_rq->r_ev_localize.ev_count++;
401 return ci; 407 return ci;
402 } 408 }
403 409
404 /* 410 /*
405 * Look for the CPU with the lowest priority thread. In case of 411 * Look for the CPU with the lowest priority thread. In case of
406 * equal priority, choose the CPU with the fewest of threads. 412 * equal priority, choose the CPU with the fewest of threads.
407 */ 413 */
408 first = l->l_cpu; 414 first = l->l_cpu;
409 ci = first; 415 ci = first;
410 tci = first; 416 tci = first;
411 lpri = PRI_COUNT; 417 lpri = PRI_COUNT;
412 do { 418 do {
@@ -431,32 +437,35 @@ sched_takecpu(struct lwp *l) @@ -431,32 +437,35 @@ sched_takecpu(struct lwp *l)
431 ci_rq = tci->ci_schedstate.spc_sched_info; 437 ci_rq = tci->ci_schedstate.spc_sched_info;
432 ci_rq->r_ev_push.ev_count++; 438 ci_rq->r_ev_push.ev_count++;
433 439
434 return tci; 440 return tci;
435} 441}
436 442
437/* 443/*
438 * Tries to catch an LWP from the runqueue of other CPU. 444 * Tries to catch an LWP from the runqueue of other CPU.
439 */ 445 */
440static struct lwp * 446static struct lwp *
441sched_catchlwp(struct cpu_info *ci) 447sched_catchlwp(struct cpu_info *ci)
442{ 448{
443 struct cpu_info *curci = curcpu(); 449 struct cpu_info *curci = curcpu();
444 struct schedstate_percpu *spc; 450 struct schedstate_percpu *spc, *curspc;
445 TAILQ_HEAD(, lwp) *q_head; 451 TAILQ_HEAD(, lwp) *q_head;
446 runqueue_t *ci_rq; 452 runqueue_t *ci_rq;
447 struct lwp *l; 453 struct lwp *l;
448 454
 455 curspc = &curci->ci_schedstate;
449 spc = &ci->ci_schedstate; 456 spc = &ci->ci_schedstate;
 457 KASSERT(curspc->spc_psid == spc->spc_psid);
 458
450 ci_rq = spc->spc_sched_info; 459 ci_rq = spc->spc_sched_info;
451 if (ci_rq->r_mcount < min_catch) { 460 if (ci_rq->r_mcount < min_catch) {
452 spc_unlock(ci); 461 spc_unlock(ci);
453 return NULL; 462 return NULL;
454 } 463 }
455 464
456 /* Take the highest priority thread */ 465 /* Take the highest priority thread */
457 q_head = sched_getrq(ci_rq, spc->spc_maxpriority); 466 q_head = sched_getrq(ci_rq, spc->spc_maxpriority);
458 l = TAILQ_FIRST(q_head); 467 l = TAILQ_FIRST(q_head);
459 468
460 for (;;) { 469 for (;;) {
461 /* Check the first and next result from the queue */ 470 /* Check the first and next result from the queue */
462 if (l == NULL) 471 if (l == NULL)
@@ -465,27 +474,27 @@ sched_catchlwp(struct cpu_info *ci) @@ -465,27 +474,27 @@ sched_catchlwp(struct cpu_info *ci)
465 KASSERT(l->l_flag & LW_INMEM); 474 KASSERT(l->l_flag & LW_INMEM);
466 475
467 /* Look for threads, whose are allowed to migrate */ 476 /* Look for threads, whose are allowed to migrate */
468 if ((l->l_pflag & LP_BOUND) || lwp_cache_hot(l) || 477 if ((l->l_pflag & LP_BOUND) || lwp_cache_hot(l) ||
469 !sched_migratable(l, curci)) { 478 !sched_migratable(l, curci)) {
470 l = TAILQ_NEXT(l, l_runq); 479 l = TAILQ_NEXT(l, l_runq);
471 continue; 480 continue;
472 } 481 }
473 482
474 /* Grab the thread, and move to the local run queue */ 483 /* Grab the thread, and move to the local run queue */
475 sched_dequeue(l); 484 sched_dequeue(l);
476 l->l_cpu = curci; 485 l->l_cpu = curci;
477 ci_rq->r_ev_pull.ev_count++; 486 ci_rq->r_ev_pull.ev_count++;
478 lwp_unlock_to(l, curci->ci_schedstate.spc_mutex); 487 lwp_unlock_to(l, curspc->spc_mutex);
479 sched_enqueue(l, false); 488 sched_enqueue(l, false);
480 return l; 489 return l;
481 } 490 }
482 spc_unlock(ci); 491 spc_unlock(ci);
483 492
484 return l; 493 return l;
485} 494}
486 495
487/* 496/*
488 * Periodical calculations for balancing. 497 * Periodical calculations for balancing.
489 */ 498 */
490static void 499static void
491sched_balance(void *nocallout) 500sched_balance(void *nocallout)
@@ -598,27 +607,28 @@ sched_idle(void) @@ -598,27 +607,28 @@ sched_idle(void)
598 } 607 }
599 spc_unlock(ci); 608 spc_unlock(ci);
600 609
601no_migration: 610no_migration:
602 ci_rq = spc->spc_sched_info; 611 ci_rq = spc->spc_sched_info;
603 if ((spc->spc_flags & SPCF_OFFLINE) != 0 || ci_rq->r_count != 0) { 612 if ((spc->spc_flags & SPCF_OFFLINE) != 0 || ci_rq->r_count != 0) {
604 return; 613 return;
605 } 614 }
606 615
607 /* Reset the counter, and call the balancer */ 616 /* Reset the counter, and call the balancer */
608 ci_rq->r_avgcount = 0; 617 ci_rq->r_avgcount = 0;
609 sched_balance(ci); 618 sched_balance(ci);
610 tci = worker_ci; 619 tci = worker_ci;
611 if (ci == tci) 620 tspc = &tci->ci_schedstate;
 621 if (ci == tci || spc->spc_psid != tspc->spc_psid)
612 return; 622 return;
613 spc_dlock(ci, tci); 623 spc_dlock(ci, tci);
614 (void)sched_catchlwp(tci); 624 (void)sched_catchlwp(tci);
615 spc_unlock(ci); 625 spc_unlock(ci);
616} 626}
617 627
618#else 628#else
619 629
620struct cpu_info * 630struct cpu_info *
621sched_takecpu(struct lwp *l) 631sched_takecpu(struct lwp *l)
622{ 632{
623 633
624 return l->l_cpu; 634 return l->l_cpu;
@@ -695,37 +705,40 @@ sched_nextlwp(void) @@ -695,37 +705,40 @@ sched_nextlwp(void)
695 TAILQ_HEAD(, lwp) *q_head; 705 TAILQ_HEAD(, lwp) *q_head;
696 runqueue_t *ci_rq; 706 runqueue_t *ci_rq;
697 struct lwp *l; 707 struct lwp *l;
698 708
699 /* Return to idle LWP if there is a migrating thread */ 709 /* Return to idle LWP if there is a migrating thread */
700 spc = &ci->ci_schedstate; 710 spc = &ci->ci_schedstate;
701 if (__predict_false(spc->spc_migrating != NULL)) 711 if (__predict_false(spc->spc_migrating != NULL))
702 return NULL; 712 return NULL;
703 ci_rq = spc->spc_sched_info; 713 ci_rq = spc->spc_sched_info;
704 714
705#ifdef MULTIPROCESSOR 715#ifdef MULTIPROCESSOR
706 /* If runqueue is empty, try to catch some thread from other CPU */ 716 /* If runqueue is empty, try to catch some thread from other CPU */
707 if (__predict_false(ci_rq->r_count == 0)) { 717 if (__predict_false(ci_rq->r_count == 0)) {
 718 struct schedstate_percpu *cspc;
708 struct cpu_info *cci; 719 struct cpu_info *cci;
709 720
710 /* Offline CPUs should not perform this, however */ 721 /* Offline CPUs should not perform this, however */
711 if (__predict_false(spc->spc_flags & SPCF_OFFLINE)) 722 if (__predict_false(spc->spc_flags & SPCF_OFFLINE))
712 return NULL; 723 return NULL;
713 724
714 /* Reset the counter, and call the balancer */ 725 /* Reset the counter, and call the balancer */
715 ci_rq->r_avgcount = 0; 726 ci_rq->r_avgcount = 0;
716 sched_balance(ci); 727 sched_balance(ci);
717 cci = worker_ci; 728 cci = worker_ci;
718 if (ci == cci || !mutex_tryenter(cci->ci_schedstate.spc_mutex)) 729 cspc = &cci->ci_schedstate;
 730 if (ci == cci || spc->spc_psid != cspc->spc_psid ||
 731 !mutex_tryenter(cci->ci_schedstate.spc_mutex))
719 return NULL; 732 return NULL;
720 return sched_catchlwp(cci); 733 return sched_catchlwp(cci);
721 } 734 }
722#else 735#else
723 if (__predict_false(ci_rq->r_count == 0)) 736 if (__predict_false(ci_rq->r_count == 0))
724 return NULL; 737 return NULL;
725#endif 738#endif
726 739
727 /* Take the highest priority thread */ 740 /* Take the highest priority thread */
728 KASSERT(ci_rq->r_bitmap[spc->spc_maxpriority >> BITMAP_SHIFT]); 741 KASSERT(ci_rq->r_bitmap[spc->spc_maxpriority >> BITMAP_SHIFT]);
729 q_head = sched_getrq(ci_rq, spc->spc_maxpriority); 742 q_head = sched_getrq(ci_rq, spc->spc_maxpriority);
730 l = TAILQ_FIRST(q_head); 743 l = TAILQ_FIRST(q_head);
731 KASSERT(l != NULL); 744 KASSERT(l != NULL);