Sun Mar 8 15:00:31 2020 UTC ()
sched_preempted(): always clear LP_TELEPORT.


(ad)
diff -r1.62 -r1.63 src/sys/kern/kern_runq.c

cvs diff -r1.62 -r1.63 src/sys/kern/kern_runq.c (switch to unified diff)

--- src/sys/kern/kern_runq.c 2020/01/25 15:09:54 1.62
+++ src/sys/kern/kern_runq.c 2020/03/08 15:00:31 1.63
@@ -1,1208 +1,1209 @@ @@ -1,1208 +1,1209 @@
1/* $NetBSD: kern_runq.c,v 1.62 2020/01/25 15:09:54 ad Exp $ */ 1/* $NetBSD: kern_runq.c,v 1.63 2020/03/08 15:00:31 ad Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc. 4 * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran. 8 * by Andrew Doran.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32/* 32/*
33 * Copyright (c) 2007, 2008 Mindaugas Rasiukevicius <rmind at NetBSD org> 33 * Copyright (c) 2007, 2008 Mindaugas Rasiukevicius <rmind at NetBSD org>
34 * All rights reserved. 34 * All rights reserved.
35 *  35 *
36 * Redistribution and use in source and binary forms, with or without 36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions 37 * modification, are permitted provided that the following conditions
38 * are met: 38 * are met:
39 * 1. Redistributions of source code must retain the above copyright 39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer. 40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright 41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the 42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution. 43 * documentation and/or other materials provided with the distribution.
44 * 44 *
45 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 45 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE. 55 * SUCH DAMAGE.
56 */ 56 */
57 57
58#include <sys/cdefs.h> 58#include <sys/cdefs.h>
59__KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.62 2020/01/25 15:09:54 ad Exp $"); 59__KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.63 2020/03/08 15:00:31 ad Exp $");
60 60
61#include "opt_dtrace.h" 61#include "opt_dtrace.h"
62 62
63#include <sys/param.h> 63#include <sys/param.h>
64#include <sys/kernel.h> 64#include <sys/kernel.h>
65#include <sys/bitops.h> 65#include <sys/bitops.h>
66#include <sys/cpu.h> 66#include <sys/cpu.h>
67#include <sys/idle.h> 67#include <sys/idle.h>
68#include <sys/intr.h> 68#include <sys/intr.h>
69#include <sys/kmem.h> 69#include <sys/kmem.h>
70#include <sys/lwp.h> 70#include <sys/lwp.h>
71#include <sys/mutex.h> 71#include <sys/mutex.h>
72#include <sys/proc.h> 72#include <sys/proc.h>
73#include <sys/pset.h> 73#include <sys/pset.h>
74#include <sys/sched.h> 74#include <sys/sched.h>
75#include <sys/syscallargs.h> 75#include <sys/syscallargs.h>
76#include <sys/sysctl.h> 76#include <sys/sysctl.h>
77#include <sys/systm.h> 77#include <sys/systm.h>
78#include <sys/types.h> 78#include <sys/types.h>
79#include <sys/evcnt.h> 79#include <sys/evcnt.h>
80#include <sys/atomic.h> 80#include <sys/atomic.h>
81 81
82/* 82/*
83 * Bits per map. 83 * Bits per map.
84 */ 84 */
85#define BITMAP_BITS (32) 85#define BITMAP_BITS (32)
86#define BITMAP_SHIFT (5) 86#define BITMAP_SHIFT (5)
87#define BITMAP_MSB (0x80000000U) 87#define BITMAP_MSB (0x80000000U)
88#define BITMAP_MASK (BITMAP_BITS - 1) 88#define BITMAP_MASK (BITMAP_BITS - 1)
89 89
90const int schedppq = 1; 90const int schedppq = 1;
91 91
92static void *sched_getrq(struct schedstate_percpu *, const pri_t); 92static void *sched_getrq(struct schedstate_percpu *, const pri_t);
93#ifdef MULTIPROCESSOR 93#ifdef MULTIPROCESSOR
94static lwp_t * sched_catchlwp(struct cpu_info *); 94static lwp_t * sched_catchlwp(struct cpu_info *);
95#endif 95#endif
96 96
97/* 97/*
98 * Preemption control. 98 * Preemption control.
99 */ 99 */
100#ifdef __HAVE_PREEMPTION 100#ifdef __HAVE_PREEMPTION
101# ifdef DEBUG 101# ifdef DEBUG
102int sched_kpreempt_pri = 0; 102int sched_kpreempt_pri = 0;
103# else 103# else
104int sched_kpreempt_pri = PRI_USER_RT; 104int sched_kpreempt_pri = PRI_USER_RT;
105# endif 105# endif
106#else 106#else
107int sched_kpreempt_pri = 1000; 107int sched_kpreempt_pri = 1000;
108#endif 108#endif
109 109
110/* 110/*
111 * Migration and balancing. 111 * Migration and balancing.
112 */ 112 */
113static u_int cacheht_time; /* Cache hotness time */ 113static u_int cacheht_time; /* Cache hotness time */
114static u_int min_catch; /* Minimal LWP count for catching */ 114static u_int min_catch; /* Minimal LWP count for catching */
115static u_int skim_interval; /* Rate limit for stealing LWPs */ 115static u_int skim_interval; /* Rate limit for stealing LWPs */
116 116
117#ifdef KDTRACE_HOOKS 117#ifdef KDTRACE_HOOKS
118struct lwp *curthread; 118struct lwp *curthread;
119#endif 119#endif
120 120
121void 121void
122runq_init(void) 122runq_init(void)
123{ 123{
124 124
125 /* Pulling from remote packages, LWP must not have run for 10ms. */ 125 /* Pulling from remote packages, LWP must not have run for 10ms. */
126 cacheht_time = 10; 126 cacheht_time = 10;
127 127
128 /* Minimal count of LWPs for catching */ 128 /* Minimal count of LWPs for catching */
129 min_catch = 1; 129 min_catch = 1;
130 130
131 /* Steal from other CPUs at most every 10ms. */ 131 /* Steal from other CPUs at most every 10ms. */
132 skim_interval = 10; 132 skim_interval = 10;
133} 133}
134 134
135void 135void
136sched_cpuattach(struct cpu_info *ci) 136sched_cpuattach(struct cpu_info *ci)
137{ 137{
138 struct schedstate_percpu *spc; 138 struct schedstate_percpu *spc;
139 size_t size; 139 size_t size;
140 void *p; 140 void *p;
141 u_int i; 141 u_int i;
142 142
143 spc = &ci->ci_schedstate; 143 spc = &ci->ci_schedstate;
144 spc->spc_nextpkg = ci; 144 spc->spc_nextpkg = ci;
145 145
146 if (spc->spc_lwplock == NULL) { 146 if (spc->spc_lwplock == NULL) {
147 spc->spc_lwplock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); 147 spc->spc_lwplock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
148 } 148 }
149 if (ci == lwp0.l_cpu) { 149 if (ci == lwp0.l_cpu) {
150 /* Initialize the scheduler structure of the primary LWP */ 150 /* Initialize the scheduler structure of the primary LWP */
151 lwp0.l_mutex = spc->spc_lwplock; 151 lwp0.l_mutex = spc->spc_lwplock;
152 } 152 }
153 if (spc->spc_mutex != NULL) { 153 if (spc->spc_mutex != NULL) {
154 /* Already initialized. */ 154 /* Already initialized. */
155 return; 155 return;
156 } 156 }
157 157
158 /* Allocate the run queue */ 158 /* Allocate the run queue */
159 size = roundup2(sizeof(spc->spc_queue[0]) * PRI_COUNT, coherency_unit) + 159 size = roundup2(sizeof(spc->spc_queue[0]) * PRI_COUNT, coherency_unit) +
160 coherency_unit; 160 coherency_unit;
161 p = kmem_alloc(size, KM_SLEEP); 161 p = kmem_alloc(size, KM_SLEEP);
162 spc->spc_queue = (void *)roundup2((uintptr_t)p, coherency_unit); 162 spc->spc_queue = (void *)roundup2((uintptr_t)p, coherency_unit);
163 163
164 /* Initialize run queues */ 164 /* Initialize run queues */
165 spc->spc_mutex = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); 165 spc->spc_mutex = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
166 for (i = 0; i < PRI_COUNT; i++) 166 for (i = 0; i < PRI_COUNT; i++)
167 TAILQ_INIT(&spc->spc_queue[i]); 167 TAILQ_INIT(&spc->spc_queue[i]);
168} 168}
169 169
170/* 170/*
171 * Control of the runqueue. 171 * Control of the runqueue.
172 */ 172 */
173static inline void * 173static inline void *
174sched_getrq(struct schedstate_percpu *spc, const pri_t prio) 174sched_getrq(struct schedstate_percpu *spc, const pri_t prio)
175{ 175{
176 176
177 KASSERT(prio < PRI_COUNT); 177 KASSERT(prio < PRI_COUNT);
178 return &spc->spc_queue[prio]; 178 return &spc->spc_queue[prio];
179} 179}
180 180
181/* 181/*
182 * Put an LWP onto a run queue. The LWP must be locked by spc_mutex for 182 * Put an LWP onto a run queue. The LWP must be locked by spc_mutex for
183 * l_cpu. 183 * l_cpu.
184 */ 184 */
185void 185void
186sched_enqueue(struct lwp *l) 186sched_enqueue(struct lwp *l)
187{ 187{
188 struct schedstate_percpu *spc; 188 struct schedstate_percpu *spc;
189 TAILQ_HEAD(, lwp) *q_head; 189 TAILQ_HEAD(, lwp) *q_head;
190 const pri_t eprio = lwp_eprio(l); 190 const pri_t eprio = lwp_eprio(l);
191 struct cpu_info *ci; 191 struct cpu_info *ci;
192 192
193 ci = l->l_cpu; 193 ci = l->l_cpu;
194 spc = &ci->ci_schedstate; 194 spc = &ci->ci_schedstate;
195 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex)); 195 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
196 196
197 /* Enqueue the thread */ 197 /* Enqueue the thread */
198 q_head = sched_getrq(spc, eprio); 198 q_head = sched_getrq(spc, eprio);
199 if (TAILQ_EMPTY(q_head)) { 199 if (TAILQ_EMPTY(q_head)) {
200 u_int i; 200 u_int i;
201 uint32_t q; 201 uint32_t q;
202 202
203 /* Mark bit */ 203 /* Mark bit */
204 i = eprio >> BITMAP_SHIFT; 204 i = eprio >> BITMAP_SHIFT;
205 q = BITMAP_MSB >> (eprio & BITMAP_MASK); 205 q = BITMAP_MSB >> (eprio & BITMAP_MASK);
206 KASSERT((spc->spc_bitmap[i] & q) == 0); 206 KASSERT((spc->spc_bitmap[i] & q) == 0);
207 spc->spc_bitmap[i] |= q; 207 spc->spc_bitmap[i] |= q;
208 } 208 }
209 /* Preempted SCHED_RR and SCHED_FIFO LWPs go to the queue head. */ 209 /* Preempted SCHED_RR and SCHED_FIFO LWPs go to the queue head. */
210 if (l->l_class != SCHED_OTHER && (l->l_pflag & LP_PREEMPTING) != 0) { 210 if (l->l_class != SCHED_OTHER && (l->l_pflag & LP_PREEMPTING) != 0) {
211 TAILQ_INSERT_HEAD(q_head, l, l_runq); 211 TAILQ_INSERT_HEAD(q_head, l, l_runq);
212 } else { 212 } else {
213 TAILQ_INSERT_TAIL(q_head, l, l_runq); 213 TAILQ_INSERT_TAIL(q_head, l, l_runq);
214 } 214 }
215 spc->spc_flags &= ~SPCF_IDLE; 215 spc->spc_flags &= ~SPCF_IDLE;
216 spc->spc_count++; 216 spc->spc_count++;
217 if ((l->l_pflag & LP_BOUND) == 0) 217 if ((l->l_pflag & LP_BOUND) == 0)
218 spc->spc_mcount++; 218 spc->spc_mcount++;
219 219
220 /* 220 /*
221 * Update the value of highest priority in the runqueue, 221 * Update the value of highest priority in the runqueue,
222 * if priority of this thread is higher. 222 * if priority of this thread is higher.
223 */ 223 */
224 if (eprio > spc->spc_maxpriority) 224 if (eprio > spc->spc_maxpriority)
225 spc->spc_maxpriority = eprio; 225 spc->spc_maxpriority = eprio;
226 226
227 sched_newts(l); 227 sched_newts(l);
228} 228}
229 229
230/* 230/*
231 * Remove and LWP from the run queue it's on. The LWP must be in state 231 * Remove and LWP from the run queue it's on. The LWP must be in state
232 * LSRUN. 232 * LSRUN.
233 */ 233 */
234void 234void
235sched_dequeue(struct lwp *l) 235sched_dequeue(struct lwp *l)
236{ 236{
237 TAILQ_HEAD(, lwp) *q_head; 237 TAILQ_HEAD(, lwp) *q_head;
238 struct schedstate_percpu *spc; 238 struct schedstate_percpu *spc;
239 const pri_t eprio = lwp_eprio(l); 239 const pri_t eprio = lwp_eprio(l);
240 240
241 spc = &l->l_cpu->ci_schedstate; 241 spc = &l->l_cpu->ci_schedstate;
242 242
243 KASSERT(lwp_locked(l, spc->spc_mutex)); 243 KASSERT(lwp_locked(l, spc->spc_mutex));
244 KASSERT(eprio <= spc->spc_maxpriority); 244 KASSERT(eprio <= spc->spc_maxpriority);
245 KASSERT(spc->spc_bitmap[eprio >> BITMAP_SHIFT] != 0); 245 KASSERT(spc->spc_bitmap[eprio >> BITMAP_SHIFT] != 0);
246 KASSERT(spc->spc_count > 0); 246 KASSERT(spc->spc_count > 0);
247 247
248 if (spc->spc_migrating == l) 248 if (spc->spc_migrating == l)
249 spc->spc_migrating = NULL; 249 spc->spc_migrating = NULL;
250 250
251 spc->spc_count--; 251 spc->spc_count--;
252 if ((l->l_pflag & LP_BOUND) == 0) 252 if ((l->l_pflag & LP_BOUND) == 0)
253 spc->spc_mcount--; 253 spc->spc_mcount--;
254 254
255 q_head = sched_getrq(spc, eprio); 255 q_head = sched_getrq(spc, eprio);
256 TAILQ_REMOVE(q_head, l, l_runq); 256 TAILQ_REMOVE(q_head, l, l_runq);
257 if (TAILQ_EMPTY(q_head)) { 257 if (TAILQ_EMPTY(q_head)) {
258 u_int i; 258 u_int i;
259 uint32_t q; 259 uint32_t q;
260 260
261 /* Unmark bit */ 261 /* Unmark bit */
262 i = eprio >> BITMAP_SHIFT; 262 i = eprio >> BITMAP_SHIFT;
263 q = BITMAP_MSB >> (eprio & BITMAP_MASK); 263 q = BITMAP_MSB >> (eprio & BITMAP_MASK);
264 KASSERT((spc->spc_bitmap[i] & q) != 0); 264 KASSERT((spc->spc_bitmap[i] & q) != 0);
265 spc->spc_bitmap[i] &= ~q; 265 spc->spc_bitmap[i] &= ~q;
266 266
267 /* 267 /*
268 * Update the value of highest priority in the runqueue, in a 268 * Update the value of highest priority in the runqueue, in a
269 * case it was a last thread in the queue of highest priority. 269 * case it was a last thread in the queue of highest priority.
270 */ 270 */
271 if (eprio != spc->spc_maxpriority) 271 if (eprio != spc->spc_maxpriority)
272 return; 272 return;
273 273
274 do { 274 do {
275 if (spc->spc_bitmap[i] != 0) { 275 if (spc->spc_bitmap[i] != 0) {
276 q = ffs(spc->spc_bitmap[i]); 276 q = ffs(spc->spc_bitmap[i]);
277 spc->spc_maxpriority = 277 spc->spc_maxpriority =
278 (i << BITMAP_SHIFT) + (BITMAP_BITS - q); 278 (i << BITMAP_SHIFT) + (BITMAP_BITS - q);
279 return; 279 return;
280 } 280 }
281 } while (i--); 281 } while (i--);
282 282
283 /* If not found - set the lowest value */ 283 /* If not found - set the lowest value */
284 spc->spc_maxpriority = 0; 284 spc->spc_maxpriority = 0;
285 } 285 }
286} 286}
287 287
288/* 288/*
289 * Cause a preemption on the given CPU, if the priority "pri" is higher 289 * Cause a preemption on the given CPU, if the priority "pri" is higher
290 * priority than the running LWP. If "unlock" is specified, and ideally it 290 * priority than the running LWP. If "unlock" is specified, and ideally it
291 * will be for concurrency reasons, spc_mutex will be dropped before return. 291 * will be for concurrency reasons, spc_mutex will be dropped before return.
292 */ 292 */
293void 293void
294sched_resched_cpu(struct cpu_info *ci, pri_t pri, bool unlock) 294sched_resched_cpu(struct cpu_info *ci, pri_t pri, bool unlock)
295{ 295{
296 struct schedstate_percpu *spc; 296 struct schedstate_percpu *spc;
297 u_int o, n, f; 297 u_int o, n, f;
298 lwp_t *l; 298 lwp_t *l;
299 299
300 spc = &ci->ci_schedstate; 300 spc = &ci->ci_schedstate;
301 301
302 KASSERT(mutex_owned(spc->spc_mutex)); 302 KASSERT(mutex_owned(spc->spc_mutex));
303 303
304 /* 304 /*
305 * If the priority level we're evaluating wouldn't cause a new LWP 305 * If the priority level we're evaluating wouldn't cause a new LWP
306 * to be run on the CPU, then we have nothing to do. 306 * to be run on the CPU, then we have nothing to do.
307 */ 307 */
308 if (pri <= spc->spc_curpriority || !mp_online) { 308 if (pri <= spc->spc_curpriority || !mp_online) {
309 if (__predict_true(unlock)) { 309 if (__predict_true(unlock)) {
310 spc_unlock(ci); 310 spc_unlock(ci);
311 } 311 }
312 return; 312 return;
313 } 313 }
314 314
315 /* 315 /*
316 * Figure out what kind of preemption we should do. 316 * Figure out what kind of preemption we should do.
317 */  317 */
318 l = ci->ci_onproc; 318 l = ci->ci_onproc;
319 if ((l->l_flag & LW_IDLE) != 0) { 319 if ((l->l_flag & LW_IDLE) != 0) {
320 f = RESCHED_IDLE | RESCHED_UPREEMPT; 320 f = RESCHED_IDLE | RESCHED_UPREEMPT;
321 } else if (pri >= sched_kpreempt_pri && (l->l_pflag & LP_INTR) == 0) { 321 } else if (pri >= sched_kpreempt_pri && (l->l_pflag & LP_INTR) == 0) {
322 /* We can't currently preempt softints - should be able to. */ 322 /* We can't currently preempt softints - should be able to. */
323#ifdef __HAVE_PREEMPTION 323#ifdef __HAVE_PREEMPTION
324 f = RESCHED_KPREEMPT; 324 f = RESCHED_KPREEMPT;
325#else 325#else
326 /* Leave door open for test: set kpreempt_pri with sysctl. */ 326 /* Leave door open for test: set kpreempt_pri with sysctl. */
327 f = RESCHED_UPREEMPT; 327 f = RESCHED_UPREEMPT;
328#endif 328#endif
329 /* 329 /*
330 * l_dopreempt must be set with the CPU locked to sync with 330 * l_dopreempt must be set with the CPU locked to sync with
331 * mi_switch(). It must also be set with an atomic to sync 331 * mi_switch(). It must also be set with an atomic to sync
332 * with kpreempt(). 332 * with kpreempt().
333 */ 333 */
334 atomic_or_uint(&l->l_dopreempt, DOPREEMPT_ACTIVE); 334 atomic_or_uint(&l->l_dopreempt, DOPREEMPT_ACTIVE);
335 } else { 335 } else {
336 f = RESCHED_UPREEMPT; 336 f = RESCHED_UPREEMPT;
337 } 337 }
338 if (ci != curcpu()) { 338 if (ci != curcpu()) {
339 f |= RESCHED_REMOTE; 339 f |= RESCHED_REMOTE;
340 } 340 }
341 341
342 /* 342 /*
343 * Things start as soon as we touch ci_want_resched: x86 for example 343 * Things start as soon as we touch ci_want_resched: x86 for example
344 * has an instruction that monitors the memory cell it's in. We 344 * has an instruction that monitors the memory cell it's in. We
345 * want to drop the schedstate lock in advance, otherwise the remote 345 * want to drop the schedstate lock in advance, otherwise the remote
346 * CPU can awaken and immediately block on the lock. 346 * CPU can awaken and immediately block on the lock.
347 */ 347 */
348 if (__predict_true(unlock)) { 348 if (__predict_true(unlock)) {
349 spc_unlock(ci); 349 spc_unlock(ci);
350 } 350 }
351 351
352 /* 352 /*
353 * The caller will always have a second scheduler lock held: either 353 * The caller will always have a second scheduler lock held: either
354 * the running LWP lock (spc_lwplock), or a sleep queue lock. That 354 * the running LWP lock (spc_lwplock), or a sleep queue lock. That
355 * keeps preemption disabled, which among other things ensures all 355 * keeps preemption disabled, which among other things ensures all
356 * LWPs involved won't be freed while we're here (see lwp_dtor()). 356 * LWPs involved won't be freed while we're here (see lwp_dtor()).
357 */ 357 */
358 KASSERT(kpreempt_disabled()); 358 KASSERT(kpreempt_disabled());
359 359
360 for (o = 0;; o = n) { 360 for (o = 0;; o = n) {
361 n = atomic_cas_uint(&ci->ci_want_resched, o, o | f); 361 n = atomic_cas_uint(&ci->ci_want_resched, o, o | f);
362 if (__predict_true(o == n)) { 362 if (__predict_true(o == n)) {
363 /* 363 /*
364 * We're the first. If we're in process context on 364 * We're the first. If we're in process context on
365 * the same CPU, we can avoid the visit to trap(). 365 * the same CPU, we can avoid the visit to trap().
366 */ 366 */
367 if (l != curlwp || cpu_intr_p()) { 367 if (l != curlwp || cpu_intr_p()) {
368 cpu_need_resched(ci, l, f); 368 cpu_need_resched(ci, l, f);
369 } 369 }
370 break; 370 break;
371 } 371 }
372 if (__predict_true( 372 if (__predict_true(
373 (n & (RESCHED_KPREEMPT|RESCHED_UPREEMPT)) >= 373 (n & (RESCHED_KPREEMPT|RESCHED_UPREEMPT)) >=
374 (f & (RESCHED_KPREEMPT|RESCHED_UPREEMPT)))) { 374 (f & (RESCHED_KPREEMPT|RESCHED_UPREEMPT)))) {
375 /* Already in progress, nothing to do. */ 375 /* Already in progress, nothing to do. */
376 break; 376 break;
377 } 377 }
378 } 378 }
379} 379}
380 380
381/* 381/*
382 * Cause a preemption on the given CPU, if the priority of LWP "l" in state 382 * Cause a preemption on the given CPU, if the priority of LWP "l" in state
383 * LSRUN, is higher priority than the running LWP. If "unlock" is 383 * LSRUN, is higher priority than the running LWP. If "unlock" is
384 * specified, and ideally it will be for concurrency reasons, spc_mutex will 384 * specified, and ideally it will be for concurrency reasons, spc_mutex will
385 * be dropped before return. 385 * be dropped before return.
386 */ 386 */
387void 387void
388sched_resched_lwp(struct lwp *l, bool unlock) 388sched_resched_lwp(struct lwp *l, bool unlock)
389{ 389{
390 struct cpu_info *ci = l->l_cpu; 390 struct cpu_info *ci = l->l_cpu;
391 391
392 KASSERT(lwp_locked(l, ci->ci_schedstate.spc_mutex)); 392 KASSERT(lwp_locked(l, ci->ci_schedstate.spc_mutex));
393 KASSERT(l->l_stat == LSRUN); 393 KASSERT(l->l_stat == LSRUN);
394 394
395 sched_resched_cpu(ci, lwp_eprio(l), unlock); 395 sched_resched_cpu(ci, lwp_eprio(l), unlock);
396} 396}
397 397
398/* 398/*
399 * Migration and balancing. 399 * Migration and balancing.
400 */ 400 */
401 401
402#ifdef MULTIPROCESSOR 402#ifdef MULTIPROCESSOR
403 403
404/* 404/*
405 * Estimate if LWP is cache-hot. 405 * Estimate if LWP is cache-hot.
406 */ 406 */
407static inline bool 407static inline bool
408lwp_cache_hot(const struct lwp *l) 408lwp_cache_hot(const struct lwp *l)
409{ 409{
410 410
411 /* Leave new LWPs in peace, determination has already been made. */ 411 /* Leave new LWPs in peace, determination has already been made. */
412 if (l->l_stat == LSIDL) 412 if (l->l_stat == LSIDL)
413 return true; 413 return true;
414 414
415 if (__predict_false(l->l_slptime != 0 || l->l_rticks == 0)) 415 if (__predict_false(l->l_slptime != 0 || l->l_rticks == 0))
416 return false; 416 return false;
417 417
418 return (hardclock_ticks - l->l_rticks < mstohz(cacheht_time)); 418 return (hardclock_ticks - l->l_rticks < mstohz(cacheht_time));
419} 419}
420 420
421/* 421/*
422 * Check if LWP can migrate to the chosen CPU. 422 * Check if LWP can migrate to the chosen CPU.
423 */ 423 */
424static inline bool 424static inline bool
425sched_migratable(const struct lwp *l, struct cpu_info *ci) 425sched_migratable(const struct lwp *l, struct cpu_info *ci)
426{ 426{
427 const struct schedstate_percpu *spc = &ci->ci_schedstate; 427 const struct schedstate_percpu *spc = &ci->ci_schedstate;
428 KASSERT(lwp_locked(__UNCONST(l), NULL)); 428 KASSERT(lwp_locked(__UNCONST(l), NULL));
429 429
430 /* Is CPU offline? */ 430 /* Is CPU offline? */
431 if (__predict_false(spc->spc_flags & SPCF_OFFLINE)) 431 if (__predict_false(spc->spc_flags & SPCF_OFFLINE))
432 return false; 432 return false;
433 433
434 /* Is affinity set? */ 434 /* Is affinity set? */
435 if (__predict_false(l->l_affinity)) 435 if (__predict_false(l->l_affinity))
436 return kcpuset_isset(l->l_affinity, cpu_index(ci)); 436 return kcpuset_isset(l->l_affinity, cpu_index(ci));
437 437
438 /* Is there a processor-set? */ 438 /* Is there a processor-set? */
439 return (spc->spc_psid == l->l_psid); 439 return (spc->spc_psid == l->l_psid);
440} 440}
441 441
442/* 442/*
443 * A small helper to do round robin through CPU packages. 443 * A small helper to do round robin through CPU packages.
444 */ 444 */
445static struct cpu_info * 445static struct cpu_info *
446sched_nextpkg(void) 446sched_nextpkg(void)
447{ 447{
448 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 448 struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
449 449
450 spc->spc_nextpkg =  450 spc->spc_nextpkg =
451 spc->spc_nextpkg->ci_sibling[CPUREL_PACKAGE1ST]; 451 spc->spc_nextpkg->ci_sibling[CPUREL_PACKAGE1ST];
452 452
453 return spc->spc_nextpkg; 453 return spc->spc_nextpkg;
454} 454}
455 455
456/* 456/*
457 * Find a CPU to run LWP "l". Look for the CPU with the lowest priority 457 * Find a CPU to run LWP "l". Look for the CPU with the lowest priority
458 * thread. In case of equal priority, prefer first class CPUs, and amongst 458 * thread. In case of equal priority, prefer first class CPUs, and amongst
459 * the remainder choose the CPU with the fewest runqueue entries. 459 * the remainder choose the CPU with the fewest runqueue entries.
460 * 460 *
461 * Begin the search in the CPU package which "pivot" is a member of. 461 * Begin the search in the CPU package which "pivot" is a member of.
462 */ 462 */
463static struct cpu_info * __noinline  463static struct cpu_info * __noinline
464sched_bestcpu(struct lwp *l, struct cpu_info *pivot) 464sched_bestcpu(struct lwp *l, struct cpu_info *pivot)
465{ 465{
466 struct cpu_info *bestci, *curci, *outer; 466 struct cpu_info *bestci, *curci, *outer;
467 struct schedstate_percpu *bestspc, *curspc; 467 struct schedstate_percpu *bestspc, *curspc;
468 pri_t bestpri, curpri; 468 pri_t bestpri, curpri;
469 469
470 /* 470 /*
471 * If this fails (it shouldn't), run on the given CPU. This also 471 * If this fails (it shouldn't), run on the given CPU. This also
472 * gives us a weak preference for "pivot" to begin with. 472 * gives us a weak preference for "pivot" to begin with.
473 */ 473 */
474 bestci = pivot; 474 bestci = pivot;
475 bestspc = &bestci->ci_schedstate; 475 bestspc = &bestci->ci_schedstate;
476 bestpri = MAX(bestspc->spc_curpriority, bestspc->spc_maxpriority); 476 bestpri = MAX(bestspc->spc_curpriority, bestspc->spc_maxpriority);
477 477
478 /* In the outer loop scroll through all CPU packages. */ 478 /* In the outer loop scroll through all CPU packages. */
479 pivot = pivot->ci_package1st; 479 pivot = pivot->ci_package1st;
480 outer = pivot; 480 outer = pivot;
481 do { 481 do {
482 /* In the inner loop scroll through all CPUs in package. */ 482 /* In the inner loop scroll through all CPUs in package. */
483 curci = outer; 483 curci = outer;
484 do { 484 do {
485 if (!sched_migratable(l, curci)) { 485 if (!sched_migratable(l, curci)) {
486 continue; 486 continue;
487 } 487 }
488 488
489 curspc = &curci->ci_schedstate; 489 curspc = &curci->ci_schedstate;
490 490
491 /* If this CPU is idle and 1st class, we're done. */ 491 /* If this CPU is idle and 1st class, we're done. */
492 if ((curspc->spc_flags & (SPCF_IDLE | SPCF_1STCLASS)) == 492 if ((curspc->spc_flags & (SPCF_IDLE | SPCF_1STCLASS)) ==
493 (SPCF_IDLE | SPCF_1STCLASS)) { 493 (SPCF_IDLE | SPCF_1STCLASS)) {
494 return curci; 494 return curci;
495 } 495 }
496 496
497 curpri = MAX(curspc->spc_curpriority, 497 curpri = MAX(curspc->spc_curpriority,
498 curspc->spc_maxpriority); 498 curspc->spc_maxpriority);
499 499
500 if (curpri > bestpri) { 500 if (curpri > bestpri) {
501 continue; 501 continue;
502 } 502 }
503 if (curpri == bestpri) { 503 if (curpri == bestpri) {
504 /* Prefer first class CPUs over others. */ 504 /* Prefer first class CPUs over others. */
505 if ((curspc->spc_flags & SPCF_1STCLASS) == 0 && 505 if ((curspc->spc_flags & SPCF_1STCLASS) == 0 &&
506 (bestspc->spc_flags & SPCF_1STCLASS) != 0) { 506 (bestspc->spc_flags & SPCF_1STCLASS) != 0) {
507 continue; 507 continue;
508 } 508 }
509 /* 509 /*
510 * Pick the least busy CPU. Make sure this is not 510 * Pick the least busy CPU. Make sure this is not
511 * <=, otherwise it defeats the above preference. 511 * <=, otherwise it defeats the above preference.
512 */ 512 */
513 if (bestspc->spc_count < curspc->spc_count) { 513 if (bestspc->spc_count < curspc->spc_count) {
514 continue; 514 continue;
515 } 515 }
516 } 516 }
517 517
518 bestpri = curpri; 518 bestpri = curpri;
519 bestci = curci; 519 bestci = curci;
520 bestspc = curspc; 520 bestspc = curspc;
521 521
522 } while (curci = curci->ci_sibling[CPUREL_PACKAGE], 522 } while (curci = curci->ci_sibling[CPUREL_PACKAGE],
523 curci != outer); 523 curci != outer);
524 } while (outer = outer->ci_sibling[CPUREL_PACKAGE1ST], 524 } while (outer = outer->ci_sibling[CPUREL_PACKAGE1ST],
525 outer != pivot); 525 outer != pivot);
526 526
527 return bestci; 527 return bestci;
528} 528}
529 529
530/* 530/*
531 * Estimate the migration of LWP to the other CPU. 531 * Estimate the migration of LWP to the other CPU.
532 * Take and return the CPU, if migration is needed. 532 * Take and return the CPU, if migration is needed.
533 */ 533 */
534struct cpu_info * 534struct cpu_info *
535sched_takecpu(struct lwp *l) 535sched_takecpu(struct lwp *l)
536{ 536{
537 struct schedstate_percpu *spc, *tspc; 537 struct schedstate_percpu *spc, *tspc;
538 struct cpu_info *ci, *curci, *tci; 538 struct cpu_info *ci, *curci, *tci;
539 pri_t eprio; 539 pri_t eprio;
540 int flags; 540 int flags;
541 541
542 KASSERT(lwp_locked(l, NULL)); 542 KASSERT(lwp_locked(l, NULL));
543 543
544 /* If thread is strictly bound, do not estimate other CPUs */ 544 /* If thread is strictly bound, do not estimate other CPUs */
545 ci = l->l_cpu; 545 ci = l->l_cpu;
546 if (l->l_pflag & LP_BOUND) 546 if (l->l_pflag & LP_BOUND)
547 return ci; 547 return ci;
548 548
549 spc = &ci->ci_schedstate; 549 spc = &ci->ci_schedstate;
550 eprio = lwp_eprio(l); 550 eprio = lwp_eprio(l);
551 551
552 /* 552 /*
553 * Handle new LWPs. For vfork() with a timeshared child, make it 553 * Handle new LWPs. For vfork() with a timeshared child, make it
554 * run on the same CPU as the parent if no other LWPs in queue.  554 * run on the same CPU as the parent if no other LWPs in queue.
555 * Otherwise scatter far and wide - try for an even distribution 555 * Otherwise scatter far and wide - try for an even distribution
556 * across all CPU packages and CPUs. 556 * across all CPU packages and CPUs.
557 */ 557 */
558 if (l->l_stat == LSIDL) { 558 if (l->l_stat == LSIDL) {
559 if (curlwp->l_vforkwaiting && l->l_class == SCHED_OTHER) { 559 if (curlwp->l_vforkwaiting && l->l_class == SCHED_OTHER) {
560 if (sched_migratable(l, curlwp->l_cpu) && eprio > 560 if (sched_migratable(l, curlwp->l_cpu) && eprio >
561 curlwp->l_cpu->ci_schedstate.spc_maxpriority) { 561 curlwp->l_cpu->ci_schedstate.spc_maxpriority) {
562 return curlwp->l_cpu; 562 return curlwp->l_cpu;
563 } 563 }
564 } else { 564 } else {
565 return sched_bestcpu(l, sched_nextpkg()); 565 return sched_bestcpu(l, sched_nextpkg());
566 } 566 }
567 flags = SPCF_IDLE; 567 flags = SPCF_IDLE;
568 } else { 568 } else {
569 flags = SPCF_IDLE | SPCF_1STCLASS; 569 flags = SPCF_IDLE | SPCF_1STCLASS;
570 } 570 }
571 571
572 /* 572 /*
573 * Try to send the LWP back to the first CPU in the same core if 573 * Try to send the LWP back to the first CPU in the same core if
574 * idle. This keeps LWPs clustered in the run queues of 1st class 574 * idle. This keeps LWPs clustered in the run queues of 1st class
575 * CPUs. This implies stickiness. If we didn't find a home for 575 * CPUs. This implies stickiness. If we didn't find a home for
576 * a vfork() child above, try to use any SMT sibling to help out. 576 * a vfork() child above, try to use any SMT sibling to help out.
577 */ 577 */
578 tci = ci; 578 tci = ci;
579 do { 579 do {
580 tspc = &tci->ci_schedstate; 580 tspc = &tci->ci_schedstate;
581 if ((tspc->spc_flags & flags) == flags && 581 if ((tspc->spc_flags & flags) == flags &&
582 sched_migratable(l, tci)) { 582 sched_migratable(l, tci)) {
583 return tci; 583 return tci;
584 } 584 }
585 tci = tci->ci_sibling[CPUREL_CORE]; 585 tci = tci->ci_sibling[CPUREL_CORE];
586 } while (tci != ci); 586 } while (tci != ci);
587 587
588 /* 588 /*
589 * Otherwise the LWP is "sticky", i.e. generally preferring to stay 589 * Otherwise the LWP is "sticky", i.e. generally preferring to stay
590 * on the same CPU. 590 * on the same CPU.
591 */ 591 */
592 if (sched_migratable(l, ci) && (eprio > spc->spc_curpriority || 592 if (sched_migratable(l, ci) && (eprio > spc->spc_curpriority ||
593 (lwp_cache_hot(l) && l->l_class == SCHED_OTHER))) { 593 (lwp_cache_hot(l) && l->l_class == SCHED_OTHER))) {
594 return ci; 594 return ci;
595 } 595 }
596 596
597 /* 597 /*
598 * If the current CPU core is idle, run there and avoid the 598 * If the current CPU core is idle, run there and avoid the
599 * expensive scan of CPUs below. 599 * expensive scan of CPUs below.
600 */ 600 */
601 curci = curcpu(); 601 curci = curcpu();
602 tci = curci; 602 tci = curci;
603 do { 603 do {
604 tspc = &tci->ci_schedstate; 604 tspc = &tci->ci_schedstate;
605 if ((tspc->spc_flags & flags) == flags && 605 if ((tspc->spc_flags & flags) == flags &&
606 sched_migratable(l, tci)) { 606 sched_migratable(l, tci)) {
607 return tci; 607 return tci;
608 } 608 }
609 tci = tci->ci_sibling[CPUREL_CORE]; 609 tci = tci->ci_sibling[CPUREL_CORE];
610 } while (tci != curci); 610 } while (tci != curci);
611 611
612 /* 612 /*
613 * Didn't find a new home above - happens infrequently. Start the 613 * Didn't find a new home above - happens infrequently. Start the
614 * search in last CPU package that the LWP ran in, but expand to 614 * search in last CPU package that the LWP ran in, but expand to
615 * include the whole system if needed. 615 * include the whole system if needed.
616 */ 616 */
617 return sched_bestcpu(l, l->l_cpu); 617 return sched_bestcpu(l, l->l_cpu);
618} 618}
619 619
620/* 620/*
621 * Tries to catch an LWP from the runqueue of other CPU. 621 * Tries to catch an LWP from the runqueue of other CPU.
622 */ 622 */
623static struct lwp * 623static struct lwp *
624sched_catchlwp(struct cpu_info *ci) 624sched_catchlwp(struct cpu_info *ci)
625{ 625{
626 struct cpu_info *curci = curcpu(); 626 struct cpu_info *curci = curcpu();
627 struct schedstate_percpu *spc, *curspc; 627 struct schedstate_percpu *spc, *curspc;
628 TAILQ_HEAD(, lwp) *q_head; 628 TAILQ_HEAD(, lwp) *q_head;
629 struct lwp *l; 629 struct lwp *l;
630 bool gentle; 630 bool gentle;
631 631
632 curspc = &curci->ci_schedstate; 632 curspc = &curci->ci_schedstate;
633 spc = &ci->ci_schedstate; 633 spc = &ci->ci_schedstate;
634 634
635 /* 635 /*
636 * Be more aggressive if this CPU is first class, and the other 636 * Be more aggressive if this CPU is first class, and the other
637 * is not. 637 * is not.
638 */ 638 */
639 gentle = ((curspc->spc_flags & SPCF_1STCLASS) == 0 || 639 gentle = ((curspc->spc_flags & SPCF_1STCLASS) == 0 ||
640 (spc->spc_flags & SPCF_1STCLASS) != 0); 640 (spc->spc_flags & SPCF_1STCLASS) != 0);
641 641
642 if (spc->spc_mcount < (gentle ? min_catch : 1) || 642 if (spc->spc_mcount < (gentle ? min_catch : 1) ||
643 curspc->spc_psid != spc->spc_psid) { 643 curspc->spc_psid != spc->spc_psid) {
644 spc_unlock(ci); 644 spc_unlock(ci);
645 return NULL; 645 return NULL;
646 } 646 }
647 647
648 /* Take the highest priority thread */ 648 /* Take the highest priority thread */
649 q_head = sched_getrq(spc, spc->spc_maxpriority); 649 q_head = sched_getrq(spc, spc->spc_maxpriority);
650 l = TAILQ_FIRST(q_head); 650 l = TAILQ_FIRST(q_head);
651 651
652 for (;;) { 652 for (;;) {
653 /* Check the first and next result from the queue */ 653 /* Check the first and next result from the queue */
654 if (l == NULL) { 654 if (l == NULL) {
655 break; 655 break;
656 } 656 }
657 KASSERTMSG(l->l_stat == LSRUN, "%s l %p (%s) l_stat %d", 657 KASSERTMSG(l->l_stat == LSRUN, "%s l %p (%s) l_stat %d",
658 ci->ci_data.cpu_name, 658 ci->ci_data.cpu_name,
659 l, (l->l_name ? l->l_name : l->l_proc->p_comm), l->l_stat); 659 l, (l->l_name ? l->l_name : l->l_proc->p_comm), l->l_stat);
660 660
661 /* Look for threads, whose are allowed to migrate */ 661 /* Look for threads, whose are allowed to migrate */
662 if ((l->l_pflag & LP_BOUND) || 662 if ((l->l_pflag & LP_BOUND) ||
663 (gentle && lwp_cache_hot(l)) || 663 (gentle && lwp_cache_hot(l)) ||
664 !sched_migratable(l, curci)) { 664 !sched_migratable(l, curci)) {
665 l = TAILQ_NEXT(l, l_runq); 665 l = TAILQ_NEXT(l, l_runq);
666 /* XXX Gap: could walk down priority list. */ 666 /* XXX Gap: could walk down priority list. */
667 continue; 667 continue;
668 } 668 }
669 669
670 /* Grab the thread, and move to the local run queue */ 670 /* Grab the thread, and move to the local run queue */
671 sched_dequeue(l); 671 sched_dequeue(l);
672 l->l_cpu = curci; 672 l->l_cpu = curci;
673 lwp_unlock_to(l, curspc->spc_mutex); 673 lwp_unlock_to(l, curspc->spc_mutex);
674 sched_enqueue(l); 674 sched_enqueue(l);
675 return l; 675 return l;
676 } 676 }
677 spc_unlock(ci); 677 spc_unlock(ci);
678 678
679 return l; 679 return l;
680} 680}
681 681
682/* 682/*
683 * Called from sched_idle() to handle migration. 683 * Called from sched_idle() to handle migration.
684 */ 684 */
685static void 685static void
686sched_idle_migrate(void) 686sched_idle_migrate(void)
687{ 687{
688 struct cpu_info *ci = curcpu(), *tci = NULL; 688 struct cpu_info *ci = curcpu(), *tci = NULL;
689 struct schedstate_percpu *spc, *tspc; 689 struct schedstate_percpu *spc, *tspc;
690 bool dlock = false; 690 bool dlock = false;
691 691
692 spc = &ci->ci_schedstate; 692 spc = &ci->ci_schedstate;
693 spc_lock(ci); 693 spc_lock(ci);
694 for (;;) { 694 for (;;) {
695 struct lwp *l; 695 struct lwp *l;
696 696
697 l = spc->spc_migrating; 697 l = spc->spc_migrating;
698 if (l == NULL) 698 if (l == NULL)
699 break; 699 break;
700 700
701 /* 701 /*
702 * If second attempt, and target CPU has changed, 702 * If second attempt, and target CPU has changed,
703 * drop the old lock. 703 * drop the old lock.
704 */ 704 */
705 if (dlock == true && tci != l->l_target_cpu) { 705 if (dlock == true && tci != l->l_target_cpu) {
706 KASSERT(tci != NULL); 706 KASSERT(tci != NULL);
707 spc_unlock(tci); 707 spc_unlock(tci);
708 dlock = false; 708 dlock = false;
709 } 709 }
710 710
711 /* 711 /*
712 * Nothing to do if destination has changed to the 712 * Nothing to do if destination has changed to the
713 * local CPU, or migration was done by other CPU. 713 * local CPU, or migration was done by other CPU.
714 */ 714 */
715 tci = l->l_target_cpu; 715 tci = l->l_target_cpu;
716 if (tci == NULL || tci == ci) { 716 if (tci == NULL || tci == ci) {
717 spc->spc_migrating = NULL; 717 spc->spc_migrating = NULL;
718 l->l_target_cpu = NULL; 718 l->l_target_cpu = NULL;
719 break; 719 break;
720 } 720 }
721 tspc = &tci->ci_schedstate; 721 tspc = &tci->ci_schedstate;
722 722
723 /* 723 /*
724 * Double-lock the runqueues. 724 * Double-lock the runqueues.
725 * We do that only once. 725 * We do that only once.
726 */ 726 */
727 if (dlock == false) { 727 if (dlock == false) {
728 dlock = true; 728 dlock = true;
729 if (ci < tci) { 729 if (ci < tci) {
730 spc_lock(tci); 730 spc_lock(tci);
731 } else if (!mutex_tryenter(tspc->spc_mutex)) { 731 } else if (!mutex_tryenter(tspc->spc_mutex)) {
732 spc_unlock(ci); 732 spc_unlock(ci);
733 spc_lock(tci); 733 spc_lock(tci);
734 spc_lock(ci); 734 spc_lock(ci);
735 /* Check the situation again.. */ 735 /* Check the situation again.. */
736 continue; 736 continue;
737 } 737 }
738 } 738 }
739 739
740 /* Migrate the thread */ 740 /* Migrate the thread */
741 KASSERT(l->l_stat == LSRUN); 741 KASSERT(l->l_stat == LSRUN);
742 spc->spc_migrating = NULL; 742 spc->spc_migrating = NULL;
743 l->l_target_cpu = NULL; 743 l->l_target_cpu = NULL;
744 sched_dequeue(l); 744 sched_dequeue(l);
745 l->l_cpu = tci; 745 l->l_cpu = tci;
746 lwp_setlock(l, tspc->spc_mutex); 746 lwp_setlock(l, tspc->spc_mutex);
747 sched_enqueue(l); 747 sched_enqueue(l);
748 sched_resched_lwp(l, true); 748 sched_resched_lwp(l, true);
749 /* tci now unlocked */ 749 /* tci now unlocked */
750 spc_unlock(ci); 750 spc_unlock(ci);
751 return; 751 return;
752 } 752 }
753 if (dlock == true) { 753 if (dlock == true) {
754 KASSERT(tci != NULL); 754 KASSERT(tci != NULL);
755 spc_unlock(tci); 755 spc_unlock(tci);
756 } 756 }
757 spc_unlock(ci); 757 spc_unlock(ci);
758} 758}
759 759
760/* 760/*
761 * Try to steal an LWP from "tci". 761 * Try to steal an LWP from "tci".
762 */ 762 */
763static bool 763static bool
764sched_steal(struct cpu_info *ci, struct cpu_info *tci) 764sched_steal(struct cpu_info *ci, struct cpu_info *tci)
765{ 765{
766 struct schedstate_percpu *spc, *tspc; 766 struct schedstate_percpu *spc, *tspc;
767 lwp_t *l; 767 lwp_t *l;
768 768
769 spc = &ci->ci_schedstate; 769 spc = &ci->ci_schedstate;
770 tspc = &tci->ci_schedstate; 770 tspc = &tci->ci_schedstate;
771 if (tspc->spc_mcount != 0 && spc->spc_psid == tspc->spc_psid) { 771 if (tspc->spc_mcount != 0 && spc->spc_psid == tspc->spc_psid) {
772 spc_dlock(ci, tci); 772 spc_dlock(ci, tci);
773 l = sched_catchlwp(tci); 773 l = sched_catchlwp(tci);
774 spc_unlock(ci); 774 spc_unlock(ci);
775 if (l != NULL) { 775 if (l != NULL) {
776 return true; 776 return true;
777 } 777 }
778 } 778 }
779 return false; 779 return false;
780} 780}
781 781
782/* 782/*
783 * Called from each CPU's idle loop. 783 * Called from each CPU's idle loop.
784 */ 784 */
785void 785void
786sched_idle(void) 786sched_idle(void)
787{ 787{
788 struct cpu_info *ci = curcpu(), *inner, *outer, *first, *tci = NULL; 788 struct cpu_info *ci = curcpu(), *inner, *outer, *first, *tci = NULL;
789 struct schedstate_percpu *spc, *tspc; 789 struct schedstate_percpu *spc, *tspc;
790 struct lwp *l; 790 struct lwp *l;
791 791
792 spc = &ci->ci_schedstate; 792 spc = &ci->ci_schedstate;
793 793
794 /* 794 /*
795 * Handle LWP migrations off this CPU to another. If there a is 795 * Handle LWP migrations off this CPU to another. If there a is
796 * migration to do then go idle afterwards (we'll wake again soon), 796 * migration to do then go idle afterwards (we'll wake again soon),
797 * as we don't want to instantly steal back the LWP we just moved 797 * as we don't want to instantly steal back the LWP we just moved
798 * out. 798 * out.
799 */ 799 */
800 if (spc->spc_migrating != NULL) { 800 if (spc->spc_migrating != NULL) {
801 sched_idle_migrate(); 801 sched_idle_migrate();
802 return; 802 return;
803 } 803 }
804 804
805 /* If this CPU is offline, or we have an LWP to run, we're done. */ 805 /* If this CPU is offline, or we have an LWP to run, we're done. */
806 if ((spc->spc_flags & SPCF_OFFLINE) != 0 || spc->spc_count != 0) { 806 if ((spc->spc_flags & SPCF_OFFLINE) != 0 || spc->spc_count != 0) {
807 return; 807 return;
808 } 808 }
809 809
810 /* Deal with SMT. */ 810 /* Deal with SMT. */
811 if (ci->ci_nsibling[CPUREL_CORE] > 1) { 811 if (ci->ci_nsibling[CPUREL_CORE] > 1) {
812 /* Try to help our siblings out. */ 812 /* Try to help our siblings out. */
813 tci = ci->ci_sibling[CPUREL_CORE]; 813 tci = ci->ci_sibling[CPUREL_CORE];
814 while (tci != ci) { 814 while (tci != ci) {
815 if (sched_steal(ci, tci)) { 815 if (sched_steal(ci, tci)) {
816 return; 816 return;
817 } 817 }
818 tci = tci->ci_sibling[CPUREL_CORE]; 818 tci = tci->ci_sibling[CPUREL_CORE];
819 } 819 }
820 /* 820 /*
821 * If not the first SMT in the core, and in the default 821 * If not the first SMT in the core, and in the default
822 * processor set, the search ends here. 822 * processor set, the search ends here.
823 */ 823 */
824 if ((spc->spc_flags & SPCF_1STCLASS) == 0 && 824 if ((spc->spc_flags & SPCF_1STCLASS) == 0 &&
825 spc->spc_psid == PS_NONE) { 825 spc->spc_psid == PS_NONE) {
826 return; 826 return;
827 } 827 }
828 } 828 }
829 829
830 /* 830 /*
831 * Find something to run, unless this CPU exceeded the rate limit.  831 * Find something to run, unless this CPU exceeded the rate limit.
832 * Start looking on the current package to maximise L2/L3 cache 832 * Start looking on the current package to maximise L2/L3 cache
833 * locality. Then expand to looking at the rest of the system. 833 * locality. Then expand to looking at the rest of the system.
834 * 834 *
835 * XXX Should probably look at 2nd class CPUs first, but they will 835 * XXX Should probably look at 2nd class CPUs first, but they will
836 * shed jobs via preempt() anyway. 836 * shed jobs via preempt() anyway.
837 */ 837 */
838 if (spc->spc_nextskim > hardclock_ticks) { 838 if (spc->spc_nextskim > hardclock_ticks) {
839 return; 839 return;
840 } 840 }
841 spc->spc_nextskim = hardclock_ticks + mstohz(skim_interval); 841 spc->spc_nextskim = hardclock_ticks + mstohz(skim_interval);
842 842
843 /* In the outer loop scroll through all CPU packages, starting here. */ 843 /* In the outer loop scroll through all CPU packages, starting here. */
844 first = ci->ci_package1st; 844 first = ci->ci_package1st;
845 outer = first; 845 outer = first;
846 do { 846 do {
847 /* In the inner loop scroll through all CPUs in package. */ 847 /* In the inner loop scroll through all CPUs in package. */
848 inner = outer; 848 inner = outer;
849 do { 849 do {
850 /* Don't hit the locks unless needed. */ 850 /* Don't hit the locks unless needed. */
851 tspc = &inner->ci_schedstate; 851 tspc = &inner->ci_schedstate;
852 if (ci == inner || spc->spc_psid != tspc->spc_psid || 852 if (ci == inner || spc->spc_psid != tspc->spc_psid ||
853 tspc->spc_mcount < min_catch) { 853 tspc->spc_mcount < min_catch) {
854 continue; 854 continue;
855 } 855 }
856 spc_dlock(ci, inner); 856 spc_dlock(ci, inner);
857 l = sched_catchlwp(inner); 857 l = sched_catchlwp(inner);
858 spc_unlock(ci); 858 spc_unlock(ci);
859 if (l != NULL) { 859 if (l != NULL) {
860 /* Got it! */ 860 /* Got it! */
861 return; 861 return;
862 } 862 }
863 } while (inner = inner->ci_sibling[CPUREL_PACKAGE], 863 } while (inner = inner->ci_sibling[CPUREL_PACKAGE],
864 inner != outer); 864 inner != outer);
865 } while (outer = outer->ci_sibling[CPUREL_PACKAGE1ST], 865 } while (outer = outer->ci_sibling[CPUREL_PACKAGE1ST],
866 outer != first); 866 outer != first);
867} 867}
868 868
869/* 869/*
870 * Called from mi_switch() when an LWP has been preempted / has yielded.  870 * Called from mi_switch() when an LWP has been preempted / has yielded.
871 * The LWP is presently in the CPU's run queue. Here we look for a better 871 * The LWP is presently in the CPU's run queue. Here we look for a better
872 * CPU to teleport the LWP to; there may not be one. 872 * CPU to teleport the LWP to; there may not be one.
873 */ 873 */
874void 874void
875sched_preempted(struct lwp *l) 875sched_preempted(struct lwp *l)
876{ 876{
877 struct schedstate_percpu *tspc; 877 struct schedstate_percpu *tspc;
878 struct cpu_info *ci, *tci; 878 struct cpu_info *ci, *tci;
879 879
880 ci = l->l_cpu; 880 ci = l->l_cpu;
881 tspc = &ci->ci_schedstate; 881 tspc = &ci->ci_schedstate;
882 882
883 KASSERT(tspc->spc_count >= 1); 883 KASSERT(tspc->spc_count >= 1);
884 884
885 /* 885 /*
886 * Try to select another CPU if: 886 * Try to select another CPU if:
887 * 887 *
888 * - there is no migration pending already 888 * - there is no migration pending already
889 * - and this LWP is running on a 2nd class CPU 889 * - and this LWP is running on a 2nd class CPU
890 * - or this LWP is a child of vfork() that has just done execve() 890 * - or this LWP is a child of vfork() that has just done execve()
891 */ 891 */
892 if (l->l_target_cpu != NULL || 892 if (l->l_target_cpu != NULL ||
893 ((tspc->spc_flags & SPCF_1STCLASS) != 0 && 893 ((tspc->spc_flags & SPCF_1STCLASS) != 0 &&
894 (l->l_pflag & LP_TELEPORT) == 0)) { 894 (l->l_pflag & LP_TELEPORT) == 0)) {
895 return; 895 return;
896 } 896 }
897 897
898 /* 898 /*
899 * Fast path: if the first SMT in the core is idle, send it back 899 * Fast path: if the first SMT in the core is idle, send it back
900 * there, because the cache is shared (cheap) and we want all LWPs 900 * there, because the cache is shared (cheap) and we want all LWPs
901 * to be clustered on 1st class CPUs (either running there or on 901 * to be clustered on 1st class CPUs (either running there or on
902 * their runqueues). 902 * their runqueues).
903 */ 903 */
904 tci = ci->ci_sibling[CPUREL_CORE]; 904 tci = ci->ci_sibling[CPUREL_CORE];
905 while (tci != ci) { 905 while (tci != ci) {
906 const int flags = SPCF_IDLE | SPCF_1STCLASS; 906 const int flags = SPCF_IDLE | SPCF_1STCLASS;
907 tspc = &tci->ci_schedstate; 907 tspc = &tci->ci_schedstate;
908 if ((tspc->spc_flags & flags) == flags && 908 if ((tspc->spc_flags & flags) == flags &&
909 sched_migratable(l, tci)) { 909 sched_migratable(l, tci)) {
910 l->l_target_cpu = tci; 910 l->l_target_cpu = tci;
 911 l->l_pflag &= ~LP_TELEPORT;
911 return; 912 return;
912 } 913 }
913 tci = tci->ci_sibling[CPUREL_CORE]; 914 tci = tci->ci_sibling[CPUREL_CORE];
914 } 915 }
915 916
916 if ((l->l_pflag & LP_TELEPORT) != 0) { 917 if ((l->l_pflag & LP_TELEPORT) != 0) {
917 /* 918 /*
918 * A child of vfork(): now that the parent is released, 919 * A child of vfork(): now that the parent is released,
919 * scatter far and wide, to match the LSIDL distribution 920 * scatter far and wide, to match the LSIDL distribution
920 * done in sched_takecpu(). 921 * done in sched_takecpu().
921 */ 922 */
922 l->l_pflag &= ~LP_TELEPORT; 923 l->l_pflag &= ~LP_TELEPORT;
923 tci = sched_bestcpu(l, sched_nextpkg()); 924 tci = sched_bestcpu(l, sched_nextpkg());
924 if (tci != ci) { 925 if (tci != ci) {
925 l->l_target_cpu = tci; 926 l->l_target_cpu = tci;
926 } 927 }
927 } else { 928 } else {
928 /* 929 /*
929 * Try to find a better CPU to take it, but don't move to 930 * Try to find a better CPU to take it, but don't move to
930 * another 2nd class CPU; there's not much point. 931 * another 2nd class CPU; there's not much point.
931 * 932 *
932 * Search in the current CPU package in order to try and 933 * Search in the current CPU package in order to try and
933 * keep L2/L3 cache locality, but expand to include the 934 * keep L2/L3 cache locality, but expand to include the
934 * whole system if needed. 935 * whole system if needed.
935 */ 936 */
936 tci = sched_bestcpu(l, l->l_cpu); 937 tci = sched_bestcpu(l, l->l_cpu);
937 if (tci != ci && 938 if (tci != ci &&
938 (tci->ci_schedstate.spc_flags & SPCF_1STCLASS) != 0) { 939 (tci->ci_schedstate.spc_flags & SPCF_1STCLASS) != 0) {
939 l->l_target_cpu = tci; 940 l->l_target_cpu = tci;
940 } 941 }
941 } 942 }
942} 943}
943 944
944/* 945/*
945 * Called during execve() by a child of vfork(). Does two things: 946 * Called during execve() by a child of vfork(). Does two things:
946 * 947 *
947 * - If the parent has been awoken and put back on curcpu then give the 948 * - If the parent has been awoken and put back on curcpu then give the
948 * CPU back to the parent. 949 * CPU back to the parent.
949 * 950 *
950 * - If curlwp is not on a 1st class CPU then find somewhere else to run, 951 * - If curlwp is not on a 1st class CPU then find somewhere else to run,
951 * since it dodged the distribution in sched_takecpu() when first set 952 * since it dodged the distribution in sched_takecpu() when first set
952 * runnable. 953 * runnable.
953 */ 954 */
954void 955void
955sched_vforkexec(struct lwp *l, bool samecpu) 956sched_vforkexec(struct lwp *l, bool samecpu)
956{ 957{
957 958
958 KASSERT(l == curlwp); 959 KASSERT(l == curlwp);
959 if ((samecpu && ncpu > 1) || 960 if ((samecpu && ncpu > 1) ||
960 (l->l_cpu->ci_schedstate.spc_flags & SPCF_1STCLASS) == 0) { 961 (l->l_cpu->ci_schedstate.spc_flags & SPCF_1STCLASS) == 0) {
961 l->l_pflag |= LP_TELEPORT; 962 l->l_pflag |= LP_TELEPORT;
962 preempt(); 963 preempt();
963 } 964 }
964} 965}
965 966
966#else 967#else
967 968
968/* 969/*
969 * stubs for !MULTIPROCESSOR 970 * stubs for !MULTIPROCESSOR
970 */ 971 */
971 972
972struct cpu_info * 973struct cpu_info *
973sched_takecpu(struct lwp *l) 974sched_takecpu(struct lwp *l)
974{ 975{
975 976
976 return l->l_cpu; 977 return l->l_cpu;
977} 978}
978 979
979void 980void
980sched_idle(void) 981sched_idle(void)
981{ 982{
982 983
983} 984}
984 985
985void 986void
986sched_preempted(struct lwp *l) 987sched_preempted(struct lwp *l)
987{ 988{
988 989
989} 990}
990 991
991void 992void
992sched_vforkexec(struct lwp *l, bool samecpu) 993sched_vforkexec(struct lwp *l, bool samecpu)
993{ 994{
994 995
995 KASSERT(l == curlwp); 996 KASSERT(l == curlwp);
996} 997}
997 998
998#endif /* MULTIPROCESSOR */ 999#endif /* MULTIPROCESSOR */
999 1000
1000/* 1001/*
1001 * Scheduling statistics and balancing. 1002 * Scheduling statistics and balancing.
1002 */ 1003 */
1003void 1004void
1004sched_lwp_stats(struct lwp *l) 1005sched_lwp_stats(struct lwp *l)
1005{ 1006{
1006 int batch; 1007 int batch;
1007 1008
1008 KASSERT(lwp_locked(l, NULL)); 1009 KASSERT(lwp_locked(l, NULL));
1009 1010
1010 /* Update sleep time */ 1011 /* Update sleep time */
1011 if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP || 1012 if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP ||
1012 l->l_stat == LSSUSPENDED) 1013 l->l_stat == LSSUSPENDED)
1013 l->l_slptime++; 1014 l->l_slptime++;
1014 1015
1015 /* 1016 /*
1016 * Set that thread is more CPU-bound, if sum of run time exceeds the 1017 * Set that thread is more CPU-bound, if sum of run time exceeds the
1017 * sum of sleep time. Check if thread is CPU-bound a first time. 1018 * sum of sleep time. Check if thread is CPU-bound a first time.
1018 */ 1019 */
1019 batch = (l->l_rticksum > l->l_slpticksum); 1020 batch = (l->l_rticksum > l->l_slpticksum);
1020 if (batch != 0) { 1021 if (batch != 0) {
1021 if ((l->l_flag & LW_BATCH) == 0) 1022 if ((l->l_flag & LW_BATCH) == 0)
1022 batch = 0; 1023 batch = 0;
1023 l->l_flag |= LW_BATCH; 1024 l->l_flag |= LW_BATCH;
1024 } else 1025 } else
1025 l->l_flag &= ~LW_BATCH; 1026 l->l_flag &= ~LW_BATCH;
1026 1027
1027 /* Reset the time sums */ 1028 /* Reset the time sums */
1028 l->l_slpticksum = 0; 1029 l->l_slpticksum = 0;
1029 l->l_rticksum = 0; 1030 l->l_rticksum = 0;
1030 1031
1031 /* Scheduler-specific hook */ 1032 /* Scheduler-specific hook */
1032 sched_pstats_hook(l, batch); 1033 sched_pstats_hook(l, batch);
1033#ifdef KDTRACE_HOOKS 1034#ifdef KDTRACE_HOOKS
1034 curthread = l; 1035 curthread = l;
1035#endif 1036#endif
1036} 1037}
1037 1038
1038/* 1039/*
1039 * Scheduler mill. 1040 * Scheduler mill.
1040 */ 1041 */
1041struct lwp * 1042struct lwp *
1042sched_nextlwp(void) 1043sched_nextlwp(void)
1043{ 1044{
1044 struct cpu_info *ci = curcpu(); 1045 struct cpu_info *ci = curcpu();
1045 struct schedstate_percpu *spc; 1046 struct schedstate_percpu *spc;
1046 TAILQ_HEAD(, lwp) *q_head; 1047 TAILQ_HEAD(, lwp) *q_head;
1047 struct lwp *l; 1048 struct lwp *l;
1048 1049
1049 /* Update the last run time on switch */ 1050 /* Update the last run time on switch */
1050 l = curlwp; 1051 l = curlwp;
1051 l->l_rticksum += (hardclock_ticks - l->l_rticks); 1052 l->l_rticksum += (hardclock_ticks - l->l_rticks);
1052 1053
1053 /* Return to idle LWP if there is a migrating thread */ 1054 /* Return to idle LWP if there is a migrating thread */
1054 spc = &ci->ci_schedstate; 1055 spc = &ci->ci_schedstate;
1055 if (__predict_false(spc->spc_migrating != NULL)) 1056 if (__predict_false(spc->spc_migrating != NULL))
1056 return NULL; 1057 return NULL;
1057 1058
1058 /* Return to idle LWP if there is no runnable job */ 1059 /* Return to idle LWP if there is no runnable job */
1059 if (__predict_false(spc->spc_count == 0)) 1060 if (__predict_false(spc->spc_count == 0))
1060 return NULL; 1061 return NULL;
1061 1062
1062 /* Take the highest priority thread */ 1063 /* Take the highest priority thread */
1063 KASSERT(spc->spc_bitmap[spc->spc_maxpriority >> BITMAP_SHIFT]); 1064 KASSERT(spc->spc_bitmap[spc->spc_maxpriority >> BITMAP_SHIFT]);
1064 q_head = sched_getrq(spc, spc->spc_maxpriority); 1065 q_head = sched_getrq(spc, spc->spc_maxpriority);
1065 l = TAILQ_FIRST(q_head); 1066 l = TAILQ_FIRST(q_head);
1066 KASSERT(l != NULL); 1067 KASSERT(l != NULL);
1067 1068
1068 sched_oncpu(l); 1069 sched_oncpu(l);
1069 l->l_rticks = hardclock_ticks; 1070 l->l_rticks = hardclock_ticks;
1070 1071
1071 return l; 1072 return l;
1072} 1073}
1073 1074
1074/* 1075/*
1075 * sched_curcpu_runnable_p: return if curcpu() should exit the idle loop. 1076 * sched_curcpu_runnable_p: return if curcpu() should exit the idle loop.
1076 */ 1077 */
1077 1078
1078bool 1079bool
1079sched_curcpu_runnable_p(void) 1080sched_curcpu_runnable_p(void)
1080{ 1081{
1081 const struct cpu_info *ci; 1082 const struct cpu_info *ci;
1082 const struct schedstate_percpu *spc; 1083 const struct schedstate_percpu *spc;
1083 bool rv; 1084 bool rv;
1084 1085
1085 kpreempt_disable(); 1086 kpreempt_disable();
1086 ci = curcpu(); 1087 ci = curcpu();
1087 spc = &ci->ci_schedstate; 1088 spc = &ci->ci_schedstate;
1088 1089
1089#ifndef __HAVE_FAST_SOFTINTS 1090#ifndef __HAVE_FAST_SOFTINTS
1090 if (ci->ci_data.cpu_softints) { 1091 if (ci->ci_data.cpu_softints) {
1091 kpreempt_enable(); 1092 kpreempt_enable();
1092 return true; 1093 return true;
1093 } 1094 }
1094#endif 1095#endif
1095 1096
1096 rv = (spc->spc_count != 0) ? true : false; 1097 rv = (spc->spc_count != 0) ? true : false;
1097 kpreempt_enable(); 1098 kpreempt_enable();
1098 1099
1099 return rv; 1100 return rv;
1100} 1101}
1101 1102
1102/* 1103/*
1103 * Sysctl nodes and initialization. 1104 * Sysctl nodes and initialization.
1104 */ 1105 */
1105 1106
1106SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup") 1107SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
1107{ 1108{
1108 const struct sysctlnode *node = NULL; 1109 const struct sysctlnode *node = NULL;
1109 1110
1110 sysctl_createv(clog, 0, NULL, &node, 1111 sysctl_createv(clog, 0, NULL, &node,
1111 CTLFLAG_PERMANENT, 1112 CTLFLAG_PERMANENT,
1112 CTLTYPE_NODE, "sched", 1113 CTLTYPE_NODE, "sched",
1113 SYSCTL_DESCR("Scheduler options"), 1114 SYSCTL_DESCR("Scheduler options"),
1114 NULL, 0, NULL, 0, 1115 NULL, 0, NULL, 0,
1115 CTL_KERN, CTL_CREATE, CTL_EOL); 1116 CTL_KERN, CTL_CREATE, CTL_EOL);
1116 1117
1117 if (node == NULL) 1118 if (node == NULL)
1118 return; 1119 return;
1119 1120
1120 sysctl_createv(clog, 0, &node, NULL, 1121 sysctl_createv(clog, 0, &node, NULL,
1121 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1122 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1122 CTLTYPE_INT, "cacheht_time", 1123 CTLTYPE_INT, "cacheht_time",
1123 SYSCTL_DESCR("Cache hotness time (in ms)"), 1124 SYSCTL_DESCR("Cache hotness time (in ms)"),
1124 NULL, 0, &cacheht_time, 0, 1125 NULL, 0, &cacheht_time, 0,
1125 CTL_CREATE, CTL_EOL); 1126 CTL_CREATE, CTL_EOL);
1126 sysctl_createv(clog, 0, &node, NULL, 1127 sysctl_createv(clog, 0, &node, NULL,
1127 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1128 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1128 CTLTYPE_INT, "skim_interval", 1129 CTLTYPE_INT, "skim_interval",
1129 SYSCTL_DESCR("Rate limit for stealing from other CPUs (in ms)"), 1130 SYSCTL_DESCR("Rate limit for stealing from other CPUs (in ms)"),
1130 NULL, 0, &skim_interval, 0, 1131 NULL, 0, &skim_interval, 0,
1131 CTL_CREATE, CTL_EOL); 1132 CTL_CREATE, CTL_EOL);
1132 sysctl_createv(clog, 0, &node, NULL, 1133 sysctl_createv(clog, 0, &node, NULL,
1133 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1134 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1134 CTLTYPE_INT, "min_catch", 1135 CTLTYPE_INT, "min_catch",
1135 SYSCTL_DESCR("Minimal count of threads for catching"), 1136 SYSCTL_DESCR("Minimal count of threads for catching"),
1136 NULL, 0, &min_catch, 0, 1137 NULL, 0, &min_catch, 0,
1137 CTL_CREATE, CTL_EOL); 1138 CTL_CREATE, CTL_EOL);
1138 sysctl_createv(clog, 0, &node, NULL, 1139 sysctl_createv(clog, 0, &node, NULL,
1139 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1140 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1140 CTLTYPE_INT, "timesoftints", 1141 CTLTYPE_INT, "timesoftints",
1141 SYSCTL_DESCR("Track CPU time for soft interrupts"), 1142 SYSCTL_DESCR("Track CPU time for soft interrupts"),
1142 NULL, 0, &softint_timing, 0, 1143 NULL, 0, &softint_timing, 0,
1143 CTL_CREATE, CTL_EOL); 1144 CTL_CREATE, CTL_EOL);
1144 sysctl_createv(clog, 0, &node, NULL, 1145 sysctl_createv(clog, 0, &node, NULL,
1145 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1146 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1146 CTLTYPE_INT, "kpreempt_pri", 1147 CTLTYPE_INT, "kpreempt_pri",
1147 SYSCTL_DESCR("Minimum priority to trigger kernel preemption"), 1148 SYSCTL_DESCR("Minimum priority to trigger kernel preemption"),
1148 NULL, 0, &sched_kpreempt_pri, 0, 1149 NULL, 0, &sched_kpreempt_pri, 0,
1149 CTL_CREATE, CTL_EOL); 1150 CTL_CREATE, CTL_EOL);
1150} 1151}
1151 1152
1152/* 1153/*
1153 * Debugging. 1154 * Debugging.
1154 */ 1155 */
1155 1156
1156#ifdef DDB 1157#ifdef DDB
1157 1158
1158void 1159void
1159sched_print_runqueue(void (*pr)(const char *, ...)) 1160sched_print_runqueue(void (*pr)(const char *, ...))
1160{ 1161{
1161 struct cpu_info *ci, *tci; 1162 struct cpu_info *ci, *tci;
1162 struct schedstate_percpu *spc; 1163 struct schedstate_percpu *spc;
1163 struct lwp *l; 1164 struct lwp *l;
1164 struct proc *p; 1165 struct proc *p;
1165 CPU_INFO_ITERATOR cii; 1166 CPU_INFO_ITERATOR cii;
1166 1167
1167 for (CPU_INFO_FOREACH(cii, ci)) { 1168 for (CPU_INFO_FOREACH(cii, ci)) {
1168 int i; 1169 int i;
1169 1170
1170 spc = &ci->ci_schedstate; 1171 spc = &ci->ci_schedstate;
1171 1172
1172 (*pr)("Run-queue (CPU = %u):\n", ci->ci_index); 1173 (*pr)("Run-queue (CPU = %u):\n", ci->ci_index);
1173 (*pr)(" pid.lid = %d.%d, r_count = %u, " 1174 (*pr)(" pid.lid = %d.%d, r_count = %u, "
1174 "maxpri = %d, mlwp = %p\n", 1175 "maxpri = %d, mlwp = %p\n",
1175#ifdef MULTIPROCESSOR 1176#ifdef MULTIPROCESSOR
1176 ci->ci_curlwp->l_proc->p_pid, ci->ci_curlwp->l_lid, 1177 ci->ci_curlwp->l_proc->p_pid, ci->ci_curlwp->l_lid,
1177#else 1178#else
1178 curlwp->l_proc->p_pid, curlwp->l_lid, 1179 curlwp->l_proc->p_pid, curlwp->l_lid,
1179#endif 1180#endif
1180 spc->spc_count, spc->spc_maxpriority, 1181 spc->spc_count, spc->spc_maxpriority,
1181 spc->spc_migrating); 1182 spc->spc_migrating);
1182 i = (PRI_COUNT >> BITMAP_SHIFT) - 1; 1183 i = (PRI_COUNT >> BITMAP_SHIFT) - 1;
1183 do { 1184 do {
1184 uint32_t q; 1185 uint32_t q;
1185 q = spc->spc_bitmap[i]; 1186 q = spc->spc_bitmap[i];
1186 (*pr)(" bitmap[%d] => [ %d (0x%x) ]\n", i, ffs(q), q); 1187 (*pr)(" bitmap[%d] => [ %d (0x%x) ]\n", i, ffs(q), q);
1187 } while (i--); 1188 } while (i--);
1188 } 1189 }
1189 1190
1190 (*pr)(" %5s %4s %4s %10s %3s %18s %4s %4s %s\n", 1191 (*pr)(" %5s %4s %4s %10s %3s %18s %4s %4s %s\n",
1191 "LID", "PRI", "EPRI", "FL", "ST", "LWP", "CPU", "TCI", "LRTICKS"); 1192 "LID", "PRI", "EPRI", "FL", "ST", "LWP", "CPU", "TCI", "LRTICKS");
1192 1193
1193 PROCLIST_FOREACH(p, &allproc) { 1194 PROCLIST_FOREACH(p, &allproc) {
1194 (*pr)(" /- %d (%s)\n", (int)p->p_pid, p->p_comm); 1195 (*pr)(" /- %d (%s)\n", (int)p->p_pid, p->p_comm);
1195 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 1196 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1196 ci = l->l_cpu; 1197 ci = l->l_cpu;
1197 tci = l->l_target_cpu; 1198 tci = l->l_target_cpu;
1198 (*pr)(" | %5d %4u %4u 0x%8.8x %3s %18p %4u %4d %u\n", 1199 (*pr)(" | %5d %4u %4u 0x%8.8x %3s %18p %4u %4d %u\n",
1199 (int)l->l_lid, l->l_priority, lwp_eprio(l), 1200 (int)l->l_lid, l->l_priority, lwp_eprio(l),
1200 l->l_flag, l->l_stat == LSRUN ? "RQ" : 1201 l->l_flag, l->l_stat == LSRUN ? "RQ" :
1201 (l->l_stat == LSSLEEP ? "SQ" : "-"), 1202 (l->l_stat == LSSLEEP ? "SQ" : "-"),
1202 l, ci->ci_index, (tci ? tci->ci_index : -1), 1203 l, ci->ci_index, (tci ? tci->ci_index : -1),
1203 (u_int)(hardclock_ticks - l->l_rticks)); 1204 (u_int)(hardclock_ticks - l->l_rticks));
1204 } 1205 }
1205 } 1206 }
1206} 1207}
1207 1208
1208#endif 1209#endif