Sat Sep 15 17:15:01 2012 UTC ()
In the "interlock" case (where the scheduler lock is used as the condvar
lock), we need to take the CPU interlock before releasing the CPU.
Otherwise other threads can be scheduled before we get the interlock,
leading to e.g. missed condvar wakeups.  This affected only "locks_up.c"
locking (nomen est omen?).

Also, remove various __predicts since they don't have a positive
performance impact in any setup.


(pooka)
diff -r1.28 -r1.29 src/sys/rump/librump/rumpkern/scheduler.c

cvs diff -r1.28 -r1.29 src/sys/rump/librump/rumpkern/scheduler.c (switch to unified diff)

--- src/sys/rump/librump/rumpkern/scheduler.c 2012/06/22 12:45:43 1.28
+++ src/sys/rump/librump/rumpkern/scheduler.c 2012/09/15 17:15:01 1.29
@@ -1,536 +1,542 @@ @@ -1,536 +1,542 @@
1/* $NetBSD: scheduler.c,v 1.28 2012/06/22 12:45:43 rmind Exp $ */ 1/* $NetBSD: scheduler.c,v 1.29 2012/09/15 17:15:01 pooka Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved. 4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 7 * modification, are permitted provided that the following conditions
8 * are met: 8 * are met:
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer. 10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright 11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the 12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution. 13 * documentation and/or other materials provided with the distribution.
14 * 14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE. 25 * SUCH DAMAGE.
26 */ 26 */
27 27
28#include <sys/cdefs.h> 28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.28 2012/06/22 12:45:43 rmind Exp $"); 29__KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.29 2012/09/15 17:15:01 pooka Exp $");
30 30
31#include <sys/param.h> 31#include <sys/param.h>
32#include <sys/atomic.h> 32#include <sys/atomic.h>
33#include <sys/cpu.h> 33#include <sys/cpu.h>
34#include <sys/kmem.h> 34#include <sys/kmem.h>
35#include <sys/mutex.h> 35#include <sys/mutex.h>
36#include <sys/namei.h> 36#include <sys/namei.h>
37#include <sys/queue.h> 37#include <sys/queue.h>
38#include <sys/select.h> 38#include <sys/select.h>
39#include <sys/systm.h> 39#include <sys/systm.h>
40 40
41#include <rump/rumpuser.h> 41#include <rump/rumpuser.h>
42 42
43#include "rump_private.h" 43#include "rump_private.h"
44 44
45static struct cpu_info rump_cpus[MAXCPUS]; 45static struct cpu_info rump_cpus[MAXCPUS];
46static struct rumpcpu { 46static struct rumpcpu {
47 /* needed in fastpath */ 47 /* needed in fastpath */
48 struct cpu_info *rcpu_ci; 48 struct cpu_info *rcpu_ci;
49 void *rcpu_prevlwp; 49 void *rcpu_prevlwp;
50 50
51 /* needed in slowpath */ 51 /* needed in slowpath */
52 struct rumpuser_mtx *rcpu_mtx; 52 struct rumpuser_mtx *rcpu_mtx;
53 struct rumpuser_cv *rcpu_cv; 53 struct rumpuser_cv *rcpu_cv;
54 int rcpu_wanted; 54 int rcpu_wanted;
55 55
56 /* offset 20 (P=4) or 36 (P=8) here */ 56 /* offset 20 (P=4) or 36 (P=8) here */
57 57
58 /* 58 /*
59 * Some stats. Not really that necessary, but we should 59 * Some stats. Not really that necessary, but we should
60 * have room. Note that these overflow quite fast, so need 60 * have room. Note that these overflow quite fast, so need
61 * to be collected often. 61 * to be collected often.
62 */ 62 */
63 unsigned int rcpu_fastpath; 63 unsigned int rcpu_fastpath;
64 unsigned int rcpu_slowpath; 64 unsigned int rcpu_slowpath;
65 unsigned int rcpu_migrated; 65 unsigned int rcpu_migrated;
66 66
67 /* offset 32 (P=4) or 50 (P=8) */ 67 /* offset 32 (P=4) or 50 (P=8) */
68 68
69 int rcpu_align[0] __aligned(CACHE_LINE_SIZE); 69 int rcpu_align[0] __aligned(CACHE_LINE_SIZE);
70} rcpu_storage[MAXCPUS]; 70} rcpu_storage[MAXCPUS];
71 71
72struct cpu_info *rump_cpu = &rump_cpus[0]; 72struct cpu_info *rump_cpu = &rump_cpus[0];
73kcpuset_t *kcpuset_attached = NULL; 73kcpuset_t *kcpuset_attached = NULL;
74kcpuset_t *kcpuset_running = NULL; 74kcpuset_t *kcpuset_running = NULL;
75int ncpu; 75int ncpu;
76 76
77#define RCPULWP_BUSY ((void *)-1) 77#define RCPULWP_BUSY ((void *)-1)
78#define RCPULWP_WANTED ((void *)-2) 78#define RCPULWP_WANTED ((void *)-2)
79 79
80static struct rumpuser_mtx *lwp0mtx; 80static struct rumpuser_mtx *lwp0mtx;
81static struct rumpuser_cv *lwp0cv; 81static struct rumpuser_cv *lwp0cv;
82static unsigned nextcpu; 82static unsigned nextcpu;
83 83
84kmutex_t unruntime_lock; /* unruntime lwp lock. practically unused */ 84kmutex_t unruntime_lock; /* unruntime lwp lock. practically unused */
85 85
86static bool lwp0isbusy = false; 86static bool lwp0isbusy = false;
87 87
88/* 88/*
89 * Keep some stats. 89 * Keep some stats.
90 * 90 *
91 * Keeping track of there is not really critical for speed, unless 91 * Keeping track of there is not really critical for speed, unless
92 * stats happen to be on a different cache line (CACHE_LINE_SIZE is 92 * stats happen to be on a different cache line (CACHE_LINE_SIZE is
93 * really just a coarse estimate), so default for the performant case 93 * really just a coarse estimate), so default for the performant case
94 * (i.e. no stats). 94 * (i.e. no stats).
95 */ 95 */
96#ifdef RUMPSCHED_STATS 96#ifdef RUMPSCHED_STATS
97#define SCHED_FASTPATH(rcpu) rcpu->rcpu_fastpath++; 97#define SCHED_FASTPATH(rcpu) rcpu->rcpu_fastpath++;
98#define SCHED_SLOWPATH(rcpu) rcpu->rcpu_slowpath++; 98#define SCHED_SLOWPATH(rcpu) rcpu->rcpu_slowpath++;
99#define SCHED_MIGRATED(rcpu) rcpu->rcpu_migrated++; 99#define SCHED_MIGRATED(rcpu) rcpu->rcpu_migrated++;
100#else 100#else
101#define SCHED_FASTPATH(rcpu) 101#define SCHED_FASTPATH(rcpu)
102#define SCHED_SLOWPATH(rcpu) 102#define SCHED_SLOWPATH(rcpu)
103#define SCHED_MIGRATED(rcpu) 103#define SCHED_MIGRATED(rcpu)
104#endif 104#endif
105 105
106struct cpu_info * 106struct cpu_info *
107cpu_lookup(u_int index) 107cpu_lookup(u_int index)
108{ 108{
109 109
110 return &rump_cpus[index]; 110 return &rump_cpus[index];
111} 111}
112 112
113static inline struct rumpcpu * 113static inline struct rumpcpu *
114getnextcpu(void) 114getnextcpu(void)
115{ 115{
116 unsigned newcpu; 116 unsigned newcpu;
117 117
118 newcpu = atomic_inc_uint_nv(&nextcpu); 118 newcpu = atomic_inc_uint_nv(&nextcpu);
119 if (__predict_false(ncpu > UINT_MAX/2)) 119 if (__predict_false(ncpu > UINT_MAX/2))
120 atomic_and_uint(&nextcpu, 0); 120 atomic_and_uint(&nextcpu, 0);
121 newcpu = newcpu % ncpu; 121 newcpu = newcpu % ncpu;
122 122
123 return &rcpu_storage[newcpu]; 123 return &rcpu_storage[newcpu];
124} 124}
125 125
126/* this could/should be mi_attach_cpu? */ 126/* this could/should be mi_attach_cpu? */
127void 127void
128rump_cpus_bootstrap(int *nump) 128rump_cpus_bootstrap(int *nump)
129{ 129{
130 struct rumpcpu *rcpu; 130 struct rumpcpu *rcpu;
131 struct cpu_info *ci; 131 struct cpu_info *ci;
132 int num = *nump; 132 int num = *nump;
133 int i; 133 int i;
134 134
135 if (num > MAXCPUS) { 135 if (num > MAXCPUS) {
136 aprint_verbose("CPU limit: %d wanted, %d (MAXCPUS) " 136 aprint_verbose("CPU limit: %d wanted, %d (MAXCPUS) "
137 "available (adjusted)\n", num, MAXCPUS); 137 "available (adjusted)\n", num, MAXCPUS);
138 num = MAXCPUS; 138 num = MAXCPUS;
139 } 139 }
140 140
141 for (i = 0; i < num; i++) { 141 for (i = 0; i < num; i++) {
142 rcpu = &rcpu_storage[i]; 142 rcpu = &rcpu_storage[i];
143 ci = &rump_cpus[i]; 143 ci = &rump_cpus[i];
144 ci->ci_index = i; 144 ci->ci_index = i;
145 } 145 }
146 146
147 kcpuset_create(&kcpuset_attached, true); 147 kcpuset_create(&kcpuset_attached, true);
148 kcpuset_create(&kcpuset_running, true); 148 kcpuset_create(&kcpuset_running, true);
149 149
150 /* attach first cpu for bootstrap */ 150 /* attach first cpu for bootstrap */
151 rump_cpu_attach(&rump_cpus[0]); 151 rump_cpu_attach(&rump_cpus[0]);
152 ncpu = 1; 152 ncpu = 1;
153 *nump = num; 153 *nump = num;
154} 154}
155 155
156void 156void
157rump_scheduler_init(int numcpu) 157rump_scheduler_init(int numcpu)
158{ 158{
159 struct rumpcpu *rcpu; 159 struct rumpcpu *rcpu;
160 struct cpu_info *ci; 160 struct cpu_info *ci;
161 int i; 161 int i;
162 162
163 rumpuser_mutex_init(&lwp0mtx); 163 rumpuser_mutex_init(&lwp0mtx);
164 rumpuser_cv_init(&lwp0cv); 164 rumpuser_cv_init(&lwp0cv);
165 for (i = 0; i < numcpu; i++) { 165 for (i = 0; i < numcpu; i++) {
166 rcpu = &rcpu_storage[i]; 166 rcpu = &rcpu_storage[i];
167 ci = &rump_cpus[i]; 167 ci = &rump_cpus[i];
168 rcpu->rcpu_ci = ci; 168 rcpu->rcpu_ci = ci;
169 ci->ci_schedstate.spc_mutex = 169 ci->ci_schedstate.spc_mutex =
170 mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 170 mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
171 ci->ci_schedstate.spc_flags = SPCF_RUNNING; 171 ci->ci_schedstate.spc_flags = SPCF_RUNNING;
172 rcpu->rcpu_wanted = 0; 172 rcpu->rcpu_wanted = 0;
173 rumpuser_cv_init(&rcpu->rcpu_cv); 173 rumpuser_cv_init(&rcpu->rcpu_cv);
174 rumpuser_mutex_init(&rcpu->rcpu_mtx); 174 rumpuser_mutex_init(&rcpu->rcpu_mtx);
175 } 175 }
176 176
177 mutex_init(&unruntime_lock, MUTEX_DEFAULT, IPL_NONE); 177 mutex_init(&unruntime_lock, MUTEX_DEFAULT, IPL_NONE);
178} 178}
179 179
180/* 180/*
181 * condvar ops using scheduler lock as the rumpuser interlock. 181 * condvar ops using scheduler lock as the rumpuser interlock.
182 */ 182 */
183void 183void
184rump_schedlock_cv_wait(struct rumpuser_cv *cv) 184rump_schedlock_cv_wait(struct rumpuser_cv *cv)
185{ 185{
186 struct lwp *l = curlwp; 186 struct lwp *l = curlwp;
187 struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]]; 187 struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]];
188 188
189 /* mutex will be taken and released in cpu schedule/unschedule */ 189 /* mutex will be taken and released in cpu schedule/unschedule */
190 rumpuser_cv_wait(cv, rcpu->rcpu_mtx); 190 rumpuser_cv_wait(cv, rcpu->rcpu_mtx);
191} 191}
192 192
193int 193int
194rump_schedlock_cv_timedwait(struct rumpuser_cv *cv, const struct timespec *ts) 194rump_schedlock_cv_timedwait(struct rumpuser_cv *cv, const struct timespec *ts)
195{ 195{
196 struct lwp *l = curlwp; 196 struct lwp *l = curlwp;
197 struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]]; 197 struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]];
198 198
199 /* mutex will be taken and released in cpu schedule/unschedule */ 199 /* mutex will be taken and released in cpu schedule/unschedule */
200 return rumpuser_cv_timedwait(cv, rcpu->rcpu_mtx, 200 return rumpuser_cv_timedwait(cv, rcpu->rcpu_mtx,
201 ts->tv_sec, ts->tv_nsec); 201 ts->tv_sec, ts->tv_nsec);
202} 202}
203 203
204static void 204static void
205lwp0busy(void) 205lwp0busy(void)
206{ 206{
207 207
208 /* busy lwp0 */ 208 /* busy lwp0 */
209 KASSERT(curlwp == NULL || curlwp->l_stat != LSONPROC); 209 KASSERT(curlwp == NULL || curlwp->l_stat != LSONPROC);
210 rumpuser_mutex_enter_nowrap(lwp0mtx); 210 rumpuser_mutex_enter_nowrap(lwp0mtx);
211 while (lwp0isbusy) 211 while (lwp0isbusy)
212 rumpuser_cv_wait_nowrap(lwp0cv, lwp0mtx); 212 rumpuser_cv_wait_nowrap(lwp0cv, lwp0mtx);
213 lwp0isbusy = true; 213 lwp0isbusy = true;
214 rumpuser_mutex_exit(lwp0mtx); 214 rumpuser_mutex_exit(lwp0mtx);
215} 215}
216 216
217static void 217static void
218lwp0rele(void) 218lwp0rele(void)
219{ 219{
220 220
221 rumpuser_mutex_enter_nowrap(lwp0mtx); 221 rumpuser_mutex_enter_nowrap(lwp0mtx);
222 KASSERT(lwp0isbusy == true); 222 KASSERT(lwp0isbusy == true);
223 lwp0isbusy = false; 223 lwp0isbusy = false;
224 rumpuser_cv_signal(lwp0cv); 224 rumpuser_cv_signal(lwp0cv);
225 rumpuser_mutex_exit(lwp0mtx); 225 rumpuser_mutex_exit(lwp0mtx);
226} 226}
227 227
228/* 228/*
229 * rump_schedule: ensure that the calling host thread has a valid lwp context. 229 * rump_schedule: ensure that the calling host thread has a valid lwp context.
230 * ie. ensure that rumpuser_get_curlwp() != NULL. 230 * ie. ensure that rumpuser_get_curlwp() != NULL.
231 */ 231 */
232void 232void
233rump_schedule() 233rump_schedule()
234{ 234{
235 struct lwp *l; 235 struct lwp *l;
236 236
237 /* 237 /*
238 * If there is no dedicated lwp, allocate a temp one and 238 * If there is no dedicated lwp, allocate a temp one and
239 * set it to be free'd upon unschedule(). Use lwp0 context 239 * set it to be free'd upon unschedule(). Use lwp0 context
240 * for reserving the necessary resources. Don't optimize 240 * for reserving the necessary resources. Don't optimize
241 * for this case -- anyone who cares about performance will 241 * for this case -- anyone who cares about performance will
242 * start a real thread. 242 * start a real thread.
243 */ 243 */
244 if (__predict_true((l = rumpuser_get_curlwp()) != NULL)) { 244 if (__predict_true((l = rumpuser_get_curlwp()) != NULL)) {
245 rump_schedule_cpu(l); 245 rump_schedule_cpu(l);
246 LWP_CACHE_CREDS(l, l->l_proc); 246 LWP_CACHE_CREDS(l, l->l_proc);
247 } else { 247 } else {
248 lwp0busy(); 248 lwp0busy();
249 249
250 /* schedule cpu and use lwp0 */ 250 /* schedule cpu and use lwp0 */
251 rump_schedule_cpu(&lwp0); 251 rump_schedule_cpu(&lwp0);
252 rumpuser_set_curlwp(&lwp0); 252 rumpuser_set_curlwp(&lwp0);
253 253
254 /* allocate thread, switch to it, and release lwp0 */ 254 /* allocate thread, switch to it, and release lwp0 */
255 l = rump__lwproc_alloclwp(initproc); 255 l = rump__lwproc_alloclwp(initproc);
256 rump_lwproc_switch(l); 256 rump_lwproc_switch(l);
257 lwp0rele(); 257 lwp0rele();
258 258
259 /* 259 /*
260 * mark new thread dead-on-unschedule. this 260 * mark new thread dead-on-unschedule. this
261 * means that we'll be running with l_refcnt == 0. 261 * means that we'll be running with l_refcnt == 0.
262 * relax, it's fine. 262 * relax, it's fine.
263 */ 263 */
264 rump_lwproc_releaselwp(); 264 rump_lwproc_releaselwp();
265 } 265 }
266} 266}
267 267
268void 268void
269rump_schedule_cpu(struct lwp *l) 269rump_schedule_cpu(struct lwp *l)
270{ 270{
271 271
272 rump_schedule_cpu_interlock(l, NULL); 272 rump_schedule_cpu_interlock(l, NULL);
273} 273}
274 274
275/* 275/*
276 * Schedule a CPU. This optimizes for the case where we schedule 276 * Schedule a CPU. This optimizes for the case where we schedule
277 * the same thread often, and we have nCPU >= nFrequently-Running-Thread 277 * the same thread often, and we have nCPU >= nFrequently-Running-Thread
278 * (where CPU is virtual rump cpu, not host CPU). 278 * (where CPU is virtual rump cpu, not host CPU).
279 */ 279 */
280void 280void
281rump_schedule_cpu_interlock(struct lwp *l, void *interlock) 281rump_schedule_cpu_interlock(struct lwp *l, void *interlock)
282{ 282{
283 struct rumpcpu *rcpu; 283 struct rumpcpu *rcpu;
284 void *old; 284 void *old;
285 bool domigrate; 285 bool domigrate;
286 bool bound = l->l_pflag & LP_BOUND; 286 bool bound = l->l_pflag & LP_BOUND;
287 287
288 l->l_stat = LSRUN; 288 l->l_stat = LSRUN;
289 289
290 /* 290 /*
291 * First, try fastpath: if we were the previous user of the 291 * First, try fastpath: if we were the previous user of the
292 * CPU, everything is in order cachewise and we can just 292 * CPU, everything is in order cachewise and we can just
293 * proceed to use it. 293 * proceed to use it.
294 * 294 *
295 * If we are a different thread (i.e. CAS fails), we must go 295 * If we are a different thread (i.e. CAS fails), we must go
296 * through a memory barrier to ensure we get a truthful 296 * through a memory barrier to ensure we get a truthful
297 * view of the world. 297 * view of the world.
298 */ 298 */
299 299
300 KASSERT(l->l_target_cpu != NULL); 300 KASSERT(l->l_target_cpu != NULL);
301 rcpu = &rcpu_storage[l->l_target_cpu-&rump_cpus[0]]; 301 rcpu = &rcpu_storage[l->l_target_cpu-&rump_cpus[0]];
302 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, l, RCPULWP_BUSY) == l) { 302 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, l, RCPULWP_BUSY) == l) {
303 if (__predict_true(interlock == rcpu->rcpu_mtx)) 303 if (interlock == rcpu->rcpu_mtx)
304 rumpuser_mutex_exit(rcpu->rcpu_mtx); 304 rumpuser_mutex_exit(rcpu->rcpu_mtx);
305 SCHED_FASTPATH(rcpu); 305 SCHED_FASTPATH(rcpu);
306 /* jones, you're the man */ 306 /* jones, you're the man */
307 goto fastlane; 307 goto fastlane;
308 } 308 }
309 309
310 /* 310 /*
311 * Else, it's the slowpath for us. First, determine if we 311 * Else, it's the slowpath for us. First, determine if we
312 * can migrate. 312 * can migrate.
313 */ 313 */
314 if (ncpu == 1) 314 if (ncpu == 1)
315 domigrate = false; 315 domigrate = false;
316 else 316 else
317 domigrate = true; 317 domigrate = true;
318 318
319 /* Take lock. This acts as a load barrier too. */ 319 /* Take lock. This acts as a load barrier too. */
320 if (__predict_true(interlock != rcpu->rcpu_mtx)) 320 if (interlock != rcpu->rcpu_mtx)
321 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 321 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
322 322
323 for (;;) { 323 for (;;) {
324 SCHED_SLOWPATH(rcpu); 324 SCHED_SLOWPATH(rcpu);
325 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, RCPULWP_WANTED); 325 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, RCPULWP_WANTED);
326 326
327 /* CPU is free? */ 327 /* CPU is free? */
328 if (old != RCPULWP_BUSY && old != RCPULWP_WANTED) { 328 if (old != RCPULWP_BUSY && old != RCPULWP_WANTED) {
329 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, 329 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp,
330 RCPULWP_WANTED, RCPULWP_BUSY) == RCPULWP_WANTED) { 330 RCPULWP_WANTED, RCPULWP_BUSY) == RCPULWP_WANTED) {
331 break; 331 break;
332 } 332 }
333 } 333 }
334 334
335 /* 335 /*
336 * Do we want to migrate once? 336 * Do we want to migrate once?
337 * This may need a slightly better algorithm, or we 337 * This may need a slightly better algorithm, or we
338 * might cache pingpong eternally for non-frequent 338 * might cache pingpong eternally for non-frequent
339 * threads. 339 * threads.
340 */ 340 */
341 if (domigrate && !bound) { 341 if (domigrate && !bound) {
342 domigrate = false; 342 domigrate = false;
343 SCHED_MIGRATED(rcpu); 343 SCHED_MIGRATED(rcpu);
344 rumpuser_mutex_exit(rcpu->rcpu_mtx); 344 rumpuser_mutex_exit(rcpu->rcpu_mtx);
345 rcpu = getnextcpu(); 345 rcpu = getnextcpu();
346 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 346 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
347 continue; 347 continue;
348 } 348 }
349 349
350 /* Want CPU, wait until it's released an retry */ 350 /* Want CPU, wait until it's released an retry */
351 rcpu->rcpu_wanted++; 351 rcpu->rcpu_wanted++;
352 rumpuser_cv_wait_nowrap(rcpu->rcpu_cv, rcpu->rcpu_mtx); 352 rumpuser_cv_wait_nowrap(rcpu->rcpu_cv, rcpu->rcpu_mtx);
353 rcpu->rcpu_wanted--; 353 rcpu->rcpu_wanted--;
354 } 354 }
355 rumpuser_mutex_exit(rcpu->rcpu_mtx); 355 rumpuser_mutex_exit(rcpu->rcpu_mtx);
356 356
357 fastlane: 357 fastlane:
358 l->l_cpu = l->l_target_cpu = rcpu->rcpu_ci; 358 l->l_cpu = l->l_target_cpu = rcpu->rcpu_ci;
359 l->l_mutex = rcpu->rcpu_ci->ci_schedstate.spc_mutex; 359 l->l_mutex = rcpu->rcpu_ci->ci_schedstate.spc_mutex;
360 l->l_ncsw++; 360 l->l_ncsw++;
361 l->l_stat = LSONPROC; 361 l->l_stat = LSONPROC;
362 362
363 rcpu->rcpu_ci->ci_curlwp = l; 363 rcpu->rcpu_ci->ci_curlwp = l;
364} 364}
365 365
366void 366void
367rump_unschedule() 367rump_unschedule()
368{ 368{
369 struct lwp *l = rumpuser_get_curlwp(); 369 struct lwp *l = rumpuser_get_curlwp();
370#ifdef DIAGNOSTIC 370#ifdef DIAGNOSTIC
371 int nlock; 371 int nlock;
372 372
373 KERNEL_UNLOCK_ALL(l, &nlock); 373 KERNEL_UNLOCK_ALL(l, &nlock);
374 KASSERT(nlock == 0); 374 KASSERT(nlock == 0);
375#endif 375#endif
376 376
377 KASSERT(l->l_mutex == l->l_cpu->ci_schedstate.spc_mutex); 377 KASSERT(l->l_mutex == l->l_cpu->ci_schedstate.spc_mutex);
378 rump_unschedule_cpu(l); 378 rump_unschedule_cpu(l);
379 l->l_mutex = &unruntime_lock; 379 l->l_mutex = &unruntime_lock;
380 l->l_stat = LSSTOP; 380 l->l_stat = LSSTOP;
381 381
382 /* 382 /*
383 * Check special conditions: 383 * Check special conditions:
384 * 1) do we need to free the lwp which just unscheduled? 384 * 1) do we need to free the lwp which just unscheduled?
385 * (locking order: lwp0, cpu) 385 * (locking order: lwp0, cpu)
386 * 2) do we want to clear curlwp for the current host thread 386 * 2) do we want to clear curlwp for the current host thread
387 */ 387 */
388 if (__predict_false(l->l_flag & LW_WEXIT)) { 388 if (__predict_false(l->l_flag & LW_WEXIT)) {
389 lwp0busy(); 389 lwp0busy();
390 390
391 /* Now that we have lwp0, we can schedule a CPU again */ 391 /* Now that we have lwp0, we can schedule a CPU again */
392 rump_schedule_cpu(l); 392 rump_schedule_cpu(l);
393 393
394 /* switch to lwp0. this frees the old thread */ 394 /* switch to lwp0. this frees the old thread */
395 KASSERT(l->l_flag & LW_WEXIT); 395 KASSERT(l->l_flag & LW_WEXIT);
396 rump_lwproc_switch(&lwp0); 396 rump_lwproc_switch(&lwp0);
397 397
398 /* release lwp0 */ 398 /* release lwp0 */
399 rump_unschedule_cpu(&lwp0); 399 rump_unschedule_cpu(&lwp0);
400 lwp0.l_mutex = &unruntime_lock; 400 lwp0.l_mutex = &unruntime_lock;
401 lwp0.l_pflag &= ~LP_RUNNING; 401 lwp0.l_pflag &= ~LP_RUNNING;
402 lwp0rele(); 402 lwp0rele();
403 rumpuser_set_curlwp(NULL); 403 rumpuser_set_curlwp(NULL);
404 404
405 } else if (__predict_false(l->l_flag & LW_RUMP_CLEAR)) { 405 } else if (__predict_false(l->l_flag & LW_RUMP_CLEAR)) {
406 rumpuser_set_curlwp(NULL); 406 rumpuser_set_curlwp(NULL);
407 l->l_flag &= ~LW_RUMP_CLEAR; 407 l->l_flag &= ~LW_RUMP_CLEAR;
408 } 408 }
409} 409}
410 410
411void 411void
412rump_unschedule_cpu(struct lwp *l) 412rump_unschedule_cpu(struct lwp *l)
413{ 413{
414 414
415 rump_unschedule_cpu_interlock(l, NULL); 415 rump_unschedule_cpu_interlock(l, NULL);
416} 416}
417 417
418void 418void
419rump_unschedule_cpu_interlock(struct lwp *l, void *interlock) 419rump_unschedule_cpu_interlock(struct lwp *l, void *interlock)
420{ 420{
421 421
422 if ((l->l_pflag & LP_INTR) == 0) 422 if ((l->l_pflag & LP_INTR) == 0)
423 rump_softint_run(l->l_cpu); 423 rump_softint_run(l->l_cpu);
424 rump_unschedule_cpu1(l, interlock); 424 rump_unschedule_cpu1(l, interlock);
425} 425}
426 426
427void 427void
428rump_unschedule_cpu1(struct lwp *l, void *interlock) 428rump_unschedule_cpu1(struct lwp *l, void *interlock)
429{ 429{
430 struct rumpcpu *rcpu; 430 struct rumpcpu *rcpu;
431 struct cpu_info *ci; 431 struct cpu_info *ci;
432 void *old; 432 void *old;
433 433
434 ci = l->l_cpu; 434 ci = l->l_cpu;
435 ci->ci_curlwp = NULL; 435 ci->ci_curlwp = NULL;
436 rcpu = &rcpu_storage[ci-&rump_cpus[0]]; 436 rcpu = &rcpu_storage[ci-&rump_cpus[0]];
437 437
438 KASSERT(rcpu->rcpu_ci == ci); 438 KASSERT(rcpu->rcpu_ci == ci);
439 439
440 /* 440 /*
441 * Make sure all stores are seen before the CPU release. This 441 * Make sure all stores are seen before the CPU release. This
442 * is relevant only in the non-fastpath scheduling case, but 442 * is relevant only in the non-fastpath scheduling case, but
443 * we don't know here if that's going to happen, so need to 443 * we don't know here if that's going to happen, so need to
444 * expect the worst. 444 * expect the worst.
 445 *
 446 * If the scheduler interlock was requested by the caller, we
 447 * need to obtain it before we release the CPU. Otherwise, we risk a
 448 * race condition where another thread is scheduled onto the
 449 * rump kernel CPU before our current thread can
 450 * grab the interlock.
445 */ 451 */
446 membar_exit(); 452 if (interlock == rcpu->rcpu_mtx)
 453 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
 454 else
 455 membar_exit();
447 456
448 /* Release the CPU. */ 457 /* Release the CPU. */
449 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, l); 458 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, l);
450 459
451 /* No waiters? No problems. We're outta here. */ 460 /* No waiters? No problems. We're outta here. */
452 if (old == RCPULWP_BUSY) { 461 if (old == RCPULWP_BUSY) {
453 /* Was the scheduler interlock requested? */ 
454 if (__predict_false(interlock == rcpu->rcpu_mtx)) 
455 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 
456 return; 462 return;
457 } 463 }
458 464
459 KASSERT(old == RCPULWP_WANTED); 465 KASSERT(old == RCPULWP_WANTED);
460 466
461 /* 467 /*
462 * Ok, things weren't so snappy. 468 * Ok, things weren't so snappy.
463 * 469 *
464 * Snailpath: take lock and signal anyone waiting for this CPU. 470 * Snailpath: take lock and signal anyone waiting for this CPU.
465 */ 471 */
466 472
467 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 473 if (interlock != rcpu->rcpu_mtx)
 474 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
468 if (rcpu->rcpu_wanted) 475 if (rcpu->rcpu_wanted)
469 rumpuser_cv_broadcast(rcpu->rcpu_cv); 476 rumpuser_cv_broadcast(rcpu->rcpu_cv);
470 477 if (interlock != rcpu->rcpu_mtx)
471 if (__predict_true(interlock != rcpu->rcpu_mtx)) 
472 rumpuser_mutex_exit(rcpu->rcpu_mtx); 478 rumpuser_mutex_exit(rcpu->rcpu_mtx);
473} 479}
474 480
475/* Give up and retake CPU (perhaps a different one) */ 481/* Give up and retake CPU (perhaps a different one) */
476void 482void
477yield() 483yield()
478{ 484{
479 struct lwp *l = curlwp; 485 struct lwp *l = curlwp;
480 int nlocks; 486 int nlocks;
481 487
482 KERNEL_UNLOCK_ALL(l, &nlocks); 488 KERNEL_UNLOCK_ALL(l, &nlocks);
483 rump_unschedule_cpu(l); 489 rump_unschedule_cpu(l);
484 rump_schedule_cpu(l); 490 rump_schedule_cpu(l);
485 KERNEL_LOCK(nlocks, l); 491 KERNEL_LOCK(nlocks, l);
486} 492}
487 493
488void 494void
489preempt() 495preempt()
490{ 496{
491 497
492 yield(); 498 yield();
493} 499}
494 500
495bool 501bool
496kpreempt(uintptr_t where) 502kpreempt(uintptr_t where)
497{ 503{
498 504
499 return false; 505 return false;
500} 506}
501 507
502/* 508/*
503 * There is no kernel thread preemption in rump currently. But call 509 * There is no kernel thread preemption in rump currently. But call
504 * the implementing macros anyway in case they grow some side-effects 510 * the implementing macros anyway in case they grow some side-effects
505 * down the road. 511 * down the road.
506 */ 512 */
507void 513void
508kpreempt_disable(void) 514kpreempt_disable(void)
509{ 515{
510 516
511 KPREEMPT_DISABLE(curlwp); 517 KPREEMPT_DISABLE(curlwp);
512} 518}
513 519
514void 520void
515kpreempt_enable(void) 521kpreempt_enable(void)
516{ 522{
517 523
518 KPREEMPT_ENABLE(curlwp); 524 KPREEMPT_ENABLE(curlwp);
519} 525}
520 526
521void 527void
522suspendsched(void) 528suspendsched(void)
523{ 529{
524 530
525 /* 531 /*
526 * Could wait until everyone is out and block further entries, 532 * Could wait until everyone is out and block further entries,
527 * but skip that for now. 533 * but skip that for now.
528 */ 534 */
529} 535}
530 536
531void 537void
532sched_nice(struct proc *p, int level) 538sched_nice(struct proc *p, int level)
533{ 539{
534 540
535 /* nothing to do for now */ 541 /* nothing to do for now */
536} 542}