| @@ -1,536 +1,542 @@ | | | @@ -1,536 +1,542 @@ |
1 | /* $NetBSD: scheduler.c,v 1.28 2012/06/22 12:45:43 rmind Exp $ */ | | 1 | /* $NetBSD: scheduler.c,v 1.29 2012/09/15 17:15:01 pooka Exp $ */ |
2 | | | 2 | |
3 | /* | | 3 | /* |
4 | * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved. | | 4 | * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved. |
5 | * | | 5 | * |
6 | * Redistribution and use in source and binary forms, with or without | | 6 | * Redistribution and use in source and binary forms, with or without |
7 | * modification, are permitted provided that the following conditions | | 7 | * modification, are permitted provided that the following conditions |
8 | * are met: | | 8 | * are met: |
9 | * 1. Redistributions of source code must retain the above copyright | | 9 | * 1. Redistributions of source code must retain the above copyright |
10 | * notice, this list of conditions and the following disclaimer. | | 10 | * notice, this list of conditions and the following disclaimer. |
11 | * 2. Redistributions in binary form must reproduce the above copyright | | 11 | * 2. Redistributions in binary form must reproduce the above copyright |
12 | * notice, this list of conditions and the following disclaimer in the | | 12 | * notice, this list of conditions and the following disclaimer in the |
13 | * documentation and/or other materials provided with the distribution. | | 13 | * documentation and/or other materials provided with the distribution. |
14 | * | | 14 | * |
15 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS | | 15 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS |
16 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | | 16 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
17 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | | 17 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
18 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | | 18 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | | 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
21 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 21 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
22 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 22 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
23 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 23 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
24 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 24 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
25 | * SUCH DAMAGE. | | 25 | * SUCH DAMAGE. |
26 | */ | | 26 | */ |
27 | | | 27 | |
28 | #include <sys/cdefs.h> | | 28 | #include <sys/cdefs.h> |
29 | __KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.28 2012/06/22 12:45:43 rmind Exp $"); | | 29 | __KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.29 2012/09/15 17:15:01 pooka Exp $"); |
30 | | | 30 | |
31 | #include <sys/param.h> | | 31 | #include <sys/param.h> |
32 | #include <sys/atomic.h> | | 32 | #include <sys/atomic.h> |
33 | #include <sys/cpu.h> | | 33 | #include <sys/cpu.h> |
34 | #include <sys/kmem.h> | | 34 | #include <sys/kmem.h> |
35 | #include <sys/mutex.h> | | 35 | #include <sys/mutex.h> |
36 | #include <sys/namei.h> | | 36 | #include <sys/namei.h> |
37 | #include <sys/queue.h> | | 37 | #include <sys/queue.h> |
38 | #include <sys/select.h> | | 38 | #include <sys/select.h> |
39 | #include <sys/systm.h> | | 39 | #include <sys/systm.h> |
40 | | | 40 | |
41 | #include <rump/rumpuser.h> | | 41 | #include <rump/rumpuser.h> |
42 | | | 42 | |
43 | #include "rump_private.h" | | 43 | #include "rump_private.h" |
44 | | | 44 | |
45 | static struct cpu_info rump_cpus[MAXCPUS]; | | 45 | static struct cpu_info rump_cpus[MAXCPUS]; |
46 | static struct rumpcpu { | | 46 | static struct rumpcpu { |
47 | /* needed in fastpath */ | | 47 | /* needed in fastpath */ |
48 | struct cpu_info *rcpu_ci; | | 48 | struct cpu_info *rcpu_ci; |
49 | void *rcpu_prevlwp; | | 49 | void *rcpu_prevlwp; |
50 | | | 50 | |
51 | /* needed in slowpath */ | | 51 | /* needed in slowpath */ |
52 | struct rumpuser_mtx *rcpu_mtx; | | 52 | struct rumpuser_mtx *rcpu_mtx; |
53 | struct rumpuser_cv *rcpu_cv; | | 53 | struct rumpuser_cv *rcpu_cv; |
54 | int rcpu_wanted; | | 54 | int rcpu_wanted; |
55 | | | 55 | |
56 | /* offset 20 (P=4) or 36 (P=8) here */ | | 56 | /* offset 20 (P=4) or 36 (P=8) here */ |
57 | | | 57 | |
58 | /* | | 58 | /* |
59 | * Some stats. Not really that necessary, but we should | | 59 | * Some stats. Not really that necessary, but we should |
60 | * have room. Note that these overflow quite fast, so need | | 60 | * have room. Note that these overflow quite fast, so need |
61 | * to be collected often. | | 61 | * to be collected often. |
62 | */ | | 62 | */ |
63 | unsigned int rcpu_fastpath; | | 63 | unsigned int rcpu_fastpath; |
64 | unsigned int rcpu_slowpath; | | 64 | unsigned int rcpu_slowpath; |
65 | unsigned int rcpu_migrated; | | 65 | unsigned int rcpu_migrated; |
66 | | | 66 | |
67 | /* offset 32 (P=4) or 50 (P=8) */ | | 67 | /* offset 32 (P=4) or 50 (P=8) */ |
68 | | | 68 | |
69 | int rcpu_align[0] __aligned(CACHE_LINE_SIZE); | | 69 | int rcpu_align[0] __aligned(CACHE_LINE_SIZE); |
70 | } rcpu_storage[MAXCPUS]; | | 70 | } rcpu_storage[MAXCPUS]; |
71 | | | 71 | |
72 | struct cpu_info *rump_cpu = &rump_cpus[0]; | | 72 | struct cpu_info *rump_cpu = &rump_cpus[0]; |
73 | kcpuset_t *kcpuset_attached = NULL; | | 73 | kcpuset_t *kcpuset_attached = NULL; |
74 | kcpuset_t *kcpuset_running = NULL; | | 74 | kcpuset_t *kcpuset_running = NULL; |
75 | int ncpu; | | 75 | int ncpu; |
76 | | | 76 | |
77 | #define RCPULWP_BUSY ((void *)-1) | | 77 | #define RCPULWP_BUSY ((void *)-1) |
78 | #define RCPULWP_WANTED ((void *)-2) | | 78 | #define RCPULWP_WANTED ((void *)-2) |
79 | | | 79 | |
80 | static struct rumpuser_mtx *lwp0mtx; | | 80 | static struct rumpuser_mtx *lwp0mtx; |
81 | static struct rumpuser_cv *lwp0cv; | | 81 | static struct rumpuser_cv *lwp0cv; |
82 | static unsigned nextcpu; | | 82 | static unsigned nextcpu; |
83 | | | 83 | |
84 | kmutex_t unruntime_lock; /* unruntime lwp lock. practically unused */ | | 84 | kmutex_t unruntime_lock; /* unruntime lwp lock. practically unused */ |
85 | | | 85 | |
86 | static bool lwp0isbusy = false; | | 86 | static bool lwp0isbusy = false; |
87 | | | 87 | |
88 | /* | | 88 | /* |
89 | * Keep some stats. | | 89 | * Keep some stats. |
90 | * | | 90 | * |
91 | * Keeping track of there is not really critical for speed, unless | | 91 | * Keeping track of there is not really critical for speed, unless |
92 | * stats happen to be on a different cache line (CACHE_LINE_SIZE is | | 92 | * stats happen to be on a different cache line (CACHE_LINE_SIZE is |
93 | * really just a coarse estimate), so default for the performant case | | 93 | * really just a coarse estimate), so default for the performant case |
94 | * (i.e. no stats). | | 94 | * (i.e. no stats). |
95 | */ | | 95 | */ |
96 | #ifdef RUMPSCHED_STATS | | 96 | #ifdef RUMPSCHED_STATS |
97 | #define SCHED_FASTPATH(rcpu) rcpu->rcpu_fastpath++; | | 97 | #define SCHED_FASTPATH(rcpu) rcpu->rcpu_fastpath++; |
98 | #define SCHED_SLOWPATH(rcpu) rcpu->rcpu_slowpath++; | | 98 | #define SCHED_SLOWPATH(rcpu) rcpu->rcpu_slowpath++; |
99 | #define SCHED_MIGRATED(rcpu) rcpu->rcpu_migrated++; | | 99 | #define SCHED_MIGRATED(rcpu) rcpu->rcpu_migrated++; |
100 | #else | | 100 | #else |
101 | #define SCHED_FASTPATH(rcpu) | | 101 | #define SCHED_FASTPATH(rcpu) |
102 | #define SCHED_SLOWPATH(rcpu) | | 102 | #define SCHED_SLOWPATH(rcpu) |
103 | #define SCHED_MIGRATED(rcpu) | | 103 | #define SCHED_MIGRATED(rcpu) |
104 | #endif | | 104 | #endif |
105 | | | 105 | |
106 | struct cpu_info * | | 106 | struct cpu_info * |
107 | cpu_lookup(u_int index) | | 107 | cpu_lookup(u_int index) |
108 | { | | 108 | { |
109 | | | 109 | |
110 | return &rump_cpus[index]; | | 110 | return &rump_cpus[index]; |
111 | } | | 111 | } |
112 | | | 112 | |
113 | static inline struct rumpcpu * | | 113 | static inline struct rumpcpu * |
114 | getnextcpu(void) | | 114 | getnextcpu(void) |
115 | { | | 115 | { |
116 | unsigned newcpu; | | 116 | unsigned newcpu; |
117 | | | 117 | |
118 | newcpu = atomic_inc_uint_nv(&nextcpu); | | 118 | newcpu = atomic_inc_uint_nv(&nextcpu); |
119 | if (__predict_false(ncpu > UINT_MAX/2)) | | 119 | if (__predict_false(ncpu > UINT_MAX/2)) |
120 | atomic_and_uint(&nextcpu, 0); | | 120 | atomic_and_uint(&nextcpu, 0); |
121 | newcpu = newcpu % ncpu; | | 121 | newcpu = newcpu % ncpu; |
122 | | | 122 | |
123 | return &rcpu_storage[newcpu]; | | 123 | return &rcpu_storage[newcpu]; |
124 | } | | 124 | } |
125 | | | 125 | |
126 | /* this could/should be mi_attach_cpu? */ | | 126 | /* this could/should be mi_attach_cpu? */ |
127 | void | | 127 | void |
128 | rump_cpus_bootstrap(int *nump) | | 128 | rump_cpus_bootstrap(int *nump) |
129 | { | | 129 | { |
130 | struct rumpcpu *rcpu; | | 130 | struct rumpcpu *rcpu; |
131 | struct cpu_info *ci; | | 131 | struct cpu_info *ci; |
132 | int num = *nump; | | 132 | int num = *nump; |
133 | int i; | | 133 | int i; |
134 | | | 134 | |
135 | if (num > MAXCPUS) { | | 135 | if (num > MAXCPUS) { |
136 | aprint_verbose("CPU limit: %d wanted, %d (MAXCPUS) " | | 136 | aprint_verbose("CPU limit: %d wanted, %d (MAXCPUS) " |
137 | "available (adjusted)\n", num, MAXCPUS); | | 137 | "available (adjusted)\n", num, MAXCPUS); |
138 | num = MAXCPUS; | | 138 | num = MAXCPUS; |
139 | } | | 139 | } |
140 | | | 140 | |
141 | for (i = 0; i < num; i++) { | | 141 | for (i = 0; i < num; i++) { |
142 | rcpu = &rcpu_storage[i]; | | 142 | rcpu = &rcpu_storage[i]; |
143 | ci = &rump_cpus[i]; | | 143 | ci = &rump_cpus[i]; |
144 | ci->ci_index = i; | | 144 | ci->ci_index = i; |
145 | } | | 145 | } |
146 | | | 146 | |
147 | kcpuset_create(&kcpuset_attached, true); | | 147 | kcpuset_create(&kcpuset_attached, true); |
148 | kcpuset_create(&kcpuset_running, true); | | 148 | kcpuset_create(&kcpuset_running, true); |
149 | | | 149 | |
150 | /* attach first cpu for bootstrap */ | | 150 | /* attach first cpu for bootstrap */ |
151 | rump_cpu_attach(&rump_cpus[0]); | | 151 | rump_cpu_attach(&rump_cpus[0]); |
152 | ncpu = 1; | | 152 | ncpu = 1; |
153 | *nump = num; | | 153 | *nump = num; |
154 | } | | 154 | } |
155 | | | 155 | |
156 | void | | 156 | void |
157 | rump_scheduler_init(int numcpu) | | 157 | rump_scheduler_init(int numcpu) |
158 | { | | 158 | { |
159 | struct rumpcpu *rcpu; | | 159 | struct rumpcpu *rcpu; |
160 | struct cpu_info *ci; | | 160 | struct cpu_info *ci; |
161 | int i; | | 161 | int i; |
162 | | | 162 | |
163 | rumpuser_mutex_init(&lwp0mtx); | | 163 | rumpuser_mutex_init(&lwp0mtx); |
164 | rumpuser_cv_init(&lwp0cv); | | 164 | rumpuser_cv_init(&lwp0cv); |
165 | for (i = 0; i < numcpu; i++) { | | 165 | for (i = 0; i < numcpu; i++) { |
166 | rcpu = &rcpu_storage[i]; | | 166 | rcpu = &rcpu_storage[i]; |
167 | ci = &rump_cpus[i]; | | 167 | ci = &rump_cpus[i]; |
168 | rcpu->rcpu_ci = ci; | | 168 | rcpu->rcpu_ci = ci; |
169 | ci->ci_schedstate.spc_mutex = | | 169 | ci->ci_schedstate.spc_mutex = |
170 | mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); | | 170 | mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); |
171 | ci->ci_schedstate.spc_flags = SPCF_RUNNING; | | 171 | ci->ci_schedstate.spc_flags = SPCF_RUNNING; |
172 | rcpu->rcpu_wanted = 0; | | 172 | rcpu->rcpu_wanted = 0; |
173 | rumpuser_cv_init(&rcpu->rcpu_cv); | | 173 | rumpuser_cv_init(&rcpu->rcpu_cv); |
174 | rumpuser_mutex_init(&rcpu->rcpu_mtx); | | 174 | rumpuser_mutex_init(&rcpu->rcpu_mtx); |
175 | } | | 175 | } |
176 | | | 176 | |
177 | mutex_init(&unruntime_lock, MUTEX_DEFAULT, IPL_NONE); | | 177 | mutex_init(&unruntime_lock, MUTEX_DEFAULT, IPL_NONE); |
178 | } | | 178 | } |
179 | | | 179 | |
180 | /* | | 180 | /* |
181 | * condvar ops using scheduler lock as the rumpuser interlock. | | 181 | * condvar ops using scheduler lock as the rumpuser interlock. |
182 | */ | | 182 | */ |
183 | void | | 183 | void |
184 | rump_schedlock_cv_wait(struct rumpuser_cv *cv) | | 184 | rump_schedlock_cv_wait(struct rumpuser_cv *cv) |
185 | { | | 185 | { |
186 | struct lwp *l = curlwp; | | 186 | struct lwp *l = curlwp; |
187 | struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]]; | | 187 | struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]]; |
188 | | | 188 | |
189 | /* mutex will be taken and released in cpu schedule/unschedule */ | | 189 | /* mutex will be taken and released in cpu schedule/unschedule */ |
190 | rumpuser_cv_wait(cv, rcpu->rcpu_mtx); | | 190 | rumpuser_cv_wait(cv, rcpu->rcpu_mtx); |
191 | } | | 191 | } |
192 | | | 192 | |
193 | int | | 193 | int |
194 | rump_schedlock_cv_timedwait(struct rumpuser_cv *cv, const struct timespec *ts) | | 194 | rump_schedlock_cv_timedwait(struct rumpuser_cv *cv, const struct timespec *ts) |
195 | { | | 195 | { |
196 | struct lwp *l = curlwp; | | 196 | struct lwp *l = curlwp; |
197 | struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]]; | | 197 | struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]]; |
198 | | | 198 | |
199 | /* mutex will be taken and released in cpu schedule/unschedule */ | | 199 | /* mutex will be taken and released in cpu schedule/unschedule */ |
200 | return rumpuser_cv_timedwait(cv, rcpu->rcpu_mtx, | | 200 | return rumpuser_cv_timedwait(cv, rcpu->rcpu_mtx, |
201 | ts->tv_sec, ts->tv_nsec); | | 201 | ts->tv_sec, ts->tv_nsec); |
202 | } | | 202 | } |
203 | | | 203 | |
204 | static void | | 204 | static void |
205 | lwp0busy(void) | | 205 | lwp0busy(void) |
206 | { | | 206 | { |
207 | | | 207 | |
208 | /* busy lwp0 */ | | 208 | /* busy lwp0 */ |
209 | KASSERT(curlwp == NULL || curlwp->l_stat != LSONPROC); | | 209 | KASSERT(curlwp == NULL || curlwp->l_stat != LSONPROC); |
210 | rumpuser_mutex_enter_nowrap(lwp0mtx); | | 210 | rumpuser_mutex_enter_nowrap(lwp0mtx); |
211 | while (lwp0isbusy) | | 211 | while (lwp0isbusy) |
212 | rumpuser_cv_wait_nowrap(lwp0cv, lwp0mtx); | | 212 | rumpuser_cv_wait_nowrap(lwp0cv, lwp0mtx); |
213 | lwp0isbusy = true; | | 213 | lwp0isbusy = true; |
214 | rumpuser_mutex_exit(lwp0mtx); | | 214 | rumpuser_mutex_exit(lwp0mtx); |
215 | } | | 215 | } |
216 | | | 216 | |
217 | static void | | 217 | static void |
218 | lwp0rele(void) | | 218 | lwp0rele(void) |
219 | { | | 219 | { |
220 | | | 220 | |
221 | rumpuser_mutex_enter_nowrap(lwp0mtx); | | 221 | rumpuser_mutex_enter_nowrap(lwp0mtx); |
222 | KASSERT(lwp0isbusy == true); | | 222 | KASSERT(lwp0isbusy == true); |
223 | lwp0isbusy = false; | | 223 | lwp0isbusy = false; |
224 | rumpuser_cv_signal(lwp0cv); | | 224 | rumpuser_cv_signal(lwp0cv); |
225 | rumpuser_mutex_exit(lwp0mtx); | | 225 | rumpuser_mutex_exit(lwp0mtx); |
226 | } | | 226 | } |
227 | | | 227 | |
228 | /* | | 228 | /* |
229 | * rump_schedule: ensure that the calling host thread has a valid lwp context. | | 229 | * rump_schedule: ensure that the calling host thread has a valid lwp context. |
230 | * ie. ensure that rumpuser_get_curlwp() != NULL. | | 230 | * ie. ensure that rumpuser_get_curlwp() != NULL. |
231 | */ | | 231 | */ |
232 | void | | 232 | void |
233 | rump_schedule() | | 233 | rump_schedule() |
234 | { | | 234 | { |
235 | struct lwp *l; | | 235 | struct lwp *l; |
236 | | | 236 | |
237 | /* | | 237 | /* |
238 | * If there is no dedicated lwp, allocate a temp one and | | 238 | * If there is no dedicated lwp, allocate a temp one and |
239 | * set it to be free'd upon unschedule(). Use lwp0 context | | 239 | * set it to be free'd upon unschedule(). Use lwp0 context |
240 | * for reserving the necessary resources. Don't optimize | | 240 | * for reserving the necessary resources. Don't optimize |
241 | * for this case -- anyone who cares about performance will | | 241 | * for this case -- anyone who cares about performance will |
242 | * start a real thread. | | 242 | * start a real thread. |
243 | */ | | 243 | */ |
244 | if (__predict_true((l = rumpuser_get_curlwp()) != NULL)) { | | 244 | if (__predict_true((l = rumpuser_get_curlwp()) != NULL)) { |
245 | rump_schedule_cpu(l); | | 245 | rump_schedule_cpu(l); |
246 | LWP_CACHE_CREDS(l, l->l_proc); | | 246 | LWP_CACHE_CREDS(l, l->l_proc); |
247 | } else { | | 247 | } else { |
248 | lwp0busy(); | | 248 | lwp0busy(); |
249 | | | 249 | |
250 | /* schedule cpu and use lwp0 */ | | 250 | /* schedule cpu and use lwp0 */ |
251 | rump_schedule_cpu(&lwp0); | | 251 | rump_schedule_cpu(&lwp0); |
252 | rumpuser_set_curlwp(&lwp0); | | 252 | rumpuser_set_curlwp(&lwp0); |
253 | | | 253 | |
254 | /* allocate thread, switch to it, and release lwp0 */ | | 254 | /* allocate thread, switch to it, and release lwp0 */ |
255 | l = rump__lwproc_alloclwp(initproc); | | 255 | l = rump__lwproc_alloclwp(initproc); |
256 | rump_lwproc_switch(l); | | 256 | rump_lwproc_switch(l); |
257 | lwp0rele(); | | 257 | lwp0rele(); |
258 | | | 258 | |
259 | /* | | 259 | /* |
260 | * mark new thread dead-on-unschedule. this | | 260 | * mark new thread dead-on-unschedule. this |
261 | * means that we'll be running with l_refcnt == 0. | | 261 | * means that we'll be running with l_refcnt == 0. |
262 | * relax, it's fine. | | 262 | * relax, it's fine. |
263 | */ | | 263 | */ |
264 | rump_lwproc_releaselwp(); | | 264 | rump_lwproc_releaselwp(); |
265 | } | | 265 | } |
266 | } | | 266 | } |
267 | | | 267 | |
268 | void | | 268 | void |
269 | rump_schedule_cpu(struct lwp *l) | | 269 | rump_schedule_cpu(struct lwp *l) |
270 | { | | 270 | { |
271 | | | 271 | |
272 | rump_schedule_cpu_interlock(l, NULL); | | 272 | rump_schedule_cpu_interlock(l, NULL); |
273 | } | | 273 | } |
274 | | | 274 | |
275 | /* | | 275 | /* |
276 | * Schedule a CPU. This optimizes for the case where we schedule | | 276 | * Schedule a CPU. This optimizes for the case where we schedule |
277 | * the same thread often, and we have nCPU >= nFrequently-Running-Thread | | 277 | * the same thread often, and we have nCPU >= nFrequently-Running-Thread |
278 | * (where CPU is virtual rump cpu, not host CPU). | | 278 | * (where CPU is virtual rump cpu, not host CPU). |
279 | */ | | 279 | */ |
280 | void | | 280 | void |
281 | rump_schedule_cpu_interlock(struct lwp *l, void *interlock) | | 281 | rump_schedule_cpu_interlock(struct lwp *l, void *interlock) |
282 | { | | 282 | { |
283 | struct rumpcpu *rcpu; | | 283 | struct rumpcpu *rcpu; |
284 | void *old; | | 284 | void *old; |
285 | bool domigrate; | | 285 | bool domigrate; |
286 | bool bound = l->l_pflag & LP_BOUND; | | 286 | bool bound = l->l_pflag & LP_BOUND; |
287 | | | 287 | |
288 | l->l_stat = LSRUN; | | 288 | l->l_stat = LSRUN; |
289 | | | 289 | |
290 | /* | | 290 | /* |
291 | * First, try fastpath: if we were the previous user of the | | 291 | * First, try fastpath: if we were the previous user of the |
292 | * CPU, everything is in order cachewise and we can just | | 292 | * CPU, everything is in order cachewise and we can just |
293 | * proceed to use it. | | 293 | * proceed to use it. |
294 | * | | 294 | * |
295 | * If we are a different thread (i.e. CAS fails), we must go | | 295 | * If we are a different thread (i.e. CAS fails), we must go |
296 | * through a memory barrier to ensure we get a truthful | | 296 | * through a memory barrier to ensure we get a truthful |
297 | * view of the world. | | 297 | * view of the world. |
298 | */ | | 298 | */ |
299 | | | 299 | |
300 | KASSERT(l->l_target_cpu != NULL); | | 300 | KASSERT(l->l_target_cpu != NULL); |
301 | rcpu = &rcpu_storage[l->l_target_cpu-&rump_cpus[0]]; | | 301 | rcpu = &rcpu_storage[l->l_target_cpu-&rump_cpus[0]]; |
302 | if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, l, RCPULWP_BUSY) == l) { | | 302 | if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, l, RCPULWP_BUSY) == l) { |
303 | if (__predict_true(interlock == rcpu->rcpu_mtx)) | | 303 | if (interlock == rcpu->rcpu_mtx) |
304 | rumpuser_mutex_exit(rcpu->rcpu_mtx); | | 304 | rumpuser_mutex_exit(rcpu->rcpu_mtx); |
305 | SCHED_FASTPATH(rcpu); | | 305 | SCHED_FASTPATH(rcpu); |
306 | /* jones, you're the man */ | | 306 | /* jones, you're the man */ |
307 | goto fastlane; | | 307 | goto fastlane; |
308 | } | | 308 | } |
309 | | | 309 | |
310 | /* | | 310 | /* |
311 | * Else, it's the slowpath for us. First, determine if we | | 311 | * Else, it's the slowpath for us. First, determine if we |
312 | * can migrate. | | 312 | * can migrate. |
313 | */ | | 313 | */ |
314 | if (ncpu == 1) | | 314 | if (ncpu == 1) |
315 | domigrate = false; | | 315 | domigrate = false; |
316 | else | | 316 | else |
317 | domigrate = true; | | 317 | domigrate = true; |
318 | | | 318 | |
319 | /* Take lock. This acts as a load barrier too. */ | | 319 | /* Take lock. This acts as a load barrier too. */ |
320 | if (__predict_true(interlock != rcpu->rcpu_mtx)) | | 320 | if (interlock != rcpu->rcpu_mtx) |
321 | rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); | | 321 | rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); |
322 | | | 322 | |
323 | for (;;) { | | 323 | for (;;) { |
324 | SCHED_SLOWPATH(rcpu); | | 324 | SCHED_SLOWPATH(rcpu); |
325 | old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, RCPULWP_WANTED); | | 325 | old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, RCPULWP_WANTED); |
326 | | | 326 | |
327 | /* CPU is free? */ | | 327 | /* CPU is free? */ |
328 | if (old != RCPULWP_BUSY && old != RCPULWP_WANTED) { | | 328 | if (old != RCPULWP_BUSY && old != RCPULWP_WANTED) { |
329 | if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, | | 329 | if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, |
330 | RCPULWP_WANTED, RCPULWP_BUSY) == RCPULWP_WANTED) { | | 330 | RCPULWP_WANTED, RCPULWP_BUSY) == RCPULWP_WANTED) { |
331 | break; | | 331 | break; |
332 | } | | 332 | } |
333 | } | | 333 | } |
334 | | | 334 | |
335 | /* | | 335 | /* |
336 | * Do we want to migrate once? | | 336 | * Do we want to migrate once? |
337 | * This may need a slightly better algorithm, or we | | 337 | * This may need a slightly better algorithm, or we |
338 | * might cache pingpong eternally for non-frequent | | 338 | * might cache pingpong eternally for non-frequent |
339 | * threads. | | 339 | * threads. |
340 | */ | | 340 | */ |
341 | if (domigrate && !bound) { | | 341 | if (domigrate && !bound) { |
342 | domigrate = false; | | 342 | domigrate = false; |
343 | SCHED_MIGRATED(rcpu); | | 343 | SCHED_MIGRATED(rcpu); |
344 | rumpuser_mutex_exit(rcpu->rcpu_mtx); | | 344 | rumpuser_mutex_exit(rcpu->rcpu_mtx); |
345 | rcpu = getnextcpu(); | | 345 | rcpu = getnextcpu(); |
346 | rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); | | 346 | rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); |
347 | continue; | | 347 | continue; |
348 | } | | 348 | } |
349 | | | 349 | |
350 | /* Want CPU, wait until it's released an retry */ | | 350 | /* Want CPU, wait until it's released an retry */ |
351 | rcpu->rcpu_wanted++; | | 351 | rcpu->rcpu_wanted++; |
352 | rumpuser_cv_wait_nowrap(rcpu->rcpu_cv, rcpu->rcpu_mtx); | | 352 | rumpuser_cv_wait_nowrap(rcpu->rcpu_cv, rcpu->rcpu_mtx); |
353 | rcpu->rcpu_wanted--; | | 353 | rcpu->rcpu_wanted--; |
354 | } | | 354 | } |
355 | rumpuser_mutex_exit(rcpu->rcpu_mtx); | | 355 | rumpuser_mutex_exit(rcpu->rcpu_mtx); |
356 | | | 356 | |
357 | fastlane: | | 357 | fastlane: |
358 | l->l_cpu = l->l_target_cpu = rcpu->rcpu_ci; | | 358 | l->l_cpu = l->l_target_cpu = rcpu->rcpu_ci; |
359 | l->l_mutex = rcpu->rcpu_ci->ci_schedstate.spc_mutex; | | 359 | l->l_mutex = rcpu->rcpu_ci->ci_schedstate.spc_mutex; |
360 | l->l_ncsw++; | | 360 | l->l_ncsw++; |
361 | l->l_stat = LSONPROC; | | 361 | l->l_stat = LSONPROC; |
362 | | | 362 | |
363 | rcpu->rcpu_ci->ci_curlwp = l; | | 363 | rcpu->rcpu_ci->ci_curlwp = l; |
364 | } | | 364 | } |
365 | | | 365 | |
366 | void | | 366 | void |
367 | rump_unschedule() | | 367 | rump_unschedule() |
368 | { | | 368 | { |
369 | struct lwp *l = rumpuser_get_curlwp(); | | 369 | struct lwp *l = rumpuser_get_curlwp(); |
370 | #ifdef DIAGNOSTIC | | 370 | #ifdef DIAGNOSTIC |
371 | int nlock; | | 371 | int nlock; |
372 | | | 372 | |
373 | KERNEL_UNLOCK_ALL(l, &nlock); | | 373 | KERNEL_UNLOCK_ALL(l, &nlock); |
374 | KASSERT(nlock == 0); | | 374 | KASSERT(nlock == 0); |
375 | #endif | | 375 | #endif |
376 | | | 376 | |
377 | KASSERT(l->l_mutex == l->l_cpu->ci_schedstate.spc_mutex); | | 377 | KASSERT(l->l_mutex == l->l_cpu->ci_schedstate.spc_mutex); |
378 | rump_unschedule_cpu(l); | | 378 | rump_unschedule_cpu(l); |
379 | l->l_mutex = &unruntime_lock; | | 379 | l->l_mutex = &unruntime_lock; |
380 | l->l_stat = LSSTOP; | | 380 | l->l_stat = LSSTOP; |
381 | | | 381 | |
382 | /* | | 382 | /* |
383 | * Check special conditions: | | 383 | * Check special conditions: |
384 | * 1) do we need to free the lwp which just unscheduled? | | 384 | * 1) do we need to free the lwp which just unscheduled? |
385 | * (locking order: lwp0, cpu) | | 385 | * (locking order: lwp0, cpu) |
386 | * 2) do we want to clear curlwp for the current host thread | | 386 | * 2) do we want to clear curlwp for the current host thread |
387 | */ | | 387 | */ |
388 | if (__predict_false(l->l_flag & LW_WEXIT)) { | | 388 | if (__predict_false(l->l_flag & LW_WEXIT)) { |
389 | lwp0busy(); | | 389 | lwp0busy(); |
390 | | | 390 | |
391 | /* Now that we have lwp0, we can schedule a CPU again */ | | 391 | /* Now that we have lwp0, we can schedule a CPU again */ |
392 | rump_schedule_cpu(l); | | 392 | rump_schedule_cpu(l); |
393 | | | 393 | |
394 | /* switch to lwp0. this frees the old thread */ | | 394 | /* switch to lwp0. this frees the old thread */ |
395 | KASSERT(l->l_flag & LW_WEXIT); | | 395 | KASSERT(l->l_flag & LW_WEXIT); |
396 | rump_lwproc_switch(&lwp0); | | 396 | rump_lwproc_switch(&lwp0); |
397 | | | 397 | |
398 | /* release lwp0 */ | | 398 | /* release lwp0 */ |
399 | rump_unschedule_cpu(&lwp0); | | 399 | rump_unschedule_cpu(&lwp0); |
400 | lwp0.l_mutex = &unruntime_lock; | | 400 | lwp0.l_mutex = &unruntime_lock; |
401 | lwp0.l_pflag &= ~LP_RUNNING; | | 401 | lwp0.l_pflag &= ~LP_RUNNING; |
402 | lwp0rele(); | | 402 | lwp0rele(); |
403 | rumpuser_set_curlwp(NULL); | | 403 | rumpuser_set_curlwp(NULL); |
404 | | | 404 | |
405 | } else if (__predict_false(l->l_flag & LW_RUMP_CLEAR)) { | | 405 | } else if (__predict_false(l->l_flag & LW_RUMP_CLEAR)) { |
406 | rumpuser_set_curlwp(NULL); | | 406 | rumpuser_set_curlwp(NULL); |
407 | l->l_flag &= ~LW_RUMP_CLEAR; | | 407 | l->l_flag &= ~LW_RUMP_CLEAR; |
408 | } | | 408 | } |
409 | } | | 409 | } |
410 | | | 410 | |
411 | void | | 411 | void |
412 | rump_unschedule_cpu(struct lwp *l) | | 412 | rump_unschedule_cpu(struct lwp *l) |
413 | { | | 413 | { |
414 | | | 414 | |
415 | rump_unschedule_cpu_interlock(l, NULL); | | 415 | rump_unschedule_cpu_interlock(l, NULL); |
416 | } | | 416 | } |
417 | | | 417 | |
418 | void | | 418 | void |
419 | rump_unschedule_cpu_interlock(struct lwp *l, void *interlock) | | 419 | rump_unschedule_cpu_interlock(struct lwp *l, void *interlock) |
420 | { | | 420 | { |
421 | | | 421 | |
422 | if ((l->l_pflag & LP_INTR) == 0) | | 422 | if ((l->l_pflag & LP_INTR) == 0) |
423 | rump_softint_run(l->l_cpu); | | 423 | rump_softint_run(l->l_cpu); |
424 | rump_unschedule_cpu1(l, interlock); | | 424 | rump_unschedule_cpu1(l, interlock); |
425 | } | | 425 | } |
426 | | | 426 | |
427 | void | | 427 | void |
428 | rump_unschedule_cpu1(struct lwp *l, void *interlock) | | 428 | rump_unschedule_cpu1(struct lwp *l, void *interlock) |
429 | { | | 429 | { |
430 | struct rumpcpu *rcpu; | | 430 | struct rumpcpu *rcpu; |
431 | struct cpu_info *ci; | | 431 | struct cpu_info *ci; |
432 | void *old; | | 432 | void *old; |
433 | | | 433 | |
434 | ci = l->l_cpu; | | 434 | ci = l->l_cpu; |
435 | ci->ci_curlwp = NULL; | | 435 | ci->ci_curlwp = NULL; |
436 | rcpu = &rcpu_storage[ci-&rump_cpus[0]]; | | 436 | rcpu = &rcpu_storage[ci-&rump_cpus[0]]; |
437 | | | 437 | |
438 | KASSERT(rcpu->rcpu_ci == ci); | | 438 | KASSERT(rcpu->rcpu_ci == ci); |
439 | | | 439 | |
440 | /* | | 440 | /* |
441 | * Make sure all stores are seen before the CPU release. This | | 441 | * Make sure all stores are seen before the CPU release. This |
442 | * is relevant only in the non-fastpath scheduling case, but | | 442 | * is relevant only in the non-fastpath scheduling case, but |
443 | * we don't know here if that's going to happen, so need to | | 443 | * we don't know here if that's going to happen, so need to |
444 | * expect the worst. | | 444 | * expect the worst. |
| | | 445 | * |
| | | 446 | * If the scheduler interlock was requested by the caller, we |
| | | 447 | * need to obtain it before we release the CPU. Otherwise, we risk a |
| | | 448 | * race condition where another thread is scheduled onto the |
| | | 449 | * rump kernel CPU before our current thread can |
| | | 450 | * grab the interlock. |
445 | */ | | 451 | */ |
446 | membar_exit(); | | 452 | if (interlock == rcpu->rcpu_mtx) |
| | | 453 | rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); |
| | | 454 | else |
| | | 455 | membar_exit(); |
447 | | | 456 | |
448 | /* Release the CPU. */ | | 457 | /* Release the CPU. */ |
449 | old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, l); | | 458 | old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, l); |
450 | | | 459 | |
451 | /* No waiters? No problems. We're outta here. */ | | 460 | /* No waiters? No problems. We're outta here. */ |
452 | if (old == RCPULWP_BUSY) { | | 461 | if (old == RCPULWP_BUSY) { |
453 | /* Was the scheduler interlock requested? */ | | | |
454 | if (__predict_false(interlock == rcpu->rcpu_mtx)) | | | |
455 | rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); | | | |
456 | return; | | 462 | return; |
457 | } | | 463 | } |
458 | | | 464 | |
459 | KASSERT(old == RCPULWP_WANTED); | | 465 | KASSERT(old == RCPULWP_WANTED); |
460 | | | 466 | |
461 | /* | | 467 | /* |
462 | * Ok, things weren't so snappy. | | 468 | * Ok, things weren't so snappy. |
463 | * | | 469 | * |
464 | * Snailpath: take lock and signal anyone waiting for this CPU. | | 470 | * Snailpath: take lock and signal anyone waiting for this CPU. |
465 | */ | | 471 | */ |
466 | | | 472 | |
467 | rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); | | 473 | if (interlock != rcpu->rcpu_mtx) |
| | | 474 | rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); |
468 | if (rcpu->rcpu_wanted) | | 475 | if (rcpu->rcpu_wanted) |
469 | rumpuser_cv_broadcast(rcpu->rcpu_cv); | | 476 | rumpuser_cv_broadcast(rcpu->rcpu_cv); |
470 | | | 477 | if (interlock != rcpu->rcpu_mtx) |
471 | if (__predict_true(interlock != rcpu->rcpu_mtx)) | | | |
472 | rumpuser_mutex_exit(rcpu->rcpu_mtx); | | 478 | rumpuser_mutex_exit(rcpu->rcpu_mtx); |
473 | } | | 479 | } |
474 | | | 480 | |
475 | /* Give up and retake CPU (perhaps a different one) */ | | 481 | /* Give up and retake CPU (perhaps a different one) */ |
476 | void | | 482 | void |
477 | yield() | | 483 | yield() |
478 | { | | 484 | { |
479 | struct lwp *l = curlwp; | | 485 | struct lwp *l = curlwp; |
480 | int nlocks; | | 486 | int nlocks; |
481 | | | 487 | |
482 | KERNEL_UNLOCK_ALL(l, &nlocks); | | 488 | KERNEL_UNLOCK_ALL(l, &nlocks); |
483 | rump_unschedule_cpu(l); | | 489 | rump_unschedule_cpu(l); |
484 | rump_schedule_cpu(l); | | 490 | rump_schedule_cpu(l); |
485 | KERNEL_LOCK(nlocks, l); | | 491 | KERNEL_LOCK(nlocks, l); |
486 | } | | 492 | } |
487 | | | 493 | |
488 | void | | 494 | void |
489 | preempt() | | 495 | preempt() |
490 | { | | 496 | { |
491 | | | 497 | |
492 | yield(); | | 498 | yield(); |
493 | } | | 499 | } |
494 | | | 500 | |
495 | bool | | 501 | bool |
496 | kpreempt(uintptr_t where) | | 502 | kpreempt(uintptr_t where) |
497 | { | | 503 | { |
498 | | | 504 | |
499 | return false; | | 505 | return false; |
500 | } | | 506 | } |
501 | | | 507 | |
502 | /* | | 508 | /* |
503 | * There is no kernel thread preemption in rump currently. But call | | 509 | * There is no kernel thread preemption in rump currently. But call |
504 | * the implementing macros anyway in case they grow some side-effects | | 510 | * the implementing macros anyway in case they grow some side-effects |
505 | * down the road. | | 511 | * down the road. |
506 | */ | | 512 | */ |
507 | void | | 513 | void |
508 | kpreempt_disable(void) | | 514 | kpreempt_disable(void) |
509 | { | | 515 | { |
510 | | | 516 | |
511 | KPREEMPT_DISABLE(curlwp); | | 517 | KPREEMPT_DISABLE(curlwp); |
512 | } | | 518 | } |
513 | | | 519 | |
514 | void | | 520 | void |
515 | kpreempt_enable(void) | | 521 | kpreempt_enable(void) |
516 | { | | 522 | { |
517 | | | 523 | |
518 | KPREEMPT_ENABLE(curlwp); | | 524 | KPREEMPT_ENABLE(curlwp); |
519 | } | | 525 | } |
520 | | | 526 | |
521 | void | | 527 | void |
522 | suspendsched(void) | | 528 | suspendsched(void) |
523 | { | | 529 | { |
524 | | | 530 | |
525 | /* | | 531 | /* |
526 | * Could wait until everyone is out and block further entries, | | 532 | * Could wait until everyone is out and block further entries, |
527 | * but skip that for now. | | 533 | * but skip that for now. |
528 | */ | | 534 | */ |
529 | } | | 535 | } |
530 | | | 536 | |
531 | void | | 537 | void |
532 | sched_nice(struct proc *p, int level) | | 538 | sched_nice(struct proc *p, int level) |
533 | { | | 539 | { |
534 | | | 540 | |
535 | /* nothing to do for now */ | | 541 | /* nothing to do for now */ |
536 | } | | 542 | } |