Thu Nov 30 14:40:46 2017 UTC ()
Pull up following revision(s) (requested by ozaki-r in ticket #405):
	sys/sys/pserialize.h: revision 1.2
	sys/kern/kern_lock.c: revision 1.160
	sys/kern/subr_pserialize.c: revision 1.9
	sys/rump/librump/rumpkern/emul.c: revision 1.184
	sys/rump/librump/rumpkern/emul.c: revision 1.185
	sys/rump/librump/rumpkern/rump.c: revision 1.330
Implement debugging feature for pserialize(9)
The debugging feature detects violations of pserialize constraints.
It causes a panic:
- if a context switch happens in a read section, or
- if a sleepable function is called in a read section.
The feature is enabled only if LOCKDEBUG is on.
Discussed on tech-kern@
Add missing inclusion of pserialize.h (fix build)


(martin)
diff -r1.158 -r1.158.6.1 src/sys/kern/kern_lock.c
diff -r1.8 -r1.8.10.1 src/sys/kern/subr_pserialize.c
diff -r1.181.6.1 -r1.181.6.2 src/sys/rump/librump/rumpkern/emul.c
diff -r1.329 -r1.329.10.1 src/sys/rump/librump/rumpkern/rump.c
diff -r1.1 -r1.1.46.1 src/sys/sys/pserialize.h

cvs diff -r1.158 -r1.158.6.1 src/sys/kern/kern_lock.c (switch to unified diff)

--- src/sys/kern/kern_lock.c 2017/01/26 04:11:56 1.158
+++ src/sys/kern/kern_lock.c 2017/11/30 14:40:46 1.158.6.1
@@ -1,314 +1,318 @@ @@ -1,314 +1,318 @@
1/* $NetBSD: kern_lock.c,v 1.158 2017/01/26 04:11:56 christos Exp $ */ 1/* $NetBSD: kern_lock.c,v 1.158.6.1 2017/11/30 14:40:46 martin Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2002, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc. 4 * Copyright (c) 2002, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Andrew Doran. 9 * NASA Ames Research Center, and by Andrew Doran.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution. 18 * documentation and/or other materials provided with the distribution.
19 * 19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE. 30 * POSSIBILITY OF SUCH DAMAGE.
31 */ 31 */
32 32
33#include <sys/cdefs.h> 33#include <sys/cdefs.h>
34__KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.158 2017/01/26 04:11:56 christos Exp $"); 34__KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.158.6.1 2017/11/30 14:40:46 martin Exp $");
35 35
36#include <sys/param.h> 36#include <sys/param.h>
37#include <sys/proc.h> 37#include <sys/proc.h>
38#include <sys/lock.h> 38#include <sys/lock.h>
39#include <sys/systm.h> 39#include <sys/systm.h>
40#include <sys/kernel.h> 40#include <sys/kernel.h>
41#include <sys/lockdebug.h> 41#include <sys/lockdebug.h>
42#include <sys/cpu.h> 42#include <sys/cpu.h>
43#include <sys/syslog.h> 43#include <sys/syslog.h>
44#include <sys/atomic.h> 44#include <sys/atomic.h>
45#include <sys/lwp.h> 45#include <sys/lwp.h>
 46#include <sys/pserialize.h>
46 47
47#include <machine/lock.h> 48#include <machine/lock.h>
48 49
49#include <dev/lockstat.h> 50#include <dev/lockstat.h>
50 51
51#define RETURN_ADDRESS (uintptr_t)__builtin_return_address(0) 52#define RETURN_ADDRESS (uintptr_t)__builtin_return_address(0)
52 53
53bool kernel_lock_dodebug; 54bool kernel_lock_dodebug;
54 55
55__cpu_simple_lock_t kernel_lock[CACHE_LINE_SIZE / sizeof(__cpu_simple_lock_t)] 56__cpu_simple_lock_t kernel_lock[CACHE_LINE_SIZE / sizeof(__cpu_simple_lock_t)]
56 __cacheline_aligned; 57 __cacheline_aligned;
57 58
58void 59void
59assert_sleepable(void) 60assert_sleepable(void)
60{ 61{
61 const char *reason; 62 const char *reason;
62 uint64_t pctr; 63 uint64_t pctr;
63 bool idle; 64 bool idle;
64 65
65 if (panicstr != NULL) { 66 if (panicstr != NULL) {
66 return; 67 return;
67 } 68 }
68 69
69 LOCKDEBUG_BARRIER(kernel_lock, 1); 70 LOCKDEBUG_BARRIER(kernel_lock, 1);
70 71
71 /* 72 /*
72 * Avoid disabling/re-enabling preemption here since this 73 * Avoid disabling/re-enabling preemption here since this
73 * routine may be called in delicate situations. 74 * routine may be called in delicate situations.
74 */ 75 */
75 do { 76 do {
76 pctr = lwp_pctr(); 77 pctr = lwp_pctr();
77 idle = CURCPU_IDLE_P(); 78 idle = CURCPU_IDLE_P();
78 } while (pctr != lwp_pctr()); 79 } while (pctr != lwp_pctr());
79 80
80 reason = NULL; 81 reason = NULL;
81 if (idle && !cold && 82 if (idle && !cold &&
82 kcpuset_isset(kcpuset_running, cpu_index(curcpu()))) { 83 kcpuset_isset(kcpuset_running, cpu_index(curcpu()))) {
83 reason = "idle"; 84 reason = "idle";
84 } 85 }
85 if (cpu_intr_p()) { 86 if (cpu_intr_p()) {
86 reason = "interrupt"; 87 reason = "interrupt";
87 } 88 }
88 if (cpu_softintr_p()) { 89 if (cpu_softintr_p()) {
89 reason = "softint"; 90 reason = "softint";
90 } 91 }
 92 if (!pserialize_not_in_read_section()) {
 93 reason = "pserialize";
 94 }
91 95
92 if (reason) { 96 if (reason) {
93 panic("%s: %s caller=%p", __func__, reason, 97 panic("%s: %s caller=%p", __func__, reason,
94 (void *)RETURN_ADDRESS); 98 (void *)RETURN_ADDRESS);
95 } 99 }
96} 100}
97 101
98/* 102/*
99 * Functions for manipulating the kernel_lock. We put them here 103 * Functions for manipulating the kernel_lock. We put them here
100 * so that they show up in profiles. 104 * so that they show up in profiles.
101 */ 105 */
102 106
103#define _KERNEL_LOCK_ABORT(msg) \ 107#define _KERNEL_LOCK_ABORT(msg) \
104 LOCKDEBUG_ABORT(__func__, __LINE__, kernel_lock, &_kernel_lock_ops, msg) 108 LOCKDEBUG_ABORT(__func__, __LINE__, kernel_lock, &_kernel_lock_ops, msg)
105 109
106#ifdef LOCKDEBUG 110#ifdef LOCKDEBUG
107#define _KERNEL_LOCK_ASSERT(cond) \ 111#define _KERNEL_LOCK_ASSERT(cond) \
108do { \ 112do { \
109 if (!(cond)) \ 113 if (!(cond)) \
110 _KERNEL_LOCK_ABORT("assertion failed: " #cond); \ 114 _KERNEL_LOCK_ABORT("assertion failed: " #cond); \
111} while (/* CONSTCOND */ 0) 115} while (/* CONSTCOND */ 0)
112#else 116#else
113#define _KERNEL_LOCK_ASSERT(cond) /* nothing */ 117#define _KERNEL_LOCK_ASSERT(cond) /* nothing */
114#endif 118#endif
115 119
116void _kernel_lock_dump(volatile void *); 120void _kernel_lock_dump(volatile void *);
117 121
118lockops_t _kernel_lock_ops = { 122lockops_t _kernel_lock_ops = {
119 "Kernel lock", 123 "Kernel lock",
120 LOCKOPS_SPIN, 124 LOCKOPS_SPIN,
121 _kernel_lock_dump 125 _kernel_lock_dump
122}; 126};
123 127
124/* 128/*
125 * Initialize the kernel lock. 129 * Initialize the kernel lock.
126 */ 130 */
127void 131void
128kernel_lock_init(void) 132kernel_lock_init(void)
129{ 133{
130 134
131 __cpu_simple_lock_init(kernel_lock); 135 __cpu_simple_lock_init(kernel_lock);
132 kernel_lock_dodebug = LOCKDEBUG_ALLOC(kernel_lock, &_kernel_lock_ops, 136 kernel_lock_dodebug = LOCKDEBUG_ALLOC(kernel_lock, &_kernel_lock_ops,
133 RETURN_ADDRESS); 137 RETURN_ADDRESS);
134} 138}
135CTASSERT(CACHE_LINE_SIZE >= sizeof(__cpu_simple_lock_t)); 139CTASSERT(CACHE_LINE_SIZE >= sizeof(__cpu_simple_lock_t));
136 140
137/* 141/*
138 * Print debugging information about the kernel lock. 142 * Print debugging information about the kernel lock.
139 */ 143 */
140void 144void
141_kernel_lock_dump(volatile void *junk) 145_kernel_lock_dump(volatile void *junk)
142{ 146{
143 struct cpu_info *ci = curcpu(); 147 struct cpu_info *ci = curcpu();
144 148
145 (void)junk; 149 (void)junk;
146 150
147 printf_nolog("curcpu holds : %18d wanted by: %#018lx\n", 151 printf_nolog("curcpu holds : %18d wanted by: %#018lx\n",
148 ci->ci_biglock_count, (long)ci->ci_biglock_wanted); 152 ci->ci_biglock_count, (long)ci->ci_biglock_wanted);
149} 153}
150 154
151/* 155/*
152 * Acquire 'nlocks' holds on the kernel lock. 156 * Acquire 'nlocks' holds on the kernel lock.
153 */ 157 */
154void 158void
155_kernel_lock(int nlocks) 159_kernel_lock(int nlocks)
156{ 160{
157 struct cpu_info *ci; 161 struct cpu_info *ci;
158 LOCKSTAT_TIMER(spintime); 162 LOCKSTAT_TIMER(spintime);
159 LOCKSTAT_FLAG(lsflag); 163 LOCKSTAT_FLAG(lsflag);
160 struct lwp *owant; 164 struct lwp *owant;
161 u_int spins; 165 u_int spins;
162 int s; 166 int s;
163 struct lwp *l = curlwp; 167 struct lwp *l = curlwp;
164 168
165 _KERNEL_LOCK_ASSERT(nlocks > 0); 169 _KERNEL_LOCK_ASSERT(nlocks > 0);
166 170
167 s = splvm(); 171 s = splvm();
168 ci = curcpu(); 172 ci = curcpu();
169 if (ci->ci_biglock_count != 0) { 173 if (ci->ci_biglock_count != 0) {
170 _KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(kernel_lock)); 174 _KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(kernel_lock));
171 ci->ci_biglock_count += nlocks; 175 ci->ci_biglock_count += nlocks;
172 l->l_blcnt += nlocks; 176 l->l_blcnt += nlocks;
173 splx(s); 177 splx(s);
174 return; 178 return;
175 } 179 }
176 180
177 _KERNEL_LOCK_ASSERT(l->l_blcnt == 0); 181 _KERNEL_LOCK_ASSERT(l->l_blcnt == 0);
178 LOCKDEBUG_WANTLOCK(kernel_lock_dodebug, kernel_lock, RETURN_ADDRESS, 182 LOCKDEBUG_WANTLOCK(kernel_lock_dodebug, kernel_lock, RETURN_ADDRESS,
179 0); 183 0);
180 184
181 if (__cpu_simple_lock_try(kernel_lock)) { 185 if (__cpu_simple_lock_try(kernel_lock)) {
182 ci->ci_biglock_count = nlocks; 186 ci->ci_biglock_count = nlocks;
183 l->l_blcnt = nlocks; 187 l->l_blcnt = nlocks;
184 LOCKDEBUG_LOCKED(kernel_lock_dodebug, kernel_lock, NULL, 188 LOCKDEBUG_LOCKED(kernel_lock_dodebug, kernel_lock, NULL,
185 RETURN_ADDRESS, 0); 189 RETURN_ADDRESS, 0);
186 splx(s); 190 splx(s);
187 return; 191 return;
188 } 192 }
189 193
190 /* 194 /*
191 * To remove the ordering constraint between adaptive mutexes 195 * To remove the ordering constraint between adaptive mutexes
192 * and kernel_lock we must make it appear as if this thread is 196 * and kernel_lock we must make it appear as if this thread is
193 * blocking. For non-interlocked mutex release, a store fence 197 * blocking. For non-interlocked mutex release, a store fence
194 * is required to ensure that the result of any mutex_exit() 198 * is required to ensure that the result of any mutex_exit()
195 * by the current LWP becomes visible on the bus before the set 199 * by the current LWP becomes visible on the bus before the set
196 * of ci->ci_biglock_wanted becomes visible. 200 * of ci->ci_biglock_wanted becomes visible.
197 */ 201 */
198 membar_producer(); 202 membar_producer();
199 owant = ci->ci_biglock_wanted; 203 owant = ci->ci_biglock_wanted;
200 ci->ci_biglock_wanted = l; 204 ci->ci_biglock_wanted = l;
201 205
202 /* 206 /*
203 * Spin until we acquire the lock. Once we have it, record the 207 * Spin until we acquire the lock. Once we have it, record the
204 * time spent with lockstat. 208 * time spent with lockstat.
205 */ 209 */
206 LOCKSTAT_ENTER(lsflag); 210 LOCKSTAT_ENTER(lsflag);
207 LOCKSTAT_START_TIMER(lsflag, spintime); 211 LOCKSTAT_START_TIMER(lsflag, spintime);
208 212
209 spins = 0; 213 spins = 0;
210 do { 214 do {
211 splx(s); 215 splx(s);
212 while (__SIMPLELOCK_LOCKED_P(kernel_lock)) { 216 while (__SIMPLELOCK_LOCKED_P(kernel_lock)) {
213 if (SPINLOCK_SPINOUT(spins)) { 217 if (SPINLOCK_SPINOUT(spins)) {
214 extern int start_init_exec; 218 extern int start_init_exec;
215 if (!start_init_exec) 219 if (!start_init_exec)
216 _KERNEL_LOCK_ABORT("spinout"); 220 _KERNEL_LOCK_ABORT("spinout");
217 } 221 }
218 SPINLOCK_BACKOFF_HOOK; 222 SPINLOCK_BACKOFF_HOOK;
219 SPINLOCK_SPIN_HOOK; 223 SPINLOCK_SPIN_HOOK;
220 } 224 }
221 s = splvm(); 225 s = splvm();
222 } while (!__cpu_simple_lock_try(kernel_lock)); 226 } while (!__cpu_simple_lock_try(kernel_lock));
223 227
224 ci->ci_biglock_count = nlocks; 228 ci->ci_biglock_count = nlocks;
225 l->l_blcnt = nlocks; 229 l->l_blcnt = nlocks;
226 LOCKSTAT_STOP_TIMER(lsflag, spintime); 230 LOCKSTAT_STOP_TIMER(lsflag, spintime);
227 LOCKDEBUG_LOCKED(kernel_lock_dodebug, kernel_lock, NULL, 231 LOCKDEBUG_LOCKED(kernel_lock_dodebug, kernel_lock, NULL,
228 RETURN_ADDRESS, 0); 232 RETURN_ADDRESS, 0);
229 if (owant == NULL) { 233 if (owant == NULL) {
230 LOCKSTAT_EVENT_RA(lsflag, kernel_lock, 234 LOCKSTAT_EVENT_RA(lsflag, kernel_lock,
231 LB_KERNEL_LOCK | LB_SPIN, 1, spintime, RETURN_ADDRESS); 235 LB_KERNEL_LOCK | LB_SPIN, 1, spintime, RETURN_ADDRESS);
232 } 236 }
233 LOCKSTAT_EXIT(lsflag); 237 LOCKSTAT_EXIT(lsflag);
234 splx(s); 238 splx(s);
235 239
236 /* 240 /*
237 * Now that we have kernel_lock, reset ci_biglock_wanted. This 241 * Now that we have kernel_lock, reset ci_biglock_wanted. This
238 * store must be unbuffered (immediately visible on the bus) in 242 * store must be unbuffered (immediately visible on the bus) in
239 * order for non-interlocked mutex release to work correctly. 243 * order for non-interlocked mutex release to work correctly.
240 * It must be visible before a mutex_exit() can execute on this 244 * It must be visible before a mutex_exit() can execute on this
241 * processor. 245 * processor.
242 * 246 *
243 * Note: only where CAS is available in hardware will this be 247 * Note: only where CAS is available in hardware will this be
244 * an unbuffered write, but non-interlocked release cannot be 248 * an unbuffered write, but non-interlocked release cannot be
245 * done on CPUs without CAS in hardware. 249 * done on CPUs without CAS in hardware.
246 */ 250 */
247 (void)atomic_swap_ptr(&ci->ci_biglock_wanted, owant); 251 (void)atomic_swap_ptr(&ci->ci_biglock_wanted, owant);
248 252
249 /* 253 /*
250 * Issue a memory barrier as we have acquired a lock. This also 254 * Issue a memory barrier as we have acquired a lock. This also
251 * prevents stores from a following mutex_exit() being reordered 255 * prevents stores from a following mutex_exit() being reordered
252 * to occur before our store to ci_biglock_wanted above. 256 * to occur before our store to ci_biglock_wanted above.
253 */ 257 */
254 membar_enter(); 258 membar_enter();
255} 259}
256 260
257/* 261/*
258 * Release 'nlocks' holds on the kernel lock. If 'nlocks' is zero, release 262 * Release 'nlocks' holds on the kernel lock. If 'nlocks' is zero, release
259 * all holds. 263 * all holds.
260 */ 264 */
261void 265void
262_kernel_unlock(int nlocks, int *countp) 266_kernel_unlock(int nlocks, int *countp)
263{ 267{
264 struct cpu_info *ci; 268 struct cpu_info *ci;
265 u_int olocks; 269 u_int olocks;
266 int s; 270 int s;
267 struct lwp *l = curlwp; 271 struct lwp *l = curlwp;
268 272
269 _KERNEL_LOCK_ASSERT(nlocks < 2); 273 _KERNEL_LOCK_ASSERT(nlocks < 2);
270 274
271 olocks = l->l_blcnt; 275 olocks = l->l_blcnt;
272 276
273 if (olocks == 0) { 277 if (olocks == 0) {
274 _KERNEL_LOCK_ASSERT(nlocks <= 0); 278 _KERNEL_LOCK_ASSERT(nlocks <= 0);
275 if (countp != NULL) 279 if (countp != NULL)
276 *countp = 0; 280 *countp = 0;
277 return; 281 return;
278 } 282 }
279 283
280 _KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(kernel_lock)); 284 _KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(kernel_lock));
281 285
282 if (nlocks == 0) 286 if (nlocks == 0)
283 nlocks = olocks; 287 nlocks = olocks;
284 else if (nlocks == -1) { 288 else if (nlocks == -1) {
285 nlocks = 1; 289 nlocks = 1;
286 _KERNEL_LOCK_ASSERT(olocks == 1); 290 _KERNEL_LOCK_ASSERT(olocks == 1);
287 } 291 }
288 s = splvm(); 292 s = splvm();
289 ci = curcpu(); 293 ci = curcpu();
290 _KERNEL_LOCK_ASSERT(ci->ci_biglock_count >= l->l_blcnt); 294 _KERNEL_LOCK_ASSERT(ci->ci_biglock_count >= l->l_blcnt);
291 if (ci->ci_biglock_count == nlocks) { 295 if (ci->ci_biglock_count == nlocks) {
292 LOCKDEBUG_UNLOCKED(kernel_lock_dodebug, kernel_lock, 296 LOCKDEBUG_UNLOCKED(kernel_lock_dodebug, kernel_lock,
293 RETURN_ADDRESS, 0); 297 RETURN_ADDRESS, 0);
294 ci->ci_biglock_count = 0; 298 ci->ci_biglock_count = 0;
295 __cpu_simple_unlock(kernel_lock); 299 __cpu_simple_unlock(kernel_lock);
296 l->l_blcnt -= nlocks; 300 l->l_blcnt -= nlocks;
297 splx(s); 301 splx(s);
298 if (l->l_dopreempt) 302 if (l->l_dopreempt)
299 kpreempt(0); 303 kpreempt(0);
300 } else { 304 } else {
301 ci->ci_biglock_count -= nlocks; 305 ci->ci_biglock_count -= nlocks;
302 l->l_blcnt -= nlocks; 306 l->l_blcnt -= nlocks;
303 splx(s); 307 splx(s);
304 } 308 }
305 309
306 if (countp != NULL) 310 if (countp != NULL)
307 *countp = olocks; 311 *countp = olocks;
308} 312}
309 313
310bool 314bool
311_kernel_locked_p(void) 315_kernel_locked_p(void)
312{ 316{
313 return __SIMPLELOCK_LOCKED_P(kernel_lock); 317 return __SIMPLELOCK_LOCKED_P(kernel_lock);
314} 318}

cvs diff -r1.8 -r1.8.10.1 src/sys/kern/subr_pserialize.c (switch to unified diff)

--- src/sys/kern/subr_pserialize.c 2015/06/12 19:18:30 1.8
+++ src/sys/kern/subr_pserialize.c 2017/11/30 14:40:46 1.8.10.1
@@ -1,263 +1,355 @@ @@ -1,263 +1,355 @@
1/* $NetBSD: subr_pserialize.c,v 1.8 2015/06/12 19:18:30 dholland Exp $ */ 1/* $NetBSD: subr_pserialize.c,v 1.8.10.1 2017/11/30 14:40:46 martin Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2010, 2011 The NetBSD Foundation, Inc. 4 * Copyright (c) 2010, 2011 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * Redistribution and use in source and binary forms, with or without 7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions 8 * modification, are permitted provided that the following conditions
9 * are met: 9 * are met:
10 * 1. Redistributions of source code must retain the above copyright 10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer. 11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright 12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the 13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution. 14 * documentation and/or other materials provided with the distribution.
15 * 15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE. 26 * POSSIBILITY OF SUCH DAMAGE.
27 */ 27 */
28 28
29/* 29/*
30 * Passive serialization. 30 * Passive serialization.
31 * 31 *
32 * Implementation accurately matches the lapsed US patent 4809168, therefore 32 * Implementation accurately matches the lapsed US patent 4809168, therefore
33 * code is patent-free in the United States. Your use of this code is at 33 * code is patent-free in the United States. Your use of this code is at
34 * your own risk. 34 * your own risk.
35 *  35 *
36 * Note for NetBSD developers: all changes to this source file must be 36 * Note for NetBSD developers: all changes to this source file must be
37 * approved by the <core>. 37 * approved by the <core>.
38 */ 38 */
39 39
40#include <sys/cdefs.h> 40#include <sys/cdefs.h>
41__KERNEL_RCSID(0, "$NetBSD: subr_pserialize.c,v 1.8 2015/06/12 19:18:30 dholland Exp $"); 41__KERNEL_RCSID(0, "$NetBSD: subr_pserialize.c,v 1.8.10.1 2017/11/30 14:40:46 martin Exp $");
42 42
43#include <sys/param.h> 43#include <sys/param.h>
44 44
45#include <sys/condvar.h> 45#include <sys/condvar.h>
46#include <sys/cpu.h> 46#include <sys/cpu.h>
47#include <sys/evcnt.h> 47#include <sys/evcnt.h>
48#include <sys/kmem.h> 48#include <sys/kmem.h>
49#include <sys/mutex.h> 49#include <sys/mutex.h>
50#include <sys/pserialize.h> 50#include <sys/pserialize.h>
51#include <sys/proc.h> 51#include <sys/proc.h>
52#include <sys/queue.h> 52#include <sys/queue.h>
53#include <sys/xcall.h> 53#include <sys/xcall.h>
54 54
55struct pserialize { 55struct pserialize {
56 TAILQ_ENTRY(pserialize) psz_chain; 56 TAILQ_ENTRY(pserialize) psz_chain;
57 lwp_t * psz_owner; 57 lwp_t * psz_owner;
58 kcpuset_t * psz_target; 58 kcpuset_t * psz_target;
59 kcpuset_t * psz_pass; 59 kcpuset_t * psz_pass;
60}; 60};
61 61
62static u_int psz_work_todo __cacheline_aligned; 62static u_int psz_work_todo __cacheline_aligned;
63static kmutex_t psz_lock __cacheline_aligned; 63static kmutex_t psz_lock __cacheline_aligned;
64static struct evcnt psz_ev_excl __cacheline_aligned; 64static struct evcnt psz_ev_excl __cacheline_aligned;
65 65
66/* 66/*
67 * As defined in "Method 1": 67 * As defined in "Method 1":
68 * q0: "0 MP checkpoints have occured". 68 * q0: "0 MP checkpoints have occured".
69 * q1: "1 MP checkpoint has occured". 69 * q1: "1 MP checkpoint has occured".
70 * q2: "2 MP checkpoints have occured". 70 * q2: "2 MP checkpoints have occured".
71 */ 71 */
72static TAILQ_HEAD(, pserialize) psz_queue0 __cacheline_aligned; 72static TAILQ_HEAD(, pserialize) psz_queue0 __cacheline_aligned;
73static TAILQ_HEAD(, pserialize) psz_queue1 __cacheline_aligned; 73static TAILQ_HEAD(, pserialize) psz_queue1 __cacheline_aligned;
74static TAILQ_HEAD(, pserialize) psz_queue2 __cacheline_aligned; 74static TAILQ_HEAD(, pserialize) psz_queue2 __cacheline_aligned;
75 75
 76#ifdef LOCKDEBUG
 77#include <sys/percpu.h>
 78
 79static percpu_t *psz_debug_nreads __cacheline_aligned;
 80#endif
 81
76/* 82/*
77 * pserialize_init: 83 * pserialize_init:
78 * 84 *
79 * Initialize passive serialization structures. 85 * Initialize passive serialization structures.
80 */ 86 */
81void 87void
82pserialize_init(void) 88pserialize_init(void)
83{ 89{
84 90
85 psz_work_todo = 0; 91 psz_work_todo = 0;
86 TAILQ_INIT(&psz_queue0); 92 TAILQ_INIT(&psz_queue0);
87 TAILQ_INIT(&psz_queue1); 93 TAILQ_INIT(&psz_queue1);
88 TAILQ_INIT(&psz_queue2); 94 TAILQ_INIT(&psz_queue2);
89 mutex_init(&psz_lock, MUTEX_DEFAULT, IPL_SCHED); 95 mutex_init(&psz_lock, MUTEX_DEFAULT, IPL_SCHED);
90 evcnt_attach_dynamic(&psz_ev_excl, EVCNT_TYPE_MISC, NULL, 96 evcnt_attach_dynamic(&psz_ev_excl, EVCNT_TYPE_MISC, NULL,
91 "pserialize", "exclusive access"); 97 "pserialize", "exclusive access");
 98#ifdef LOCKDEBUG
 99 psz_debug_nreads = percpu_alloc(sizeof(uint32_t));
 100#endif
92} 101}
93 102
94/* 103/*
95 * pserialize_create: 104 * pserialize_create:
96 * 105 *
97 * Create and initialize a passive serialization object. 106 * Create and initialize a passive serialization object.
98 */ 107 */
99pserialize_t 108pserialize_t
100pserialize_create(void) 109pserialize_create(void)
101{ 110{
102 pserialize_t psz; 111 pserialize_t psz;
103 112
104 psz = kmem_zalloc(sizeof(struct pserialize), KM_SLEEP); 113 psz = kmem_zalloc(sizeof(struct pserialize), KM_SLEEP);
105 kcpuset_create(&psz->psz_target, true); 114 kcpuset_create(&psz->psz_target, true);
106 kcpuset_create(&psz->psz_pass, true); 115 kcpuset_create(&psz->psz_pass, true);
107 psz->psz_owner = NULL; 116 psz->psz_owner = NULL;
108 117
109 return psz; 118 return psz;
110} 119}
111 120
112/* 121/*
113 * pserialize_destroy: 122 * pserialize_destroy:
114 * 123 *
115 * Destroy a passive serialization object. 124 * Destroy a passive serialization object.
116 */ 125 */
117void 126void
118pserialize_destroy(pserialize_t psz) 127pserialize_destroy(pserialize_t psz)
119{ 128{
120 129
121 KASSERT(psz->psz_owner == NULL); 130 KASSERT(psz->psz_owner == NULL);
122 131
123 kcpuset_destroy(psz->psz_target); 132 kcpuset_destroy(psz->psz_target);
124 kcpuset_destroy(psz->psz_pass); 133 kcpuset_destroy(psz->psz_pass);
125 kmem_free(psz, sizeof(struct pserialize)); 134 kmem_free(psz, sizeof(struct pserialize));
126} 135}
127 136
128/* 137/*
129 * pserialize_perform: 138 * pserialize_perform:
130 * 139 *
131 * Perform the write side of passive serialization. The calling 140 * Perform the write side of passive serialization. The calling
132 * thread holds an exclusive lock on the data object(s) being updated. 141 * thread holds an exclusive lock on the data object(s) being updated.
133 * We wait until every processor in the system has made at least two 142 * We wait until every processor in the system has made at least two
134 * passes through cpu_switchto(). The wait is made with the caller's 143 * passes through cpu_switchto(). The wait is made with the caller's
135 * update lock held, but is short term. 144 * update lock held, but is short term.
136 */ 145 */
137void 146void
138pserialize_perform(pserialize_t psz) 147pserialize_perform(pserialize_t psz)
139{ 148{
140 uint64_t xc; 149 uint64_t xc;
141 150
142 KASSERT(!cpu_intr_p()); 151 KASSERT(!cpu_intr_p());
143 KASSERT(!cpu_softintr_p()); 152 KASSERT(!cpu_softintr_p());
144 153
145 if (__predict_false(panicstr != NULL)) { 154 if (__predict_false(panicstr != NULL)) {
146 return; 155 return;
147 } 156 }
148 KASSERT(psz->psz_owner == NULL); 157 KASSERT(psz->psz_owner == NULL);
149 KASSERT(ncpu > 0); 158 KASSERT(ncpu > 0);
150 159
151 /* 160 /*
152 * Set up the object and put it onto the queue. The lock 161 * Set up the object and put it onto the queue. The lock
153 * activity here provides the necessary memory barrier to 162 * activity here provides the necessary memory barrier to
154 * make the caller's data update completely visible to 163 * make the caller's data update completely visible to
155 * other processors. 164 * other processors.
156 */ 165 */
157 psz->psz_owner = curlwp; 166 psz->psz_owner = curlwp;
158 kcpuset_copy(psz->psz_target, kcpuset_running); 167 kcpuset_copy(psz->psz_target, kcpuset_running);
159 kcpuset_zero(psz->psz_pass); 168 kcpuset_zero(psz->psz_pass);
160 169
161 mutex_spin_enter(&psz_lock); 170 mutex_spin_enter(&psz_lock);
162 TAILQ_INSERT_TAIL(&psz_queue0, psz, psz_chain); 171 TAILQ_INSERT_TAIL(&psz_queue0, psz, psz_chain);
163 psz_work_todo++; 172 psz_work_todo++;
164 173
165 do { 174 do {
166 mutex_spin_exit(&psz_lock); 175 mutex_spin_exit(&psz_lock);
167 176
168 /* 177 /*
169 * Force some context switch activity on every CPU, as 178 * Force some context switch activity on every CPU, as
170 * the system may not be busy. Pause to not flood. 179 * the system may not be busy. Pause to not flood.
171 */ 180 */
172 xc = xc_broadcast(XC_HIGHPRI, (xcfunc_t)nullop, NULL, NULL); 181 xc = xc_broadcast(XC_HIGHPRI, (xcfunc_t)nullop, NULL, NULL);
173 xc_wait(xc); 182 xc_wait(xc);
174 kpause("psrlz", false, 1, NULL); 183 kpause("psrlz", false, 1, NULL);
175 184
176 mutex_spin_enter(&psz_lock); 185 mutex_spin_enter(&psz_lock);
177 } while (!kcpuset_iszero(psz->psz_target)); 186 } while (!kcpuset_iszero(psz->psz_target));
178 187
179 psz_ev_excl.ev_count++; 188 psz_ev_excl.ev_count++;
180 mutex_spin_exit(&psz_lock); 189 mutex_spin_exit(&psz_lock);
181 190
182 psz->psz_owner = NULL; 191 psz->psz_owner = NULL;
183} 192}
184 193
185int 194int
186pserialize_read_enter(void) 195pserialize_read_enter(void)
187{ 196{
 197 int s;
188 198
189 KASSERT(!cpu_intr_p()); 199 KASSERT(!cpu_intr_p());
190 return splsoftserial(); 200 s = splsoftserial();
 201#ifdef LOCKDEBUG
 202 {
 203 uint32_t *nreads;
 204 nreads = percpu_getref(psz_debug_nreads);
 205 (*nreads)++;
 206 if (*nreads == 0)
 207 panic("nreads overflow");
 208 percpu_putref(psz_debug_nreads);
 209 }
 210#endif
 211 return s;
191} 212}
192 213
193void 214void
194pserialize_read_exit(int s) 215pserialize_read_exit(int s)
195{ 216{
196 217
 218#ifdef LOCKDEBUG
 219 {
 220 uint32_t *nreads;
 221 nreads = percpu_getref(psz_debug_nreads);
 222 (*nreads)--;
 223 if (*nreads == UINT_MAX)
 224 panic("nreads underflow");
 225 percpu_putref(psz_debug_nreads);
 226 }
 227#endif
197 splx(s); 228 splx(s);
198} 229}
199 230
200/* 231/*
201 * pserialize_switchpoint: 232 * pserialize_switchpoint:
202 * 233 *
203 * Monitor system context switch activity. Called from machine 234 * Monitor system context switch activity. Called from machine
204 * independent code after mi_switch() returns. 235 * independent code after mi_switch() returns.
205 */  236 */
206void 237void
207pserialize_switchpoint(void) 238pserialize_switchpoint(void)
208{ 239{
209 pserialize_t psz, next; 240 pserialize_t psz, next;
210 cpuid_t cid; 241 cpuid_t cid;
211 242
 243 /* We must to ensure not to come here from inside a read section. */
 244 KASSERT(pserialize_not_in_read_section());
 245
212 /* 246 /*
213 * If no updates pending, bail out. No need to lock in order to 247 * If no updates pending, bail out. No need to lock in order to
214 * test psz_work_todo; the only ill effect of missing an update 248 * test psz_work_todo; the only ill effect of missing an update
215 * would be to delay LWPs waiting in pserialize_perform(). That 249 * would be to delay LWPs waiting in pserialize_perform(). That
216 * will not happen because updates are on the queue before an 250 * will not happen because updates are on the queue before an
217 * xcall is generated (serialization) to tickle every CPU. 251 * xcall is generated (serialization) to tickle every CPU.
218 */ 252 */
219 if (__predict_true(psz_work_todo == 0)) { 253 if (__predict_true(psz_work_todo == 0)) {
220 return; 254 return;
221 } 255 }
222 mutex_spin_enter(&psz_lock); 256 mutex_spin_enter(&psz_lock);
223 cid = cpu_index(curcpu()); 257 cid = cpu_index(curcpu());
224 258
225 /* 259 /*
226 * At first, scan through the second queue and update each request, 260 * At first, scan through the second queue and update each request,
227 * if passed all processors, then transfer to the third queue.  261 * if passed all processors, then transfer to the third queue.
228 */ 262 */
229 for (psz = TAILQ_FIRST(&psz_queue1); psz != NULL; psz = next) { 263 for (psz = TAILQ_FIRST(&psz_queue1); psz != NULL; psz = next) {
230 next = TAILQ_NEXT(psz, psz_chain); 264 next = TAILQ_NEXT(psz, psz_chain);
231 kcpuset_set(psz->psz_pass, cid); 265 kcpuset_set(psz->psz_pass, cid);
232 if (!kcpuset_match(psz->psz_pass, psz->psz_target)) { 266 if (!kcpuset_match(psz->psz_pass, psz->psz_target)) {
233 continue; 267 continue;
234 } 268 }
235 kcpuset_zero(psz->psz_pass); 269 kcpuset_zero(psz->psz_pass);
236 TAILQ_REMOVE(&psz_queue1, psz, psz_chain); 270 TAILQ_REMOVE(&psz_queue1, psz, psz_chain);
237 TAILQ_INSERT_TAIL(&psz_queue2, psz, psz_chain); 271 TAILQ_INSERT_TAIL(&psz_queue2, psz, psz_chain);
238 } 272 }
239 /* 273 /*
240 * Scan through the first queue and update each request, 274 * Scan through the first queue and update each request,
241 * if passed all processors, then move to the second queue.  275 * if passed all processors, then move to the second queue.
242 */ 276 */
243 for (psz = TAILQ_FIRST(&psz_queue0); psz != NULL; psz = next) { 277 for (psz = TAILQ_FIRST(&psz_queue0); psz != NULL; psz = next) {
244 next = TAILQ_NEXT(psz, psz_chain); 278 next = TAILQ_NEXT(psz, psz_chain);
245 kcpuset_set(psz->psz_pass, cid); 279 kcpuset_set(psz->psz_pass, cid);
246 if (!kcpuset_match(psz->psz_pass, psz->psz_target)) { 280 if (!kcpuset_match(psz->psz_pass, psz->psz_target)) {
247 continue; 281 continue;
248 } 282 }
249 kcpuset_zero(psz->psz_pass); 283 kcpuset_zero(psz->psz_pass);
250 TAILQ_REMOVE(&psz_queue0, psz, psz_chain); 284 TAILQ_REMOVE(&psz_queue0, psz, psz_chain);
251 TAILQ_INSERT_TAIL(&psz_queue1, psz, psz_chain); 285 TAILQ_INSERT_TAIL(&psz_queue1, psz, psz_chain);
252 } 286 }
253 /* 287 /*
254 * Process the third queue: entries have been seen twice on every 288 * Process the third queue: entries have been seen twice on every
255 * processor, remove from the queue and notify the updating thread. 289 * processor, remove from the queue and notify the updating thread.
256 */ 290 */
257 while ((psz = TAILQ_FIRST(&psz_queue2)) != NULL) { 291 while ((psz = TAILQ_FIRST(&psz_queue2)) != NULL) {
258 TAILQ_REMOVE(&psz_queue2, psz, psz_chain); 292 TAILQ_REMOVE(&psz_queue2, psz, psz_chain);
259 kcpuset_zero(psz->psz_target); 293 kcpuset_zero(psz->psz_target);
260 psz_work_todo--; 294 psz_work_todo--;
261 } 295 }
262 mutex_spin_exit(&psz_lock); 296 mutex_spin_exit(&psz_lock);
263} 297}
 298
 299/*
 300 * pserialize_in_read_section:
 301 *
 302 * True if the caller is in a pserialize read section. To be used only
 303 * for diagnostic assertions where we want to guarantee the condition like:
 304 *
 305 * KASSERT(pserialize_in_read_section());
 306 */
 307bool
 308pserialize_in_read_section(void)
 309{
 310#ifdef LOCKDEBUG
 311 uint32_t *nreads;
 312 bool in;
 313
 314 /* Not initialized yet */
 315 if (__predict_false(psz_debug_nreads == NULL))
 316 return true;
 317
 318 nreads = percpu_getref(psz_debug_nreads);
 319 in = *nreads != 0;
 320 percpu_putref(psz_debug_nreads);
 321
 322 return in;
 323#else
 324 return true;
 325#endif
 326}
 327
 328/*
 329 * pserialize_not_in_read_section:
 330 *
 331 * True if the caller is not in a pserialize read section. To be used only
 332 * for diagnostic assertions where we want to guarantee the condition like:
 333 *
 334 * KASSERT(pserialize_not_in_read_section());
 335 */
 336bool
 337pserialize_not_in_read_section(void)
 338{
 339#ifdef LOCKDEBUG
 340 uint32_t *nreads;
 341 bool notin;
 342
 343 /* Not initialized yet */
 344 if (__predict_false(psz_debug_nreads == NULL))
 345 return true;
 346
 347 nreads = percpu_getref(psz_debug_nreads);
 348 notin = *nreads == 0;
 349 percpu_putref(psz_debug_nreads);
 350
 351 return notin;
 352#else
 353 return true;
 354#endif
 355}

cvs diff -r1.181.6.1 -r1.181.6.2 src/sys/rump/librump/rumpkern/emul.c (switch to unified diff)

--- src/sys/rump/librump/rumpkern/emul.c 2017/06/04 20:35:01 1.181.6.1
+++ src/sys/rump/librump/rumpkern/emul.c 2017/11/30 14:40:46 1.181.6.2
@@ -1,380 +1,393 @@ @@ -1,380 +1,393 @@
1/* $NetBSD: emul.c,v 1.181.6.1 2017/06/04 20:35:01 bouyer Exp $ */ 1/* $NetBSD: emul.c,v 1.181.6.2 2017/11/30 14:40:46 martin Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2007-2011 Antti Kantee. All Rights Reserved. 4 * Copyright (c) 2007-2011 Antti Kantee. All Rights Reserved.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 7 * modification, are permitted provided that the following conditions
8 * are met: 8 * are met:
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer. 10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright 11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the 12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution. 13 * documentation and/or other materials provided with the distribution.
14 * 14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE. 25 * SUCH DAMAGE.
26 */ 26 */
27 27
28#include <sys/cdefs.h> 28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: emul.c,v 1.181.6.1 2017/06/04 20:35:01 bouyer Exp $"); 29__KERNEL_RCSID(0, "$NetBSD: emul.c,v 1.181.6.2 2017/11/30 14:40:46 martin Exp $");
30 30
31#include <sys/param.h> 31#include <sys/param.h>
32#include <sys/cprng.h> 32#include <sys/cprng.h>
33#include <sys/filedesc.h> 33#include <sys/filedesc.h>
34#include <sys/fstrans.h> 34#include <sys/fstrans.h>
35#include <sys/kauth.h> 35#include <sys/kauth.h>
36#include <sys/module.h> 36#include <sys/module.h>
37#include <sys/reboot.h> 37#include <sys/reboot.h>
38#include <sys/syscall.h> 38#include <sys/syscall.h>
 39#include <sys/pserialize.h>
39#ifdef LOCKDEBUG 40#ifdef LOCKDEBUG
40#include <sys/sleepq.h> 41#include <sys/sleepq.h>
41#endif 42#endif
42 43
43#include <dev/cons.h> 44#include <dev/cons.h>
44 45
45#include <rump-sys/kern.h> 46#include <rump-sys/kern.h>
46 47
47#include <rump/rumpuser.h> 48#include <rump/rumpuser.h>
48 49
49void (*rump_vfs_fini)(void) = (void *)nullop; 50void (*rump_vfs_fini)(void) = (void *)nullop;
50 51
51/* 52/*
52 * physmem is largely unused (except for nmbcluster calculations), 53 * physmem is largely unused (except for nmbcluster calculations),
53 * so pick a default value which suits ZFS. if an application wants 54 * so pick a default value which suits ZFS. if an application wants
54 * a very small memory footprint, it can still adjust this before 55 * a very small memory footprint, it can still adjust this before
55 * calling rump_init() 56 * calling rump_init()
56 */ 57 */
57#define PHYSMEM 512*256 58#define PHYSMEM 512*256
58psize_t physmem = PHYSMEM; 59psize_t physmem = PHYSMEM;
59int nkmempages = PHYSMEM/2; /* from le chapeau */ 60int nkmempages = PHYSMEM/2; /* from le chapeau */
60#undef PHYSMEM 61#undef PHYSMEM
61 62
62struct vnode *rootvp; 63struct vnode *rootvp;
63dev_t rootdev = NODEV; 64dev_t rootdev = NODEV;
64 65
65const int schedppq = 1; 66const int schedppq = 1;
66bool mp_online = false; 67bool mp_online = false;
67struct timespec boottime; 68struct timespec boottime;
68int cold = 1; 69int cold = 1;
69int boothowto = AB_SILENT; 70int boothowto = AB_SILENT;
70struct tty *constty; 71struct tty *constty;
71 72
72const struct bdevsw *bdevsw0[255]; 73const struct bdevsw *bdevsw0[255];
73const struct bdevsw **bdevsw = bdevsw0; 74const struct bdevsw **bdevsw = bdevsw0;
74const int sys_cdevsws = 255; 75const int sys_cdevsws = 255;
75int max_cdevsws = 255; 76int max_cdevsws = 255;
76 77
77const struct cdevsw *cdevsw0[255]; 78const struct cdevsw *cdevsw0[255];
78const struct cdevsw **cdevsw = cdevsw0; 79const struct cdevsw **cdevsw = cdevsw0;
79const int sys_bdevsws = 255; 80const int sys_bdevsws = 255;
80int max_bdevsws = 255; 81int max_bdevsws = 255;
81 82
82int mem_no = 2; 83int mem_no = 2;
83 84
84device_t booted_device; 85device_t booted_device;
85device_t booted_wedge; 86device_t booted_wedge;
86int booted_partition; 87int booted_partition;
87 88
88/* XXX: unused */ 89/* XXX: unused */
89kmutex_t tty_lock; 90kmutex_t tty_lock;
90krwlock_t exec_lock; 91krwlock_t exec_lock;
91 92
92/* sparc doesn't sport constant page size, pretend we have 4k pages */ 93/* sparc doesn't sport constant page size, pretend we have 4k pages */
93#ifdef __sparc__ 94#ifdef __sparc__
94int nbpg = 4096; 95int nbpg = 4096;
95int pgofset = 4096-1; 96int pgofset = 4096-1;
96int pgshift = 12; 97int pgshift = 12;
97#endif 98#endif
98 99
99/* on sun3 VM_MAX_ADDRESS is a const variable */ 100/* on sun3 VM_MAX_ADDRESS is a const variable */
100/* XXX: should be moved into rump.c and initialize for sun3 and sun3x? */ 101/* XXX: should be moved into rump.c and initialize for sun3 and sun3x? */
101#ifdef sun3 102#ifdef sun3
102const vaddr_t kernbase = KERNBASE3; 103const vaddr_t kernbase = KERNBASE3;
103#endif 104#endif
104 105
105struct loadavg averunnable = { 106struct loadavg averunnable = {
106 { 0 * FSCALE, 107 { 0 * FSCALE,
107 1 * FSCALE, 108 1 * FSCALE,
108 11 * FSCALE, }, 109 11 * FSCALE, },
109 FSCALE, 110 FSCALE,
110}; 111};
111 112
112/* 113/*
113 * Include the autogenerated list of auto-loadable syscalls 114 * Include the autogenerated list of auto-loadable syscalls
114 */ 115 */
115#include <kern/syscalls_autoload.c> 116#include <kern/syscalls_autoload.c>
116 117
117struct emul emul_netbsd = { 118struct emul emul_netbsd = {
118 .e_name = "netbsd-rump", 119 .e_name = "netbsd-rump",
119 .e_sysent = rump_sysent, 120 .e_sysent = rump_sysent,
120#ifndef __HAVE_MINIMAL_EMUL 121#ifndef __HAVE_MINIMAL_EMUL
121 .e_nsysent = SYS_NSYSENT, 122 .e_nsysent = SYS_NSYSENT,
122#endif 123#endif
123 .e_vm_default_addr = uvm_default_mapaddr, 124 .e_vm_default_addr = uvm_default_mapaddr,
124#ifdef __HAVE_SYSCALL_INTERN 125#ifdef __HAVE_SYSCALL_INTERN
125 .e_syscall_intern = syscall_intern, 126 .e_syscall_intern = syscall_intern,
126#endif 127#endif
127 .e_sc_autoload = netbsd_syscalls_autoload, 128 .e_sc_autoload = netbsd_syscalls_autoload,
128}; 129};
129 130
130cprng_strong_t *kern_cprng; 131cprng_strong_t *kern_cprng;
131 132
132/* not used, but need the symbols for pointer comparisons */ 133/* not used, but need the symbols for pointer comparisons */
133syncobj_t mutex_syncobj, rw_syncobj; 134syncobj_t mutex_syncobj, rw_syncobj;
134 135
135int 136int
136kpause(const char *wmesg, bool intr, int timeo, kmutex_t *mtx) 137kpause(const char *wmesg, bool intr, int timeo, kmutex_t *mtx)
137{ 138{
138 extern int hz; 139 extern int hz;
139 int rv __diagused; 140 int rv __diagused;
140 uint64_t sec, nsec; 141 uint64_t sec, nsec;
141 142
142 if (mtx) 143 if (mtx)
143 mutex_exit(mtx); 144 mutex_exit(mtx);
144 145
145 sec = timeo / hz; 146 sec = timeo / hz;
146 nsec = (timeo % hz) * (1000000000 / hz); 147 nsec = (timeo % hz) * (1000000000 / hz);
147 rv = rumpuser_clock_sleep(RUMPUSER_CLOCK_RELWALL, sec, nsec); 148 rv = rumpuser_clock_sleep(RUMPUSER_CLOCK_RELWALL, sec, nsec);
148 KASSERT(rv == 0); 149 KASSERT(rv == 0);
149 150
150 if (mtx) 151 if (mtx)
151 mutex_enter(mtx); 152 mutex_enter(mtx);
152 153
153 return 0; 154 return 0;
154} 155}
155 156
156vaddr_t 157vaddr_t
157calc_cache_size(vsize_t vasz, int pct, int va_pct) 158calc_cache_size(vsize_t vasz, int pct, int va_pct)
158{ 159{
159 paddr_t t; 160 paddr_t t;
160 161
161 t = (paddr_t)physmem * pct / 100 * PAGE_SIZE; 162 t = (paddr_t)physmem * pct / 100 * PAGE_SIZE;
162 if ((vaddr_t)t != t) { 163 if ((vaddr_t)t != t) {
163 panic("%s: needs tweak", __func__); 164 panic("%s: needs tweak", __func__);
164 } 165 }
165 return t; 166 return t;
166} 167}
167 168
 169#define RETURN_ADDRESS (uintptr_t)__builtin_return_address(0)
 170
168void 171void
169assert_sleepable(void) 172assert_sleepable(void)
170{ 173{
 174 const char *reason = NULL;
171 175
172 /* always sleepable, although we should improve this */ 176 /* always sleepable, although we should improve this */
 177
 178 if (!pserialize_not_in_read_section()) {
 179 reason = "pserialize";
 180 }
 181
 182 if (reason) {
 183 panic("%s: %s caller=%p", __func__, reason,
 184 (void *)RETURN_ADDRESS);
 185 }
173} 186}
174 187
175void 188void
176module_init_md(void) 189module_init_md(void)
177{ 190{
178 191
179 /* 192 /*
180 * Nothing for now. However, we should load the librump 193 * Nothing for now. However, we should load the librump
181 * symbol table. 194 * symbol table.
182 */ 195 */
183} 196}
184 197
185/* 198/*
186 * Try to emulate all the MD definitions of DELAY() / delay(). 199 * Try to emulate all the MD definitions of DELAY() / delay().
187 * Would be nice to fix the #defines in MD headers, but this quicker. 200 * Would be nice to fix the #defines in MD headers, but this quicker.
188 * 201 *
189 * XXX: we'd need a rumpuser_clock_sleep_nowrap() here. Since we 202 * XXX: we'd need a rumpuser_clock_sleep_nowrap() here. Since we
190 * don't have it in the current hypercall revision, busyloop. 203 * don't have it in the current hypercall revision, busyloop.
191 * Note that rather than calibrate a loop delay and work with that, 204 * Note that rather than calibrate a loop delay and work with that,
192 * get call gettime (which does not block) in a loop to make sure 205 * get call gettime (which does not block) in a loop to make sure
193 * we didn't get virtual ghosttime. That might be slightly inaccurate 206 * we didn't get virtual ghosttime. That might be slightly inaccurate
194 * for very small delays ... 207 * for very small delays ...
195 * 208 *
196 * The other option would be to run a thread in the hypervisor which 209 * The other option would be to run a thread in the hypervisor which
197 * sleeps for us and we can wait for it using rumpuser_cv_wait_nowrap() 210 * sleeps for us and we can wait for it using rumpuser_cv_wait_nowrap()
198 * Probably too fussy. Better just wait for hypercall rev 18 ;) 211 * Probably too fussy. Better just wait for hypercall rev 18 ;)
199 */ 212 */
200static void 213static void
201rump_delay(unsigned int us) 214rump_delay(unsigned int us)
202{ 215{
203 struct timespec target, tmp; 216 struct timespec target, tmp;
204 uint64_t sec, sec_ini, sec_now; 217 uint64_t sec, sec_ini, sec_now;
205 long nsec, nsec_ini, nsec_now; 218 long nsec, nsec_ini, nsec_now;
206 int loops; 219 int loops;
207 220
208 rumpuser_clock_gettime(RUMPUSER_CLOCK_ABSMONO, &sec_ini, &nsec_ini); 221 rumpuser_clock_gettime(RUMPUSER_CLOCK_ABSMONO, &sec_ini, &nsec_ini);
209 222
210#ifdef __mac68k__ 223#ifdef __mac68k__
211 sec = us / 1000; 224 sec = us / 1000;
212 nsec = (us % 1000) * 1000000; 225 nsec = (us % 1000) * 1000000;
213#else 226#else
214 sec = us / 1000000; 227 sec = us / 1000000;
215 nsec = (us % 1000000) * 1000; 228 nsec = (us % 1000000) * 1000;
216#endif 229#endif
217 230
218 target.tv_sec = sec_ini; 231 target.tv_sec = sec_ini;
219 tmp.tv_sec = sec; 232 tmp.tv_sec = sec;
220 target.tv_nsec = nsec_ini; 233 target.tv_nsec = nsec_ini;
221 tmp.tv_nsec = nsec; 234 tmp.tv_nsec = nsec;
222 timespecadd(&target, &tmp, &target); 235 timespecadd(&target, &tmp, &target);
223 236
224 if (__predict_false(sec != 0)) 237 if (__predict_false(sec != 0))
225 printf("WARNING: over 1s delay\n"); 238 printf("WARNING: over 1s delay\n");
226 239
227 for (loops = 0; loops < 1000*1000*100; loops++) { 240 for (loops = 0; loops < 1000*1000*100; loops++) {
228 struct timespec cur; 241 struct timespec cur;
229 242
230 rumpuser_clock_gettime(RUMPUSER_CLOCK_ABSMONO, 243 rumpuser_clock_gettime(RUMPUSER_CLOCK_ABSMONO,
231 &sec_now, &nsec_now); 244 &sec_now, &nsec_now);
232 cur.tv_sec = sec_now; 245 cur.tv_sec = sec_now;
233 cur.tv_nsec = nsec_now; 246 cur.tv_nsec = nsec_now;
234 if (timespeccmp(&cur, &target, >=)) { 247 if (timespeccmp(&cur, &target, >=)) {
235 return; 248 return;
236 } 249 }
237 } 250 }
238 printf("WARNING: DELAY ESCAPED\n"); 251 printf("WARNING: DELAY ESCAPED\n");
239} 252}
240void (*delay_func)(unsigned int) = rump_delay; 253void (*delay_func)(unsigned int) = rump_delay;
241__strong_alias(delay,rump_delay); 254__strong_alias(delay,rump_delay);
242__strong_alias(_delay,rump_delay); 255__strong_alias(_delay,rump_delay);
243 256
244/* Weak aliases for fstrans to be used unless librumpvfs is present. */ 257/* Weak aliases for fstrans to be used unless librumpvfs is present. */
245 258
246void rump_fstrans_start(struct mount *); 259void rump_fstrans_start(struct mount *);
247void 260void
248rump_fstrans_start(struct mount *mp) 261rump_fstrans_start(struct mount *mp)
249{ 262{
250 263
251} 264}
252__weak_alias(fstrans_start,rump_fstrans_start); 265__weak_alias(fstrans_start,rump_fstrans_start);
253 266
254int rump_fstrans_start_nowait(struct mount *); 267int rump_fstrans_start_nowait(struct mount *);
255int 268int
256rump_fstrans_start_nowait(struct mount *mp) 269rump_fstrans_start_nowait(struct mount *mp)
257{ 270{
258 271
259 return 0; 272 return 0;
260} 273}
261__weak_alias(fstrans_start_nowait,rump_fstrans_start_nowait); 274__weak_alias(fstrans_start_nowait,rump_fstrans_start_nowait);
262 275
263void rump_fstrans_done(struct mount *); 276void rump_fstrans_done(struct mount *);
264void 277void
265rump_fstrans_done(struct mount *mp) 278rump_fstrans_done(struct mount *mp)
266{ 279{
267 280
268} 281}
269__weak_alias(fstrans_done,rump_fstrans_done); 282__weak_alias(fstrans_done,rump_fstrans_done);
270 283
271/* 284/*
272 * Provide weak aliases for tty routines used by printf. 285 * Provide weak aliases for tty routines used by printf.
273 * They will be used unless the rumpkern_tty component is present. 286 * They will be used unless the rumpkern_tty component is present.
274 */ 287 */
275 288
276int rump_ttycheckoutq(struct tty *, int); 289int rump_ttycheckoutq(struct tty *, int);
277int 290int
278rump_ttycheckoutq(struct tty *tp, int wait) 291rump_ttycheckoutq(struct tty *tp, int wait)
279{ 292{
280 293
281 return 1; 294 return 1;
282} 295}
283__weak_alias(ttycheckoutq,rump_ttycheckoutq); 296__weak_alias(ttycheckoutq,rump_ttycheckoutq);
284 297
285int rump_tputchar(int, int, struct tty *); 298int rump_tputchar(int, int, struct tty *);
286int 299int
287rump_tputchar(int c, int flags, struct tty *tp) 300rump_tputchar(int c, int flags, struct tty *tp)
288{ 301{
289 302
290 cnputc(c); 303 cnputc(c);
291 return 0; 304 return 0;
292} 305}
293__weak_alias(tputchar,rump_tputchar); 306__weak_alias(tputchar,rump_tputchar);
294 307
295void 308void
296cnputc(int c) 309cnputc(int c)
297{ 310{
298 311
299 rumpuser_putchar(c); 312 rumpuser_putchar(c);
300} 313}
301 314
302void 315void
303cnflush(void) 316cnflush(void)
304{ 317{
305 318
306 /* done */ 319 /* done */
307} 320}
308 321
309void 322void
310resettodr(void) 323resettodr(void)
311{ 324{
312 325
313 /* setting clocks is not in the jurisdiction of rump kernels */ 326 /* setting clocks is not in the jurisdiction of rump kernels */
314} 327}
315 328
316#ifdef __HAVE_SYSCALL_INTERN 329#ifdef __HAVE_SYSCALL_INTERN
317void 330void
318syscall_intern(struct proc *p) 331syscall_intern(struct proc *p)
319{ 332{
320 333
321 p->p_emuldata = NULL; 334 p->p_emuldata = NULL;
322} 335}
323#endif 336#endif
324 337
325#ifdef LOCKDEBUG 338#ifdef LOCKDEBUG
326void 339void
327turnstile_print(volatile void *obj, void (*pr)(const char *, ...)) 340turnstile_print(volatile void *obj, void (*pr)(const char *, ...))
328{ 341{
329 342
330 /* nada */ 343 /* nada */
331} 344}
332#endif 345#endif
333 346
334void 347void
335cpu_reboot(int howto, char *bootstr) 348cpu_reboot(int howto, char *bootstr)
336{ 349{
337 int ruhow = 0; 350 int ruhow = 0;
338 void *finiarg; 351 void *finiarg;
339 352
340 printf("rump kernel halting...\n"); 353 printf("rump kernel halting...\n");
341 354
342 if (!RUMP_LOCALPROC_P(curproc)) 355 if (!RUMP_LOCALPROC_P(curproc))
343 finiarg = RUMP_SPVM2CTL(curproc->p_vmspace); 356 finiarg = RUMP_SPVM2CTL(curproc->p_vmspace);
344 else 357 else
345 finiarg = NULL; 358 finiarg = NULL;
346 359
347 /* dump means we really take the dive here */ 360 /* dump means we really take the dive here */
348 if ((howto & RB_DUMP) || panicstr) { 361 if ((howto & RB_DUMP) || panicstr) {
349 ruhow = RUMPUSER_PANIC; 362 ruhow = RUMPUSER_PANIC;
350 goto out; 363 goto out;
351 } 364 }
352 365
353 /* try to sync */ 366 /* try to sync */
354 if (!((howto & RB_NOSYNC) || panicstr)) { 367 if (!((howto & RB_NOSYNC) || panicstr)) {
355 rump_vfs_fini(); 368 rump_vfs_fini();
356 } 369 }
357 370
358 doshutdownhooks(); 371 doshutdownhooks();
359 372
360 /* your wish is my command */ 373 /* your wish is my command */
361 if (howto & RB_HALT) { 374 if (howto & RB_HALT) {
362 printf("rump kernel halted (with RB_HALT, not exiting)\n"); 375 printf("rump kernel halted (with RB_HALT, not exiting)\n");
363 rump_sysproxy_fini(finiarg); 376 rump_sysproxy_fini(finiarg);
364 for (;;) { 377 for (;;) {
365 rumpuser_clock_sleep(RUMPUSER_CLOCK_RELWALL, 10, 0); 378 rumpuser_clock_sleep(RUMPUSER_CLOCK_RELWALL, 10, 0);
366 } 379 }
367 } 380 }
368 381
369 /* this function is __dead, we must exit */ 382 /* this function is __dead, we must exit */
370 out: 383 out:
371 rump_sysproxy_fini(finiarg); 384 rump_sysproxy_fini(finiarg);
372 rumpuser_exit(ruhow); 385 rumpuser_exit(ruhow);
373} 386}
374 387
375const char * 388const char *
376cpu_getmodel(void) 389cpu_getmodel(void)
377{ 390{
378 391
379 return "rumpcore (virtual)"; 392 return "rumpcore (virtual)";
380} 393}

cvs diff -r1.329 -r1.329.10.1 src/sys/rump/librump/rumpkern/rump.c (switch to unified diff)

--- src/sys/rump/librump/rumpkern/rump.c 2016/03/08 14:30:48 1.329
+++ src/sys/rump/librump/rumpkern/rump.c 2017/11/30 14:40:46 1.329.10.1
@@ -1,819 +1,819 @@ @@ -1,819 +1,819 @@
1/* $NetBSD: rump.c,v 1.329 2016/03/08 14:30:48 joerg Exp $ */ 1/* $NetBSD: rump.c,v 1.329.10.1 2017/11/30 14:40:46 martin Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2007-2011 Antti Kantee. All Rights Reserved. 4 * Copyright (c) 2007-2011 Antti Kantee. All Rights Reserved.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 7 * modification, are permitted provided that the following conditions
8 * are met: 8 * are met:
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer. 10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright 11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the 12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution. 13 * documentation and/or other materials provided with the distribution.
14 * 14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE. 25 * SUCH DAMAGE.
26 */ 26 */
27 27
28#include <sys/cdefs.h> 28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: rump.c,v 1.329 2016/03/08 14:30:48 joerg Exp $"); 29__KERNEL_RCSID(0, "$NetBSD: rump.c,v 1.329.10.1 2017/11/30 14:40:46 martin Exp $");
30 30
31#include <sys/systm.h> 31#include <sys/systm.h>
32#define ELFSIZE ARCH_ELFSIZE 32#define ELFSIZE ARCH_ELFSIZE
33 33
34#include <sys/param.h> 34#include <sys/param.h>
35#include <sys/atomic.h> 35#include <sys/atomic.h>
36#include <sys/buf.h> 36#include <sys/buf.h>
37#include <sys/callout.h> 37#include <sys/callout.h>
38#include <sys/conf.h> 38#include <sys/conf.h>
39#include <sys/cpu.h> 39#include <sys/cpu.h>
40#include <sys/device.h> 40#include <sys/device.h>
41#include <sys/evcnt.h> 41#include <sys/evcnt.h>
42#include <sys/event.h> 42#include <sys/event.h>
43#include <sys/exec_elf.h> 43#include <sys/exec_elf.h>
44#include <sys/filedesc.h> 44#include <sys/filedesc.h>
45#include <sys/iostat.h> 45#include <sys/iostat.h>
46#include <sys/kauth.h> 46#include <sys/kauth.h>
47#include <sys/kcpuset.h> 47#include <sys/kcpuset.h>
48#include <sys/kernel.h> 48#include <sys/kernel.h>
49#include <sys/kmem.h> 49#include <sys/kmem.h>
50#include <sys/kprintf.h> 50#include <sys/kprintf.h>
51#include <sys/kthread.h> 51#include <sys/kthread.h>
52#include <sys/ksyms.h> 52#include <sys/ksyms.h>
53#include <sys/msgbuf.h> 53#include <sys/msgbuf.h>
54#include <sys/module.h> 54#include <sys/module.h>
55#include <sys/namei.h> 55#include <sys/namei.h>
56#include <sys/once.h> 56#include <sys/once.h>
57#include <sys/percpu.h> 57#include <sys/percpu.h>
58#include <sys/pipe.h> 58#include <sys/pipe.h>
59#include <sys/pool.h> 59#include <sys/pool.h>
60#include <sys/pserialize.h> 60#include <sys/pserialize.h>
61#include <sys/queue.h> 61#include <sys/queue.h>
62#include <sys/reboot.h> 62#include <sys/reboot.h>
63#include <sys/resourcevar.h> 63#include <sys/resourcevar.h>
64#include <sys/select.h> 64#include <sys/select.h>
65#include <sys/sysctl.h> 65#include <sys/sysctl.h>
66#include <sys/syscall.h> 66#include <sys/syscall.h>
67#include <sys/syscallvar.h> 67#include <sys/syscallvar.h>
68#include <sys/timetc.h> 68#include <sys/timetc.h>
69#include <sys/tty.h> 69#include <sys/tty.h>
70#include <sys/uidinfo.h> 70#include <sys/uidinfo.h>
71#include <sys/vmem.h> 71#include <sys/vmem.h>
72#include <sys/xcall.h> 72#include <sys/xcall.h>
73#include <sys/cprng.h> 73#include <sys/cprng.h>
74#include <sys/rnd.h> 74#include <sys/rnd.h>
75#include <sys/ktrace.h> 75#include <sys/ktrace.h>
76 76
77#include <rump-sys/kern.h> 77#include <rump-sys/kern.h>
78#include <rump-sys/dev.h> 78#include <rump-sys/dev.h>
79#include <rump-sys/net.h> 79#include <rump-sys/net.h>
80#include <rump-sys/vfs.h> 80#include <rump-sys/vfs.h>
81 81
82#include <rump/rumpuser.h> 82#include <rump/rumpuser.h>
83 83
84#include <secmodel/suser/suser.h> 84#include <secmodel/suser/suser.h>
85 85
86#include <prop/proplib.h> 86#include <prop/proplib.h>
87 87
88#include <uvm/uvm_extern.h> 88#include <uvm/uvm_extern.h>
89#include <uvm/uvm_readahead.h> 89#include <uvm/uvm_readahead.h>
90 90
91char machine[] = MACHINE; 91char machine[] = MACHINE;
92char machine_arch[] = MACHINE_ARCH; 92char machine_arch[] = MACHINE_ARCH;
93 93
94struct proc *initproc; 94struct proc *initproc;
95 95
96struct device rump_rootdev = { 96struct device rump_rootdev = {
97 .dv_class = DV_VIRTUAL 97 .dv_class = DV_VIRTUAL
98}; 98};
99 99
100#ifdef RUMP_WITHOUT_THREADS 100#ifdef RUMP_WITHOUT_THREADS
101int rump_threads = 0; 101int rump_threads = 0;
102#else 102#else
103int rump_threads = 1; 103int rump_threads = 1;
104#endif 104#endif
105 105
106static void rump_component_addlocal(void); 106static void rump_component_addlocal(void);
107static struct lwp *bootlwp; 107static struct lwp *bootlwp;
108 108
109/* 16k should be enough for std rump needs */ 109/* 16k should be enough for std rump needs */
110static char rump_msgbuf[16*1024] __aligned(256); 110static char rump_msgbuf[16*1024] __aligned(256);
111 111
112bool rump_ttycomponent = false; 112bool rump_ttycomponent = false;
113 113
114static void 114static void
115rump_aiodone_worker(struct work *wk, void *dummy) 115rump_aiodone_worker(struct work *wk, void *dummy)
116{ 116{
117 struct buf *bp = (struct buf *)wk; 117 struct buf *bp = (struct buf *)wk;
118 118
119 KASSERT(&bp->b_work == wk); 119 KASSERT(&bp->b_work == wk);
120 bp->b_iodone(bp); 120 bp->b_iodone(bp);
121} 121}
122 122
123static int rump_inited; 123static int rump_inited;
124 124
125void (*rump_vfs_drainbufs)(int) = (void *)nullop; 125void (*rump_vfs_drainbufs)(int) = (void *)nullop;
126int (*rump_vfs_makeonedevnode)(dev_t, const char *, 126int (*rump_vfs_makeonedevnode)(dev_t, const char *,
127 devmajor_t, devminor_t) = (void *)nullop; 127 devmajor_t, devminor_t) = (void *)nullop;
128int (*rump_vfs_makedevnodes)(dev_t, const char *, char, 128int (*rump_vfs_makedevnodes)(dev_t, const char *, char,
129 devmajor_t, devminor_t, int) = (void *)nullop; 129 devmajor_t, devminor_t, int) = (void *)nullop;
130int (*rump_vfs_makesymlink)(const char *, const char *) = (void *)nullop; 130int (*rump_vfs_makesymlink)(const char *, const char *) = (void *)nullop;
131 131
132rump_proc_vfs_init_fn rump_proc_vfs_init = (void *)nullop; 132rump_proc_vfs_init_fn rump_proc_vfs_init = (void *)nullop;
133rump_proc_vfs_release_fn rump_proc_vfs_release = (void *)nullop; 133rump_proc_vfs_release_fn rump_proc_vfs_release = (void *)nullop;
134 134
135static void add_linkedin_modules(const struct modinfo *const *, size_t); 135static void add_linkedin_modules(const struct modinfo *const *, size_t);
136 136
137static pid_t rspo_wrap_getpid(void) { 137static pid_t rspo_wrap_getpid(void) {
138 return rump_sysproxy_hyp_getpid(); 138 return rump_sysproxy_hyp_getpid();
139} 139}
140static int rspo_wrap_syscall(int num, void *arg, long *retval) { 140static int rspo_wrap_syscall(int num, void *arg, long *retval) {
141 return rump_sysproxy_hyp_syscall(num, arg, retval); 141 return rump_sysproxy_hyp_syscall(num, arg, retval);
142} 142}
143static int rspo_wrap_rfork(void *priv, int flag, const char *comm) { 143static int rspo_wrap_rfork(void *priv, int flag, const char *comm) {
144 return rump_sysproxy_hyp_rfork(priv, flag, comm); 144 return rump_sysproxy_hyp_rfork(priv, flag, comm);
145} 145}
146static void rspo_wrap_lwpexit(void) { 146static void rspo_wrap_lwpexit(void) {
147 rump_sysproxy_hyp_lwpexit(); 147 rump_sysproxy_hyp_lwpexit();
148} 148}
149static void rspo_wrap_execnotify(const char *comm) { 149static void rspo_wrap_execnotify(const char *comm) {
150 rump_sysproxy_hyp_execnotify(comm); 150 rump_sysproxy_hyp_execnotify(comm);
151} 151}
152static const struct rumpuser_hyperup hyp = { 152static const struct rumpuser_hyperup hyp = {
153 .hyp_schedule = rump_schedule, 153 .hyp_schedule = rump_schedule,
154 .hyp_unschedule = rump_unschedule, 154 .hyp_unschedule = rump_unschedule,
155 .hyp_backend_unschedule = rump_user_unschedule, 155 .hyp_backend_unschedule = rump_user_unschedule,
156 .hyp_backend_schedule = rump_user_schedule, 156 .hyp_backend_schedule = rump_user_schedule,
157 .hyp_lwproc_switch = rump_lwproc_switch, 157 .hyp_lwproc_switch = rump_lwproc_switch,
158 .hyp_lwproc_release = rump_lwproc_releaselwp, 158 .hyp_lwproc_release = rump_lwproc_releaselwp,
159 .hyp_lwproc_newlwp = rump_lwproc_newlwp, 159 .hyp_lwproc_newlwp = rump_lwproc_newlwp,
160 .hyp_lwproc_curlwp = rump_lwproc_curlwp, 160 .hyp_lwproc_curlwp = rump_lwproc_curlwp,
161 161
162 .hyp_getpid = rspo_wrap_getpid, 162 .hyp_getpid = rspo_wrap_getpid,
163 .hyp_syscall = rspo_wrap_syscall, 163 .hyp_syscall = rspo_wrap_syscall,
164 .hyp_lwproc_rfork = rspo_wrap_rfork, 164 .hyp_lwproc_rfork = rspo_wrap_rfork,
165 .hyp_lwpexit = rspo_wrap_lwpexit, 165 .hyp_lwpexit = rspo_wrap_lwpexit,
166 .hyp_execnotify = rspo_wrap_execnotify, 166 .hyp_execnotify = rspo_wrap_execnotify,
167}; 167};
168struct rump_sysproxy_ops rump_sysproxy_ops = { 168struct rump_sysproxy_ops rump_sysproxy_ops = {
169 .rspo_copyin = (void *)enxio, 169 .rspo_copyin = (void *)enxio,
170 .rspo_copyinstr = (void *)enxio, 170 .rspo_copyinstr = (void *)enxio,
171 .rspo_copyout = (void *)enxio, 171 .rspo_copyout = (void *)enxio,
172 .rspo_copyoutstr = (void *)enxio, 172 .rspo_copyoutstr = (void *)enxio,
173 .rspo_anonmmap = (void *)enxio, 173 .rspo_anonmmap = (void *)enxio,
174 .rspo_raise = (void *)enxio, 174 .rspo_raise = (void *)enxio,
175 .rspo_fini = (void *)enxio, 175 .rspo_fini = (void *)enxio,
176 .rspo_hyp_getpid = (void *)enxio, 176 .rspo_hyp_getpid = (void *)enxio,
177 .rspo_hyp_syscall = (void *)enxio, 177 .rspo_hyp_syscall = (void *)enxio,
178 .rspo_hyp_rfork = (void *)enxio, 178 .rspo_hyp_rfork = (void *)enxio,
179 .rspo_hyp_lwpexit = (void *)enxio, 179 .rspo_hyp_lwpexit = (void *)enxio,
180 .rspo_hyp_execnotify = (void *)enxio, 180 .rspo_hyp_execnotify = (void *)enxio,
181}; 181};
182 182
183int 183int
184rump_daemonize_begin(void) 184rump_daemonize_begin(void)
185{ 185{
186 186
187 if (rump_inited) 187 if (rump_inited)
188 return EALREADY; 188 return EALREADY;
189 189
190 return rumpuser_daemonize_begin(); 190 return rumpuser_daemonize_begin();
191} 191}
192 192
193int 193int
194rump_daemonize_done(int error) 194rump_daemonize_done(int error)
195{ 195{
196 196
197 return rumpuser_daemonize_done(error); 197 return rumpuser_daemonize_done(error);
198} 198}
199 199
200#ifdef RUMP_USE_CTOR 200#ifdef RUMP_USE_CTOR
201 201
202/* sysctl bootstrap handling */ 202/* sysctl bootstrap handling */
203struct sysctl_boot_chain sysctl_boot_chain \ 203struct sysctl_boot_chain sysctl_boot_chain \
204 = LIST_HEAD_INITIALIZER(sysctl_boot_chain); 204 = LIST_HEAD_INITIALIZER(sysctl_boot_chain);
205__link_set_add_text(sysctl_funcs,voidop); /* ensure linkset is non-empty */ 205__link_set_add_text(sysctl_funcs,voidop); /* ensure linkset is non-empty */
206 206
207#else /* RUMP_USE_CTOR */ 207#else /* RUMP_USE_CTOR */
208 208
209RUMP_COMPONENT(RUMP_COMPONENT_POSTINIT) 209RUMP_COMPONENT(RUMP_COMPONENT_POSTINIT)
210{ 210{
211 __link_set_decl(rump_components, struct rump_component); 211 __link_set_decl(rump_components, struct rump_component);
212 212
213 /* 213 /*
214 * Trick compiler into generating references so that statically 214 * Trick compiler into generating references so that statically
215 * linked rump kernels are generated with the link set symbols. 215 * linked rump kernels are generated with the link set symbols.
216 */ 216 */
217 asm("" :: "r"(__start_link_set_rump_components)); 217 asm("" :: "r"(__start_link_set_rump_components));
218 asm("" :: "r"(__stop_link_set_rump_components)); 218 asm("" :: "r"(__stop_link_set_rump_components));
219} 219}
220 220
221#endif /* RUMP_USE_CTOR */ 221#endif /* RUMP_USE_CTOR */
222 222
223int 223int
224rump_init(void) 224rump_init(void)
225{ 225{
226 char buf[256]; 226 char buf[256];
227 struct timespec ts; 227 struct timespec ts;
228 int64_t sec; 228 int64_t sec;
229 long nsec; 229 long nsec;
230 struct lwp *l, *initlwp; 230 struct lwp *l, *initlwp;
231 int i, numcpu; 231 int i, numcpu;
232 232
233 /* not reentrant */ 233 /* not reentrant */
234 if (rump_inited) 234 if (rump_inited)
235 return 0; 235 return 0;
236 else if (rump_inited == -1) 236 else if (rump_inited == -1)
237 panic("rump_init: host process restart required"); 237 panic("rump_init: host process restart required");
238 else 238 else
239 rump_inited = 1; 239 rump_inited = 1;
240 240
241 /* initialize hypervisor */ 241 /* initialize hypervisor */
242 if (rumpuser_init(RUMPUSER_VERSION, &hyp) != 0) { 242 if (rumpuser_init(RUMPUSER_VERSION, &hyp) != 0) {
243 rumpuser_dprintf("rumpuser init failed\n"); 243 rumpuser_dprintf("rumpuser init failed\n");
244 return EINVAL; 244 return EINVAL;
245 } 245 }
246 246
247 /* init minimal lwp/cpu context */ 247 /* init minimal lwp/cpu context */
248 rump_lwproc_init(); 248 rump_lwproc_init();
249 l = &lwp0; 249 l = &lwp0;
250 l->l_cpu = l->l_target_cpu = &rump_bootcpu; 250 l->l_cpu = l->l_target_cpu = &rump_bootcpu;
251 rump_lwproc_curlwp_set(l); 251 rump_lwproc_curlwp_set(l);
252 252
253 /* retrieve env vars which affect the early stage of bootstrap */ 253 /* retrieve env vars which affect the early stage of bootstrap */
254 if (rumpuser_getparam("RUMP_THREADS", buf, sizeof(buf)) == 0) { 254 if (rumpuser_getparam("RUMP_THREADS", buf, sizeof(buf)) == 0) {
255 rump_threads = *buf != '0'; 255 rump_threads = *buf != '0';
256 } 256 }
257 if (rumpuser_getparam("RUMP_VERBOSE", buf, sizeof(buf)) == 0) { 257 if (rumpuser_getparam("RUMP_VERBOSE", buf, sizeof(buf)) == 0) {
258 if (*buf != '0') 258 if (*buf != '0')
259 boothowto = AB_VERBOSE; 259 boothowto = AB_VERBOSE;
260 } 260 }
261 261
262 if (rumpuser_getparam(RUMPUSER_PARAM_NCPU, buf, sizeof(buf)) != 0) 262 if (rumpuser_getparam(RUMPUSER_PARAM_NCPU, buf, sizeof(buf)) != 0)
263 panic("mandatory hypervisor configuration (NCPU) missing"); 263 panic("mandatory hypervisor configuration (NCPU) missing");
264 numcpu = strtoll(buf, NULL, 10); 264 numcpu = strtoll(buf, NULL, 10);
265 if (numcpu < 1) { 265 if (numcpu < 1) {
266 panic("rump kernels are not lightweight enough for \"%d\" CPUs", 266 panic("rump kernels are not lightweight enough for \"%d\" CPUs",
267 numcpu); 267 numcpu);
268 } 268 }
269 269
270 rump_thread_init(); 270 rump_thread_init();
271 rump_cpus_bootstrap(&numcpu); 271 rump_cpus_bootstrap(&numcpu);
272 272
273 rumpuser_clock_gettime(RUMPUSER_CLOCK_RELWALL, &sec, &nsec); 273 rumpuser_clock_gettime(RUMPUSER_CLOCK_RELWALL, &sec, &nsec);
274 boottime.tv_sec = sec; 274 boottime.tv_sec = sec;
275 boottime.tv_nsec = nsec; 275 boottime.tv_nsec = nsec;
276 276
277 initmsgbuf(rump_msgbuf, sizeof(rump_msgbuf)); 277 initmsgbuf(rump_msgbuf, sizeof(rump_msgbuf));
278 aprint_verbose("%s%s", copyright, version); 278 aprint_verbose("%s%s", copyright, version);
279 279
280 rump_intr_init(numcpu); 280 rump_intr_init(numcpu);
281 281
282 rump_tsleep_init(); 282 rump_tsleep_init();
283 283
284 rumpuser_mutex_init(&rump_giantlock, RUMPUSER_MTX_SPIN); 284 rumpuser_mutex_init(&rump_giantlock, RUMPUSER_MTX_SPIN);
285 ksyms_init(); 285 ksyms_init();
286 uvm_init(); 286 uvm_init();
287 evcnt_init(); 287 evcnt_init();
288 288
289 kcpuset_sysinit(); 289 kcpuset_sysinit();
290 once_init(); 290 once_init();
291 kernconfig_lock_init(); 291 kernconfig_lock_init();
292 prop_kern_init(); 292 prop_kern_init();
293 293
294 kmem_init(); 294 kmem_init();
295 295
296 uvm_ra_init(); 296 uvm_ra_init();
297 uao_init(); 297 uao_init();
298 298
299 mutex_obj_init(); 299 mutex_obj_init();
300 rw_obj_init(); 300 rw_obj_init();
301 callout_startup(); 301 callout_startup();
302 302
303 kprintf_init(); 303 kprintf_init();
 304 percpu_init();
304 pserialize_init(); 305 pserialize_init();
305 306
306 kauth_init(); 307 kauth_init();
307 308
308 secmodel_init(); 309 secmodel_init();
309 sysctl_init(); 310 sysctl_init();
310 /* 311 /*
311 * The above call to sysctl_init() only initializes sysctl nodes 312 * The above call to sysctl_init() only initializes sysctl nodes
312 * from link sets. Initialize sysctls in case we used ctors. 313 * from link sets. Initialize sysctls in case we used ctors.
313 */ 314 */
314#ifdef RUMP_USE_CTOR 315#ifdef RUMP_USE_CTOR
315 { 316 {
316 struct sysctl_setup_chain *ssc; 317 struct sysctl_setup_chain *ssc;
317 318
318 while ((ssc = LIST_FIRST(&sysctl_boot_chain)) != NULL) { 319 while ((ssc = LIST_FIRST(&sysctl_boot_chain)) != NULL) {
319 LIST_REMOVE(ssc, ssc_entries); 320 LIST_REMOVE(ssc, ssc_entries);
320 ssc->ssc_func(NULL); 321 ssc->ssc_func(NULL);
321 } 322 }
322 } 323 }
323#endif /* RUMP_USE_CTOR */ 324#endif /* RUMP_USE_CTOR */
324 325
325 rnd_init(); 326 rnd_init();
326 cprng_init(); 327 cprng_init();
327 kern_cprng = cprng_strong_create("kernel", IPL_VM, 328 kern_cprng = cprng_strong_create("kernel", IPL_VM,
328 CPRNG_INIT_ANY|CPRNG_REKEY_ANY); 329 CPRNG_INIT_ANY|CPRNG_REKEY_ANY);
329 330
330 rump_hyperentropy_init(); 331 rump_hyperentropy_init();
331 332
332 procinit(); 333 procinit();
333 proc0_init(); 334 proc0_init();
334 uid_init(); 335 uid_init();
335 chgproccnt(0, 1); 336 chgproccnt(0, 1);
336 337
337 l->l_proc = &proc0; 338 l->l_proc = &proc0;
338 lwp_update_creds(l); 339 lwp_update_creds(l);
339 340
340 lwpinit_specificdata(); 341 lwpinit_specificdata();
341 lwp_initspecific(&lwp0); 342 lwp_initspecific(&lwp0);
342 343
343 loginit(); 344 loginit();
344 345
345 rump_biglock_init(); 346 rump_biglock_init();
346 347
347 rump_scheduler_init(numcpu); 348 rump_scheduler_init(numcpu);
348 /* revert temporary context and schedule a semireal context */ 349 /* revert temporary context and schedule a semireal context */
349 rump_lwproc_curlwp_clear(l); 350 rump_lwproc_curlwp_clear(l);
350 initproc = &proc0; /* borrow proc0 before we get initproc started */ 351 initproc = &proc0; /* borrow proc0 before we get initproc started */
351 rump_schedule(); 352 rump_schedule();
352 bootlwp = curlwp; 353 bootlwp = curlwp;
353 354
354 percpu_init(); 
355 inittimecounter(); 355 inittimecounter();
356 ntp_init(); 356 ntp_init();
357 357
358#ifdef KTRACE 358#ifdef KTRACE
359 ktrinit(); 359 ktrinit();
360#endif 360#endif
361 361
362 ts = boottime; 362 ts = boottime;
363 tc_setclock(&ts); 363 tc_setclock(&ts);
364 364
365 extern krwlock_t exec_lock; 365 extern krwlock_t exec_lock;
366 rw_init(&exec_lock); 366 rw_init(&exec_lock);
367 367
368 /* we are mostly go. do per-cpu subsystem init */ 368 /* we are mostly go. do per-cpu subsystem init */
369 for (i = 0; i < numcpu; i++) { 369 for (i = 0; i < numcpu; i++) {
370 struct cpu_info *ci = cpu_lookup(i); 370 struct cpu_info *ci = cpu_lookup(i);
371 371
372 /* attach non-bootstrap CPUs */ 372 /* attach non-bootstrap CPUs */
373 if (i > 0) { 373 if (i > 0) {
374 rump_cpu_attach(ci); 374 rump_cpu_attach(ci);
375 ncpu++; 375 ncpu++;
376 } 376 }
377 377
378 callout_init_cpu(ci); 378 callout_init_cpu(ci);
379 softint_init(ci); 379 softint_init(ci);
380 xc_init_cpu(ci); 380 xc_init_cpu(ci);
381 pool_cache_cpu_init(ci); 381 pool_cache_cpu_init(ci);
382 selsysinit(ci); 382 selsysinit(ci);
383 percpu_init_cpu(ci); 383 percpu_init_cpu(ci);
384 384
385 TAILQ_INIT(&ci->ci_data.cpu_ld_locks); 385 TAILQ_INIT(&ci->ci_data.cpu_ld_locks);
386 __cpu_simple_lock_init(&ci->ci_data.cpu_ld_lock); 386 __cpu_simple_lock_init(&ci->ci_data.cpu_ld_lock);
387 387
388 aprint_verbose("cpu%d at thinair0: rump virtual cpu\n", i); 388 aprint_verbose("cpu%d at thinair0: rump virtual cpu\n", i);
389 } 389 }
390 ncpuonline = ncpu; 390 ncpuonline = ncpu;
391 391
392 /* Once all CPUs are detected, initialize the per-CPU cprng_fast. */ 392 /* Once all CPUs are detected, initialize the per-CPU cprng_fast. */
393 cprng_fast_init(); 393 cprng_fast_init();
394 394
395 /* CPUs are up. allow kernel threads to run */ 395 /* CPUs are up. allow kernel threads to run */
396 rump_thread_allow(NULL); 396 rump_thread_allow(NULL);
397 397
398 rnd_init_softint(); 398 rnd_init_softint();
399 399
400 kqueue_init(); 400 kqueue_init();
401 iostat_init(); 401 iostat_init();
402 fd_sys_init(); 402 fd_sys_init();
403 module_init(); 403 module_init();
404 devsw_init(); 404 devsw_init();
405 pipe_init(); 405 pipe_init();
406 resource_init(); 406 resource_init();
407 procinit_sysctl(); 407 procinit_sysctl();
408 time_init(); 408 time_init();
409 time_init2(); 409 time_init2();
410 410
411 /* start page baroness */ 411 /* start page baroness */
412 if (rump_threads) { 412 if (rump_threads) {
413 if (kthread_create(PRI_PGDAEMON, KTHREAD_MPSAFE, NULL, 413 if (kthread_create(PRI_PGDAEMON, KTHREAD_MPSAFE, NULL,
414 uvm_pageout, NULL, &uvm.pagedaemon_lwp, "pdaemon") != 0) 414 uvm_pageout, NULL, &uvm.pagedaemon_lwp, "pdaemon") != 0)
415 panic("pagedaemon create failed"); 415 panic("pagedaemon create failed");
416 } else 416 } else
417 uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */ 417 uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */
418 418
419 /* process dso's */ 419 /* process dso's */
420 rumpuser_dl_bootstrap(add_linkedin_modules, 420 rumpuser_dl_bootstrap(add_linkedin_modules,
421 rump_kernelfsym_load, rump_component_load); 421 rump_kernelfsym_load, rump_component_load);
422 422
423 rump_component_addlocal(); 423 rump_component_addlocal();
424 rump_component_init(RUMP_COMPONENT_KERN); 424 rump_component_init(RUMP_COMPONENT_KERN);
425 425
426 /* initialize factions, if present */ 426 /* initialize factions, if present */
427 rump_component_init(RUMP__FACTION_VFS); 427 rump_component_init(RUMP__FACTION_VFS);
428 /* pnbuf_cache is used even without vfs */ 428 /* pnbuf_cache is used even without vfs */
429 if (rump_component_count(RUMP__FACTION_VFS) == 0) { 429 if (rump_component_count(RUMP__FACTION_VFS) == 0) {
430 pnbuf_cache = pool_cache_init(MAXPATHLEN, 0, 0, 0, "pnbufpl", 430 pnbuf_cache = pool_cache_init(MAXPATHLEN, 0, 0, 0, "pnbufpl",
431 NULL, IPL_NONE, NULL, NULL, NULL); 431 NULL, IPL_NONE, NULL, NULL, NULL);
432 } 432 }
433 rump_component_init(RUMP__FACTION_NET); 433 rump_component_init(RUMP__FACTION_NET);
434 rump_component_init(RUMP__FACTION_DEV); 434 rump_component_init(RUMP__FACTION_DEV);
435 KASSERT(rump_component_count(RUMP__FACTION_VFS) <= 1 435 KASSERT(rump_component_count(RUMP__FACTION_VFS) <= 1
436 && rump_component_count(RUMP__FACTION_NET) <= 1 436 && rump_component_count(RUMP__FACTION_NET) <= 1
437 && rump_component_count(RUMP__FACTION_DEV) <= 1); 437 && rump_component_count(RUMP__FACTION_DEV) <= 1);
438 438
439 rump_component_init(RUMP_COMPONENT_KERN_VFS); 439 rump_component_init(RUMP_COMPONENT_KERN_VFS);
440 440
441 /* 441 /*
442 * if we initialized the tty component above, the tyttymtx is 442 * if we initialized the tty component above, the tyttymtx is
443 * now initialized. otherwise, we need to initialize it. 443 * now initialized. otherwise, we need to initialize it.
444 */ 444 */
445 if (!rump_ttycomponent) 445 if (!rump_ttycomponent)
446 mutex_init(&tty_lock, MUTEX_DEFAULT, IPL_VM); 446 mutex_init(&tty_lock, MUTEX_DEFAULT, IPL_VM);
447 447
448 cold = 0; 448 cold = 0;
449 449
450 /* aieeeedondest */ 450 /* aieeeedondest */
451 if (rump_threads) { 451 if (rump_threads) {
452 if (workqueue_create(&uvm.aiodone_queue, "aiodoned", 452 if (workqueue_create(&uvm.aiodone_queue, "aiodoned",
453 rump_aiodone_worker, NULL, 0, 0, WQ_MPSAFE)) 453 rump_aiodone_worker, NULL, 0, 0, WQ_MPSAFE))
454 panic("aiodoned"); 454 panic("aiodoned");
455 } 455 }
456 456
457 sysctl_finalize(); 457 sysctl_finalize();
458 458
459 module_init_class(MODULE_CLASS_ANY); 459 module_init_class(MODULE_CLASS_ANY);
460 460
461 if (rumpuser_getparam(RUMPUSER_PARAM_HOSTNAME, 461 if (rumpuser_getparam(RUMPUSER_PARAM_HOSTNAME,
462 hostname, MAXHOSTNAMELEN) != 0) { 462 hostname, MAXHOSTNAMELEN) != 0) {
463 panic("mandatory hypervisor configuration (HOSTNAME) missing"); 463 panic("mandatory hypervisor configuration (HOSTNAME) missing");
464 } 464 }
465 hostnamelen = strlen(hostname); 465 hostnamelen = strlen(hostname);
466 466
467 sigemptyset(&sigcantmask); 467 sigemptyset(&sigcantmask);
468 468
469 if (rump_threads) 469 if (rump_threads)
470 vmem_rehash_start(); 470 vmem_rehash_start();
471 471
472 /* 472 /*
473 * Create init (proc 1), used to attach implicit threads in rump. 473 * Create init (proc 1), used to attach implicit threads in rump.
474 * (note: must be done after vfsinit to get cwdi) 474 * (note: must be done after vfsinit to get cwdi)
475 */ 475 */
476 initlwp = rump__lwproc_alloclwp(NULL); 476 initlwp = rump__lwproc_alloclwp(NULL);
477 mutex_enter(proc_lock); 477 mutex_enter(proc_lock);
478 initproc = proc_find_raw(1); 478 initproc = proc_find_raw(1);
479 mutex_exit(proc_lock); 479 mutex_exit(proc_lock);
480 if (initproc == NULL) 480 if (initproc == NULL)
481 panic("where in the world is initproc?"); 481 panic("where in the world is initproc?");
482 strlcpy(initproc->p_comm, "rumplocal", sizeof(initproc->p_comm)); 482 strlcpy(initproc->p_comm, "rumplocal", sizeof(initproc->p_comm));
483 483
484 rump_component_init(RUMP_COMPONENT_POSTINIT); 484 rump_component_init(RUMP_COMPONENT_POSTINIT);
485 485
486 /* load syscalls */ 486 /* load syscalls */
487 rump_component_init(RUMP_COMPONENT_SYSCALL); 487 rump_component_init(RUMP_COMPONENT_SYSCALL);
488 488
489 /* component inits done */ 489 /* component inits done */
490 bootlwp = NULL; 490 bootlwp = NULL;
491 491
492 /* open 0/1/2 for init */ 492 /* open 0/1/2 for init */
493 KASSERT(rump_lwproc_curlwp() == NULL); 493 KASSERT(rump_lwproc_curlwp() == NULL);
494 rump_lwproc_switch(initlwp); 494 rump_lwproc_switch(initlwp);
495 rump_consdev_init(); 495 rump_consdev_init();
496 rump_lwproc_switch(NULL); 496 rump_lwproc_switch(NULL);
497 497
498 /* release cpu */ 498 /* release cpu */
499 rump_unschedule(); 499 rump_unschedule();
500 500
501 return 0; 501 return 0;
502} 502}
503/* historic compat */ 503/* historic compat */
504__strong_alias(rump__init,rump_init); 504__strong_alias(rump__init,rump_init);
505 505
506static int compcounter[RUMP_COMPONENT_MAX]; 506static int compcounter[RUMP_COMPONENT_MAX];
507static int compinited[RUMP_COMPONENT_MAX]; 507static int compinited[RUMP_COMPONENT_MAX];
508 508
509/* 509/*
510 * Yea, this is O(n^2), but we're only looking at a handful of components. 510 * Yea, this is O(n^2), but we're only looking at a handful of components.
511 * Components are always initialized from the thread that called rump_init(). 511 * Components are always initialized from the thread that called rump_init().
512 */ 512 */
513static LIST_HEAD(, rump_component) rchead = LIST_HEAD_INITIALIZER(rchead); 513static LIST_HEAD(, rump_component) rchead = LIST_HEAD_INITIALIZER(rchead);
514 514
515#ifdef RUMP_USE_CTOR 515#ifdef RUMP_USE_CTOR
516struct modinfo_boot_chain modinfo_boot_chain \ 516struct modinfo_boot_chain modinfo_boot_chain \
517 = LIST_HEAD_INITIALIZER(modinfo_boot_chain); 517 = LIST_HEAD_INITIALIZER(modinfo_boot_chain);
518 518
519static void 519static void
520rump_component_addlocal(void) 520rump_component_addlocal(void)
521{ 521{
522 struct modinfo_chain *mc; 522 struct modinfo_chain *mc;
523  523
524 while ((mc = LIST_FIRST(&modinfo_boot_chain)) != NULL) { 524 while ((mc = LIST_FIRST(&modinfo_boot_chain)) != NULL) {
525 LIST_REMOVE(mc, mc_entries); 525 LIST_REMOVE(mc, mc_entries);
526 module_builtin_add(&mc->mc_info, 1, false); 526 module_builtin_add(&mc->mc_info, 1, false);
527 } 527 }
528} 528}
529 529
530#else /* RUMP_USE_CTOR */ 530#else /* RUMP_USE_CTOR */
531 531
532static void 532static void
533rump_component_addlocal(void) 533rump_component_addlocal(void)
534{ 534{
535 __link_set_decl(rump_components, struct rump_component); 535 __link_set_decl(rump_components, struct rump_component);
536 struct rump_component *const *rc; 536 struct rump_component *const *rc;
537 537
538 __link_set_foreach(rc, rump_components) { 538 __link_set_foreach(rc, rump_components) {
539 rump_component_load(*rc); 539 rump_component_load(*rc);
540 } 540 }
541} 541}
542#endif /* RUMP_USE_CTOR */ 542#endif /* RUMP_USE_CTOR */
543 543
544void 544void
545rump_component_load(const struct rump_component *rc_const) 545rump_component_load(const struct rump_component *rc_const)
546{ 546{
547 struct rump_component *rc, *rc_iter; 547 struct rump_component *rc, *rc_iter;
548 548
549 /* time for rump component loading and unloading has passed */ 549 /* time for rump component loading and unloading has passed */
550 if (!cold) 550 if (!cold)
551 return; 551 return;
552 552
553 /* 553 /*
554 * XXX: this is ok since the "const" was removed from the 554 * XXX: this is ok since the "const" was removed from the
555 * definition of RUMP_COMPONENT(). 555 * definition of RUMP_COMPONENT().
556 * 556 *
557 * However, to preserve the hypercall interface, the const 557 * However, to preserve the hypercall interface, the const
558 * remains here. This can be fixed in the next hypercall revision. 558 * remains here. This can be fixed in the next hypercall revision.
559 */ 559 */
560 rc = __UNCONST(rc_const); 560 rc = __UNCONST(rc_const);
561 561
562 KASSERT(!rump_inited || curlwp == bootlwp); 562 KASSERT(!rump_inited || curlwp == bootlwp);
563 563
564 LIST_FOREACH(rc_iter, &rchead, rc_entries) { 564 LIST_FOREACH(rc_iter, &rchead, rc_entries) {
565 if (rc_iter == rc) 565 if (rc_iter == rc)
566 return; 566 return;
567 } 567 }
568 568
569 LIST_INSERT_HEAD(&rchead, rc, rc_entries); 569 LIST_INSERT_HEAD(&rchead, rc, rc_entries);
570 KASSERT(rc->rc_type < RUMP_COMPONENT_MAX); 570 KASSERT(rc->rc_type < RUMP_COMPONENT_MAX);
571 compcounter[rc->rc_type]++; 571 compcounter[rc->rc_type]++;
572} 572}
573 573
574void 574void
575rump_component_unload(struct rump_component *rc) 575rump_component_unload(struct rump_component *rc)
576{ 576{
577 577
578 /* 578 /*
579 * Checking for cold is enough because rump_init() both 579 * Checking for cold is enough because rump_init() both
580 * flips it and handles component loading. 580 * flips it and handles component loading.
581 */ 581 */
582 if (!cold) 582 if (!cold)
583 return; 583 return;
584 584
585 LIST_REMOVE(rc, rc_entries); 585 LIST_REMOVE(rc, rc_entries);
586} 586}
587 587
588int 588int
589rump_component_count(enum rump_component_type type) 589rump_component_count(enum rump_component_type type)
590{ 590{
591 591
592 KASSERT(curlwp == bootlwp); 592 KASSERT(curlwp == bootlwp);
593 KASSERT(type < RUMP_COMPONENT_MAX); 593 KASSERT(type < RUMP_COMPONENT_MAX);
594 return compcounter[type]; 594 return compcounter[type];
595} 595}
596 596
597void 597void
598rump_component_init(enum rump_component_type type) 598rump_component_init(enum rump_component_type type)
599{ 599{
600 const struct rump_component *rc, *rc_safe; 600 const struct rump_component *rc, *rc_safe;
601 601
602 KASSERT(curlwp == bootlwp); 602 KASSERT(curlwp == bootlwp);
603 KASSERT(!compinited[type]); 603 KASSERT(!compinited[type]);
604 LIST_FOREACH_SAFE(rc, &rchead, rc_entries, rc_safe) { 604 LIST_FOREACH_SAFE(rc, &rchead, rc_entries, rc_safe) {
605 if (rc->rc_type == type) { 605 if (rc->rc_type == type) {
606 rc->rc_init(); 606 rc->rc_init();
607 LIST_REMOVE(rc, rc_entries); 607 LIST_REMOVE(rc, rc_entries);
608 } 608 }
609 } 609 }
610 compinited[type] = 1; 610 compinited[type] = 1;
611} 611}
612 612
613/* 613/*
614 * Initialize a module which has already been loaded and linked 614 * Initialize a module which has already been loaded and linked
615 * with dlopen(). This is fundamentally the same as a builtin module. 615 * with dlopen(). This is fundamentally the same as a builtin module.
616 * 616 *
617 * XXX: this interface does not really work in the RUMP_USE_CTOR case, 617 * XXX: this interface does not really work in the RUMP_USE_CTOR case,
618 * but I'm not sure it's anything to cry about. In feeling blue, 618 * but I'm not sure it's anything to cry about. In feeling blue,
619 * things could somehow be handled via modinfo_boot_chain. 619 * things could somehow be handled via modinfo_boot_chain.
620 */ 620 */
621int 621int
622rump_module_init(const struct modinfo * const *mip, size_t nmodinfo) 622rump_module_init(const struct modinfo * const *mip, size_t nmodinfo)
623{ 623{
624 624
625 return module_builtin_add(mip, nmodinfo, true); 625 return module_builtin_add(mip, nmodinfo, true);
626} 626}
627 627
628/* 628/*
629 * Finish module (flawless victory, fatality!). 629 * Finish module (flawless victory, fatality!).
630 */ 630 */
631int 631int
632rump_module_fini(const struct modinfo *mi) 632rump_module_fini(const struct modinfo *mi)
633{ 633{
634 634
635 return module_builtin_remove(mi, true); 635 return module_builtin_remove(mi, true);
636} 636}
637 637
638/* 638/*
639 * Add loaded and linked module to the builtin list. It will 639 * Add loaded and linked module to the builtin list. It will
640 * later be initialized with module_init_class(). 640 * later be initialized with module_init_class().
641 */ 641 */
642 642
643static void 643static void
644add_linkedin_modules(const struct modinfo * const *mip, size_t nmodinfo) 644add_linkedin_modules(const struct modinfo * const *mip, size_t nmodinfo)
645{ 645{
646 646
647 module_builtin_add(mip, nmodinfo, false); 647 module_builtin_add(mip, nmodinfo, false);
648} 648}
649 649
650int 650int
651rump_kernelfsym_load(void *symtab, uint64_t symsize, 651rump_kernelfsym_load(void *symtab, uint64_t symsize,
652 char *strtab, uint64_t strsize) 652 char *strtab, uint64_t strsize)
653{ 653{
654 static int inited = 0; 654 static int inited = 0;
655 Elf64_Ehdr ehdr; 655 Elf64_Ehdr ehdr;
656 656
657 if (inited) 657 if (inited)
658 return EBUSY; 658 return EBUSY;
659 inited = 1; 659 inited = 1;
660 660
661 /* 661 /*
662 * Use 64bit header since it's bigger. Shouldn't make a 662 * Use 64bit header since it's bigger. Shouldn't make a
663 * difference, since we're passing in all zeroes anyway. 663 * difference, since we're passing in all zeroes anyway.
664 */ 664 */
665 memset(&ehdr, 0, sizeof(ehdr)); 665 memset(&ehdr, 0, sizeof(ehdr));
666 ksyms_addsyms_explicit(&ehdr, symtab, symsize, strtab, strsize); 666 ksyms_addsyms_explicit(&ehdr, symtab, symsize, strtab, strsize);
667 667
668 return 0; 668 return 0;
669} 669}
670 670
671int 671int
672rump_boot_gethowto() 672rump_boot_gethowto()
673{ 673{
674 674
675 return boothowto; 675 return boothowto;
676} 676}
677 677
678void 678void
679rump_boot_sethowto(int howto) 679rump_boot_sethowto(int howto)
680{ 680{
681 681
682 boothowto = howto; 682 boothowto = howto;
683} 683}
684 684
685int 685int
686rump_getversion(void) 686rump_getversion(void)
687{ 687{
688 688
689 return __NetBSD_Version__; 689 return __NetBSD_Version__;
690} 690}
691/* compat */ 691/* compat */
692__strong_alias(rump_pub_getversion,rump_getversion); 692__strong_alias(rump_pub_getversion,rump_getversion);
693 693
694/* 694/*
695 * Note: may be called unscheduled. Not fully safe since no locking 695 * Note: may be called unscheduled. Not fully safe since no locking
696 * of allevents (currently that's not even available). 696 * of allevents (currently that's not even available).
697 */ 697 */
698void 698void
699rump_printevcnts() 699rump_printevcnts()
700{ 700{
701 struct evcnt *ev; 701 struct evcnt *ev;
702 702
703 TAILQ_FOREACH(ev, &allevents, ev_list) 703 TAILQ_FOREACH(ev, &allevents, ev_list)
704 rumpuser_dprintf("%s / %s: %" PRIu64 "\n", 704 rumpuser_dprintf("%s / %s: %" PRIu64 "\n",
705 ev->ev_group, ev->ev_name, ev->ev_count); 705 ev->ev_group, ev->ev_name, ev->ev_count);
706} 706}
707 707
708/* 708/*
709 * If you use this interface ... well ... all bets are off. 709 * If you use this interface ... well ... all bets are off.
710 * The original purpose is for the p2k fs server library to be 710 * The original purpose is for the p2k fs server library to be
711 * able to use the same pid/lid for VOPs as the host kernel. 711 * able to use the same pid/lid for VOPs as the host kernel.
712 */ 712 */
713void 713void
714rump_allbetsareoff_setid(pid_t pid, int lid) 714rump_allbetsareoff_setid(pid_t pid, int lid)
715{ 715{
716 struct lwp *l = curlwp; 716 struct lwp *l = curlwp;
717 struct proc *p = l->l_proc; 717 struct proc *p = l->l_proc;
718 718
719 l->l_lid = lid; 719 l->l_lid = lid;
720 p->p_pid = pid; 720 p->p_pid = pid;
721} 721}
722 722
723#include <sys/pserialize.h> 723#include <sys/pserialize.h>
724 724
725static void 725static void
726ipiemu(void *a1, void *a2) 726ipiemu(void *a1, void *a2)
727{ 727{
728 728
729 xc__highpri_intr(NULL); 729 xc__highpri_intr(NULL);
730 pserialize_switchpoint(); 730 pserialize_switchpoint();
731} 731}
732 732
733void 733void
734rump_xc_highpri(struct cpu_info *ci) 734rump_xc_highpri(struct cpu_info *ci)
735{ 735{
736 736
737 if (ci) 737 if (ci)
738 xc_unicast(0, ipiemu, NULL, NULL, ci); 738 xc_unicast(0, ipiemu, NULL, NULL, ci);
739 else 739 else
740 xc_broadcast(0, ipiemu, NULL, NULL); 740 xc_broadcast(0, ipiemu, NULL, NULL);
741} 741}
742 742
743int 743int
744rump_syscall(int num, void *data, size_t dlen, register_t *retval) 744rump_syscall(int num, void *data, size_t dlen, register_t *retval)
745{ 745{
746 struct proc *p; 746 struct proc *p;
747 struct emul *e; 747 struct emul *e;
748 struct sysent *callp; 748 struct sysent *callp;
749 const int *etrans = NULL; 749 const int *etrans = NULL;
750 int rv; 750 int rv;
751 751
752 rump_schedule(); 752 rump_schedule();
753 p = curproc; 753 p = curproc;
754 e = p->p_emul; 754 e = p->p_emul;
755#ifndef __HAVE_MINIMAL_EMUL 755#ifndef __HAVE_MINIMAL_EMUL
756 KASSERT(num > 0 && num < e->e_nsysent); 756 KASSERT(num > 0 && num < e->e_nsysent);
757#endif 757#endif
758 callp = e->e_sysent + num; 758 callp = e->e_sysent + num;
759 759
760 rv = sy_invoke(callp, curlwp, data, retval, num); 760 rv = sy_invoke(callp, curlwp, data, retval, num);
761 761
762 /* 762 /*
763 * I hope that (!__HAVE_MINIMAL_EMUL || __HAVE_SYSCALL_INTERN) is 763 * I hope that (!__HAVE_MINIMAL_EMUL || __HAVE_SYSCALL_INTERN) is
764 * an invariant ... 764 * an invariant ...
765 */ 765 */
766#if !defined(__HAVE_MINIMAL_EMUL) 766#if !defined(__HAVE_MINIMAL_EMUL)
767 etrans = e->e_errno; 767 etrans = e->e_errno;
768#elif defined(__HAVE_SYSCALL_INTERN) 768#elif defined(__HAVE_SYSCALL_INTERN)
769 etrans = p->p_emuldata; 769 etrans = p->p_emuldata;
770#endif 770#endif
771 771
772 if (etrans) { 772 if (etrans) {
773 rv = etrans[rv]; 773 rv = etrans[rv];
774 /* 774 /*
775 * XXX: small hack since Linux etrans vectors on some 775 * XXX: small hack since Linux etrans vectors on some
776 * archs contain negative errnos, but rump_syscalls 776 * archs contain negative errnos, but rump_syscalls
777 * uses the -1 + errno ABI. Note that these 777 * uses the -1 + errno ABI. Note that these
778 * negative values are always the result of translation, 778 * negative values are always the result of translation,
779 * otherwise the above translation method would not 779 * otherwise the above translation method would not
780 * work very well. 780 * work very well.
781 */ 781 */
782 if (rv < 0) 782 if (rv < 0)
783 rv = -rv; 783 rv = -rv;
784 } 784 }
785 rump_unschedule(); 785 rump_unschedule();
786 786
787 return rv; 787 return rv;
788} 788}
789 789
790void 790void
791rump_syscall_boot_establish(const struct rump_onesyscall *calls, size_t ncall) 791rump_syscall_boot_establish(const struct rump_onesyscall *calls, size_t ncall)
792{ 792{
793 struct sysent *callp; 793 struct sysent *callp;
794 size_t i; 794 size_t i;
795 795
796 for (i = 0; i < ncall; i++) { 796 for (i = 0; i < ncall; i++) {
797 callp = rump_sysent + calls[i].ros_num; 797 callp = rump_sysent + calls[i].ros_num;
798 KASSERT(bootlwp != NULL 798 KASSERT(bootlwp != NULL
799 && callp->sy_call == (sy_call_t *)enosys); 799 && callp->sy_call == (sy_call_t *)enosys);
800 callp->sy_call = calls[i].ros_handler; 800 callp->sy_call = calls[i].ros_handler;
801 } 801 }
802} 802}
803 803
804struct rump_boot_etfs *ebstart; 804struct rump_boot_etfs *ebstart;
805void 805void
806rump_boot_etfs_register(struct rump_boot_etfs *eb) 806rump_boot_etfs_register(struct rump_boot_etfs *eb)
807{ 807{
808 808
809 /* 809 /*
810 * Could use atomics, but, since caller would need to synchronize 810 * Could use atomics, but, since caller would need to synchronize
811 * against calling rump_init() anyway, easier to just specify the 811 * against calling rump_init() anyway, easier to just specify the
812 * interface as "caller serializes". This solve-by-specification 812 * interface as "caller serializes". This solve-by-specification
813 * approach avoids the grey area of using atomics before rump_init() 813 * approach avoids the grey area of using atomics before rump_init()
814 * runs. 814 * runs.
815 */ 815 */
816 eb->_eb_next = ebstart; 816 eb->_eb_next = ebstart;
817 eb->eb_status = -1; 817 eb->eb_status = -1;
818 ebstart = eb; 818 ebstart = eb;
819} 819}

cvs diff -r1.1 -r1.1.46.1 src/sys/sys/pserialize.h (switch to unified diff)

--- src/sys/sys/pserialize.h 2011/07/30 17:01:04 1.1
+++ src/sys/sys/pserialize.h 2017/11/30 14:40:46 1.1.46.1
@@ -1,49 +1,51 @@ @@ -1,49 +1,51 @@
1/* $NetBSD: pserialize.h,v 1.1 2011/07/30 17:01:04 christos Exp $ */ 1/* $NetBSD: pserialize.h,v 1.1.46.1 2017/11/30 14:40:46 martin Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2010, 2011 The NetBSD Foundation, Inc. 4 * Copyright (c) 2010, 2011 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * Redistribution and use in source and binary forms, with or without 7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions 8 * modification, are permitted provided that the following conditions
9 * are met: 9 * are met:
10 * 1. Redistributions of source code must retain the above copyright 10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer. 11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright 12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the 13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution. 14 * documentation and/or other materials provided with the distribution.
15 * 15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE. 26 * POSSIBILITY OF SUCH DAMAGE.
27 */ 27 */
28 28
29#ifndef _SYS_PSERIALIZE_H_ 29#ifndef _SYS_PSERIALIZE_H_
30#define _SYS_PSERIALIZE_H_ 30#define _SYS_PSERIALIZE_H_
31 31
32#ifdef _KERNEL 32#ifdef _KERNEL
33 33
34struct pserialize; 34struct pserialize;
35typedef struct pserialize *pserialize_t; 35typedef struct pserialize *pserialize_t;
36 36
37void pserialize_init(void); 37void pserialize_init(void);
38void pserialize_switchpoint(void); 38void pserialize_switchpoint(void);
39 39
40pserialize_t pserialize_create(void); 40pserialize_t pserialize_create(void);
41void pserialize_destroy(pserialize_t); 41void pserialize_destroy(pserialize_t);
42void pserialize_perform(pserialize_t); 42void pserialize_perform(pserialize_t);
43 43
44int pserialize_read_enter(void); 44int pserialize_read_enter(void);
45void pserialize_read_exit(int); 45void pserialize_read_exit(int);
46 46
 47bool pserialize_in_read_section(void);
 48bool pserialize_not_in_read_section(void);
47#endif /* _KERNEL */ 49#endif /* _KERNEL */
48 50
49#endif /* _SYS_PSERIALIZE_H_ */ 51#endif /* _SYS_PSERIALIZE_H_ */