Sun Mar 8 15:05:18 2020 UTC ()
Kill off kernel_lock_plug_leak(), and go back to dropping kernel_lock in
exit1(), since there seems little hope of finding the leaking code any
time soon.  Can still be caught with LOCKDEBUG.


(ad)
diff -r1.284 -r1.285 src/sys/kern/kern_exit.c
diff -r1.169 -r1.170 src/sys/kern/kern_lock.c
diff -r1.61 -r1.62 src/sys/kern/kern_softint.c
diff -r1.88 -r1.89 src/sys/sys/lock.h

cvs diff -r1.284 -r1.285 src/sys/kern/kern_exit.c (switch to unified diff)

--- src/sys/kern/kern_exit.c 2020/02/22 21:07:46 1.284
+++ src/sys/kern/kern_exit.c 2020/03/08 15:05:18 1.285
@@ -1,1213 +1,1213 @@ @@ -1,1213 +1,1213 @@
1/* $NetBSD: kern_exit.c,v 1.284 2020/02/22 21:07:46 ad Exp $ */ 1/* $NetBSD: kern_exit.c,v 1.285 2020/03/08 15:05:18 ad Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1998, 1999, 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc. 4 * Copyright (c) 1998, 1999, 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Andrew Doran. 9 * NASA Ames Research Center, and by Andrew Doran.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution. 18 * documentation and/or other materials provided with the distribution.
19 * 19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE. 30 * POSSIBILITY OF SUCH DAMAGE.
31 */ 31 */
32 32
33/* 33/*
34 * Copyright (c) 1982, 1986, 1989, 1991, 1993 34 * Copyright (c) 1982, 1986, 1989, 1991, 1993
35 * The Regents of the University of California. All rights reserved. 35 * The Regents of the University of California. All rights reserved.
36 * (c) UNIX System Laboratories, Inc. 36 * (c) UNIX System Laboratories, Inc.
37 * All or some portions of this file are derived from material licensed 37 * All or some portions of this file are derived from material licensed
38 * to the University of California by American Telephone and Telegraph 38 * to the University of California by American Telephone and Telegraph
39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40 * the permission of UNIX System Laboratories, Inc. 40 * the permission of UNIX System Laboratories, Inc.
41 * 41 *
42 * Redistribution and use in source and binary forms, with or without 42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions 43 * modification, are permitted provided that the following conditions
44 * are met: 44 * are met:
45 * 1. Redistributions of source code must retain the above copyright 45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer. 46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright 47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the 48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution. 49 * documentation and/or other materials provided with the distribution.
50 * 3. Neither the name of the University nor the names of its contributors 50 * 3. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software 51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission. 52 * without specific prior written permission.
53 * 53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE. 64 * SUCH DAMAGE.
65 * 65 *
66 * @(#)kern_exit.c 8.10 (Berkeley) 2/23/95 66 * @(#)kern_exit.c 8.10 (Berkeley) 2/23/95
67 */ 67 */
68 68
69#include <sys/cdefs.h> 69#include <sys/cdefs.h>
70__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.284 2020/02/22 21:07:46 ad Exp $"); 70__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.285 2020/03/08 15:05:18 ad Exp $");
71 71
72#include "opt_ktrace.h" 72#include "opt_ktrace.h"
73#include "opt_dtrace.h" 73#include "opt_dtrace.h"
74#include "opt_sysv.h" 74#include "opt_sysv.h"
75 75
76#include <sys/param.h> 76#include <sys/param.h>
77#include <sys/systm.h> 77#include <sys/systm.h>
78#include <sys/ioctl.h> 78#include <sys/ioctl.h>
79#include <sys/tty.h> 79#include <sys/tty.h>
80#include <sys/time.h> 80#include <sys/time.h>
81#include <sys/resource.h> 81#include <sys/resource.h>
82#include <sys/kernel.h> 82#include <sys/kernel.h>
83#include <sys/proc.h> 83#include <sys/proc.h>
84#include <sys/buf.h> 84#include <sys/buf.h>
85#include <sys/wait.h> 85#include <sys/wait.h>
86#include <sys/file.h> 86#include <sys/file.h>
87#include <sys/fstrans.h> 87#include <sys/fstrans.h>
88#include <sys/vnode.h> 88#include <sys/vnode.h>
89#include <sys/syslog.h> 89#include <sys/syslog.h>
90#include <sys/pool.h> 90#include <sys/pool.h>
91#include <sys/uidinfo.h> 91#include <sys/uidinfo.h>
92#include <sys/ptrace.h> 92#include <sys/ptrace.h>
93#include <sys/acct.h> 93#include <sys/acct.h>
94#include <sys/filedesc.h> 94#include <sys/filedesc.h>
95#include <sys/ras.h> 95#include <sys/ras.h>
96#include <sys/signalvar.h> 96#include <sys/signalvar.h>
97#include <sys/sched.h> 97#include <sys/sched.h>
98#include <sys/mount.h> 98#include <sys/mount.h>
99#include <sys/syscallargs.h> 99#include <sys/syscallargs.h>
100#include <sys/kauth.h> 100#include <sys/kauth.h>
101#include <sys/sleepq.h> 101#include <sys/sleepq.h>
102#include <sys/lock.h> 102#include <sys/lock.h>
103#include <sys/lockdebug.h> 103#include <sys/lockdebug.h>
104#include <sys/ktrace.h> 104#include <sys/ktrace.h>
105#include <sys/cpu.h> 105#include <sys/cpu.h>
106#include <sys/lwpctl.h> 106#include <sys/lwpctl.h>
107#include <sys/atomic.h> 107#include <sys/atomic.h>
108#include <sys/sdt.h> 108#include <sys/sdt.h>
109#include <sys/psref.h> 109#include <sys/psref.h>
110 110
111#include <uvm/uvm_extern.h> 111#include <uvm/uvm_extern.h>
112 112
113#ifdef DEBUG_EXIT 113#ifdef DEBUG_EXIT
114int debug_exit = 0; 114int debug_exit = 0;
115#define DPRINTF(x) if (debug_exit) printf x 115#define DPRINTF(x) if (debug_exit) printf x
116#else 116#else
117#define DPRINTF(x) 117#define DPRINTF(x)
118#endif 118#endif
119 119
120static int find_stopped_child(struct proc *, idtype_t, id_t, int, 120static int find_stopped_child(struct proc *, idtype_t, id_t, int,
121 struct proc **, struct wrusage *, siginfo_t *); 121 struct proc **, struct wrusage *, siginfo_t *);
122static void proc_free(struct proc *, struct wrusage *); 122static void proc_free(struct proc *, struct wrusage *);
123 123
124/* 124/*
125 * DTrace SDT provider definitions 125 * DTrace SDT provider definitions
126 */ 126 */
127SDT_PROVIDER_DECLARE(proc); 127SDT_PROVIDER_DECLARE(proc);
128SDT_PROBE_DEFINE1(proc, kernel, , exit, "int"); 128SDT_PROBE_DEFINE1(proc, kernel, , exit, "int");
129 129
130/* 130/*
131 * Fill in the appropriate signal information, and signal the parent. 131 * Fill in the appropriate signal information, and signal the parent.
132 */ 132 */
133/* XXX noclone works around a gcc 4.5 bug on arm */ 133/* XXX noclone works around a gcc 4.5 bug on arm */
134static void __noclone 134static void __noclone
135exit_psignal(struct proc *p, struct proc *pp, ksiginfo_t *ksi) 135exit_psignal(struct proc *p, struct proc *pp, ksiginfo_t *ksi)
136{ 136{
137 137
138 KSI_INIT(ksi); 138 KSI_INIT(ksi);
139 if ((ksi->ksi_signo = P_EXITSIG(p)) == SIGCHLD) { 139 if ((ksi->ksi_signo = P_EXITSIG(p)) == SIGCHLD) {
140 if (p->p_xsig) { 140 if (p->p_xsig) {
141 if (p->p_sflag & PS_COREDUMP) 141 if (p->p_sflag & PS_COREDUMP)
142 ksi->ksi_code = CLD_DUMPED; 142 ksi->ksi_code = CLD_DUMPED;
143 else 143 else
144 ksi->ksi_code = CLD_KILLED; 144 ksi->ksi_code = CLD_KILLED;
145 ksi->ksi_status = p->p_xsig; 145 ksi->ksi_status = p->p_xsig;
146 } else { 146 } else {
147 ksi->ksi_code = CLD_EXITED; 147 ksi->ksi_code = CLD_EXITED;
148 ksi->ksi_status = p->p_xexit; 148 ksi->ksi_status = p->p_xexit;
149 } 149 }
150 } else { 150 } else {
151 ksi->ksi_code = SI_USER; 151 ksi->ksi_code = SI_USER;
152 ksi->ksi_status = p->p_xsig; 152 ksi->ksi_status = p->p_xsig;
153 } 153 }
154 /* 154 /*
155 * We fill those in, even for non-SIGCHLD. 155 * We fill those in, even for non-SIGCHLD.
156 * It's safe to access p->p_cred unlocked here. 156 * It's safe to access p->p_cred unlocked here.
157 */ 157 */
158 ksi->ksi_pid = p->p_pid; 158 ksi->ksi_pid = p->p_pid;
159 ksi->ksi_uid = kauth_cred_geteuid(p->p_cred); 159 ksi->ksi_uid = kauth_cred_geteuid(p->p_cred);
160 /* XXX: is this still valid? */ 160 /* XXX: is this still valid? */
161 ksi->ksi_utime = p->p_stats->p_ru.ru_utime.tv_sec; 161 ksi->ksi_utime = p->p_stats->p_ru.ru_utime.tv_sec;
162 ksi->ksi_stime = p->p_stats->p_ru.ru_stime.tv_sec; 162 ksi->ksi_stime = p->p_stats->p_ru.ru_stime.tv_sec;
163} 163}
164 164
165/* 165/*
166 * exit -- 166 * exit --
167 * Death of process. 167 * Death of process.
168 */ 168 */
169int 169int
170sys_exit(struct lwp *l, const struct sys_exit_args *uap, register_t *retval) 170sys_exit(struct lwp *l, const struct sys_exit_args *uap, register_t *retval)
171{ 171{
172 /* { 172 /* {
173 syscallarg(int) rval; 173 syscallarg(int) rval;
174 } */ 174 } */
175 struct proc *p = l->l_proc; 175 struct proc *p = l->l_proc;
176 176
177 /* Don't call exit1() multiple times in the same process. */ 177 /* Don't call exit1() multiple times in the same process. */
178 mutex_enter(p->p_lock); 178 mutex_enter(p->p_lock);
179 if (p->p_sflag & PS_WEXIT) { 179 if (p->p_sflag & PS_WEXIT) {
180 mutex_exit(p->p_lock); 180 mutex_exit(p->p_lock);
181 lwp_exit(l); 181 lwp_exit(l);
182 } 182 }
183 183
184 /* exit1() will release the mutex. */ 184 /* exit1() will release the mutex. */
185 exit1(l, SCARG(uap, rval), 0); 185 exit1(l, SCARG(uap, rval), 0);
186 /* NOTREACHED */ 186 /* NOTREACHED */
187 return (0); 187 return (0);
188} 188}
189 189
190/* 190/*
191 * Exit: deallocate address space and other resources, change proc state 191 * Exit: deallocate address space and other resources, change proc state
192 * to zombie, and unlink proc from allproc and parent's lists. Save exit 192 * to zombie, and unlink proc from allproc and parent's lists. Save exit
193 * status and rusage for wait(). Check for child processes and orphan them. 193 * status and rusage for wait(). Check for child processes and orphan them.
194 * 194 *
195 * Must be called with p->p_lock held. Does not return. 195 * Must be called with p->p_lock held. Does not return.
196 */ 196 */
197void 197void
198exit1(struct lwp *l, int exitcode, int signo) 198exit1(struct lwp *l, int exitcode, int signo)
199{ 199{
200 struct proc *p, *child, *next_child, *old_parent, *new_parent; 200 struct proc *p, *child, *next_child, *old_parent, *new_parent;
201 struct pgrp *pgrp; 201 struct pgrp *pgrp;
202 ksiginfo_t ksi; 202 ksiginfo_t ksi;
203 ksiginfoq_t kq; 203 ksiginfoq_t kq;
204 int wakeinit; 204 int wakeinit;
205 struct lwp *l2 __diagused; 205 struct lwp *l2 __diagused;
206 206
207 p = l->l_proc; 207 p = l->l_proc;
208 208
209 /* XXX Temporary. */ 
210 kernel_lock_plug_leak(); 
211 
212 /* Verify that we hold no locks other than p->p_lock. */ 209 /* Verify that we hold no locks other than p->p_lock. */
213 LOCKDEBUG_BARRIER(p->p_lock, 0); 210 LOCKDEBUG_BARRIER(p->p_lock, 0);
214 KASSERTMSG(curcpu()->ci_biglock_count == 0, "kernel_lock leaked"); 211
 212 /* XXX Temporary: something is leaking kernel_lock. */
 213 KERNEL_UNLOCK_ALL(l, NULL);
 214
215 KASSERT(mutex_owned(p->p_lock)); 215 KASSERT(mutex_owned(p->p_lock));
216 KASSERT(p->p_vmspace != NULL); 216 KASSERT(p->p_vmspace != NULL);
217 217
218 if (__predict_false(p == initproc)) { 218 if (__predict_false(p == initproc)) {
219 panic("init died (signal %d, exit %d)", signo, exitcode); 219 panic("init died (signal %d, exit %d)", signo, exitcode);
220 } 220 }
221 221
222 p->p_sflag |= PS_WEXIT; 222 p->p_sflag |= PS_WEXIT;
223 223
224 /* 224 /*
225 * Force all other LWPs to exit before we do. Only then can we 225 * Force all other LWPs to exit before we do. Only then can we
226 * begin to tear down the rest of the process state. 226 * begin to tear down the rest of the process state.
227 */ 227 */
228 if (p->p_nlwps > 1) { 228 if (p->p_nlwps > 1) {
229 exit_lwps(l); 229 exit_lwps(l);
230 } 230 }
231 231
232 ksiginfo_queue_init(&kq); 232 ksiginfo_queue_init(&kq);
233 233
234 /* 234 /*
235 * If we have been asked to stop on exit, do so now. 235 * If we have been asked to stop on exit, do so now.
236 */ 236 */
237 if (__predict_false(p->p_sflag & PS_STOPEXIT)) { 237 if (__predict_false(p->p_sflag & PS_STOPEXIT)) {
238 KERNEL_UNLOCK_ALL(l, &l->l_biglocks); 238 KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
239 sigclearall(p, &contsigmask, &kq); 239 sigclearall(p, &contsigmask, &kq);
240 240
241 if (!mutex_tryenter(proc_lock)) { 241 if (!mutex_tryenter(proc_lock)) {
242 mutex_exit(p->p_lock); 242 mutex_exit(p->p_lock);
243 mutex_enter(proc_lock); 243 mutex_enter(proc_lock);
244 mutex_enter(p->p_lock); 244 mutex_enter(p->p_lock);
245 } 245 }
246 p->p_waited = 0; 246 p->p_waited = 0;
247 p->p_pptr->p_nstopchild++; 247 p->p_pptr->p_nstopchild++;
248 p->p_stat = SSTOP; 248 p->p_stat = SSTOP;
249 mutex_exit(proc_lock); 249 mutex_exit(proc_lock);
250 lwp_lock(l); 250 lwp_lock(l);
251 p->p_nrlwps--; 251 p->p_nrlwps--;
252 l->l_stat = LSSTOP; 252 l->l_stat = LSSTOP;
253 lwp_unlock(l); 253 lwp_unlock(l);
254 mutex_exit(p->p_lock); 254 mutex_exit(p->p_lock);
255 lwp_lock(l); 255 lwp_lock(l);
256 spc_lock(l->l_cpu); 256 spc_lock(l->l_cpu);
257 mi_switch(l); 257 mi_switch(l);
258 mutex_enter(p->p_lock); 258 mutex_enter(p->p_lock);
259 } 259 }
260 260
261 /* 261 /*
262 * Bin any remaining signals and mark the process as dying so it will 262 * Bin any remaining signals and mark the process as dying so it will
263 * not be found for, e.g. signals. 263 * not be found for, e.g. signals.
264 */ 264 */
265 sigfillset(&p->p_sigctx.ps_sigignore); 265 sigfillset(&p->p_sigctx.ps_sigignore);
266 sigclearall(p, NULL, &kq); 266 sigclearall(p, NULL, &kq);
267 p->p_stat = SDYING; 267 p->p_stat = SDYING;
268 mutex_exit(p->p_lock); 268 mutex_exit(p->p_lock);
269 ksiginfo_queue_drain(&kq); 269 ksiginfo_queue_drain(&kq);
270 270
271 /* Destroy any lwpctl info. */ 271 /* Destroy any lwpctl info. */
272 if (p->p_lwpctl != NULL) 272 if (p->p_lwpctl != NULL)
273 lwp_ctl_exit(); 273 lwp_ctl_exit();
274 274
275 /* 275 /*
276 * Drain all remaining references that procfs, ptrace and others may 276 * Drain all remaining references that procfs, ptrace and others may
277 * have on the process. 277 * have on the process.
278 */ 278 */
279 rw_enter(&p->p_reflock, RW_WRITER); 279 rw_enter(&p->p_reflock, RW_WRITER);
280 280
281 DPRINTF(("%s: %d.%d exiting.\n", __func__, p->p_pid, l->l_lid)); 281 DPRINTF(("%s: %d.%d exiting.\n", __func__, p->p_pid, l->l_lid));
282 282
283 timers_free(p, TIMERS_ALL); 283 timers_free(p, TIMERS_ALL);
284#if defined(__HAVE_RAS) 284#if defined(__HAVE_RAS)
285 ras_purgeall(); 285 ras_purgeall();
286#endif 286#endif
287 287
288 /* 288 /*
289 * Close open files, release open-file table and free signal 289 * Close open files, release open-file table and free signal
290 * actions. This may block! 290 * actions. This may block!
291 */ 291 */
292 fd_free(); 292 fd_free();
293 cwdfree(p->p_cwdi); 293 cwdfree(p->p_cwdi);
294 p->p_cwdi = NULL; 294 p->p_cwdi = NULL;
295 doexithooks(p); 295 doexithooks(p);
296 sigactsfree(p->p_sigacts); 296 sigactsfree(p->p_sigacts);
297 297
298 /* 298 /*
299 * Write out accounting data. 299 * Write out accounting data.
300 */ 300 */
301 (void)acct_process(l); 301 (void)acct_process(l);
302 302
303#ifdef KTRACE 303#ifdef KTRACE
304 /* 304 /*
305 * Release trace file. 305 * Release trace file.
306 */ 306 */
307 if (p->p_tracep != NULL) { 307 if (p->p_tracep != NULL) {
308 mutex_enter(&ktrace_lock); 308 mutex_enter(&ktrace_lock);
309 ktrderef(p); 309 ktrderef(p);
310 mutex_exit(&ktrace_lock); 310 mutex_exit(&ktrace_lock);
311 } 311 }
312#endif 312#endif
313 313
314 p->p_xexit = exitcode; 314 p->p_xexit = exitcode;
315 p->p_xsig = signo; 315 p->p_xsig = signo;
316 316
317 /* 317 /*
318 * If emulation has process exit hook, call it now. 318 * If emulation has process exit hook, call it now.
319 * Set the exit status now so that the exit hook has 319 * Set the exit status now so that the exit hook has
320 * an opportunity to tweak it (COMPAT_LINUX requires 320 * an opportunity to tweak it (COMPAT_LINUX requires
321 * this for thread group emulation) 321 * this for thread group emulation)
322 */ 322 */
323 if (p->p_emul->e_proc_exit) 323 if (p->p_emul->e_proc_exit)
324 (*p->p_emul->e_proc_exit)(p); 324 (*p->p_emul->e_proc_exit)(p);
325 325
326 /* 326 /*
327 * Free the VM resources we're still holding on to. 327 * Free the VM resources we're still holding on to.
328 * We must do this from a valid thread because doing 328 * We must do this from a valid thread because doing
329 * so may block. This frees vmspace, which we don't 329 * so may block. This frees vmspace, which we don't
330 * need anymore. The only remaining lwp is the one 330 * need anymore. The only remaining lwp is the one
331 * we run at this moment, nothing runs in userland 331 * we run at this moment, nothing runs in userland
332 * anymore. 332 * anymore.
333 */ 333 */
334 ruspace(p); /* Update our vm resource use */ 334 ruspace(p); /* Update our vm resource use */
335 uvm_proc_exit(p); 335 uvm_proc_exit(p);
336 336
337 /* 337 /*
338 * Stop profiling. 338 * Stop profiling.
339 */ 339 */
340 if (__predict_false((p->p_stflag & PST_PROFIL) != 0)) { 340 if (__predict_false((p->p_stflag & PST_PROFIL) != 0)) {
341 mutex_spin_enter(&p->p_stmutex); 341 mutex_spin_enter(&p->p_stmutex);
342 stopprofclock(p); 342 stopprofclock(p);
343 mutex_spin_exit(&p->p_stmutex); 343 mutex_spin_exit(&p->p_stmutex);
344 } 344 }
345 345
346 /* 346 /*
347 * If parent is waiting for us to exit or exec, PL_PPWAIT is set; we 347 * If parent is waiting for us to exit or exec, PL_PPWAIT is set; we
348 * wake up the parent early to avoid deadlock. We can do this once 348 * wake up the parent early to avoid deadlock. We can do this once
349 * the VM resources are released. 349 * the VM resources are released.
350 */ 350 */
351 mutex_enter(proc_lock); 351 mutex_enter(proc_lock);
352 if (p->p_lflag & PL_PPWAIT) { 352 if (p->p_lflag & PL_PPWAIT) {
353 lwp_t *lp; 353 lwp_t *lp;
354 354
355 l->l_lwpctl = NULL; /* was on loan from blocked parent */ 355 l->l_lwpctl = NULL; /* was on loan from blocked parent */
356 p->p_lflag &= ~PL_PPWAIT; 356 p->p_lflag &= ~PL_PPWAIT;
357 357
358 lp = p->p_vforklwp; 358 lp = p->p_vforklwp;
359 p->p_vforklwp = NULL; 359 p->p_vforklwp = NULL;
360 lp->l_vforkwaiting = false; 360 lp->l_vforkwaiting = false;
361 cv_broadcast(&lp->l_waitcv); 361 cv_broadcast(&lp->l_waitcv);
362 } 362 }
363 363
364 if (SESS_LEADER(p)) { 364 if (SESS_LEADER(p)) {
365 struct vnode *vprele = NULL, *vprevoke = NULL; 365 struct vnode *vprele = NULL, *vprevoke = NULL;
366 struct session *sp = p->p_session; 366 struct session *sp = p->p_session;
367 struct tty *tp; 367 struct tty *tp;
368 368
369 if (sp->s_ttyvp) { 369 if (sp->s_ttyvp) {
370 /* 370 /*
371 * Controlling process. 371 * Controlling process.
372 * Signal foreground pgrp, 372 * Signal foreground pgrp,
373 * drain controlling terminal 373 * drain controlling terminal
374 * and revoke access to controlling terminal. 374 * and revoke access to controlling terminal.
375 */ 375 */
376 tp = sp->s_ttyp; 376 tp = sp->s_ttyp;
377 mutex_spin_enter(&tty_lock); 377 mutex_spin_enter(&tty_lock);
378 if (tp->t_session == sp) { 378 if (tp->t_session == sp) {
379 /* we can't guarantee the revoke will do this */ 379 /* we can't guarantee the revoke will do this */
380 pgrp = tp->t_pgrp; 380 pgrp = tp->t_pgrp;
381 tp->t_pgrp = NULL; 381 tp->t_pgrp = NULL;
382 tp->t_session = NULL; 382 tp->t_session = NULL;
383 mutex_spin_exit(&tty_lock); 383 mutex_spin_exit(&tty_lock);
384 if (pgrp != NULL) { 384 if (pgrp != NULL) {
385 pgsignal(pgrp, SIGHUP, 1); 385 pgsignal(pgrp, SIGHUP, 1);
386 } 386 }
387 mutex_exit(proc_lock); 387 mutex_exit(proc_lock);
388 (void) ttywait(tp); 388 (void) ttywait(tp);
389 mutex_enter(proc_lock); 389 mutex_enter(proc_lock);
390 390
391 /* The tty could have been revoked. */ 391 /* The tty could have been revoked. */
392 vprevoke = sp->s_ttyvp; 392 vprevoke = sp->s_ttyvp;
393 } else 393 } else
394 mutex_spin_exit(&tty_lock); 394 mutex_spin_exit(&tty_lock);
395 vprele = sp->s_ttyvp; 395 vprele = sp->s_ttyvp;
396 sp->s_ttyvp = NULL; 396 sp->s_ttyvp = NULL;
397 /* 397 /*
398 * s_ttyp is not zero'd; we use this to indicate 398 * s_ttyp is not zero'd; we use this to indicate
399 * that the session once had a controlling terminal. 399 * that the session once had a controlling terminal.
400 * (for logging and informational purposes) 400 * (for logging and informational purposes)
401 */ 401 */
402 } 402 }
403 sp->s_leader = NULL; 403 sp->s_leader = NULL;
404 404
405 if (vprevoke != NULL || vprele != NULL) { 405 if (vprevoke != NULL || vprele != NULL) {
406 if (vprevoke != NULL) { 406 if (vprevoke != NULL) {
407 /* Releases proc_lock. */ 407 /* Releases proc_lock. */
408 proc_sessrele(sp); 408 proc_sessrele(sp);
409 VOP_REVOKE(vprevoke, REVOKEALL); 409 VOP_REVOKE(vprevoke, REVOKEALL);
410 } else 410 } else
411 mutex_exit(proc_lock); 411 mutex_exit(proc_lock);
412 if (vprele != NULL) 412 if (vprele != NULL)
413 vrele(vprele); 413 vrele(vprele);
414 mutex_enter(proc_lock); 414 mutex_enter(proc_lock);
415 } 415 }
416 } 416 }
417 fixjobc(p, p->p_pgrp, 0); 417 fixjobc(p, p->p_pgrp, 0);
418 418
419 /* Release fstrans private data. */ 419 /* Release fstrans private data. */
420 fstrans_lwp_dtor(l); 420 fstrans_lwp_dtor(l);
421 421
422 /* 422 /*
423 * Finalize the last LWP's specificdata, as well as the 423 * Finalize the last LWP's specificdata, as well as the
424 * specificdata for the proc itself. 424 * specificdata for the proc itself.
425 */ 425 */
426 lwp_finispecific(l); 426 lwp_finispecific(l);
427 proc_finispecific(p); 427 proc_finispecific(p);
428 428
429 /* 429 /*
430 * Notify interested parties of our demise. 430 * Notify interested parties of our demise.
431 */ 431 */
432 KNOTE(&p->p_klist, NOTE_EXIT); 432 KNOTE(&p->p_klist, NOTE_EXIT);
433 433
434 SDT_PROBE(proc, kernel, , exit, 434 SDT_PROBE(proc, kernel, , exit,
435 ((p->p_sflag & PS_COREDUMP) ? CLD_DUMPED : 435 ((p->p_sflag & PS_COREDUMP) ? CLD_DUMPED :
436 (p->p_xsig ? CLD_KILLED : CLD_EXITED)), 436 (p->p_xsig ? CLD_KILLED : CLD_EXITED)),
437 0,0,0,0); 437 0,0,0,0);
438 438
439 /* 439 /*
440 * Reset p_opptr pointer of all former children which got 440 * Reset p_opptr pointer of all former children which got
441 * traced by another process and were reparented. We reset 441 * traced by another process and were reparented. We reset
442 * it to NULL here; the trace detach code then reparents 442 * it to NULL here; the trace detach code then reparents
443 * the child to initproc. We only check allproc list, since 443 * the child to initproc. We only check allproc list, since
444 * eventual former children on zombproc list won't reference 444 * eventual former children on zombproc list won't reference
445 * p_opptr anymore. 445 * p_opptr anymore.
446 */ 446 */
447 if (__predict_false(p->p_slflag & PSL_CHTRACED)) { 447 if (__predict_false(p->p_slflag & PSL_CHTRACED)) {
448 struct proc *q; 448 struct proc *q;
449 PROCLIST_FOREACH(q, &allproc) { 449 PROCLIST_FOREACH(q, &allproc) {
450 if (q->p_opptr == p) 450 if (q->p_opptr == p)
451 q->p_opptr = NULL; 451 q->p_opptr = NULL;
452 } 452 }
453 PROCLIST_FOREACH(q, &zombproc) { 453 PROCLIST_FOREACH(q, &zombproc) {
454 if (q->p_opptr == p) 454 if (q->p_opptr == p)
455 q->p_opptr = NULL; 455 q->p_opptr = NULL;
456 } 456 }
457 } 457 }
458 458
459 /* 459 /*
460 * Give orphaned children to init(8). 460 * Give orphaned children to init(8).
461 */ 461 */
462 child = LIST_FIRST(&p->p_children); 462 child = LIST_FIRST(&p->p_children);
463 wakeinit = (child != NULL); 463 wakeinit = (child != NULL);
464 for (; child != NULL; child = next_child) { 464 for (; child != NULL; child = next_child) {
465 next_child = LIST_NEXT(child, p_sibling); 465 next_child = LIST_NEXT(child, p_sibling);
466 466
467 /* 467 /*
468 * Traced processes are killed since their existence 468 * Traced processes are killed since their existence
469 * means someone is screwing up. Since we reset the 469 * means someone is screwing up. Since we reset the
470 * trace flags, the logic in sys_wait4() would not be 470 * trace flags, the logic in sys_wait4() would not be
471 * triggered to reparent the process to its 471 * triggered to reparent the process to its
472 * original parent, so we must do this here. 472 * original parent, so we must do this here.
473 */ 473 */
474 if (__predict_false(child->p_slflag & PSL_TRACED)) { 474 if (__predict_false(child->p_slflag & PSL_TRACED)) {
475 mutex_enter(p->p_lock); 475 mutex_enter(p->p_lock);
476 child->p_slflag &= 476 child->p_slflag &=
477 ~(PSL_TRACED|PSL_SYSCALL); 477 ~(PSL_TRACED|PSL_SYSCALL);
478 mutex_exit(p->p_lock); 478 mutex_exit(p->p_lock);
479 if (child->p_opptr != child->p_pptr) { 479 if (child->p_opptr != child->p_pptr) {
480 struct proc *t = child->p_opptr; 480 struct proc *t = child->p_opptr;
481 proc_reparent(child, t ? t : initproc); 481 proc_reparent(child, t ? t : initproc);
482 child->p_opptr = NULL; 482 child->p_opptr = NULL;
483 } else 483 } else
484 proc_reparent(child, initproc); 484 proc_reparent(child, initproc);
485 killproc(child, "orphaned traced process"); 485 killproc(child, "orphaned traced process");
486 } else 486 } else
487 proc_reparent(child, initproc); 487 proc_reparent(child, initproc);
488 } 488 }
489 489
490 /* 490 /*
491 * Move proc from allproc to zombproc, it's now nearly ready to be 491 * Move proc from allproc to zombproc, it's now nearly ready to be
492 * collected by parent. 492 * collected by parent.
493 */ 493 */
494 LIST_REMOVE(l, l_list); 494 LIST_REMOVE(l, l_list);
495 LIST_REMOVE(p, p_list); 495 LIST_REMOVE(p, p_list);
496 LIST_INSERT_HEAD(&zombproc, p, p_list); 496 LIST_INSERT_HEAD(&zombproc, p, p_list);
497 497
498 /* 498 /*
499 * Mark the process as dead. We must do this before we signal 499 * Mark the process as dead. We must do this before we signal
500 * the parent. 500 * the parent.
501 */ 501 */
502 p->p_stat = SDEAD; 502 p->p_stat = SDEAD;
503 503
504 /* Put in front of parent's sibling list for parent to collect it */ 504 /* Put in front of parent's sibling list for parent to collect it */
505 old_parent = p->p_pptr; 505 old_parent = p->p_pptr;
506 old_parent->p_nstopchild++; 506 old_parent->p_nstopchild++;
507 if (LIST_FIRST(&old_parent->p_children) != p) { 507 if (LIST_FIRST(&old_parent->p_children) != p) {
508 /* Put child where it can be found quickly */ 508 /* Put child where it can be found quickly */
509 LIST_REMOVE(p, p_sibling); 509 LIST_REMOVE(p, p_sibling);
510 LIST_INSERT_HEAD(&old_parent->p_children, p, p_sibling); 510 LIST_INSERT_HEAD(&old_parent->p_children, p, p_sibling);
511 } 511 }
512 512
513 /* 513 /*
514 * Notify parent that we're gone. If parent has the P_NOCLDWAIT 514 * Notify parent that we're gone. If parent has the P_NOCLDWAIT
515 * flag set, notify init instead (and hope it will handle 515 * flag set, notify init instead (and hope it will handle
516 * this situation). 516 * this situation).
517 */ 517 */
518 if (old_parent->p_flag & (PK_NOCLDWAIT|PK_CLDSIGIGN)) { 518 if (old_parent->p_flag & (PK_NOCLDWAIT|PK_CLDSIGIGN)) {
519 proc_reparent(p, initproc); 519 proc_reparent(p, initproc);
520 wakeinit = 1; 520 wakeinit = 1;
521 521
522 /* 522 /*
523 * If this was the last child of our parent, notify 523 * If this was the last child of our parent, notify
524 * parent, so in case he was wait(2)ing, he will 524 * parent, so in case he was wait(2)ing, he will
525 * continue. 525 * continue.
526 */ 526 */
527 if (LIST_FIRST(&old_parent->p_children) == NULL) 527 if (LIST_FIRST(&old_parent->p_children) == NULL)
528 cv_broadcast(&old_parent->p_waitcv); 528 cv_broadcast(&old_parent->p_waitcv);
529 } 529 }
530 530
531 /* Reload parent pointer, since p may have been reparented above */ 531 /* Reload parent pointer, since p may have been reparented above */
532 new_parent = p->p_pptr; 532 new_parent = p->p_pptr;
533 533
534 if (__predict_false(p->p_exitsig != 0)) { 534 if (__predict_false(p->p_exitsig != 0)) {
535 exit_psignal(p, new_parent, &ksi); 535 exit_psignal(p, new_parent, &ksi);
536 kpsignal(new_parent, &ksi, NULL); 536 kpsignal(new_parent, &ksi, NULL);
537 } 537 }
538 538
539 /* Calculate the final rusage info. */ 539 /* Calculate the final rusage info. */
540 calcru(p, &p->p_stats->p_ru.ru_utime, &p->p_stats->p_ru.ru_stime, 540 calcru(p, &p->p_stats->p_ru.ru_utime, &p->p_stats->p_ru.ru_stime,
541 NULL, NULL); 541 NULL, NULL);
542 542
543 if (wakeinit) 543 if (wakeinit)
544 cv_broadcast(&initproc->p_waitcv); 544 cv_broadcast(&initproc->p_waitcv);
545 545
546 callout_destroy(&l->l_timeout_ch); 546 callout_destroy(&l->l_timeout_ch);
547 547
548 /* 548 /*
549 * Release any PCU resources before becoming a zombie. 549 * Release any PCU resources before becoming a zombie.
550 */ 550 */
551 pcu_discard_all(l); 551 pcu_discard_all(l);
552 552
553 mutex_enter(p->p_lock); 553 mutex_enter(p->p_lock);
554 /* Don't bother with p_treelock as no other LWPs remain. */ 554 /* Don't bother with p_treelock as no other LWPs remain. */
555 l2 = radix_tree_remove_node(&p->p_lwptree, (uint64_t)(l->l_lid - 1)); 555 l2 = radix_tree_remove_node(&p->p_lwptree, (uint64_t)(l->l_lid - 1));
556 KASSERT(l2 == l); 556 KASSERT(l2 == l);
557 KASSERT(radix_tree_empty_tree_p(&p->p_lwptree)); 557 KASSERT(radix_tree_empty_tree_p(&p->p_lwptree));
558 radix_tree_fini_tree(&p->p_lwptree); 558 radix_tree_fini_tree(&p->p_lwptree);
559 /* Free the linux lwp id */ 559 /* Free the linux lwp id */
560 if ((l->l_pflag & LP_PIDLID) != 0 && l->l_lid != p->p_pid) 560 if ((l->l_pflag & LP_PIDLID) != 0 && l->l_lid != p->p_pid)
561 proc_free_pid(l->l_lid); 561 proc_free_pid(l->l_lid);
562 lwp_drainrefs(l); 562 lwp_drainrefs(l);
563 lwp_lock(l); 563 lwp_lock(l);
564 l->l_prflag &= ~LPR_DETACHED; 564 l->l_prflag &= ~LPR_DETACHED;
565 l->l_stat = LSZOMB; 565 l->l_stat = LSZOMB;
566 lwp_unlock(l); 566 lwp_unlock(l);
567 KASSERT(curlwp == l); 567 KASSERT(curlwp == l);
568 KASSERT(p->p_nrlwps == 1); 568 KASSERT(p->p_nrlwps == 1);
569 KASSERT(p->p_nlwps == 1); 569 KASSERT(p->p_nlwps == 1);
570 p->p_stat = SZOMB; 570 p->p_stat = SZOMB;
571 p->p_nrlwps--; 571 p->p_nrlwps--;
572 p->p_nzlwps++; 572 p->p_nzlwps++;
573 p->p_ndlwps = 0; 573 p->p_ndlwps = 0;
574 mutex_exit(p->p_lock); 574 mutex_exit(p->p_lock);
575 575
576 /* 576 /*
577 * Signal the parent to collect us, and drop the proclist lock. 577 * Signal the parent to collect us, and drop the proclist lock.
578 * Drop debugger/procfs lock; no new references can be gained. 578 * Drop debugger/procfs lock; no new references can be gained.
579 */ 579 */
580 cv_broadcast(&p->p_pptr->p_waitcv); 580 cv_broadcast(&p->p_pptr->p_waitcv);
581 rw_exit(&p->p_reflock); 581 rw_exit(&p->p_reflock);
582 mutex_exit(proc_lock); 582 mutex_exit(proc_lock);
583 583
584 /* 584 /*
585 * NOTE: WE ARE NO LONGER ALLOWED TO SLEEP! 585 * NOTE: WE ARE NO LONGER ALLOWED TO SLEEP!
586 */ 586 */
587 587
588 /* 588 /*
589 * Give machine-dependent code a chance to free any MD LWP 589 * Give machine-dependent code a chance to free any MD LWP
590 * resources. This must be done before uvm_lwp_exit(), in 590 * resources. This must be done before uvm_lwp_exit(), in
591 * case these resources are in the PCB. 591 * case these resources are in the PCB.
592 */ 592 */
593 cpu_lwp_free(l, 1); 593 cpu_lwp_free(l, 1);
594 594
595 /* Switch away into oblivion. */ 595 /* Switch away into oblivion. */
596 lwp_lock(l); 596 lwp_lock(l);
597 spc_lock(l->l_cpu); 597 spc_lock(l->l_cpu);
598 mi_switch(l); 598 mi_switch(l);
599 panic("exit1"); 599 panic("exit1");
600} 600}
601 601
602void 602void
603exit_lwps(struct lwp *l) 603exit_lwps(struct lwp *l)
604{ 604{
605 proc_t *p = l->l_proc; 605 proc_t *p = l->l_proc;
606 lwp_t *l2; 606 lwp_t *l2;
607 607
608retry: 608retry:
609 KASSERT(mutex_owned(p->p_lock)); 609 KASSERT(mutex_owned(p->p_lock));
610 610
611 /* 611 /*
612 * Interrupt LWPs in interruptable sleep, unsuspend suspended 612 * Interrupt LWPs in interruptable sleep, unsuspend suspended
613 * LWPs and then wait for everyone else to finish. 613 * LWPs and then wait for everyone else to finish.
614 */ 614 */
615 LIST_FOREACH(l2, &p->p_lwps, l_sibling) { 615 LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
616 if (l2 == l) 616 if (l2 == l)
617 continue; 617 continue;
618 lwp_lock(l2); 618 lwp_lock(l2);
619 l2->l_flag |= LW_WEXIT; 619 l2->l_flag |= LW_WEXIT;
620 if ((l2->l_stat == LSSLEEP && (l2->l_flag & LW_SINTR)) || 620 if ((l2->l_stat == LSSLEEP && (l2->l_flag & LW_SINTR)) ||
621 l2->l_stat == LSSUSPENDED || l2->l_stat == LSSTOP) { 621 l2->l_stat == LSSUSPENDED || l2->l_stat == LSSTOP) {
622 l2->l_flag &= ~LW_DBGSUSPEND; 622 l2->l_flag &= ~LW_DBGSUSPEND;
623 /* setrunnable() will release the lock. */ 623 /* setrunnable() will release the lock. */
624 setrunnable(l2); 624 setrunnable(l2);
625 continue; 625 continue;
626 } 626 }
627 lwp_need_userret(l2); 627 lwp_need_userret(l2);
628 lwp_unlock(l2); 628 lwp_unlock(l2);
629 } 629 }
630 630
631 /* 631 /*
632 * Wait for every LWP to exit. Note: LWPs can get suspended/slept 632 * Wait for every LWP to exit. Note: LWPs can get suspended/slept
633 * behind us or there may even be new LWPs created. Therefore, a 633 * behind us or there may even be new LWPs created. Therefore, a
634 * full retry is required on error. 634 * full retry is required on error.
635 */ 635 */
636 while (p->p_nlwps > 1) { 636 while (p->p_nlwps > 1) {
637 if (lwp_wait(l, 0, NULL, true)) { 637 if (lwp_wait(l, 0, NULL, true)) {
638 goto retry; 638 goto retry;
639 } 639 }
640 } 640 }
641 641
642 KASSERT(p->p_nlwps == 1); 642 KASSERT(p->p_nlwps == 1);
643} 643}
644 644
645int 645int
646do_sys_waitid(idtype_t idtype, id_t id, int *pid, int *status, int options, 646do_sys_waitid(idtype_t idtype, id_t id, int *pid, int *status, int options,
647 struct wrusage *wru, siginfo_t *si) 647 struct wrusage *wru, siginfo_t *si)
648{ 648{
649 proc_t *child; 649 proc_t *child;
650 int error; 650 int error;
651 651
652 652
653 if (wru != NULL) 653 if (wru != NULL)
654 memset(wru, 0, sizeof(*wru)); 654 memset(wru, 0, sizeof(*wru));
655 if (si != NULL) 655 if (si != NULL)
656 memset(si, 0, sizeof(*si)); 656 memset(si, 0, sizeof(*si));
657 657
658 mutex_enter(proc_lock); 658 mutex_enter(proc_lock);
659 error = find_stopped_child(curproc, idtype, id, options, &child, 659 error = find_stopped_child(curproc, idtype, id, options, &child,
660 wru, si); 660 wru, si);
661 if (child == NULL) { 661 if (child == NULL) {
662 mutex_exit(proc_lock); 662 mutex_exit(proc_lock);
663 *pid = 0; 663 *pid = 0;
664 *status = 0; 664 *status = 0;
665 return error; 665 return error;
666 } 666 }
667 *pid = child->p_pid; 667 *pid = child->p_pid;
668 668
669 if (child->p_stat == SZOMB) { 669 if (child->p_stat == SZOMB) {
670 /* Child is exiting */ 670 /* Child is exiting */
671 *status = P_WAITSTATUS(child); 671 *status = P_WAITSTATUS(child);
672 /* proc_free() will release the proc_lock. */ 672 /* proc_free() will release the proc_lock. */
673 if (options & WNOWAIT) { 673 if (options & WNOWAIT) {
674 mutex_exit(proc_lock); 674 mutex_exit(proc_lock);
675 } else { 675 } else {
676 proc_free(child, wru); 676 proc_free(child, wru);
677 } 677 }
678 } else { 678 } else {
679 /* Don't mark SIGCONT if we are being stopped */ 679 /* Don't mark SIGCONT if we are being stopped */
680 *status = (child->p_xsig == SIGCONT && child->p_stat != SSTOP) ? 680 *status = (child->p_xsig == SIGCONT && child->p_stat != SSTOP) ?
681 W_CONTCODE() : W_STOPCODE(child->p_xsig); 681 W_CONTCODE() : W_STOPCODE(child->p_xsig);
682 mutex_exit(proc_lock); 682 mutex_exit(proc_lock);
683 } 683 }
684 return 0; 684 return 0;
685} 685}
686 686
687int 687int
688do_sys_wait(int *pid, int *status, int options, struct rusage *ru) 688do_sys_wait(int *pid, int *status, int options, struct rusage *ru)
689{ 689{
690 idtype_t idtype; 690 idtype_t idtype;
691 id_t id; 691 id_t id;
692 int ret; 692 int ret;
693 struct wrusage wru; 693 struct wrusage wru;
694 694
695 /* 695 /*
696 * Translate the special pid values into the (idtype, pid) 696 * Translate the special pid values into the (idtype, pid)
697 * pair for wait6. The WAIT_MYPGRP case is handled by 697 * pair for wait6. The WAIT_MYPGRP case is handled by
698 * find_stopped_child() on its own. 698 * find_stopped_child() on its own.
699 */ 699 */
700 if (*pid == WAIT_ANY) { 700 if (*pid == WAIT_ANY) {
701 idtype = P_ALL; 701 idtype = P_ALL;
702 id = 0; 702 id = 0;
703 } else if (*pid < 0) { 703 } else if (*pid < 0) {
704 idtype = P_PGID; 704 idtype = P_PGID;
705 id = (id_t)-*pid; 705 id = (id_t)-*pid;
706 } else { 706 } else {
707 idtype = P_PID; 707 idtype = P_PID;
708 id = (id_t)*pid; 708 id = (id_t)*pid;
709 } 709 }
710 options |= WEXITED | WTRAPPED; 710 options |= WEXITED | WTRAPPED;
711 ret = do_sys_waitid(idtype, id, pid, status, options, ru ? &wru : NULL, 711 ret = do_sys_waitid(idtype, id, pid, status, options, ru ? &wru : NULL,
712 NULL); 712 NULL);
713 if (ru) 713 if (ru)
714 *ru = wru.wru_self; 714 *ru = wru.wru_self;
715 return ret; 715 return ret;
716} 716}
717 717
718int 718int
719sys___wait450(struct lwp *l, const struct sys___wait450_args *uap, 719sys___wait450(struct lwp *l, const struct sys___wait450_args *uap,
720 register_t *retval) 720 register_t *retval)
721{ 721{
722 /* { 722 /* {
723 syscallarg(int) pid; 723 syscallarg(int) pid;
724 syscallarg(int *) status; 724 syscallarg(int *) status;
725 syscallarg(int) options; 725 syscallarg(int) options;
726 syscallarg(struct rusage *) rusage; 726 syscallarg(struct rusage *) rusage;
727 } */ 727 } */
728 int error, status, pid = SCARG(uap, pid); 728 int error, status, pid = SCARG(uap, pid);
729 struct rusage ru; 729 struct rusage ru;
730 730
731 error = do_sys_wait(&pid, &status, SCARG(uap, options), 731 error = do_sys_wait(&pid, &status, SCARG(uap, options),
732 SCARG(uap, rusage) != NULL ? &ru : NULL); 732 SCARG(uap, rusage) != NULL ? &ru : NULL);
733 733
734 retval[0] = pid; 734 retval[0] = pid;
735 if (pid == 0) { 735 if (pid == 0) {
736 return error; 736 return error;
737 } 737 }
738 if (SCARG(uap, status)) { 738 if (SCARG(uap, status)) {
739 error = copyout(&status, SCARG(uap, status), sizeof(status)); 739 error = copyout(&status, SCARG(uap, status), sizeof(status));
740 } 740 }
741 if (SCARG(uap, rusage) && error == 0) { 741 if (SCARG(uap, rusage) && error == 0) {
742 error = copyout(&ru, SCARG(uap, rusage), sizeof(ru)); 742 error = copyout(&ru, SCARG(uap, rusage), sizeof(ru));
743 } 743 }
744 return error; 744 return error;
745} 745}
746 746
747int 747int
748sys_wait6(struct lwp *l, const struct sys_wait6_args *uap, register_t *retval) 748sys_wait6(struct lwp *l, const struct sys_wait6_args *uap, register_t *retval)
749{ 749{
750 /* { 750 /* {
751 syscallarg(idtype_t) idtype; 751 syscallarg(idtype_t) idtype;
752 syscallarg(id_t) id; 752 syscallarg(id_t) id;
753 syscallarg(int *) status; 753 syscallarg(int *) status;
754 syscallarg(int) options; 754 syscallarg(int) options;
755 syscallarg(struct wrusage *) wru; 755 syscallarg(struct wrusage *) wru;
756 syscallarg(siginfo_t *) si; 756 syscallarg(siginfo_t *) si;
757 } */ 757 } */
758 struct wrusage wru, *wrup; 758 struct wrusage wru, *wrup;
759 siginfo_t si, *sip; 759 siginfo_t si, *sip;
760 idtype_t idtype; 760 idtype_t idtype;
761 int pid; 761 int pid;
762 id_t id; 762 id_t id;
763 int error, status; 763 int error, status;
764 764
765 idtype = SCARG(uap, idtype); 765 idtype = SCARG(uap, idtype);
766 id = SCARG(uap, id); 766 id = SCARG(uap, id);
767 767
768 if (SCARG(uap, wru) != NULL) 768 if (SCARG(uap, wru) != NULL)
769 wrup = &wru; 769 wrup = &wru;
770 else 770 else
771 wrup = NULL; 771 wrup = NULL;
772 772
773 if (SCARG(uap, info) != NULL) 773 if (SCARG(uap, info) != NULL)
774 sip = &si; 774 sip = &si;
775 else 775 else
776 sip = NULL; 776 sip = NULL;
777 777
778 /* 778 /*
779 * We expect all callers of wait6() to know about WEXITED and 779 * We expect all callers of wait6() to know about WEXITED and
780 * WTRAPPED. 780 * WTRAPPED.
781 */ 781 */
782 error = do_sys_waitid(idtype, id, &pid, &status, SCARG(uap, options), 782 error = do_sys_waitid(idtype, id, &pid, &status, SCARG(uap, options),
783 wrup, sip); 783 wrup, sip);
784 784
785 retval[0] = pid; /* tell userland who it was */ 785 retval[0] = pid; /* tell userland who it was */
786 786
787#if 0 787#if 0
788 /* 788 /*
789 * should we copyout if there was no process, hence no useful data? 789 * should we copyout if there was no process, hence no useful data?
790 * We don't for an old sytle wait4() (etc) but I believe 790 * We don't for an old sytle wait4() (etc) but I believe
791 * FreeBSD does for wait6(), so a tossup... Go with FreeBSD for now. 791 * FreeBSD does for wait6(), so a tossup... Go with FreeBSD for now.
792 */ 792 */
793 if (pid == 0) 793 if (pid == 0)
794 return error; 794 return error;
795#endif 795#endif
796 796
797 if (SCARG(uap, status) != NULL && error == 0) 797 if (SCARG(uap, status) != NULL && error == 0)
798 error = copyout(&status, SCARG(uap, status), sizeof(status)); 798 error = copyout(&status, SCARG(uap, status), sizeof(status));
799 if (SCARG(uap, wru) != NULL && error == 0) 799 if (SCARG(uap, wru) != NULL && error == 0)
800 error = copyout(&wru, SCARG(uap, wru), sizeof(wru)); 800 error = copyout(&wru, SCARG(uap, wru), sizeof(wru));
801 if (SCARG(uap, info) != NULL && error == 0) 801 if (SCARG(uap, info) != NULL && error == 0)
802 error = copyout(&si, SCARG(uap, info), sizeof(si)); 802 error = copyout(&si, SCARG(uap, info), sizeof(si));
803 return error; 803 return error;
804} 804}
805 805
806 806
807/* 807/*
808 * Find a process that matches the provided criteria, and fill siginfo 808 * Find a process that matches the provided criteria, and fill siginfo
809 * and resources if found. 809 * and resources if found.
810 * Returns: 810 * Returns:
811 * -1: Not found, abort early 811 * -1: Not found, abort early
812 * 0: Not matched 812 * 0: Not matched
813 * 1: Matched, there might be more matches 813 * 1: Matched, there might be more matches
814 * 2: This is the only match 814 * 2: This is the only match
815 */ 815 */
816static int 816static int
817match_process(const struct proc *pp, struct proc **q, idtype_t idtype, id_t id, 817match_process(const struct proc *pp, struct proc **q, idtype_t idtype, id_t id,
818 int options, struct wrusage *wrusage, siginfo_t *siginfo) 818 int options, struct wrusage *wrusage, siginfo_t *siginfo)
819{ 819{
820 struct rusage *rup; 820 struct rusage *rup;
821 struct proc *p = *q; 821 struct proc *p = *q;
822 int rv = 1; 822 int rv = 1;
823 823
824 mutex_enter(p->p_lock); 824 mutex_enter(p->p_lock);
825 switch (idtype) { 825 switch (idtype) {
826 case P_ALL: 826 case P_ALL:
827 break; 827 break;
828 case P_PID: 828 case P_PID:
829 if (p->p_pid != (pid_t)id) { 829 if (p->p_pid != (pid_t)id) {
830 mutex_exit(p->p_lock); 830 mutex_exit(p->p_lock);
831 p = *q = proc_find_raw((pid_t)id); 831 p = *q = proc_find_raw((pid_t)id);
832 if (p == NULL || p->p_stat == SIDL || p->p_pptr != pp) { 832 if (p == NULL || p->p_stat == SIDL || p->p_pptr != pp) {
833 *q = NULL; 833 *q = NULL;
834 return -1; 834 return -1;
835 } 835 }
836 mutex_enter(p->p_lock); 836 mutex_enter(p->p_lock);
837 } 837 }
838 rv++; 838 rv++;
839 break; 839 break;
840 case P_PGID: 840 case P_PGID:
841 if (p->p_pgid != (pid_t)id) 841 if (p->p_pgid != (pid_t)id)
842 goto out; 842 goto out;
843 break; 843 break;
844 case P_SID: 844 case P_SID:
845 if (p->p_session->s_sid != (pid_t)id) 845 if (p->p_session->s_sid != (pid_t)id)
846 goto out; 846 goto out;
847 break; 847 break;
848 case P_UID: 848 case P_UID:
849 if (kauth_cred_geteuid(p->p_cred) != (uid_t)id) 849 if (kauth_cred_geteuid(p->p_cred) != (uid_t)id)
850 goto out; 850 goto out;
851 break; 851 break;
852 case P_GID: 852 case P_GID:
853 if (kauth_cred_getegid(p->p_cred) != (gid_t)id) 853 if (kauth_cred_getegid(p->p_cred) != (gid_t)id)
854 goto out; 854 goto out;
855 break; 855 break;
856 case P_CID: 856 case P_CID:
857 case P_PSETID: 857 case P_PSETID:
858 case P_CPUID: 858 case P_CPUID:
859 /* XXX: Implement me */ 859 /* XXX: Implement me */
860 default: 860 default:
861 out: 861 out:
862 mutex_exit(p->p_lock); 862 mutex_exit(p->p_lock);
863 return 0; 863 return 0;
864 } 864 }
865 865
866 if ((options & WEXITED) == 0 && p->p_stat == SZOMB) 866 if ((options & WEXITED) == 0 && p->p_stat == SZOMB)
867 goto out; 867 goto out;
868 868
869 if (siginfo != NULL) { 869 if (siginfo != NULL) {
870 siginfo->si_errno = 0; 870 siginfo->si_errno = 0;
871 871
872 /* 872 /*
873 * SUSv4 requires that the si_signo value is always 873 * SUSv4 requires that the si_signo value is always
874 * SIGCHLD. Obey it despite the rfork(2) interface 874 * SIGCHLD. Obey it despite the rfork(2) interface
875 * allows to request other signal for child exit 875 * allows to request other signal for child exit
876 * notification. 876 * notification.
877 */ 877 */
878 siginfo->si_signo = SIGCHLD; 878 siginfo->si_signo = SIGCHLD;
879 879
880 /* 880 /*
881 * This is still a rough estimate. We will fix the 881 * This is still a rough estimate. We will fix the
882 * cases TRAPPED, STOPPED, and CONTINUED later. 882 * cases TRAPPED, STOPPED, and CONTINUED later.
883 */ 883 */
884 if (p->p_sflag & PS_COREDUMP) { 884 if (p->p_sflag & PS_COREDUMP) {
885 siginfo->si_code = CLD_DUMPED; 885 siginfo->si_code = CLD_DUMPED;
886 siginfo->si_status = p->p_xsig; 886 siginfo->si_status = p->p_xsig;
887 } else if (p->p_xsig) { 887 } else if (p->p_xsig) {
888 siginfo->si_code = CLD_KILLED; 888 siginfo->si_code = CLD_KILLED;
889 siginfo->si_status = p->p_xsig; 889 siginfo->si_status = p->p_xsig;
890 } else { 890 } else {
891 siginfo->si_code = CLD_EXITED; 891 siginfo->si_code = CLD_EXITED;
892 siginfo->si_status = p->p_xexit; 892 siginfo->si_status = p->p_xexit;
893 } 893 }
894 894
895 siginfo->si_pid = p->p_pid; 895 siginfo->si_pid = p->p_pid;
896 siginfo->si_uid = kauth_cred_geteuid(p->p_cred); 896 siginfo->si_uid = kauth_cred_geteuid(p->p_cred);
897 siginfo->si_utime = p->p_stats->p_ru.ru_utime.tv_sec; 897 siginfo->si_utime = p->p_stats->p_ru.ru_utime.tv_sec;
898 siginfo->si_stime = p->p_stats->p_ru.ru_stime.tv_sec; 898 siginfo->si_stime = p->p_stats->p_ru.ru_stime.tv_sec;
899 } 899 }
900 900
901 /* 901 /*
902 * There should be no reason to limit resources usage info to 902 * There should be no reason to limit resources usage info to
903 * exited processes only. A snapshot about any resources used 903 * exited processes only. A snapshot about any resources used
904 * by a stopped process may be exactly what is needed. 904 * by a stopped process may be exactly what is needed.
905 */ 905 */
906 if (wrusage != NULL) { 906 if (wrusage != NULL) {
907 rup = &wrusage->wru_self; 907 rup = &wrusage->wru_self;
908 *rup = p->p_stats->p_ru; 908 *rup = p->p_stats->p_ru;
909 calcru(p, &rup->ru_utime, &rup->ru_stime, NULL, NULL); 909 calcru(p, &rup->ru_utime, &rup->ru_stime, NULL, NULL);
910 910
911 rup = &wrusage->wru_children; 911 rup = &wrusage->wru_children;
912 *rup = p->p_stats->p_cru; 912 *rup = p->p_stats->p_cru;
913 calcru(p, &rup->ru_utime, &rup->ru_stime, NULL, NULL); 913 calcru(p, &rup->ru_utime, &rup->ru_stime, NULL, NULL);
914 } 914 }
915 915
916 mutex_exit(p->p_lock); 916 mutex_exit(p->p_lock);
917 return rv; 917 return rv;
918} 918}
919 919
920/* 920/*
921 * Determine if there are existing processes being debugged 921 * Determine if there are existing processes being debugged
922 * that used to be (and sometime later will be again) children 922 * that used to be (and sometime later will be again) children
923 * of a specific parent (while matching wait criteria) 923 * of a specific parent (while matching wait criteria)
924 */ 924 */
925static bool 925static bool
926debugged_child_exists(idtype_t idtype, id_t id, int options, siginfo_t *si, 926debugged_child_exists(idtype_t idtype, id_t id, int options, siginfo_t *si,
927 const struct proc *parent) 927 const struct proc *parent)
928{ 928{
929 struct proc *pp; 929 struct proc *pp;
930 930
931 /* 931 /*
932 * If we are searching for a specific pid, we can optimise a little 932 * If we are searching for a specific pid, we can optimise a little
933 */ 933 */
934 if (idtype == P_PID) { 934 if (idtype == P_PID) {
935 /* 935 /*
936 * Check the specific process to see if its real parent is us 936 * Check the specific process to see if its real parent is us
937 */ 937 */
938 pp = proc_find_raw((pid_t)id); 938 pp = proc_find_raw((pid_t)id);
939 if (pp != NULL && pp->p_stat != SIDL && pp->p_opptr == parent) { 939 if (pp != NULL && pp->p_stat != SIDL && pp->p_opptr == parent) {
940 /* 940 /*
941 * using P_ALL here avoids match_process() doing the 941 * using P_ALL here avoids match_process() doing the
942 * same work that we just did, but incorrectly for 942 * same work that we just did, but incorrectly for
943 * this scenario. 943 * this scenario.
944 */ 944 */
945 if (match_process(parent, &pp, P_ALL, id, options, 945 if (match_process(parent, &pp, P_ALL, id, options,
946 NULL, si)) 946 NULL, si))
947 return true; 947 return true;
948 } 948 }
949 return false; 949 return false;
950 } 950 }
951 951
952 /* 952 /*
953 * For the hard cases, just look everywhere to see if some 953 * For the hard cases, just look everywhere to see if some
954 * stolen (reparented) process is really our lost child. 954 * stolen (reparented) process is really our lost child.
955 * Then check if that process could satisfy the wait conditions. 955 * Then check if that process could satisfy the wait conditions.
956 */ 956 */
957 957
958 /* 958 /*
959 * XXX inefficient, but hopefully fairly rare. 959 * XXX inefficient, but hopefully fairly rare.
960 * XXX should really use a list of reparented processes. 960 * XXX should really use a list of reparented processes.
961 */ 961 */
962 PROCLIST_FOREACH(pp, &allproc) { 962 PROCLIST_FOREACH(pp, &allproc) {
963 if (pp->p_stat == SIDL) /* XXX impossible ?? */ 963 if (pp->p_stat == SIDL) /* XXX impossible ?? */
964 continue; 964 continue;
965 if (pp->p_opptr == parent && 965 if (pp->p_opptr == parent &&
966 match_process(parent, &pp, idtype, id, options, NULL, si)) 966 match_process(parent, &pp, idtype, id, options, NULL, si))
967 return true; 967 return true;
968 } 968 }
969 PROCLIST_FOREACH(pp, &zombproc) { 969 PROCLIST_FOREACH(pp, &zombproc) {
970 if (pp->p_stat == SIDL) /* XXX impossible ?? */ 970 if (pp->p_stat == SIDL) /* XXX impossible ?? */
971 continue; 971 continue;
972 if (pp->p_opptr == parent && 972 if (pp->p_opptr == parent &&
973 match_process(parent, &pp, idtype, id, options, NULL, si)) 973 match_process(parent, &pp, idtype, id, options, NULL, si))
974 return true; 974 return true;
975 } 975 }
976 976
977 return false; 977 return false;
978} 978}
979 979
980/* 980/*
981 * Scan list of child processes for a child process that has stopped or 981 * Scan list of child processes for a child process that has stopped or
982 * exited. Used by sys_wait4 and 'compat' equivalents. 982 * exited. Used by sys_wait4 and 'compat' equivalents.
983 * 983 *
984 * Must be called with the proc_lock held, and may release while waiting. 984 * Must be called with the proc_lock held, and may release while waiting.
985 */ 985 */
986static int 986static int
987find_stopped_child(struct proc *parent, idtype_t idtype, id_t id, int options, 987find_stopped_child(struct proc *parent, idtype_t idtype, id_t id, int options,
988 struct proc **child_p, struct wrusage *wru, siginfo_t *si) 988 struct proc **child_p, struct wrusage *wru, siginfo_t *si)
989{ 989{
990 struct proc *child, *dead; 990 struct proc *child, *dead;
991 int error; 991 int error;
992 992
993 KASSERT(mutex_owned(proc_lock)); 993 KASSERT(mutex_owned(proc_lock));
994 994
995 if (options & ~WALLOPTS) { 995 if (options & ~WALLOPTS) {
996 *child_p = NULL; 996 *child_p = NULL;
997 return EINVAL; 997 return EINVAL;
998 } 998 }
999 999
1000 if ((options & WSELECTOPTS) == 0) { 1000 if ((options & WSELECTOPTS) == 0) {
1001 /* 1001 /*
1002 * We will be unable to find any matching processes, 1002 * We will be unable to find any matching processes,
1003 * because there are no known events to look for. 1003 * because there are no known events to look for.
1004 * Prefer to return error instead of blocking 1004 * Prefer to return error instead of blocking
1005 * indefinitely. 1005 * indefinitely.
1006 */ 1006 */
1007 *child_p = NULL; 1007 *child_p = NULL;
1008 return EINVAL; 1008 return EINVAL;
1009 } 1009 }
1010 1010
1011 if ((pid_t)id == WAIT_MYPGRP && (idtype == P_PID || idtype == P_PGID)) { 1011 if ((pid_t)id == WAIT_MYPGRP && (idtype == P_PID || idtype == P_PGID)) {
1012 mutex_enter(parent->p_lock); 1012 mutex_enter(parent->p_lock);
1013 id = (id_t)parent->p_pgid; 1013 id = (id_t)parent->p_pgid;
1014 mutex_exit(parent->p_lock); 1014 mutex_exit(parent->p_lock);
1015 idtype = P_PGID; 1015 idtype = P_PGID;
1016 } 1016 }
1017 1017
1018 for (;;) { 1018 for (;;) {
1019 error = ECHILD; 1019 error = ECHILD;
1020 dead = NULL; 1020 dead = NULL;
1021 1021
1022 LIST_FOREACH(child, &parent->p_children, p_sibling) { 1022 LIST_FOREACH(child, &parent->p_children, p_sibling) {
1023 int rv = match_process(parent, &child, idtype, id, 1023 int rv = match_process(parent, &child, idtype, id,
1024 options, wru, si); 1024 options, wru, si);
1025 if (rv == -1) 1025 if (rv == -1)
1026 break; 1026 break;
1027 if (rv == 0) 1027 if (rv == 0)
1028 continue; 1028 continue;
1029 1029
1030 /* 1030 /*
1031 * Wait for processes with p_exitsig != SIGCHLD 1031 * Wait for processes with p_exitsig != SIGCHLD
1032 * processes only if WALTSIG is set; wait for 1032 * processes only if WALTSIG is set; wait for
1033 * processes with p_exitsig == SIGCHLD only 1033 * processes with p_exitsig == SIGCHLD only
1034 * if WALTSIG is clear. 1034 * if WALTSIG is clear.
1035 */ 1035 */
1036 if (((options & WALLSIG) == 0) && 1036 if (((options & WALLSIG) == 0) &&
1037 (options & WALTSIG ? child->p_exitsig == SIGCHLD 1037 (options & WALTSIG ? child->p_exitsig == SIGCHLD
1038 : P_EXITSIG(child) != SIGCHLD)){ 1038 : P_EXITSIG(child) != SIGCHLD)){
1039 if (rv == 2) { 1039 if (rv == 2) {
1040 child = NULL; 1040 child = NULL;
1041 break; 1041 break;
1042 } 1042 }
1043 continue; 1043 continue;
1044 } 1044 }
1045 1045
1046 error = 0; 1046 error = 0;
1047 if ((options & WNOZOMBIE) == 0) { 1047 if ((options & WNOZOMBIE) == 0) {
1048 if (child->p_stat == SZOMB) 1048 if (child->p_stat == SZOMB)
1049 break; 1049 break;
1050 if (child->p_stat == SDEAD) { 1050 if (child->p_stat == SDEAD) {
1051 /* 1051 /*
1052 * We may occasionally arrive here 1052 * We may occasionally arrive here
1053 * after receiving a signal, but 1053 * after receiving a signal, but
1054 * immediately before the child 1054 * immediately before the child
1055 * process is zombified. The wait 1055 * process is zombified. The wait
1056 * will be short, so avoid returning 1056 * will be short, so avoid returning
1057 * to userspace. 1057 * to userspace.
1058 */ 1058 */
1059 dead = child; 1059 dead = child;
1060 } 1060 }
1061 } 1061 }
1062 1062
1063 if ((options & WCONTINUED) != 0 && 1063 if ((options & WCONTINUED) != 0 &&
1064 child->p_xsig == SIGCONT && 1064 child->p_xsig == SIGCONT &&
1065 (child->p_sflag & PS_CONTINUED)) { 1065 (child->p_sflag & PS_CONTINUED)) {
1066 if ((options & WNOWAIT) == 0) { 1066 if ((options & WNOWAIT) == 0) {
1067 child->p_sflag &= ~PS_CONTINUED; 1067 child->p_sflag &= ~PS_CONTINUED;
1068 child->p_waited = 1; 1068 child->p_waited = 1;
1069 parent->p_nstopchild--; 1069 parent->p_nstopchild--;
1070 } 1070 }
1071 if (si) { 1071 if (si) {
1072 si->si_status = child->p_xsig; 1072 si->si_status = child->p_xsig;
1073 si->si_code = CLD_CONTINUED; 1073 si->si_code = CLD_CONTINUED;
1074 } 1074 }
1075 break; 1075 break;
1076 } 1076 }
1077 1077
1078 if ((options & (WTRAPPED|WSTOPPED)) != 0 && 1078 if ((options & (WTRAPPED|WSTOPPED)) != 0 &&
1079 child->p_stat == SSTOP && 1079 child->p_stat == SSTOP &&
1080 child->p_waited == 0 && 1080 child->p_waited == 0 &&
1081 ((child->p_slflag & PSL_TRACED) || 1081 ((child->p_slflag & PSL_TRACED) ||
1082 options & (WUNTRACED|WSTOPPED))) { 1082 options & (WUNTRACED|WSTOPPED))) {
1083 if ((options & WNOWAIT) == 0) { 1083 if ((options & WNOWAIT) == 0) {
1084 child->p_waited = 1; 1084 child->p_waited = 1;
1085 parent->p_nstopchild--; 1085 parent->p_nstopchild--;
1086 } 1086 }
1087 if (si) { 1087 if (si) {
1088 si->si_status = child->p_xsig; 1088 si->si_status = child->p_xsig;
1089 si->si_code = 1089 si->si_code =
1090 (child->p_slflag & PSL_TRACED) ? 1090 (child->p_slflag & PSL_TRACED) ?
1091 CLD_TRAPPED : CLD_STOPPED; 1091 CLD_TRAPPED : CLD_STOPPED;
1092 } 1092 }
1093 break; 1093 break;
1094 } 1094 }
1095 if (parent->p_nstopchild == 0 || rv == 2) { 1095 if (parent->p_nstopchild == 0 || rv == 2) {
1096 child = NULL; 1096 child = NULL;
1097 break; 1097 break;
1098 } 1098 }
1099 } 1099 }
1100 1100
1101 /* 1101 /*
1102 * If we found nothing, but we are the bereaved parent 1102 * If we found nothing, but we are the bereaved parent
1103 * of a stolen child, look and see if that child (or 1103 * of a stolen child, look and see if that child (or
1104 * one of them) meets our search criteria. If so, then 1104 * one of them) meets our search criteria. If so, then
1105 * we cannot succeed, but we can hang (wait...),  1105 * we cannot succeed, but we can hang (wait...),
1106 * or if WNOHANG, return 0 instead of ECHILD 1106 * or if WNOHANG, return 0 instead of ECHILD
1107 */ 1107 */
1108 if (child == NULL && error == ECHILD &&  1108 if (child == NULL && error == ECHILD &&
1109 (parent->p_slflag & PSL_CHTRACED) && 1109 (parent->p_slflag & PSL_CHTRACED) &&
1110 debugged_child_exists(idtype, id, options, si, parent)) 1110 debugged_child_exists(idtype, id, options, si, parent))
1111 error = 0; 1111 error = 0;
1112 1112
1113 if (child != NULL || error != 0 || 1113 if (child != NULL || error != 0 ||
1114 ((options & WNOHANG) != 0 && dead == NULL)) { 1114 ((options & WNOHANG) != 0 && dead == NULL)) {
1115 *child_p = child; 1115 *child_p = child;
1116 return error; 1116 return error;
1117 } 1117 }
1118 1118
1119 /* 1119 /*
1120 * Wait for another child process to stop. 1120 * Wait for another child process to stop.
1121 */ 1121 */
1122 error = cv_wait_sig(&parent->p_waitcv, proc_lock); 1122 error = cv_wait_sig(&parent->p_waitcv, proc_lock);
1123 1123
1124 if (error != 0) { 1124 if (error != 0) {
1125 *child_p = NULL; 1125 *child_p = NULL;
1126 return error; 1126 return error;
1127 } 1127 }
1128 } 1128 }
1129} 1129}
1130 1130
1131/* 1131/*
1132 * Free a process after parent has taken all the state info. Must be called 1132 * Free a process after parent has taken all the state info. Must be called
1133 * with the proclist lock held, and will release before returning. 1133 * with the proclist lock held, and will release before returning.
1134 * 1134 *
1135 * *ru is returned to the caller, and must be freed by the caller. 1135 * *ru is returned to the caller, and must be freed by the caller.
1136 */ 1136 */
1137static void 1137static void
1138proc_free(struct proc *p, struct wrusage *wru) 1138proc_free(struct proc *p, struct wrusage *wru)
1139{ 1139{
1140 struct proc *parent = p->p_pptr; 1140 struct proc *parent = p->p_pptr;
1141 struct lwp *l; 1141 struct lwp *l;
1142 ksiginfo_t ksi; 1142 ksiginfo_t ksi;
1143 kauth_cred_t cred1, cred2; 1143 kauth_cred_t cred1, cred2;
1144 uid_t uid; 1144 uid_t uid;
1145 1145
1146 KASSERT(mutex_owned(proc_lock)); 1146 KASSERT(mutex_owned(proc_lock));
1147 KASSERT(p->p_nlwps == 1); 1147 KASSERT(p->p_nlwps == 1);
1148 KASSERT(p->p_nzlwps == 1); 1148 KASSERT(p->p_nzlwps == 1);
1149 KASSERT(p->p_nrlwps == 0); 1149 KASSERT(p->p_nrlwps == 0);
1150 KASSERT(p->p_stat == SZOMB); 1150 KASSERT(p->p_stat == SZOMB);
1151 1151
1152 /* 1152 /*
1153 * If we got the child via ptrace(2) or procfs, and 1153 * If we got the child via ptrace(2) or procfs, and
1154 * the parent is different (meaning the process was 1154 * the parent is different (meaning the process was
1155 * attached, rather than run as a child), then we need 1155 * attached, rather than run as a child), then we need
1156 * to give it back to the old parent, and send the 1156 * to give it back to the old parent, and send the
1157 * parent the exit signal. The rest of the cleanup 1157 * parent the exit signal. The rest of the cleanup
1158 * will be done when the old parent waits on the child. 1158 * will be done when the old parent waits on the child.
1159 */ 1159 */
1160 if ((p->p_slflag & PSL_TRACED) != 0 && p->p_opptr != parent) { 1160 if ((p->p_slflag & PSL_TRACED) != 0 && p->p_opptr != parent) {
1161 mutex_enter(p->p_lock); 1161 mutex_enter(p->p_lock);
1162 p->p_slflag &= ~(PSL_TRACED|PSL_SYSCALL); 1162 p->p_slflag &= ~(PSL_TRACED|PSL_SYSCALL);
1163 mutex_exit(p->p_lock); 1163 mutex_exit(p->p_lock);
1164 parent = (p->p_opptr == NULL) ? initproc : p->p_opptr; 1164 parent = (p->p_opptr == NULL) ? initproc : p->p_opptr;
1165 proc_reparent(p, parent); 1165 proc_reparent(p, parent);
1166 p->p_opptr = NULL; 1166 p->p_opptr = NULL;
1167 if (p->p_exitsig != 0) { 1167 if (p->p_exitsig != 0) {
1168 exit_psignal(p, parent, &ksi); 1168 exit_psignal(p, parent, &ksi);
1169 kpsignal(parent, &ksi, NULL); 1169 kpsignal(parent, &ksi, NULL);
1170 } 1170 }
1171 cv_broadcast(&parent->p_waitcv); 1171 cv_broadcast(&parent->p_waitcv);
1172 mutex_exit(proc_lock); 1172 mutex_exit(proc_lock);
1173 return; 1173 return;
1174 } 1174 }
1175 1175
1176 sched_proc_exit(parent, p); 1176 sched_proc_exit(parent, p);
1177 1177
1178 /* 1178 /*
1179 * Add child times of exiting process onto its own times. 1179 * Add child times of exiting process onto its own times.
1180 * This cannot be done any earlier else it might get done twice. 1180 * This cannot be done any earlier else it might get done twice.
1181 */ 1181 */
1182 l = LIST_FIRST(&p->p_lwps); 1182 l = LIST_FIRST(&p->p_lwps);
1183 p->p_stats->p_ru.ru_nvcsw += (l->l_ncsw - l->l_nivcsw); 1183 p->p_stats->p_ru.ru_nvcsw += (l->l_ncsw - l->l_nivcsw);
1184 p->p_stats->p_ru.ru_nivcsw += l->l_nivcsw; 1184 p->p_stats->p_ru.ru_nivcsw += l->l_nivcsw;
1185 ruadd(&p->p_stats->p_ru, &l->l_ru); 1185 ruadd(&p->p_stats->p_ru, &l->l_ru);
1186 ruadd(&p->p_stats->p_ru, &p->p_stats->p_cru); 1186 ruadd(&p->p_stats->p_ru, &p->p_stats->p_cru);
1187 ruadd(&parent->p_stats->p_cru, &p->p_stats->p_ru); 1187 ruadd(&parent->p_stats->p_cru, &p->p_stats->p_ru);
1188 if (wru != NULL) { 1188 if (wru != NULL) {
1189 wru->wru_self = p->p_stats->p_ru; 1189 wru->wru_self = p->p_stats->p_ru;
1190 wru->wru_children = p->p_stats->p_cru; 1190 wru->wru_children = p->p_stats->p_cru;
1191 } 1191 }
1192 p->p_xsig = 0; 1192 p->p_xsig = 0;
1193 p->p_xexit = 0; 1193 p->p_xexit = 0;
1194 1194
1195 /* 1195 /*
1196 * At this point we are going to start freeing the final resources. 1196 * At this point we are going to start freeing the final resources.
1197 * If anyone tries to access the proc structure after here they will 1197 * If anyone tries to access the proc structure after here they will
1198 * get a shock - bits are missing. Attempt to make it hard! We 1198 * get a shock - bits are missing. Attempt to make it hard! We
1199 * don't bother with any further locking past this point. 1199 * don't bother with any further locking past this point.
1200 */ 1200 */
1201 p->p_stat = SIDL; /* not even a zombie any more */ 1201 p->p_stat = SIDL; /* not even a zombie any more */
1202 LIST_REMOVE(p, p_list); /* off zombproc */ 1202 LIST_REMOVE(p, p_list); /* off zombproc */
1203 parent->p_nstopchild--; 1203 parent->p_nstopchild--;
1204 LIST_REMOVE(p, p_sibling); 1204 LIST_REMOVE(p, p_sibling);
1205 1205
1206 /* 1206 /*
1207 * Let pid be reallocated. 1207 * Let pid be reallocated.
1208 */ 1208 */
1209 proc_free_pid(p->p_pid); 1209 proc_free_pid(p->p_pid);
1210 1210
1211 /* 1211 /*
1212 * Unlink process from its process group. 1212 * Unlink process from its process group.
1213 * Releases the proc_lock. 1213 * Releases the proc_lock.

cvs diff -r1.169 -r1.170 src/sys/kern/kern_lock.c (switch to unified diff)

--- src/sys/kern/kern_lock.c 2020/02/10 22:11:09 1.169
+++ src/sys/kern/kern_lock.c 2020/03/08 15:05:18 1.170
@@ -1,365 +1,345 @@ @@ -1,365 +1,345 @@
1/* $NetBSD: kern_lock.c,v 1.169 2020/02/10 22:11:09 christos Exp $ */ 1/* $NetBSD: kern_lock.c,v 1.170 2020/03/08 15:05:18 ad Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2020 The NetBSD Foundation, Inc. 4 * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2020 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Andrew Doran. 9 * NASA Ames Research Center, and by Andrew Doran.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution. 18 * documentation and/or other materials provided with the distribution.
19 * 19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE. 30 * POSSIBILITY OF SUCH DAMAGE.
31 */ 31 */
32 32
33#include <sys/cdefs.h> 33#include <sys/cdefs.h>
34__KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.169 2020/02/10 22:11:09 christos Exp $"); 34__KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.170 2020/03/08 15:05:18 ad Exp $");
35 35
36#ifdef _KERNEL_OPT 36#ifdef _KERNEL_OPT
37#include "opt_lockdebug.h" 37#include "opt_lockdebug.h"
38#endif 38#endif
39 39
40#include <sys/param.h> 40#include <sys/param.h>
41#include <sys/proc.h> 41#include <sys/proc.h>
42#include <sys/lock.h> 42#include <sys/lock.h>
43#include <sys/systm.h> 43#include <sys/systm.h>
44#include <sys/kernel.h> 44#include <sys/kernel.h>
45#include <sys/lockdebug.h> 45#include <sys/lockdebug.h>
46#include <sys/cpu.h> 46#include <sys/cpu.h>
47#include <sys/syslog.h> 47#include <sys/syslog.h>
48#include <sys/atomic.h> 48#include <sys/atomic.h>
49#include <sys/lwp.h> 49#include <sys/lwp.h>
50#include <sys/pserialize.h> 50#include <sys/pserialize.h>
51 51
52#if defined(DIAGNOSTIC) && !defined(LOCKDEBUG) 52#if defined(DIAGNOSTIC) && !defined(LOCKDEBUG)
53#include <sys/ksyms.h> 53#include <sys/ksyms.h>
54#endif 54#endif
55 55
56#include <machine/lock.h> 56#include <machine/lock.h>
57 57
58#include <dev/lockstat.h> 58#include <dev/lockstat.h>
59 59
60#define RETURN_ADDRESS (uintptr_t)__builtin_return_address(0) 60#define RETURN_ADDRESS (uintptr_t)__builtin_return_address(0)
61 61
62bool kernel_lock_dodebug; 62bool kernel_lock_dodebug;
63 63
64__cpu_simple_lock_t kernel_lock[CACHE_LINE_SIZE / sizeof(__cpu_simple_lock_t)] 64__cpu_simple_lock_t kernel_lock[CACHE_LINE_SIZE / sizeof(__cpu_simple_lock_t)]
65 __cacheline_aligned; 65 __cacheline_aligned;
66 66
67void 67void
68assert_sleepable(void) 68assert_sleepable(void)
69{ 69{
70 const char *reason; 70 const char *reason;
71 uint64_t pctr; 71 uint64_t pctr;
72 bool idle; 72 bool idle;
73 73
74 if (panicstr != NULL) { 74 if (panicstr != NULL) {
75 return; 75 return;
76 } 76 }
77 77
78 LOCKDEBUG_BARRIER(kernel_lock, 1); 78 LOCKDEBUG_BARRIER(kernel_lock, 1);
79 79
80 /* 80 /*
81 * Avoid disabling/re-enabling preemption here since this 81 * Avoid disabling/re-enabling preemption here since this
82 * routine may be called in delicate situations. 82 * routine may be called in delicate situations.
83 */ 83 */
84 do { 84 do {
85 pctr = lwp_pctr(); 85 pctr = lwp_pctr();
86 __insn_barrier(); 86 __insn_barrier();
87 idle = CURCPU_IDLE_P(); 87 idle = CURCPU_IDLE_P();
88 __insn_barrier(); 88 __insn_barrier();
89 } while (pctr != lwp_pctr()); 89 } while (pctr != lwp_pctr());
90 90
91 reason = NULL; 91 reason = NULL;
92 if (idle && !cold && 92 if (idle && !cold &&
93 kcpuset_isset(kcpuset_running, cpu_index(curcpu()))) { 93 kcpuset_isset(kcpuset_running, cpu_index(curcpu()))) {
94 reason = "idle"; 94 reason = "idle";
95 } 95 }
96 if (cpu_intr_p()) { 96 if (cpu_intr_p()) {
97 reason = "interrupt"; 97 reason = "interrupt";
98 } 98 }
99 if (cpu_softintr_p()) { 99 if (cpu_softintr_p()) {
100 reason = "softint"; 100 reason = "softint";
101 } 101 }
102 if (!pserialize_not_in_read_section()) { 102 if (!pserialize_not_in_read_section()) {
103 reason = "pserialize"; 103 reason = "pserialize";
104 } 104 }
105 105
106 if (reason) { 106 if (reason) {
107 panic("%s: %s caller=%p", __func__, reason, 107 panic("%s: %s caller=%p", __func__, reason,
108 (void *)RETURN_ADDRESS); 108 (void *)RETURN_ADDRESS);
109 } 109 }
110} 110}
111 111
112/* 112/*
113 * Functions for manipulating the kernel_lock. We put them here 113 * Functions for manipulating the kernel_lock. We put them here
114 * so that they show up in profiles. 114 * so that they show up in profiles.
115 */ 115 */
116 116
117#define _KERNEL_LOCK_ABORT(msg) \ 117#define _KERNEL_LOCK_ABORT(msg) \
118 LOCKDEBUG_ABORT(__func__, __LINE__, kernel_lock, &_kernel_lock_ops, msg) 118 LOCKDEBUG_ABORT(__func__, __LINE__, kernel_lock, &_kernel_lock_ops, msg)
119 119
120#ifdef LOCKDEBUG 120#ifdef LOCKDEBUG
121#define _KERNEL_LOCK_ASSERT(cond) \ 121#define _KERNEL_LOCK_ASSERT(cond) \
122do { \ 122do { \
123 if (!(cond)) \ 123 if (!(cond)) \
124 _KERNEL_LOCK_ABORT("assertion failed: " #cond); \ 124 _KERNEL_LOCK_ABORT("assertion failed: " #cond); \
125} while (/* CONSTCOND */ 0) 125} while (/* CONSTCOND */ 0)
126#else 126#else
127#define _KERNEL_LOCK_ASSERT(cond) /* nothing */ 127#define _KERNEL_LOCK_ASSERT(cond) /* nothing */
128#endif 128#endif
129 129
130static void _kernel_lock_dump(const volatile void *, lockop_printer_t); 130static void _kernel_lock_dump(const volatile void *, lockop_printer_t);
131 131
132lockops_t _kernel_lock_ops = { 132lockops_t _kernel_lock_ops = {
133 .lo_name = "Kernel lock", 133 .lo_name = "Kernel lock",
134 .lo_type = LOCKOPS_SPIN, 134 .lo_type = LOCKOPS_SPIN,
135 .lo_dump = _kernel_lock_dump, 135 .lo_dump = _kernel_lock_dump,
136}; 136};
137 137
138/* 138/*
139 * Initialize the kernel lock. 139 * Initialize the kernel lock.
140 */ 140 */
141void 141void
142kernel_lock_init(void) 142kernel_lock_init(void)
143{ 143{
144 144
145 __cpu_simple_lock_init(kernel_lock); 145 __cpu_simple_lock_init(kernel_lock);
146 kernel_lock_dodebug = LOCKDEBUG_ALLOC(kernel_lock, &_kernel_lock_ops, 146 kernel_lock_dodebug = LOCKDEBUG_ALLOC(kernel_lock, &_kernel_lock_ops,
147 RETURN_ADDRESS); 147 RETURN_ADDRESS);
148} 148}
149CTASSERT(CACHE_LINE_SIZE >= sizeof(__cpu_simple_lock_t)); 149CTASSERT(CACHE_LINE_SIZE >= sizeof(__cpu_simple_lock_t));
150 150
151/* 151/*
152 * Print debugging information about the kernel lock. 152 * Print debugging information about the kernel lock.
153 */ 153 */
154static void 154static void
155_kernel_lock_dump(const volatile void *junk, lockop_printer_t pr) 155_kernel_lock_dump(const volatile void *junk, lockop_printer_t pr)
156{ 156{
157 struct cpu_info *ci = curcpu(); 157 struct cpu_info *ci = curcpu();
158 158
159 (void)junk; 159 (void)junk;
160 160
161 pr("curcpu holds : %18d wanted by: %#018lx\n", 161 pr("curcpu holds : %18d wanted by: %#018lx\n",
162 ci->ci_biglock_count, (long)ci->ci_biglock_wanted); 162 ci->ci_biglock_count, (long)ci->ci_biglock_wanted);
163} 163}
164 164
165/* 165/*
166 * Acquire 'nlocks' holds on the kernel lock. 166 * Acquire 'nlocks' holds on the kernel lock.
167 * 167 *
168 * Although it may not look it, this is one of the most central, intricate 168 * Although it may not look it, this is one of the most central, intricate
169 * routines in the kernel, and tons of code elsewhere depends on its exact 169 * routines in the kernel, and tons of code elsewhere depends on its exact
170 * behaviour. If you change something in here, expect it to bite you in the 170 * behaviour. If you change something in here, expect it to bite you in the
171 * rear. 171 * rear.
172 */ 172 */
173void 173void
174_kernel_lock(int nlocks) 174_kernel_lock(int nlocks)
175{ 175{
176 struct cpu_info *ci; 176 struct cpu_info *ci;
177 LOCKSTAT_TIMER(spintime); 177 LOCKSTAT_TIMER(spintime);
178 LOCKSTAT_FLAG(lsflag); 178 LOCKSTAT_FLAG(lsflag);
179 struct lwp *owant; 179 struct lwp *owant;
180#ifdef LOCKDEBUG 180#ifdef LOCKDEBUG
181 u_int spins = 0; 181 u_int spins = 0;
182#endif 182#endif
183 int s; 183 int s;
184 struct lwp *l = curlwp; 184 struct lwp *l = curlwp;
185 185
186 _KERNEL_LOCK_ASSERT(nlocks > 0); 186 _KERNEL_LOCK_ASSERT(nlocks > 0);
187 187
188 s = splvm(); 188 s = splvm();
189 ci = curcpu(); 189 ci = curcpu();
190 if (ci->ci_biglock_count != 0) { 190 if (ci->ci_biglock_count != 0) {
191 _KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(kernel_lock)); 191 _KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(kernel_lock));
192 ci->ci_biglock_count += nlocks; 192 ci->ci_biglock_count += nlocks;
193 l->l_blcnt += nlocks; 193 l->l_blcnt += nlocks;
194 splx(s); 194 splx(s);
195 return; 195 return;
196 } 196 }
197 197
198 _KERNEL_LOCK_ASSERT(l->l_blcnt == 0); 198 _KERNEL_LOCK_ASSERT(l->l_blcnt == 0);
199 LOCKDEBUG_WANTLOCK(kernel_lock_dodebug, kernel_lock, RETURN_ADDRESS, 199 LOCKDEBUG_WANTLOCK(kernel_lock_dodebug, kernel_lock, RETURN_ADDRESS,
200 0); 200 0);
201 201
202 if (__predict_true(__cpu_simple_lock_try(kernel_lock))) { 202 if (__predict_true(__cpu_simple_lock_try(kernel_lock))) {
203 ci->ci_biglock_count = nlocks; 203 ci->ci_biglock_count = nlocks;
204 l->l_blcnt = nlocks; 204 l->l_blcnt = nlocks;
205 LOCKDEBUG_LOCKED(kernel_lock_dodebug, kernel_lock, NULL, 205 LOCKDEBUG_LOCKED(kernel_lock_dodebug, kernel_lock, NULL,
206 RETURN_ADDRESS, 0); 206 RETURN_ADDRESS, 0);
207 splx(s); 207 splx(s);
208 return; 208 return;
209 } 209 }
210 210
211 /* 211 /*
212 * To remove the ordering constraint between adaptive mutexes 212 * To remove the ordering constraint between adaptive mutexes
213 * and kernel_lock we must make it appear as if this thread is 213 * and kernel_lock we must make it appear as if this thread is
214 * blocking. For non-interlocked mutex release, a store fence 214 * blocking. For non-interlocked mutex release, a store fence
215 * is required to ensure that the result of any mutex_exit() 215 * is required to ensure that the result of any mutex_exit()
216 * by the current LWP becomes visible on the bus before the set 216 * by the current LWP becomes visible on the bus before the set
217 * of ci->ci_biglock_wanted becomes visible. 217 * of ci->ci_biglock_wanted becomes visible.
218 * 218 *
219 * However, we won't set ci_biglock_wanted until we've spun for 219 * However, we won't set ci_biglock_wanted until we've spun for
220 * a bit, as we don't want to make any lock waiters in rw_oncpu() 220 * a bit, as we don't want to make any lock waiters in rw_oncpu()
221 * or mutex_oncpu() block prematurely. 221 * or mutex_oncpu() block prematurely.
222 */ 222 */
223 membar_producer(); 223 membar_producer();
224 owant = ci->ci_biglock_wanted; 224 owant = ci->ci_biglock_wanted;
225 ci->ci_biglock_wanted = l; 225 ci->ci_biglock_wanted = l;
226#if defined(DIAGNOSTIC) && !defined(LOCKDEBUG) 226#if defined(DIAGNOSTIC) && !defined(LOCKDEBUG)
227 l->l_ld_wanted = __builtin_return_address(0); 227 l->l_ld_wanted = __builtin_return_address(0);
228#endif 228#endif
229 229
230 /* 230 /*
231 * Spin until we acquire the lock. Once we have it, record the 231 * Spin until we acquire the lock. Once we have it, record the
232 * time spent with lockstat. 232 * time spent with lockstat.
233 */ 233 */
234 LOCKSTAT_ENTER(lsflag); 234 LOCKSTAT_ENTER(lsflag);
235 LOCKSTAT_START_TIMER(lsflag, spintime); 235 LOCKSTAT_START_TIMER(lsflag, spintime);
236 236
237 do { 237 do {
238 splx(s); 238 splx(s);
239 while (__SIMPLELOCK_LOCKED_P(kernel_lock)) { 239 while (__SIMPLELOCK_LOCKED_P(kernel_lock)) {
240#ifdef LOCKDEBUG 240#ifdef LOCKDEBUG
241 if (SPINLOCK_SPINOUT(spins)) { 241 if (SPINLOCK_SPINOUT(spins)) {
242 extern int start_init_exec; 242 extern int start_init_exec;
243 if (!start_init_exec) 243 if (!start_init_exec)
244 _KERNEL_LOCK_ABORT("spinout"); 244 _KERNEL_LOCK_ABORT("spinout");
245 } 245 }
246 SPINLOCK_BACKOFF_HOOK; 246 SPINLOCK_BACKOFF_HOOK;
247 SPINLOCK_SPIN_HOOK; 247 SPINLOCK_SPIN_HOOK;
248#endif 248#endif
249 } 249 }
250 s = splvm(); 250 s = splvm();
251 } while (!__cpu_simple_lock_try(kernel_lock)); 251 } while (!__cpu_simple_lock_try(kernel_lock));
252 252
253 ci->ci_biglock_count = nlocks; 253 ci->ci_biglock_count = nlocks;
254 l->l_blcnt = nlocks; 254 l->l_blcnt = nlocks;
255 LOCKSTAT_STOP_TIMER(lsflag, spintime); 255 LOCKSTAT_STOP_TIMER(lsflag, spintime);
256 LOCKDEBUG_LOCKED(kernel_lock_dodebug, kernel_lock, NULL, 256 LOCKDEBUG_LOCKED(kernel_lock_dodebug, kernel_lock, NULL,
257 RETURN_ADDRESS, 0); 257 RETURN_ADDRESS, 0);
258 if (owant == NULL) { 258 if (owant == NULL) {
259 LOCKSTAT_EVENT_RA(lsflag, kernel_lock, 259 LOCKSTAT_EVENT_RA(lsflag, kernel_lock,
260 LB_KERNEL_LOCK | LB_SPIN, 1, spintime, RETURN_ADDRESS); 260 LB_KERNEL_LOCK | LB_SPIN, 1, spintime, RETURN_ADDRESS);
261 } 261 }
262 LOCKSTAT_EXIT(lsflag); 262 LOCKSTAT_EXIT(lsflag);
263 splx(s); 263 splx(s);
264 264
265 /* 265 /*
266 * Now that we have kernel_lock, reset ci_biglock_wanted. This 266 * Now that we have kernel_lock, reset ci_biglock_wanted. This
267 * store must be unbuffered (immediately visible on the bus) in 267 * store must be unbuffered (immediately visible on the bus) in
268 * order for non-interlocked mutex release to work correctly. 268 * order for non-interlocked mutex release to work correctly.
269 * It must be visible before a mutex_exit() can execute on this 269 * It must be visible before a mutex_exit() can execute on this
270 * processor. 270 * processor.
271 * 271 *
272 * Note: only where CAS is available in hardware will this be 272 * Note: only where CAS is available in hardware will this be
273 * an unbuffered write, but non-interlocked release cannot be 273 * an unbuffered write, but non-interlocked release cannot be
274 * done on CPUs without CAS in hardware. 274 * done on CPUs without CAS in hardware.
275 */ 275 */
276 (void)atomic_swap_ptr(&ci->ci_biglock_wanted, owant); 276 (void)atomic_swap_ptr(&ci->ci_biglock_wanted, owant);
277 277
278 /* 278 /*
279 * Issue a memory barrier as we have acquired a lock. This also 279 * Issue a memory barrier as we have acquired a lock. This also
280 * prevents stores from a following mutex_exit() being reordered 280 * prevents stores from a following mutex_exit() being reordered
281 * to occur before our store to ci_biglock_wanted above. 281 * to occur before our store to ci_biglock_wanted above.
282 */ 282 */
283#ifndef __HAVE_ATOMIC_AS_MEMBAR 283#ifndef __HAVE_ATOMIC_AS_MEMBAR
284 membar_enter(); 284 membar_enter();
285#endif 285#endif
286} 286}
287 287
288/* 288/*
289 * Release 'nlocks' holds on the kernel lock. If 'nlocks' is zero, release 289 * Release 'nlocks' holds on the kernel lock. If 'nlocks' is zero, release
290 * all holds. 290 * all holds.
291 */ 291 */
292void 292void
293_kernel_unlock(int nlocks, int *countp) 293_kernel_unlock(int nlocks, int *countp)
294{ 294{
295 struct cpu_info *ci; 295 struct cpu_info *ci;
296 u_int olocks; 296 u_int olocks;
297 int s; 297 int s;
298 struct lwp *l = curlwp; 298 struct lwp *l = curlwp;
299 299
300 _KERNEL_LOCK_ASSERT(nlocks < 2); 300 _KERNEL_LOCK_ASSERT(nlocks < 2);
301 301
302 olocks = l->l_blcnt; 302 olocks = l->l_blcnt;
303 303
304 if (olocks == 0) { 304 if (olocks == 0) {
305 _KERNEL_LOCK_ASSERT(nlocks <= 0); 305 _KERNEL_LOCK_ASSERT(nlocks <= 0);
306 if (countp != NULL) 306 if (countp != NULL)
307 *countp = 0; 307 *countp = 0;
308 return; 308 return;
309 } 309 }
310 310
311 _KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(kernel_lock)); 311 _KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(kernel_lock));
312 312
313 if (nlocks == 0) 313 if (nlocks == 0)
314 nlocks = olocks; 314 nlocks = olocks;
315 else if (nlocks == -1) { 315 else if (nlocks == -1) {
316 nlocks = 1; 316 nlocks = 1;
317 _KERNEL_LOCK_ASSERT(olocks == 1); 317 _KERNEL_LOCK_ASSERT(olocks == 1);
318 } 318 }
319 s = splvm(); 319 s = splvm();
320 ci = curcpu(); 320 ci = curcpu();
321 _KERNEL_LOCK_ASSERT(ci->ci_biglock_count >= l->l_blcnt); 321 _KERNEL_LOCK_ASSERT(ci->ci_biglock_count >= l->l_blcnt);
322 if (ci->ci_biglock_count == nlocks) { 322 if (ci->ci_biglock_count == nlocks) {
323 LOCKDEBUG_UNLOCKED(kernel_lock_dodebug, kernel_lock, 323 LOCKDEBUG_UNLOCKED(kernel_lock_dodebug, kernel_lock,
324 RETURN_ADDRESS, 0); 324 RETURN_ADDRESS, 0);
325 ci->ci_biglock_count = 0; 325 ci->ci_biglock_count = 0;
326 __cpu_simple_unlock(kernel_lock); 326 __cpu_simple_unlock(kernel_lock);
327 l->l_blcnt -= nlocks; 327 l->l_blcnt -= nlocks;
328 splx(s); 328 splx(s);
329 if (l->l_dopreempt) 329 if (l->l_dopreempt)
330 kpreempt(0); 330 kpreempt(0);
331 } else { 331 } else {
332 ci->ci_biglock_count -= nlocks; 332 ci->ci_biglock_count -= nlocks;
333 l->l_blcnt -= nlocks; 333 l->l_blcnt -= nlocks;
334 splx(s); 334 splx(s);
335 } 335 }
336 336
337 if (countp != NULL) 337 if (countp != NULL)
338 *countp = olocks; 338 *countp = olocks;
339} 339}
340 340
341bool 341bool
342_kernel_locked_p(void) 342_kernel_locked_p(void)
343{ 343{
344 return __SIMPLELOCK_LOCKED_P(kernel_lock); 344 return __SIMPLELOCK_LOCKED_P(kernel_lock);
345} 345}
346 
347void 
348kernel_lock_plug_leak(void) 
349{ 
350#ifndef LOCKDEBUG 
351# ifdef DIAGNOSTIC 
352 int biglocks = 0; 
353 KERNEL_UNLOCK_ALL(curlwp, &biglocks); 
354 if (biglocks != 0) { 
355 const char *sym = "(unknown)"; 
356 ksyms_getname(NULL, &sym, (vaddr_t)curlwp->l_ld_wanted, 
357 KSYMS_CLOSEST|KSYMS_PROC|KSYMS_ANY); 
358 printf("kernel_lock leak detected. last acquired: %s / %p\n", 
359 sym, curlwp->l_ld_wanted); 
360 } 
361# else 
362 KERNEL_UNLOCK_ALL(curlwp, NULL); 
363# endif 
364#endif 
365} 

cvs diff -r1.61 -r1.62 src/sys/kern/kern_softint.c (switch to unified diff)

--- src/sys/kern/kern_softint.c 2020/02/17 21:44:42 1.61
+++ src/sys/kern/kern_softint.c 2020/03/08 15:05:18 1.62
@@ -1,921 +1,918 @@ @@ -1,921 +1,918 @@
1/* $NetBSD: kern_softint.c,v 1.61 2020/02/17 21:44:42 ad Exp $ */ 1/* $NetBSD: kern_softint.c,v 1.62 2020/03/08 15:05:18 ad Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2007, 2008, 2019, 2020 The NetBSD Foundation, Inc. 4 * Copyright (c) 2007, 2008, 2019, 2020 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran. 8 * by Andrew Doran.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32/* 32/*
33 * Generic software interrupt framework. 33 * Generic software interrupt framework.
34 * 34 *
35 * Overview 35 * Overview
36 * 36 *
37 * The soft interrupt framework provides a mechanism to schedule a 37 * The soft interrupt framework provides a mechanism to schedule a
38 * low priority callback that runs with thread context. It allows 38 * low priority callback that runs with thread context. It allows
39 * for dynamic registration of software interrupts, and for fair 39 * for dynamic registration of software interrupts, and for fair
40 * queueing and prioritization of those interrupts. The callbacks 40 * queueing and prioritization of those interrupts. The callbacks
41 * can be scheduled to run from nearly any point in the kernel: by 41 * can be scheduled to run from nearly any point in the kernel: by
42 * code running with thread context, by code running from a 42 * code running with thread context, by code running from a
43 * hardware interrupt handler, and at any interrupt priority 43 * hardware interrupt handler, and at any interrupt priority
44 * level. 44 * level.
45 * 45 *
46 * Priority levels 46 * Priority levels
47 * 47 *
48 * Since soft interrupt dispatch can be tied to the underlying 48 * Since soft interrupt dispatch can be tied to the underlying
49 * architecture's interrupt dispatch code, it can be limited 49 * architecture's interrupt dispatch code, it can be limited
50 * both by the capabilities of the hardware and the capabilities 50 * both by the capabilities of the hardware and the capabilities
51 * of the interrupt dispatch code itself. The number of priority 51 * of the interrupt dispatch code itself. The number of priority
52 * levels is restricted to four. In order of priority (lowest to 52 * levels is restricted to four. In order of priority (lowest to
53 * highest) the levels are: clock, bio, net, serial. 53 * highest) the levels are: clock, bio, net, serial.
54 * 54 *
55 * The names are symbolic and in isolation do not have any direct 55 * The names are symbolic and in isolation do not have any direct
56 * connection with a particular kind of device activity: they are 56 * connection with a particular kind of device activity: they are
57 * only meant as a guide. 57 * only meant as a guide.
58 * 58 *
59 * The four priority levels map directly to scheduler priority 59 * The four priority levels map directly to scheduler priority
60 * levels, and where the architecture implements 'fast' software 60 * levels, and where the architecture implements 'fast' software
61 * interrupts, they also map onto interrupt priorities. The 61 * interrupts, they also map onto interrupt priorities. The
62 * interrupt priorities are intended to be hidden from machine 62 * interrupt priorities are intended to be hidden from machine
63 * independent code, which should use thread-safe mechanisms to 63 * independent code, which should use thread-safe mechanisms to
64 * synchronize with software interrupts (for example: mutexes). 64 * synchronize with software interrupts (for example: mutexes).
65 * 65 *
66 * Capabilities 66 * Capabilities
67 * 67 *
68 * Software interrupts run with limited machine context. In 68 * Software interrupts run with limited machine context. In
69 * particular, they do not posess any address space context. They 69 * particular, they do not posess any address space context. They
70 * should not try to operate on user space addresses, or to use 70 * should not try to operate on user space addresses, or to use
71 * virtual memory facilities other than those noted as interrupt 71 * virtual memory facilities other than those noted as interrupt
72 * safe. 72 * safe.
73 * 73 *
74 * Unlike hardware interrupts, software interrupts do have thread 74 * Unlike hardware interrupts, software interrupts do have thread
75 * context. They may block on synchronization objects, sleep, and 75 * context. They may block on synchronization objects, sleep, and
76 * resume execution at a later time. 76 * resume execution at a later time.
77 * 77 *
78 * Since software interrupts are a limited resource and run with 78 * Since software interrupts are a limited resource and run with
79 * higher priority than most other LWPs in the system, all 79 * higher priority than most other LWPs in the system, all
80 * block-and-resume activity by a software interrupt must be kept 80 * block-and-resume activity by a software interrupt must be kept
81 * short to allow futher processing at that level to continue. By 81 * short to allow futher processing at that level to continue. By
82 * extension, code running with process context must take care to 82 * extension, code running with process context must take care to
83 * ensure that any lock that may be taken from a software interrupt 83 * ensure that any lock that may be taken from a software interrupt
84 * can not be held for more than a short period of time. 84 * can not be held for more than a short period of time.
85 * 85 *
86 * The kernel does not allow software interrupts to use facilities 86 * The kernel does not allow software interrupts to use facilities
87 * or perform actions that may block for a significant amount of 87 * or perform actions that may block for a significant amount of
88 * time. This means that it's not valid for a software interrupt 88 * time. This means that it's not valid for a software interrupt
89 * to sleep on condition variables or wait for resources to become 89 * to sleep on condition variables or wait for resources to become
90 * available (for example, memory). 90 * available (for example, memory).
91 * 91 *
92 * Per-CPU operation 92 * Per-CPU operation
93 * 93 *
94 * If a soft interrupt is triggered on a CPU, it can only be 94 * If a soft interrupt is triggered on a CPU, it can only be
95 * dispatched on the same CPU. Each LWP dedicated to handling a 95 * dispatched on the same CPU. Each LWP dedicated to handling a
96 * soft interrupt is bound to its home CPU, so if the LWP blocks 96 * soft interrupt is bound to its home CPU, so if the LWP blocks
97 * and needs to run again, it can only run there. Nearly all data 97 * and needs to run again, it can only run there. Nearly all data
98 * structures used to manage software interrupts are per-CPU. 98 * structures used to manage software interrupts are per-CPU.
99 * 99 *
100 * The per-CPU requirement is intended to reduce "ping-pong" of 100 * The per-CPU requirement is intended to reduce "ping-pong" of
101 * cache lines between CPUs: lines occupied by data structures 101 * cache lines between CPUs: lines occupied by data structures
102 * used to manage the soft interrupts, and lines occupied by data 102 * used to manage the soft interrupts, and lines occupied by data
103 * items being passed down to the soft interrupt. As a positive 103 * items being passed down to the soft interrupt. As a positive
104 * side effect, this also means that the soft interrupt dispatch 104 * side effect, this also means that the soft interrupt dispatch
105 * code does not need to to use spinlocks to synchronize. 105 * code does not need to to use spinlocks to synchronize.
106 * 106 *
107 * Generic implementation 107 * Generic implementation
108 * 108 *
109 * A generic, low performance implementation is provided that 109 * A generic, low performance implementation is provided that
110 * works across all architectures, with no machine-dependent 110 * works across all architectures, with no machine-dependent
111 * modifications needed. This implementation uses the scheduler, 111 * modifications needed. This implementation uses the scheduler,
112 * and so has a number of restrictions: 112 * and so has a number of restrictions:
113 * 113 *
114 * 1) The software interrupts are not currently preemptive, so 114 * 1) The software interrupts are not currently preemptive, so
115 * must wait for the currently executing LWP to yield the CPU.  115 * must wait for the currently executing LWP to yield the CPU.
116 * This can introduce latency. 116 * This can introduce latency.
117 * 117 *
118 * 2) An expensive context switch is required for a software 118 * 2) An expensive context switch is required for a software
119 * interrupt to be handled. 119 * interrupt to be handled.
120 * 120 *
121 * 'Fast' software interrupts 121 * 'Fast' software interrupts
122 * 122 *
123 * If an architectures defines __HAVE_FAST_SOFTINTS, it implements 123 * If an architectures defines __HAVE_FAST_SOFTINTS, it implements
124 * the fast mechanism. Threads running either in the kernel or in 124 * the fast mechanism. Threads running either in the kernel or in
125 * userspace will be interrupted, but will not be preempted. When 125 * userspace will be interrupted, but will not be preempted. When
126 * the soft interrupt completes execution, the interrupted LWP 126 * the soft interrupt completes execution, the interrupted LWP
127 * is resumed. Interrupt dispatch code must provide the minimum 127 * is resumed. Interrupt dispatch code must provide the minimum
128 * level of context necessary for the soft interrupt to block and 128 * level of context necessary for the soft interrupt to block and
129 * be resumed at a later time. The machine-dependent dispatch 129 * be resumed at a later time. The machine-dependent dispatch
130 * path looks something like the following: 130 * path looks something like the following:
131 * 131 *
132 * softintr() 132 * softintr()
133 * { 133 * {
134 * go to IPL_HIGH if necessary for switch; 134 * go to IPL_HIGH if necessary for switch;
135 * save any necessary registers in a format that can be 135 * save any necessary registers in a format that can be
136 * restored by cpu_switchto if the softint blocks; 136 * restored by cpu_switchto if the softint blocks;
137 * arrange for cpu_switchto() to restore into the 137 * arrange for cpu_switchto() to restore into the
138 * trampoline function; 138 * trampoline function;
139 * identify LWP to handle this interrupt; 139 * identify LWP to handle this interrupt;
140 * switch to the LWP's stack; 140 * switch to the LWP's stack;
141 * switch register stacks, if necessary; 141 * switch register stacks, if necessary;
142 * assign new value of curlwp; 142 * assign new value of curlwp;
143 * call MI softint_dispatch, passing old curlwp and IPL 143 * call MI softint_dispatch, passing old curlwp and IPL
144 * to execute interrupt at; 144 * to execute interrupt at;
145 * switch back to old stack; 145 * switch back to old stack;
146 * switch back to old register stack, if necessary; 146 * switch back to old register stack, if necessary;
147 * restore curlwp; 147 * restore curlwp;
148 * return to interrupted LWP; 148 * return to interrupted LWP;
149 * } 149 * }
150 * 150 *
151 * If the soft interrupt blocks, a trampoline function is returned 151 * If the soft interrupt blocks, a trampoline function is returned
152 * to in the context of the interrupted LWP, as arranged for by 152 * to in the context of the interrupted LWP, as arranged for by
153 * softint(): 153 * softint():
154 * 154 *
155 * softint_ret() 155 * softint_ret()
156 * { 156 * {
157 * unlock soft interrupt LWP; 157 * unlock soft interrupt LWP;
158 * resume interrupt processing, likely returning to 158 * resume interrupt processing, likely returning to
159 * interrupted LWP or dispatching another, different 159 * interrupted LWP or dispatching another, different
160 * interrupt; 160 * interrupt;
161 * } 161 * }
162 * 162 *
163 * Once the soft interrupt has fired (and even if it has blocked), 163 * Once the soft interrupt has fired (and even if it has blocked),
164 * no further soft interrupts at that level will be triggered by 164 * no further soft interrupts at that level will be triggered by
165 * MI code until the soft interrupt handler has ceased execution.  165 * MI code until the soft interrupt handler has ceased execution.
166 * If a soft interrupt handler blocks and is resumed, it resumes 166 * If a soft interrupt handler blocks and is resumed, it resumes
167 * execution as a normal LWP (kthread) and gains VM context. Only 167 * execution as a normal LWP (kthread) and gains VM context. Only
168 * when it has completed and is ready to fire again will it 168 * when it has completed and is ready to fire again will it
169 * interrupt other threads. 169 * interrupt other threads.
170 */ 170 */
171 171
172#include <sys/cdefs.h> 172#include <sys/cdefs.h>
173__KERNEL_RCSID(0, "$NetBSD: kern_softint.c,v 1.61 2020/02/17 21:44:42 ad Exp $"); 173__KERNEL_RCSID(0, "$NetBSD: kern_softint.c,v 1.62 2020/03/08 15:05:18 ad Exp $");
174 174
175#include <sys/param.h> 175#include <sys/param.h>
176#include <sys/proc.h> 176#include <sys/proc.h>
177#include <sys/intr.h> 177#include <sys/intr.h>
178#include <sys/ipi.h> 178#include <sys/ipi.h>
179#include <sys/lock.h> 179#include <sys/lock.h>
180#include <sys/mutex.h> 180#include <sys/mutex.h>
181#include <sys/kernel.h> 181#include <sys/kernel.h>
182#include <sys/kthread.h> 182#include <sys/kthread.h>
183#include <sys/evcnt.h> 183#include <sys/evcnt.h>
184#include <sys/cpu.h> 184#include <sys/cpu.h>
185#include <sys/xcall.h> 185#include <sys/xcall.h>
186 186
187#include <net/netisr.h> 187#include <net/netisr.h>
188 188
189#include <uvm/uvm_extern.h> 189#include <uvm/uvm_extern.h>
190 190
191/* This could overlap with signal info in struct lwp. */ 191/* This could overlap with signal info in struct lwp. */
192typedef struct softint { 192typedef struct softint {
193 SIMPLEQ_HEAD(, softhand) si_q; 193 SIMPLEQ_HEAD(, softhand) si_q;
194 struct lwp *si_lwp; 194 struct lwp *si_lwp;
195 struct cpu_info *si_cpu; 195 struct cpu_info *si_cpu;
196 uintptr_t si_machdep; 196 uintptr_t si_machdep;
197 struct evcnt si_evcnt; 197 struct evcnt si_evcnt;
198 struct evcnt si_evcnt_block; 198 struct evcnt si_evcnt_block;
199 int si_active; 199 int si_active;
200 char si_name[8]; 200 char si_name[8];
201 char si_name_block[8+6]; 201 char si_name_block[8+6];
202} softint_t; 202} softint_t;
203 203
204typedef struct softhand { 204typedef struct softhand {
205 SIMPLEQ_ENTRY(softhand) sh_q; 205 SIMPLEQ_ENTRY(softhand) sh_q;
206 void (*sh_func)(void *); 206 void (*sh_func)(void *);
207 void *sh_arg; 207 void *sh_arg;
208 softint_t *sh_isr; 208 softint_t *sh_isr;
209 u_int sh_flags; 209 u_int sh_flags;
210 u_int sh_ipi_id; 210 u_int sh_ipi_id;
211} softhand_t; 211} softhand_t;
212 212
213typedef struct softcpu { 213typedef struct softcpu {
214 struct cpu_info *sc_cpu; 214 struct cpu_info *sc_cpu;
215 softint_t sc_int[SOFTINT_COUNT]; 215 softint_t sc_int[SOFTINT_COUNT];
216 softhand_t sc_hand[1]; 216 softhand_t sc_hand[1];
217} softcpu_t; 217} softcpu_t;
218 218
219static void softint_thread(void *); 219static void softint_thread(void *);
220 220
221u_int softint_bytes = 32768; 221u_int softint_bytes = 32768;
222u_int softint_timing; 222u_int softint_timing;
223static u_int softint_max; 223static u_int softint_max;
224static kmutex_t softint_lock; 224static kmutex_t softint_lock;
225static void *softint_netisrs[NETISR_MAX]; 225static void *softint_netisrs[NETISR_MAX];
226 226
227/* 227/*
228 * softint_init_isr: 228 * softint_init_isr:
229 * 229 *
230 * Initialize a single interrupt level for a single CPU. 230 * Initialize a single interrupt level for a single CPU.
231 */ 231 */
232static void 232static void
233softint_init_isr(softcpu_t *sc, const char *desc, pri_t pri, u_int level) 233softint_init_isr(softcpu_t *sc, const char *desc, pri_t pri, u_int level)
234{ 234{
235 struct cpu_info *ci; 235 struct cpu_info *ci;
236 softint_t *si; 236 softint_t *si;
237 int error; 237 int error;
238 238
239 si = &sc->sc_int[level]; 239 si = &sc->sc_int[level];
240 ci = sc->sc_cpu; 240 ci = sc->sc_cpu;
241 si->si_cpu = ci; 241 si->si_cpu = ci;
242 242
243 SIMPLEQ_INIT(&si->si_q); 243 SIMPLEQ_INIT(&si->si_q);
244 244
245 error = kthread_create(pri, KTHREAD_MPSAFE | KTHREAD_INTR | 245 error = kthread_create(pri, KTHREAD_MPSAFE | KTHREAD_INTR |
246 KTHREAD_IDLE, ci, softint_thread, si, &si->si_lwp, 246 KTHREAD_IDLE, ci, softint_thread, si, &si->si_lwp,
247 "soft%s/%u", desc, ci->ci_index); 247 "soft%s/%u", desc, ci->ci_index);
248 if (error != 0) 248 if (error != 0)
249 panic("softint_init_isr: error %d", error); 249 panic("softint_init_isr: error %d", error);
250 250
251 snprintf(si->si_name, sizeof(si->si_name), "%s/%u", desc, 251 snprintf(si->si_name, sizeof(si->si_name), "%s/%u", desc,
252 ci->ci_index); 252 ci->ci_index);
253 evcnt_attach_dynamic(&si->si_evcnt, EVCNT_TYPE_MISC, NULL, 253 evcnt_attach_dynamic(&si->si_evcnt, EVCNT_TYPE_MISC, NULL,
254 "softint", si->si_name); 254 "softint", si->si_name);
255 snprintf(si->si_name_block, sizeof(si->si_name_block), "%s block/%u", 255 snprintf(si->si_name_block, sizeof(si->si_name_block), "%s block/%u",
256 desc, ci->ci_index); 256 desc, ci->ci_index);
257 evcnt_attach_dynamic(&si->si_evcnt_block, EVCNT_TYPE_MISC, NULL, 257 evcnt_attach_dynamic(&si->si_evcnt_block, EVCNT_TYPE_MISC, NULL,
258 "softint", si->si_name_block); 258 "softint", si->si_name_block);
259 259
260 si->si_lwp->l_private = si; 260 si->si_lwp->l_private = si;
261 softint_init_md(si->si_lwp, level, &si->si_machdep); 261 softint_init_md(si->si_lwp, level, &si->si_machdep);
262} 262}
263 263
264/* 264/*
265 * softint_init: 265 * softint_init:
266 * 266 *
267 * Initialize per-CPU data structures. Called from mi_cpu_attach(). 267 * Initialize per-CPU data structures. Called from mi_cpu_attach().
268 */ 268 */
269void 269void
270softint_init(struct cpu_info *ci) 270softint_init(struct cpu_info *ci)
271{ 271{
272 static struct cpu_info *first; 272 static struct cpu_info *first;
273 softcpu_t *sc, *scfirst; 273 softcpu_t *sc, *scfirst;
274 softhand_t *sh, *shmax; 274 softhand_t *sh, *shmax;
275 275
276 if (first == NULL) { 276 if (first == NULL) {
277 /* Boot CPU. */ 277 /* Boot CPU. */
278 first = ci; 278 first = ci;
279 mutex_init(&softint_lock, MUTEX_DEFAULT, IPL_NONE); 279 mutex_init(&softint_lock, MUTEX_DEFAULT, IPL_NONE);
280 softint_bytes = round_page(softint_bytes); 280 softint_bytes = round_page(softint_bytes);
281 softint_max = (softint_bytes - sizeof(softcpu_t)) / 281 softint_max = (softint_bytes - sizeof(softcpu_t)) /
282 sizeof(softhand_t); 282 sizeof(softhand_t);
283 } 283 }
284 284
285 /* Use uvm_km(9) for persistent, page-aligned allocation. */ 285 /* Use uvm_km(9) for persistent, page-aligned allocation. */
286 sc = (softcpu_t *)uvm_km_alloc(kernel_map, softint_bytes, 0, 286 sc = (softcpu_t *)uvm_km_alloc(kernel_map, softint_bytes, 0,
287 UVM_KMF_WIRED | UVM_KMF_ZERO); 287 UVM_KMF_WIRED | UVM_KMF_ZERO);
288 if (sc == NULL) 288 if (sc == NULL)
289 panic("softint_init_cpu: cannot allocate memory"); 289 panic("softint_init_cpu: cannot allocate memory");
290 290
291 ci->ci_data.cpu_softcpu = sc; 291 ci->ci_data.cpu_softcpu = sc;
292 ci->ci_data.cpu_softints = 0; 292 ci->ci_data.cpu_softints = 0;
293 sc->sc_cpu = ci; 293 sc->sc_cpu = ci;
294 294
295 softint_init_isr(sc, "net", PRI_SOFTNET, SOFTINT_NET); 295 softint_init_isr(sc, "net", PRI_SOFTNET, SOFTINT_NET);
296 softint_init_isr(sc, "bio", PRI_SOFTBIO, SOFTINT_BIO); 296 softint_init_isr(sc, "bio", PRI_SOFTBIO, SOFTINT_BIO);
297 softint_init_isr(sc, "clk", PRI_SOFTCLOCK, SOFTINT_CLOCK); 297 softint_init_isr(sc, "clk", PRI_SOFTCLOCK, SOFTINT_CLOCK);
298 softint_init_isr(sc, "ser", PRI_SOFTSERIAL, SOFTINT_SERIAL); 298 softint_init_isr(sc, "ser", PRI_SOFTSERIAL, SOFTINT_SERIAL);
299 299
300 if (first != ci) { 300 if (first != ci) {
301 mutex_enter(&softint_lock); 301 mutex_enter(&softint_lock);
302 scfirst = first->ci_data.cpu_softcpu; 302 scfirst = first->ci_data.cpu_softcpu;
303 sh = sc->sc_hand; 303 sh = sc->sc_hand;
304 memcpy(sh, scfirst->sc_hand, sizeof(*sh) * softint_max); 304 memcpy(sh, scfirst->sc_hand, sizeof(*sh) * softint_max);
305 /* Update pointers for this CPU. */ 305 /* Update pointers for this CPU. */
306 for (shmax = sh + softint_max; sh < shmax; sh++) { 306 for (shmax = sh + softint_max; sh < shmax; sh++) {
307 if (sh->sh_func == NULL) 307 if (sh->sh_func == NULL)
308 continue; 308 continue;
309 sh->sh_isr = 309 sh->sh_isr =
310 &sc->sc_int[sh->sh_flags & SOFTINT_LVLMASK]; 310 &sc->sc_int[sh->sh_flags & SOFTINT_LVLMASK];
311 } 311 }
312 mutex_exit(&softint_lock); 312 mutex_exit(&softint_lock);
313 } else { 313 } else {
314 /* 314 /*
315 * Establish handlers for legacy net interrupts. 315 * Establish handlers for legacy net interrupts.
316 * XXX Needs to go away. 316 * XXX Needs to go away.
317 */ 317 */
318#define DONETISR(n, f) \ 318#define DONETISR(n, f) \
319 softint_netisrs[(n)] = softint_establish(SOFTINT_NET|SOFTINT_MPSAFE,\ 319 softint_netisrs[(n)] = softint_establish(SOFTINT_NET|SOFTINT_MPSAFE,\
320 (void (*)(void *))(f), NULL) 320 (void (*)(void *))(f), NULL)
321#include <net/netisr_dispatch.h> 321#include <net/netisr_dispatch.h>
322 } 322 }
323} 323}
324 324
325/* 325/*
326 * softint_establish: 326 * softint_establish:
327 * 327 *
328 * Register a software interrupt handler. 328 * Register a software interrupt handler.
329 */ 329 */
330void * 330void *
331softint_establish(u_int flags, void (*func)(void *), void *arg) 331softint_establish(u_int flags, void (*func)(void *), void *arg)
332{ 332{
333 CPU_INFO_ITERATOR cii; 333 CPU_INFO_ITERATOR cii;
334 struct cpu_info *ci; 334 struct cpu_info *ci;
335 softcpu_t *sc; 335 softcpu_t *sc;
336 softhand_t *sh; 336 softhand_t *sh;
337 u_int level, index; 337 u_int level, index;
338 u_int ipi_id = 0; 338 u_int ipi_id = 0;
339 void *sih; 339 void *sih;
340 340
341 level = (flags & SOFTINT_LVLMASK); 341 level = (flags & SOFTINT_LVLMASK);
342 KASSERT(level < SOFTINT_COUNT); 342 KASSERT(level < SOFTINT_COUNT);
343 KASSERT((flags & SOFTINT_IMPMASK) == 0); 343 KASSERT((flags & SOFTINT_IMPMASK) == 0);
344 344
345 mutex_enter(&softint_lock); 345 mutex_enter(&softint_lock);
346 346
347 /* Find a free slot. */ 347 /* Find a free slot. */
348 sc = curcpu()->ci_data.cpu_softcpu; 348 sc = curcpu()->ci_data.cpu_softcpu;
349 for (index = 1; index < softint_max; index++) { 349 for (index = 1; index < softint_max; index++) {
350 if (sc->sc_hand[index].sh_func == NULL) 350 if (sc->sc_hand[index].sh_func == NULL)
351 break; 351 break;
352 } 352 }
353 if (index == softint_max) { 353 if (index == softint_max) {
354 mutex_exit(&softint_lock); 354 mutex_exit(&softint_lock);
355 printf("WARNING: softint_establish: table full, " 355 printf("WARNING: softint_establish: table full, "
356 "increase softint_bytes\n"); 356 "increase softint_bytes\n");
357 return NULL; 357 return NULL;
358 } 358 }
359 sih = (void *)((uint8_t *)&sc->sc_hand[index] - (uint8_t *)sc); 359 sih = (void *)((uint8_t *)&sc->sc_hand[index] - (uint8_t *)sc);
360 360
361 if (flags & SOFTINT_RCPU) { 361 if (flags & SOFTINT_RCPU) {
362 if ((ipi_id = ipi_register(softint_schedule, sih)) == 0) { 362 if ((ipi_id = ipi_register(softint_schedule, sih)) == 0) {
363 mutex_exit(&softint_lock); 363 mutex_exit(&softint_lock);
364 return NULL; 364 return NULL;
365 } 365 }
366 } 366 }
367 367
368 /* Set up the handler on each CPU. */ 368 /* Set up the handler on each CPU. */
369 if (ncpu < 2) { 369 if (ncpu < 2) {
370 /* XXX hack for machines with no CPU_INFO_FOREACH() early on */ 370 /* XXX hack for machines with no CPU_INFO_FOREACH() early on */
371 sc = curcpu()->ci_data.cpu_softcpu; 371 sc = curcpu()->ci_data.cpu_softcpu;
372 sh = &sc->sc_hand[index]; 372 sh = &sc->sc_hand[index];
373 sh->sh_isr = &sc->sc_int[level]; 373 sh->sh_isr = &sc->sc_int[level];
374 sh->sh_func = func; 374 sh->sh_func = func;
375 sh->sh_arg = arg; 375 sh->sh_arg = arg;
376 sh->sh_flags = flags; 376 sh->sh_flags = flags;
377 sh->sh_ipi_id = ipi_id; 377 sh->sh_ipi_id = ipi_id;
378 } else for (CPU_INFO_FOREACH(cii, ci)) { 378 } else for (CPU_INFO_FOREACH(cii, ci)) {
379 sc = ci->ci_data.cpu_softcpu; 379 sc = ci->ci_data.cpu_softcpu;
380 sh = &sc->sc_hand[index]; 380 sh = &sc->sc_hand[index];
381 sh->sh_isr = &sc->sc_int[level]; 381 sh->sh_isr = &sc->sc_int[level];
382 sh->sh_func = func; 382 sh->sh_func = func;
383 sh->sh_arg = arg; 383 sh->sh_arg = arg;
384 sh->sh_flags = flags; 384 sh->sh_flags = flags;
385 sh->sh_ipi_id = ipi_id; 385 sh->sh_ipi_id = ipi_id;
386 } 386 }
387 mutex_exit(&softint_lock); 387 mutex_exit(&softint_lock);
388 388
389 return sih; 389 return sih;
390} 390}
391 391
392/* 392/*
393 * softint_disestablish: 393 * softint_disestablish:
394 * 394 *
395 * Unregister a software interrupt handler. The soft interrupt could 395 * Unregister a software interrupt handler. The soft interrupt could
396 * still be active at this point, but the caller commits not to try 396 * still be active at this point, but the caller commits not to try
397 * and trigger it again once this call is made. The caller must not 397 * and trigger it again once this call is made. The caller must not
398 * hold any locks that could be taken from soft interrupt context, 398 * hold any locks that could be taken from soft interrupt context,
399 * because we will wait for the softint to complete if it's still 399 * because we will wait for the softint to complete if it's still
400 * running. 400 * running.
401 */ 401 */
402void 402void
403softint_disestablish(void *arg) 403softint_disestablish(void *arg)
404{ 404{
405 CPU_INFO_ITERATOR cii; 405 CPU_INFO_ITERATOR cii;
406 struct cpu_info *ci; 406 struct cpu_info *ci;
407 softcpu_t *sc; 407 softcpu_t *sc;
408 softhand_t *sh; 408 softhand_t *sh;
409 uintptr_t offset; 409 uintptr_t offset;
410 u_int flags; 410 u_int flags;
411 411
412 offset = (uintptr_t)arg; 412 offset = (uintptr_t)arg;
413 KASSERTMSG(offset != 0 && offset < softint_bytes, "%"PRIuPTR" %u", 413 KASSERTMSG(offset != 0 && offset < softint_bytes, "%"PRIuPTR" %u",
414 offset, softint_bytes); 414 offset, softint_bytes);
415 415
416 /* 416 /*
417 * Unregister an IPI handler if there is any. Note: there is 417 * Unregister an IPI handler if there is any. Note: there is
418 * no need to disable preemption here - ID is stable. 418 * no need to disable preemption here - ID is stable.
419 */ 419 */
420 sc = curcpu()->ci_data.cpu_softcpu; 420 sc = curcpu()->ci_data.cpu_softcpu;
421 sh = (softhand_t *)((uint8_t *)sc + offset); 421 sh = (softhand_t *)((uint8_t *)sc + offset);
422 if (sh->sh_ipi_id) { 422 if (sh->sh_ipi_id) {
423 ipi_unregister(sh->sh_ipi_id); 423 ipi_unregister(sh->sh_ipi_id);
424 } 424 }
425 425
426 /* 426 /*
427 * Run a cross call so we see up to date values of sh_flags from 427 * Run a cross call so we see up to date values of sh_flags from
428 * all CPUs. Once softint_disestablish() is called, the caller 428 * all CPUs. Once softint_disestablish() is called, the caller
429 * commits to not trigger the interrupt and set SOFTINT_ACTIVE on 429 * commits to not trigger the interrupt and set SOFTINT_ACTIVE on
430 * it again. So, we are only looking for handler records with 430 * it again. So, we are only looking for handler records with
431 * SOFTINT_ACTIVE already set. 431 * SOFTINT_ACTIVE already set.
432 */ 432 */
433 if (__predict_true(mp_online)) { 433 if (__predict_true(mp_online)) {
434 xc_barrier(0); 434 xc_barrier(0);
435 } 435 }
436 436
437 for (;;) { 437 for (;;) {
438 /* Collect flag values from each CPU. */ 438 /* Collect flag values from each CPU. */
439 flags = 0; 439 flags = 0;
440 for (CPU_INFO_FOREACH(cii, ci)) { 440 for (CPU_INFO_FOREACH(cii, ci)) {
441 sc = ci->ci_data.cpu_softcpu; 441 sc = ci->ci_data.cpu_softcpu;
442 sh = (softhand_t *)((uint8_t *)sc + offset); 442 sh = (softhand_t *)((uint8_t *)sc + offset);
443 KASSERT(sh->sh_func != NULL); 443 KASSERT(sh->sh_func != NULL);
444 flags |= sh->sh_flags; 444 flags |= sh->sh_flags;
445 } 445 }
446 /* Inactive on all CPUs? */ 446 /* Inactive on all CPUs? */
447 if ((flags & SOFTINT_ACTIVE) == 0) { 447 if ((flags & SOFTINT_ACTIVE) == 0) {
448 break; 448 break;
449 } 449 }
450 /* Oops, still active. Wait for it to clear. */ 450 /* Oops, still active. Wait for it to clear. */
451 (void)kpause("softdis", false, 1, NULL); 451 (void)kpause("softdis", false, 1, NULL);
452 } 452 }
453 453
454 /* Clear the handler on each CPU. */ 454 /* Clear the handler on each CPU. */
455 mutex_enter(&softint_lock); 455 mutex_enter(&softint_lock);
456 for (CPU_INFO_FOREACH(cii, ci)) { 456 for (CPU_INFO_FOREACH(cii, ci)) {
457 sc = ci->ci_data.cpu_softcpu; 457 sc = ci->ci_data.cpu_softcpu;
458 sh = (softhand_t *)((uint8_t *)sc + offset); 458 sh = (softhand_t *)((uint8_t *)sc + offset);
459 KASSERT(sh->sh_func != NULL); 459 KASSERT(sh->sh_func != NULL);
460 sh->sh_func = NULL; 460 sh->sh_func = NULL;
461 } 461 }
462 mutex_exit(&softint_lock); 462 mutex_exit(&softint_lock);
463} 463}
464 464
465/* 465/*
466 * softint_schedule: 466 * softint_schedule:
467 * 467 *
468 * Trigger a software interrupt. Must be called from a hardware 468 * Trigger a software interrupt. Must be called from a hardware
469 * interrupt handler, or with preemption disabled (since we are 469 * interrupt handler, or with preemption disabled (since we are
470 * using the value of curcpu()). 470 * using the value of curcpu()).
471 */ 471 */
472void 472void
473softint_schedule(void *arg) 473softint_schedule(void *arg)
474{ 474{
475 softhand_t *sh; 475 softhand_t *sh;
476 softint_t *si; 476 softint_t *si;
477 uintptr_t offset; 477 uintptr_t offset;
478 int s; 478 int s;
479 479
480 KASSERT(kpreempt_disabled()); 480 KASSERT(kpreempt_disabled());
481 481
482 /* Find the handler record for this CPU. */ 482 /* Find the handler record for this CPU. */
483 offset = (uintptr_t)arg; 483 offset = (uintptr_t)arg;
484 KASSERTMSG(offset != 0 && offset < softint_bytes, "%"PRIuPTR" %u", 484 KASSERTMSG(offset != 0 && offset < softint_bytes, "%"PRIuPTR" %u",
485 offset, softint_bytes); 485 offset, softint_bytes);
486 sh = (softhand_t *)((uint8_t *)curcpu()->ci_data.cpu_softcpu + offset); 486 sh = (softhand_t *)((uint8_t *)curcpu()->ci_data.cpu_softcpu + offset);
487 487
488 /* If it's already pending there's nothing to do. */ 488 /* If it's already pending there's nothing to do. */
489 if ((sh->sh_flags & SOFTINT_PENDING) != 0) { 489 if ((sh->sh_flags & SOFTINT_PENDING) != 0) {
490 return; 490 return;
491 } 491 }
492 492
493 /* 493 /*
494 * Enqueue the handler into the LWP's pending list. 494 * Enqueue the handler into the LWP's pending list.
495 * If the LWP is completely idle, then make it run. 495 * If the LWP is completely idle, then make it run.
496 */ 496 */
497 s = splhigh(); 497 s = splhigh();
498 if ((sh->sh_flags & SOFTINT_PENDING) == 0) { 498 if ((sh->sh_flags & SOFTINT_PENDING) == 0) {
499 si = sh->sh_isr; 499 si = sh->sh_isr;
500 sh->sh_flags |= SOFTINT_PENDING; 500 sh->sh_flags |= SOFTINT_PENDING;
501 SIMPLEQ_INSERT_TAIL(&si->si_q, sh, sh_q); 501 SIMPLEQ_INSERT_TAIL(&si->si_q, sh, sh_q);
502 if (si->si_active == 0) { 502 if (si->si_active == 0) {
503 si->si_active = 1; 503 si->si_active = 1;
504 softint_trigger(si->si_machdep); 504 softint_trigger(si->si_machdep);
505 } 505 }
506 } 506 }
507 splx(s); 507 splx(s);
508} 508}
509 509
510/* 510/*
511 * softint_schedule_cpu: 511 * softint_schedule_cpu:
512 * 512 *
513 * Trigger a software interrupt on a target CPU. This invokes 513 * Trigger a software interrupt on a target CPU. This invokes
514 * softint_schedule() for the local CPU or send an IPI to invoke 514 * softint_schedule() for the local CPU or send an IPI to invoke
515 * this routine on the remote CPU. Preemption must be disabled. 515 * this routine on the remote CPU. Preemption must be disabled.
516 */ 516 */
517void 517void
518softint_schedule_cpu(void *arg, struct cpu_info *ci) 518softint_schedule_cpu(void *arg, struct cpu_info *ci)
519{ 519{
520 KASSERT(kpreempt_disabled()); 520 KASSERT(kpreempt_disabled());
521 521
522 if (curcpu() != ci) { 522 if (curcpu() != ci) {
523 const softcpu_t *sc = ci->ci_data.cpu_softcpu; 523 const softcpu_t *sc = ci->ci_data.cpu_softcpu;
524 const uintptr_t offset = (uintptr_t)arg; 524 const uintptr_t offset = (uintptr_t)arg;
525 const softhand_t *sh; 525 const softhand_t *sh;
526 526
527 sh = (const softhand_t *)((const uint8_t *)sc + offset); 527 sh = (const softhand_t *)((const uint8_t *)sc + offset);
528 KASSERT((sh->sh_flags & SOFTINT_RCPU) != 0); 528 KASSERT((sh->sh_flags & SOFTINT_RCPU) != 0);
529 ipi_trigger(sh->sh_ipi_id, ci); 529 ipi_trigger(sh->sh_ipi_id, ci);
530 return; 530 return;
531 } 531 }
532 532
533 /* Just a local CPU. */ 533 /* Just a local CPU. */
534 softint_schedule(arg); 534 softint_schedule(arg);
535} 535}
536 536
537/* 537/*
538 * softint_execute: 538 * softint_execute:
539 * 539 *
540 * Invoke handlers for the specified soft interrupt. 540 * Invoke handlers for the specified soft interrupt.
541 * Must be entered at splhigh. Will drop the priority 541 * Must be entered at splhigh. Will drop the priority
542 * to the level specified, but returns back at splhigh. 542 * to the level specified, but returns back at splhigh.
543 */ 543 */
544static inline void 544static inline void
545softint_execute(softint_t *si, lwp_t *l, int s) 545softint_execute(softint_t *si, lwp_t *l, int s)
546{ 546{
547 softhand_t *sh; 547 softhand_t *sh;
548 548
549#ifdef __HAVE_FAST_SOFTINTS 549#ifdef __HAVE_FAST_SOFTINTS
550 KASSERT(si->si_lwp == curlwp); 550 KASSERT(si->si_lwp == curlwp);
551#else 551#else
552 /* May be running in user context. */ 552 /* May be running in user context. */
553#endif 553#endif
554 KASSERT(si->si_cpu == curcpu()); 554 KASSERT(si->si_cpu == curcpu());
555 KASSERT(si->si_lwp->l_wchan == NULL); 555 KASSERT(si->si_lwp->l_wchan == NULL);
556 KASSERT(si->si_active); 556 KASSERT(si->si_active);
557 557
558 /* 558 /*
559 * Note: due to priority inheritance we may have interrupted a 559 * Note: due to priority inheritance we may have interrupted a
560 * higher priority LWP. Since the soft interrupt must be quick 560 * higher priority LWP. Since the soft interrupt must be quick
561 * and is non-preemptable, we don't bother yielding. 561 * and is non-preemptable, we don't bother yielding.
562 */ 562 */
563 563
564 while (!SIMPLEQ_EMPTY(&si->si_q)) { 564 while (!SIMPLEQ_EMPTY(&si->si_q)) {
565 /* 565 /*
566 * Pick the longest waiting handler to run. We block 566 * Pick the longest waiting handler to run. We block
567 * interrupts but do not lock in order to do this, as 567 * interrupts but do not lock in order to do this, as
568 * we are protecting against the local CPU only. 568 * we are protecting against the local CPU only.
569 */ 569 */
570 sh = SIMPLEQ_FIRST(&si->si_q); 570 sh = SIMPLEQ_FIRST(&si->si_q);
571 SIMPLEQ_REMOVE_HEAD(&si->si_q, sh_q); 571 SIMPLEQ_REMOVE_HEAD(&si->si_q, sh_q);
572 KASSERT((sh->sh_flags & SOFTINT_PENDING) != 0); 572 KASSERT((sh->sh_flags & SOFTINT_PENDING) != 0);
573 KASSERT((sh->sh_flags & SOFTINT_ACTIVE) == 0); 573 KASSERT((sh->sh_flags & SOFTINT_ACTIVE) == 0);
574 sh->sh_flags ^= (SOFTINT_PENDING | SOFTINT_ACTIVE); 574 sh->sh_flags ^= (SOFTINT_PENDING | SOFTINT_ACTIVE);
575 splx(s); 575 splx(s);
576 576
577 /* Run the handler. */ 577 /* Run the handler. */
578 if (__predict_true((sh->sh_flags & SOFTINT_MPSAFE) != 0)) { 578 if (__predict_true((sh->sh_flags & SOFTINT_MPSAFE) != 0)) {
579 (*sh->sh_func)(sh->sh_arg); 579 (*sh->sh_func)(sh->sh_arg);
580 } else { 580 } else {
581 KERNEL_LOCK(1, l); 581 KERNEL_LOCK(1, l);
582 (*sh->sh_func)(sh->sh_arg); 582 (*sh->sh_func)(sh->sh_arg);
583 KERNEL_UNLOCK_ONE(l); 583 KERNEL_UNLOCK_ONE(l);
584 } 584 }
585  585
586 /* Diagnostic: check that spin-locks have not leaked. */ 586 /* Diagnostic: check that spin-locks have not leaked. */
587 KASSERTMSG(curcpu()->ci_mtx_count == 0, 587 KASSERTMSG(curcpu()->ci_mtx_count == 0,
588 "%s: ci_mtx_count (%d) != 0, sh_func %p\n", 588 "%s: ci_mtx_count (%d) != 0, sh_func %p\n",
589 __func__, curcpu()->ci_mtx_count, sh->sh_func); 589 __func__, curcpu()->ci_mtx_count, sh->sh_func);
590 /* Diagnostic: check that psrefs have not leaked. */ 590 /* Diagnostic: check that psrefs have not leaked. */
591 KASSERTMSG(l->l_psrefs == 0, "%s: l_psrefs=%d, sh_func=%p\n", 591 KASSERTMSG(l->l_psrefs == 0, "%s: l_psrefs=%d, sh_func=%p\n",
592 __func__, l->l_psrefs, sh->sh_func); 592 __func__, l->l_psrefs, sh->sh_func);
593 593
594 (void)splhigh(); 594 (void)splhigh();
595 KASSERT((sh->sh_flags & SOFTINT_ACTIVE) != 0); 595 KASSERT((sh->sh_flags & SOFTINT_ACTIVE) != 0);
596 sh->sh_flags ^= SOFTINT_ACTIVE; 596 sh->sh_flags ^= SOFTINT_ACTIVE;
597 } 597 }
598 598
599 PSREF_DEBUG_BARRIER(); 599 PSREF_DEBUG_BARRIER();
600 600
601 CPU_COUNT(CPU_COUNT_NSOFT, 1); 601 CPU_COUNT(CPU_COUNT_NSOFT, 1);
602 602
603 KASSERT(si->si_cpu == curcpu()); 603 KASSERT(si->si_cpu == curcpu());
604 KASSERT(si->si_lwp->l_wchan == NULL); 604 KASSERT(si->si_lwp->l_wchan == NULL);
605 KASSERT(si->si_active); 605 KASSERT(si->si_active);
606 si->si_evcnt.ev_count++; 606 si->si_evcnt.ev_count++;
607 si->si_active = 0; 607 si->si_active = 0;
608} 608}
609 609
610/* 610/*
611 * softint_block: 611 * softint_block:
612 * 612 *
613 * Update statistics when the soft interrupt blocks. 613 * Update statistics when the soft interrupt blocks.
614 */ 614 */
615void 615void
616softint_block(lwp_t *l) 616softint_block(lwp_t *l)
617{ 617{
618 softint_t *si = l->l_private; 618 softint_t *si = l->l_private;
619 619
620 KASSERT((l->l_pflag & LP_INTR) != 0); 620 KASSERT((l->l_pflag & LP_INTR) != 0);
621 si->si_evcnt_block.ev_count++; 621 si->si_evcnt_block.ev_count++;
622} 622}
623 623
624/* 624/*
625 * schednetisr: 625 * schednetisr:
626 * 626 *
627 * Trigger a legacy network interrupt. XXX Needs to go away. 627 * Trigger a legacy network interrupt. XXX Needs to go away.
628 */ 628 */
629void 629void
630schednetisr(int isr) 630schednetisr(int isr)
631{ 631{
632 632
633 softint_schedule(softint_netisrs[isr]); 633 softint_schedule(softint_netisrs[isr]);
634} 634}
635 635
636#ifndef __HAVE_FAST_SOFTINTS 636#ifndef __HAVE_FAST_SOFTINTS
637 637
638#ifdef __HAVE_PREEMPTION 638#ifdef __HAVE_PREEMPTION
639#error __HAVE_PREEMPTION requires __HAVE_FAST_SOFTINTS 639#error __HAVE_PREEMPTION requires __HAVE_FAST_SOFTINTS
640#endif 640#endif
641 641
642/* 642/*
643 * softint_init_md: 643 * softint_init_md:
644 * 644 *
645 * Slow path: perform machine-dependent initialization. 645 * Slow path: perform machine-dependent initialization.
646 */ 646 */
647void 647void
648softint_init_md(lwp_t *l, u_int level, uintptr_t *machdep) 648softint_init_md(lwp_t *l, u_int level, uintptr_t *machdep)
649{ 649{
650 struct proc *p; 650 struct proc *p;
651 softint_t *si; 651 softint_t *si;
652 652
653 *machdep = (1 << level); 653 *machdep = (1 << level);
654 si = l->l_private; 654 si = l->l_private;
655 p = l->l_proc; 655 p = l->l_proc;
656 656
657 mutex_enter(p->p_lock); 657 mutex_enter(p->p_lock);
658 lwp_lock(l); 658 lwp_lock(l);
659 /* Cheat and make the KASSERT in softint_thread() happy. */ 659 /* Cheat and make the KASSERT in softint_thread() happy. */
660 si->si_active = 1; 660 si->si_active = 1;
661 setrunnable(l); 661 setrunnable(l);
662 /* LWP now unlocked */ 662 /* LWP now unlocked */
663 mutex_exit(p->p_lock); 663 mutex_exit(p->p_lock);
664} 664}
665 665
666/* 666/*
667 * softint_trigger: 667 * softint_trigger:
668 * 668 *
669 * Slow path: cause a soft interrupt handler to begin executing. 669 * Slow path: cause a soft interrupt handler to begin executing.
670 * Called at IPL_HIGH. 670 * Called at IPL_HIGH.
671 */ 671 */
672void 672void
673softint_trigger(uintptr_t machdep) 673softint_trigger(uintptr_t machdep)
674{ 674{
675 struct cpu_info *ci; 675 struct cpu_info *ci;
676 lwp_t *l; 676 lwp_t *l;
677 677
678 ci = curcpu(); 678 ci = curcpu();
679 ci->ci_data.cpu_softints |= machdep; 679 ci->ci_data.cpu_softints |= machdep;
680 l = ci->ci_onproc; 680 l = ci->ci_onproc;
681 if (l == ci->ci_data.cpu_idlelwp) { 681 if (l == ci->ci_data.cpu_idlelwp) {
682 atomic_or_uint(&ci->ci_want_resched, 682 atomic_or_uint(&ci->ci_want_resched,
683 RESCHED_IDLE | RESCHED_UPREEMPT); 683 RESCHED_IDLE | RESCHED_UPREEMPT);
684 } else { 684 } else {
685 /* MI equivalent of aston() */ 685 /* MI equivalent of aston() */
686 cpu_signotify(l); 686 cpu_signotify(l);
687 } 687 }
688} 688}
689 689
690/* 690/*
691 * softint_thread: 691 * softint_thread:
692 * 692 *
693 * Slow path: MI software interrupt dispatch. 693 * Slow path: MI software interrupt dispatch.
694 */ 694 */
695void 695void
696softint_thread(void *cookie) 696softint_thread(void *cookie)
697{ 697{
698 softint_t *si; 698 softint_t *si;
699 lwp_t *l; 699 lwp_t *l;
700 int s; 700 int s;
701 701
702 l = curlwp; 702 l = curlwp;
703 si = l->l_private; 703 si = l->l_private;
704 704
705 for (;;) { 705 for (;;) {
706 /* 706 /*
707 * Clear pending status and run it. We must drop the 707 * Clear pending status and run it. We must drop the
708 * spl before mi_switch(), since IPL_HIGH may be higher 708 * spl before mi_switch(), since IPL_HIGH may be higher
709 * than IPL_SCHED (and it is not safe to switch at a 709 * than IPL_SCHED (and it is not safe to switch at a
710 * higher level). 710 * higher level).
711 */ 711 */
712 s = splhigh(); 712 s = splhigh();
713 l->l_cpu->ci_data.cpu_softints &= ~si->si_machdep; 713 l->l_cpu->ci_data.cpu_softints &= ~si->si_machdep;
714 softint_execute(si, l, s); 714 softint_execute(si, l, s);
715 splx(s); 715 splx(s);
716 716
717 lwp_lock(l); 717 lwp_lock(l);
718 l->l_stat = LSIDL; 718 l->l_stat = LSIDL;
719 spc_lock(l->l_cpu); 719 spc_lock(l->l_cpu);
720 mi_switch(l); 720 mi_switch(l);
721 } 721 }
722} 722}
723 723
724/* 724/*
725 * softint_picklwp: 725 * softint_picklwp:
726 * 726 *
727 * Slow path: called from mi_switch() to pick the highest priority 727 * Slow path: called from mi_switch() to pick the highest priority
728 * soft interrupt LWP that needs to run. 728 * soft interrupt LWP that needs to run.
729 */ 729 */
730lwp_t * 730lwp_t *
731softint_picklwp(void) 731softint_picklwp(void)
732{ 732{
733 struct cpu_info *ci; 733 struct cpu_info *ci;
734 u_int mask; 734 u_int mask;
735 softint_t *si; 735 softint_t *si;
736 lwp_t *l; 736 lwp_t *l;
737 737
738 ci = curcpu(); 738 ci = curcpu();
739 si = ((softcpu_t *)ci->ci_data.cpu_softcpu)->sc_int; 739 si = ((softcpu_t *)ci->ci_data.cpu_softcpu)->sc_int;
740 mask = ci->ci_data.cpu_softints; 740 mask = ci->ci_data.cpu_softints;
741 741
742 if ((mask & (1 << SOFTINT_SERIAL)) != 0) { 742 if ((mask & (1 << SOFTINT_SERIAL)) != 0) {
743 l = si[SOFTINT_SERIAL].si_lwp; 743 l = si[SOFTINT_SERIAL].si_lwp;
744 } else if ((mask & (1 << SOFTINT_NET)) != 0) { 744 } else if ((mask & (1 << SOFTINT_NET)) != 0) {
745 l = si[SOFTINT_NET].si_lwp; 745 l = si[SOFTINT_NET].si_lwp;
746 } else if ((mask & (1 << SOFTINT_BIO)) != 0) { 746 } else if ((mask & (1 << SOFTINT_BIO)) != 0) {
747 l = si[SOFTINT_BIO].si_lwp; 747 l = si[SOFTINT_BIO].si_lwp;
748 } else if ((mask & (1 << SOFTINT_CLOCK)) != 0) { 748 } else if ((mask & (1 << SOFTINT_CLOCK)) != 0) {
749 l = si[SOFTINT_CLOCK].si_lwp; 749 l = si[SOFTINT_CLOCK].si_lwp;
750 } else { 750 } else {
751 panic("softint_picklwp"); 751 panic("softint_picklwp");
752 } 752 }
753 753
754 return l; 754 return l;
755} 755}
756 756
757/* 757/*
758 * softint_overlay: 758 * softint_overlay:
759 * 759 *
760 * Slow path: called from lwp_userret() to run a soft interrupt 760 * Slow path: called from lwp_userret() to run a soft interrupt
761 * within the context of a user thread. 761 * within the context of a user thread.
762 */ 762 */
763void 763void
764softint_overlay(void) 764softint_overlay(void)
765{ 765{
766 struct cpu_info *ci; 766 struct cpu_info *ci;
767 u_int softints, oflag; 767 u_int softints, oflag;
768 softint_t *si; 768 softint_t *si;
769 pri_t obase; 769 pri_t obase;
770 lwp_t *l; 770 lwp_t *l;
771 int s; 771 int s;
772 772
773 l = curlwp; 773 l = curlwp;
774 KASSERT((l->l_pflag & LP_INTR) == 0); 774 KASSERT((l->l_pflag & LP_INTR) == 0);
775 775
776 /* 776 /*
777 * Arrange to elevate priority if the LWP blocks. Also, bind LWP 777 * Arrange to elevate priority if the LWP blocks. Also, bind LWP
778 * to the CPU. Note: disable kernel preemption before doing that. 778 * to the CPU. Note: disable kernel preemption before doing that.
779 */ 779 */
780 s = splhigh(); 780 s = splhigh();
781 ci = l->l_cpu; 781 ci = l->l_cpu;
782 si = ((softcpu_t *)ci->ci_data.cpu_softcpu)->sc_int; 782 si = ((softcpu_t *)ci->ci_data.cpu_softcpu)->sc_int;
783 783
784 obase = l->l_kpribase; 784 obase = l->l_kpribase;
785 l->l_kpribase = PRI_KERNEL_RT; 785 l->l_kpribase = PRI_KERNEL_RT;
786 oflag = l->l_pflag; 786 oflag = l->l_pflag;
787 l->l_pflag = oflag | LP_INTR | LP_BOUND; 787 l->l_pflag = oflag | LP_INTR | LP_BOUND;
788 788
789 while ((softints = ci->ci_data.cpu_softints) != 0) { 789 while ((softints = ci->ci_data.cpu_softints) != 0) {
790 if ((softints & (1 << SOFTINT_SERIAL)) != 0) { 790 if ((softints & (1 << SOFTINT_SERIAL)) != 0) {
791 ci->ci_data.cpu_softints &= ~(1 << SOFTINT_SERIAL); 791 ci->ci_data.cpu_softints &= ~(1 << SOFTINT_SERIAL);
792 softint_execute(&si[SOFTINT_SERIAL], l, s); 792 softint_execute(&si[SOFTINT_SERIAL], l, s);
793 continue; 793 continue;
794 } 794 }
795 if ((softints & (1 << SOFTINT_NET)) != 0) { 795 if ((softints & (1 << SOFTINT_NET)) != 0) {
796 ci->ci_data.cpu_softints &= ~(1 << SOFTINT_NET); 796 ci->ci_data.cpu_softints &= ~(1 << SOFTINT_NET);
797 softint_execute(&si[SOFTINT_NET], l, s); 797 softint_execute(&si[SOFTINT_NET], l, s);
798 continue; 798 continue;
799 } 799 }
800 if ((softints & (1 << SOFTINT_BIO)) != 0) { 800 if ((softints & (1 << SOFTINT_BIO)) != 0) {
801 ci->ci_data.cpu_softints &= ~(1 << SOFTINT_BIO); 801 ci->ci_data.cpu_softints &= ~(1 << SOFTINT_BIO);
802 softint_execute(&si[SOFTINT_BIO], l, s); 802 softint_execute(&si[SOFTINT_BIO], l, s);
803 continue; 803 continue;
804 } 804 }
805 if ((softints & (1 << SOFTINT_CLOCK)) != 0) { 805 if ((softints & (1 << SOFTINT_CLOCK)) != 0) {
806 ci->ci_data.cpu_softints &= ~(1 << SOFTINT_CLOCK); 806 ci->ci_data.cpu_softints &= ~(1 << SOFTINT_CLOCK);
807 softint_execute(&si[SOFTINT_CLOCK], l, s); 807 softint_execute(&si[SOFTINT_CLOCK], l, s);
808 continue; 808 continue;
809 } 809 }
810 } 810 }
811 l->l_pflag = oflag; 811 l->l_pflag = oflag;
812 l->l_kpribase = obase; 812 l->l_kpribase = obase;
813 splx(s); 813 splx(s);
814} 814}
815 815
816#else /* !__HAVE_FAST_SOFTINTS */ 816#else /* !__HAVE_FAST_SOFTINTS */
817 817
818/* 818/*
819 * softint_thread: 819 * softint_thread:
820 * 820 *
821 * Fast path: the LWP is switched to without restoring any state, 821 * Fast path: the LWP is switched to without restoring any state,
822 * so we should not arrive here - there is a direct handoff between 822 * so we should not arrive here - there is a direct handoff between
823 * the interrupt stub and softint_dispatch(). 823 * the interrupt stub and softint_dispatch().
824 */ 824 */
825void 825void
826softint_thread(void *cookie) 826softint_thread(void *cookie)
827{ 827{
828 828
829 panic("softint_thread"); 829 panic("softint_thread");
830} 830}
831 831
832/* 832/*
833 * softint_dispatch: 833 * softint_dispatch:
834 * 834 *
835 * Fast path: entry point from machine-dependent code. 835 * Fast path: entry point from machine-dependent code.
836 */ 836 */
837void 837void
838softint_dispatch(lwp_t *pinned, int s) 838softint_dispatch(lwp_t *pinned, int s)
839{ 839{
840 struct bintime now; 840 struct bintime now;
841 softint_t *si; 841 softint_t *si;
842 u_int timing; 842 u_int timing;
843 lwp_t *l; 843 lwp_t *l;
844 844
845#ifdef DIAGNOSTIC 845#ifdef DIAGNOSTIC
846 if ((pinned->l_pflag & LP_RUNNING) == 0 || curlwp->l_stat != LSIDL) { 846 if ((pinned->l_pflag & LP_RUNNING) == 0 || curlwp->l_stat != LSIDL) {
847 struct lwp *onproc = curcpu()->ci_onproc; 847 struct lwp *onproc = curcpu()->ci_onproc;
848 int s2 = splhigh(); 848 int s2 = splhigh();
849 printf("curcpu=%d, spl=%d curspl=%d\n" 849 printf("curcpu=%d, spl=%d curspl=%d\n"
850 "onproc=%p => l_stat=%d l_flag=%08x l_cpu=%d\n" 850 "onproc=%p => l_stat=%d l_flag=%08x l_cpu=%d\n"
851 "curlwp=%p => l_stat=%d l_flag=%08x l_cpu=%d\n" 851 "curlwp=%p => l_stat=%d l_flag=%08x l_cpu=%d\n"
852 "pinned=%p => l_stat=%d l_flag=%08x l_cpu=%d\n", 852 "pinned=%p => l_stat=%d l_flag=%08x l_cpu=%d\n",
853 cpu_index(curcpu()), s, s2, onproc, onproc->l_stat, 853 cpu_index(curcpu()), s, s2, onproc, onproc->l_stat,
854 onproc->l_flag, cpu_index(onproc->l_cpu), curlwp, 854 onproc->l_flag, cpu_index(onproc->l_cpu), curlwp,
855 curlwp->l_stat, curlwp->l_flag, 855 curlwp->l_stat, curlwp->l_flag,
856 cpu_index(curlwp->l_cpu), pinned, pinned->l_stat, 856 cpu_index(curlwp->l_cpu), pinned, pinned->l_stat,
857 pinned->l_flag, cpu_index(pinned->l_cpu)); 857 pinned->l_flag, cpu_index(pinned->l_cpu));
858 splx(s2); 858 splx(s2);
859 panic("softint screwup"); 859 panic("softint screwup");
860 } 860 }
861#endif 861#endif
862 862
863 l = curlwp; 863 l = curlwp;
864 si = l->l_private; 864 si = l->l_private;
865 865
866 /* 866 /*
867 * Note the interrupted LWP, and mark the current LWP as running 867 * Note the interrupted LWP, and mark the current LWP as running
868 * before proceeding. Although this must as a rule be done with 868 * before proceeding. Although this must as a rule be done with
869 * the LWP locked, at this point no external agents will want to 869 * the LWP locked, at this point no external agents will want to
870 * modify the interrupt LWP's state. 870 * modify the interrupt LWP's state.
871 */ 871 */
872 timing = softint_timing; 872 timing = softint_timing;
873 l->l_switchto = pinned; 873 l->l_switchto = pinned;
874 l->l_stat = LSONPROC; 874 l->l_stat = LSONPROC;
875 875
876 /* 876 /*
877 * Dispatch the interrupt. If softints are being timed, charge 877 * Dispatch the interrupt. If softints are being timed, charge
878 * for it. 878 * for it.
879 */ 879 */
880 if (timing) { 880 if (timing) {
881 binuptime(&l->l_stime); 881 binuptime(&l->l_stime);
882 membar_producer(); /* for calcru */ 882 membar_producer(); /* for calcru */
883 l->l_pflag |= LP_TIMEINTR; 883 l->l_pflag |= LP_TIMEINTR;
884 } 884 }
885 l->l_pflag |= LP_RUNNING; 885 l->l_pflag |= LP_RUNNING;
886 softint_execute(si, l, s); 886 softint_execute(si, l, s);
887 if (timing) { 887 if (timing) {
888 binuptime(&now); 888 binuptime(&now);
889 updatertime(l, &now); 889 updatertime(l, &now);
890 l->l_pflag &= ~LP_TIMEINTR; 890 l->l_pflag &= ~LP_TIMEINTR;
891 } 891 }
892 892
893 /* XXX temporary */ 
894 kernel_lock_plug_leak(); 
895 
896 /* 893 /*
897 * If we blocked while handling the interrupt, the pinned LWP is 894 * If we blocked while handling the interrupt, the pinned LWP is
898 * gone so switch to the idle LWP. It will select a new LWP to 895 * gone so switch to the idle LWP. It will select a new LWP to
899 * run. 896 * run.
900 * 897 *
901 * We must drop the priority level as switching at IPL_HIGH could 898 * We must drop the priority level as switching at IPL_HIGH could
902 * deadlock the system. We have already set si->si_active = 0, 899 * deadlock the system. We have already set si->si_active = 0,
903 * which means another interrupt at this level can be triggered.  900 * which means another interrupt at this level can be triggered.
904 * That's not be a problem: we are lowering to level 's' which will 901 * That's not be a problem: we are lowering to level 's' which will
905 * prevent softint_dispatch() from being reentered at level 's', 902 * prevent softint_dispatch() from being reentered at level 's',
906 * until the priority is finally dropped to IPL_NONE on entry to 903 * until the priority is finally dropped to IPL_NONE on entry to
907 * the LWP chosen by mi_switch(). 904 * the LWP chosen by mi_switch().
908 */ 905 */
909 l->l_stat = LSIDL; 906 l->l_stat = LSIDL;
910 if (l->l_switchto == NULL) { 907 if (l->l_switchto == NULL) {
911 splx(s); 908 splx(s);
912 lwp_lock(l); 909 lwp_lock(l);
913 spc_lock(l->l_cpu); 910 spc_lock(l->l_cpu);
914 mi_switch(l); 911 mi_switch(l);
915 /* NOTREACHED */ 912 /* NOTREACHED */
916 } 913 }
917 l->l_switchto = NULL; 914 l->l_switchto = NULL;
918 l->l_pflag &= ~LP_RUNNING; 915 l->l_pflag &= ~LP_RUNNING;
919} 916}
920 917
921#endif /* !__HAVE_FAST_SOFTINTS */ 918#endif /* !__HAVE_FAST_SOFTINTS */

cvs diff -r1.88 -r1.89 src/sys/sys/lock.h (switch to unified diff)

--- src/sys/sys/lock.h 2020/01/27 21:05:43 1.88
+++ src/sys/sys/lock.h 2020/03/08 15:05:18 1.89
@@ -1,116 +1,114 @@ @@ -1,116 +1,114 @@
1/* $NetBSD: lock.h,v 1.88 2020/01/27 21:05:43 ad Exp $ */ 1/* $NetBSD: lock.h,v 1.89 2020/03/08 15:05:18 ad Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1999, 2000, 2006, 2007 The NetBSD Foundation, Inc. 4 * Copyright (c) 1999, 2000, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, by Ross Harvey, and by Andrew Doran. 9 * NASA Ames Research Center, by Ross Harvey, and by Andrew Doran.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution. 18 * documentation and/or other materials provided with the distribution.
19 * 19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE. 30 * POSSIBILITY OF SUCH DAMAGE.
31 */ 31 */
32 32
33/* 33/*
34 * Copyright (c) 1995 34 * Copyright (c) 1995
35 * The Regents of the University of California. All rights reserved. 35 * The Regents of the University of California. All rights reserved.
36 * 36 *
37 * This code contains ideas from software contributed to Berkeley by 37 * This code contains ideas from software contributed to Berkeley by
38 * Avadis Tevanian, Jr., Michael Wayne Young, and the Mach Operating 38 * Avadis Tevanian, Jr., Michael Wayne Young, and the Mach Operating
39 * System project at Carnegie-Mellon University. 39 * System project at Carnegie-Mellon University.
40 * 40 *
41 * Redistribution and use in source and binary forms, with or without 41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions 42 * modification, are permitted provided that the following conditions
43 * are met: 43 * are met:
44 * 1. Redistributions of source code must retain the above copyright 44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer. 45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright 46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the 47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution. 48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors 49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software 50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission. 51 * without specific prior written permission.
52 * 52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE. 63 * SUCH DAMAGE.
64 * 64 *
65 * @(#)lock.h 8.12 (Berkeley) 5/19/95 65 * @(#)lock.h 8.12 (Berkeley) 5/19/95
66 */ 66 */
67 67
68#ifndef _SYS_LOCK_H_ 68#ifndef _SYS_LOCK_H_
69#define _SYS_LOCK_H_ 69#define _SYS_LOCK_H_
70 70
71#include <sys/stdint.h> 71#include <sys/stdint.h>
72#include <sys/mutex.h> 72#include <sys/mutex.h>
73 73
74#include <machine/lock.h> 74#include <machine/lock.h>
75 75
76#ifdef _KERNEL 76#ifdef _KERNEL
77 77
78/* 78/*
79 * From <machine/lock.h>. 79 * From <machine/lock.h>.
80 */ 80 */
81#ifndef SPINLOCK_SPIN_HOOK 81#ifndef SPINLOCK_SPIN_HOOK
82#define SPINLOCK_SPIN_HOOK 82#define SPINLOCK_SPIN_HOOK
83#endif 83#endif
84#ifndef SPINLOCK_BACKOFF_HOOK 84#ifndef SPINLOCK_BACKOFF_HOOK
85#define SPINLOCK_BACKOFF_HOOK nullop(NULL) 85#define SPINLOCK_BACKOFF_HOOK nullop(NULL)
86#endif 86#endif
87#ifndef SPINLOCK_BACKOFF_MIN 87#ifndef SPINLOCK_BACKOFF_MIN
88#define SPINLOCK_BACKOFF_MIN 4 88#define SPINLOCK_BACKOFF_MIN 4
89#endif 89#endif
90#ifndef SPINLOCK_BACKOFF_MAX 90#ifndef SPINLOCK_BACKOFF_MAX
91#define SPINLOCK_BACKOFF_MAX 128 91#define SPINLOCK_BACKOFF_MAX 128
92#endif 92#endif
93 93
94#define SPINLOCK_BACKOFF(count) \ 94#define SPINLOCK_BACKOFF(count) \
95do { \ 95do { \
96 int __i; \ 96 int __i; \
97 for (__i = (count); __i != 0; __i--) { \ 97 for (__i = (count); __i != 0; __i--) { \
98 SPINLOCK_BACKOFF_HOOK; \ 98 SPINLOCK_BACKOFF_HOOK; \
99 } \ 99 } \
100 if ((count) < SPINLOCK_BACKOFF_MAX) \ 100 if ((count) < SPINLOCK_BACKOFF_MAX) \
101 (count) += (count); \ 101 (count) += (count); \
102} while (/* CONSTCOND */ 0); 102} while (/* CONSTCOND */ 0);
103 103
104#ifdef LOCKDEBUG 104#ifdef LOCKDEBUG
105#define SPINLOCK_SPINOUT(spins) ((spins)++ > 0x0fffffff) 105#define SPINLOCK_SPINOUT(spins) ((spins)++ > 0x0fffffff)
106#else 106#else
107#define SPINLOCK_SPINOUT(spins) ((void)(spins), 0) 107#define SPINLOCK_SPINOUT(spins) ((void)(spins), 0)
108#endif 108#endif
109 109
110extern __cpu_simple_lock_t kernel_lock[]; 110extern __cpu_simple_lock_t kernel_lock[];
111 111
112void kernel_lock_plug_leak(void); 
113 
114#endif /* _KERNEL */ 112#endif /* _KERNEL */
115 113
116#endif /* _SYS_LOCK_H_ */ 114#endif /* _SYS_LOCK_H_ */