Sun Apr 19 23:05:05 2020 UTC ()
lwp_wait(): don't need to check for process exit, cv_wait_sig() does it.


(ad)
diff -r1.233 -r1.234 src/sys/kern/kern_lwp.c

cvs diff -r1.233 -r1.234 src/sys/kern/kern_lwp.c (switch to unified diff)

--- src/sys/kern/kern_lwp.c 2020/04/04 20:20:12 1.233
+++ src/sys/kern/kern_lwp.c 2020/04/19 23:05:04 1.234
@@ -1,1748 +1,1746 @@ @@ -1,1748 +1,1746 @@
1/* $NetBSD: kern_lwp.c,v 1.233 2020/04/04 20:20:12 thorpej Exp $ */ 1/* $NetBSD: kern_lwp.c,v 1.234 2020/04/19 23:05:04 ad Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2019, 2020 4 * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2019, 2020
5 * The NetBSD Foundation, Inc. 5 * The NetBSD Foundation, Inc.
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * This code is derived from software contributed to The NetBSD Foundation 8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Nathan J. Williams, and Andrew Doran. 9 * by Nathan J. Williams, and Andrew Doran.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution. 18 * documentation and/or other materials provided with the distribution.
19 * 19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE. 30 * POSSIBILITY OF SUCH DAMAGE.
31 */ 31 */
32 32
33/* 33/*
34 * Overview 34 * Overview
35 * 35 *
36 * Lightweight processes (LWPs) are the basic unit or thread of 36 * Lightweight processes (LWPs) are the basic unit or thread of
37 * execution within the kernel. The core state of an LWP is described 37 * execution within the kernel. The core state of an LWP is described
38 * by "struct lwp", also known as lwp_t. 38 * by "struct lwp", also known as lwp_t.
39 * 39 *
40 * Each LWP is contained within a process (described by "struct proc"), 40 * Each LWP is contained within a process (described by "struct proc"),
41 * Every process contains at least one LWP, but may contain more. The 41 * Every process contains at least one LWP, but may contain more. The
42 * process describes attributes shared among all of its LWPs such as a 42 * process describes attributes shared among all of its LWPs such as a
43 * private address space, global execution state (stopped, active, 43 * private address space, global execution state (stopped, active,
44 * zombie, ...), signal disposition and so on. On a multiprocessor 44 * zombie, ...), signal disposition and so on. On a multiprocessor
45 * machine, multiple LWPs be executing concurrently in the kernel. 45 * machine, multiple LWPs be executing concurrently in the kernel.
46 * 46 *
47 * Execution states 47 * Execution states
48 * 48 *
49 * At any given time, an LWP has overall state that is described by 49 * At any given time, an LWP has overall state that is described by
50 * lwp::l_stat. The states are broken into two sets below. The first 50 * lwp::l_stat. The states are broken into two sets below. The first
51 * set is guaranteed to represent the absolute, current state of the 51 * set is guaranteed to represent the absolute, current state of the
52 * LWP: 52 * LWP:
53 * 53 *
54 * LSONPROC 54 * LSONPROC
55 * 55 *
56 * On processor: the LWP is executing on a CPU, either in the 56 * On processor: the LWP is executing on a CPU, either in the
57 * kernel or in user space. 57 * kernel or in user space.
58 * 58 *
59 * LSRUN 59 * LSRUN
60 * 60 *
61 * Runnable: the LWP is parked on a run queue, and may soon be 61 * Runnable: the LWP is parked on a run queue, and may soon be
62 * chosen to run by an idle processor, or by a processor that 62 * chosen to run by an idle processor, or by a processor that
63 * has been asked to preempt a currently runnning but lower 63 * has been asked to preempt a currently runnning but lower
64 * priority LWP. 64 * priority LWP.
65 * 65 *
66 * LSIDL 66 * LSIDL
67 * 67 *
68 * Idle: the LWP has been created but has not yet executed, 68 * Idle: the LWP has been created but has not yet executed,
69 * or it has ceased executing a unit of work and is waiting 69 * or it has ceased executing a unit of work and is waiting
70 * to be started again. 70 * to be started again.
71 * 71 *
72 * LSSUSPENDED: 72 * LSSUSPENDED:
73 * 73 *
74 * Suspended: the LWP has had its execution suspended by 74 * Suspended: the LWP has had its execution suspended by
75 * another LWP in the same process using the _lwp_suspend() 75 * another LWP in the same process using the _lwp_suspend()
76 * system call. User-level LWPs also enter the suspended 76 * system call. User-level LWPs also enter the suspended
77 * state when the system is shutting down. 77 * state when the system is shutting down.
78 * 78 *
79 * The second set represent a "statement of intent" on behalf of the 79 * The second set represent a "statement of intent" on behalf of the
80 * LWP. The LWP may in fact be executing on a processor, may be 80 * LWP. The LWP may in fact be executing on a processor, may be
81 * sleeping or idle. It is expected to take the necessary action to 81 * sleeping or idle. It is expected to take the necessary action to
82 * stop executing or become "running" again within a short timeframe. 82 * stop executing or become "running" again within a short timeframe.
83 * The LP_RUNNING flag in lwp::l_pflag indicates that an LWP is running. 83 * The LP_RUNNING flag in lwp::l_pflag indicates that an LWP is running.
84 * Importantly, it indicates that its state is tied to a CPU. 84 * Importantly, it indicates that its state is tied to a CPU.
85 * 85 *
86 * LSZOMB: 86 * LSZOMB:
87 * 87 *
88 * Dead or dying: the LWP has released most of its resources 88 * Dead or dying: the LWP has released most of its resources
89 * and is about to switch away into oblivion, or has already 89 * and is about to switch away into oblivion, or has already
90 * switched away. When it switches away, its few remaining 90 * switched away. When it switches away, its few remaining
91 * resources can be collected. 91 * resources can be collected.
92 * 92 *
93 * LSSLEEP: 93 * LSSLEEP:
94 * 94 *
95 * Sleeping: the LWP has entered itself onto a sleep queue, and 95 * Sleeping: the LWP has entered itself onto a sleep queue, and
96 * has switched away or will switch away shortly to allow other 96 * has switched away or will switch away shortly to allow other
97 * LWPs to run on the CPU. 97 * LWPs to run on the CPU.
98 * 98 *
99 * LSSTOP: 99 * LSSTOP:
100 * 100 *
101 * Stopped: the LWP has been stopped as a result of a job 101 * Stopped: the LWP has been stopped as a result of a job
102 * control signal, or as a result of the ptrace() interface.  102 * control signal, or as a result of the ptrace() interface.
103 * 103 *
104 * Stopped LWPs may run briefly within the kernel to handle 104 * Stopped LWPs may run briefly within the kernel to handle
105 * signals that they receive, but will not return to user space 105 * signals that they receive, but will not return to user space
106 * until their process' state is changed away from stopped.  106 * until their process' state is changed away from stopped.
107 * 107 *
108 * Single LWPs within a process can not be set stopped 108 * Single LWPs within a process can not be set stopped
109 * selectively: all actions that can stop or continue LWPs 109 * selectively: all actions that can stop or continue LWPs
110 * occur at the process level. 110 * occur at the process level.
111 * 111 *
112 * State transitions 112 * State transitions
113 * 113 *
114 * Note that the LSSTOP state may only be set when returning to 114 * Note that the LSSTOP state may only be set when returning to
115 * user space in userret(), or when sleeping interruptably. The 115 * user space in userret(), or when sleeping interruptably. The
116 * LSSUSPENDED state may only be set in userret(). Before setting 116 * LSSUSPENDED state may only be set in userret(). Before setting
117 * those states, we try to ensure that the LWPs will release all 117 * those states, we try to ensure that the LWPs will release all
118 * locks that they hold, and at a minimum try to ensure that the 118 * locks that they hold, and at a minimum try to ensure that the
119 * LWP can be set runnable again by a signal. 119 * LWP can be set runnable again by a signal.
120 * 120 *
121 * LWPs may transition states in the following ways: 121 * LWPs may transition states in the following ways:
122 * 122 *
123 * RUN -------> ONPROC ONPROC -----> RUN 123 * RUN -------> ONPROC ONPROC -----> RUN
124 * > SLEEP 124 * > SLEEP
125 * > STOPPED 125 * > STOPPED
126 * > SUSPENDED 126 * > SUSPENDED
127 * > ZOMB 127 * > ZOMB
128 * > IDL (special cases) 128 * > IDL (special cases)
129 * 129 *
130 * STOPPED ---> RUN SUSPENDED --> RUN 130 * STOPPED ---> RUN SUSPENDED --> RUN
131 * > SLEEP 131 * > SLEEP
132 * 132 *
133 * SLEEP -----> ONPROC IDL --------> RUN 133 * SLEEP -----> ONPROC IDL --------> RUN
134 * > RUN > SUSPENDED 134 * > RUN > SUSPENDED
135 * > STOPPED > STOPPED 135 * > STOPPED > STOPPED
136 * > ONPROC (special cases) 136 * > ONPROC (special cases)
137 * 137 *
138 * Some state transitions are only possible with kernel threads (eg 138 * Some state transitions are only possible with kernel threads (eg
139 * ONPROC -> IDL) and happen under tightly controlled circumstances 139 * ONPROC -> IDL) and happen under tightly controlled circumstances
140 * free of unwanted side effects. 140 * free of unwanted side effects.
141 * 141 *
142 * Migration 142 * Migration
143 * 143 *
144 * Migration of threads from one CPU to another could be performed 144 * Migration of threads from one CPU to another could be performed
145 * internally by the scheduler via sched_takecpu() or sched_catchlwp() 145 * internally by the scheduler via sched_takecpu() or sched_catchlwp()
146 * functions. The universal lwp_migrate() function should be used for 146 * functions. The universal lwp_migrate() function should be used for
147 * any other cases. Subsystems in the kernel must be aware that CPU 147 * any other cases. Subsystems in the kernel must be aware that CPU
148 * of LWP may change, while it is not locked. 148 * of LWP may change, while it is not locked.
149 * 149 *
150 * Locking 150 * Locking
151 * 151 *
152 * The majority of fields in 'struct lwp' are covered by a single, 152 * The majority of fields in 'struct lwp' are covered by a single,
153 * general spin lock pointed to by lwp::l_mutex. The locks covering 153 * general spin lock pointed to by lwp::l_mutex. The locks covering
154 * each field are documented in sys/lwp.h. 154 * each field are documented in sys/lwp.h.
155 * 155 *
156 * State transitions must be made with the LWP's general lock held, 156 * State transitions must be made with the LWP's general lock held,
157 * and may cause the LWP's lock pointer to change. Manipulation of 157 * and may cause the LWP's lock pointer to change. Manipulation of
158 * the general lock is not performed directly, but through calls to 158 * the general lock is not performed directly, but through calls to
159 * lwp_lock(), lwp_unlock() and others. It should be noted that the 159 * lwp_lock(), lwp_unlock() and others. It should be noted that the
160 * adaptive locks are not allowed to be released while the LWP's lock 160 * adaptive locks are not allowed to be released while the LWP's lock
161 * is being held (unlike for other spin-locks). 161 * is being held (unlike for other spin-locks).
162 * 162 *
163 * States and their associated locks: 163 * States and their associated locks:
164 * 164 *
165 * LSIDL, LSONPROC, LSZOMB, LSSUPENDED: 165 * LSIDL, LSONPROC, LSZOMB, LSSUPENDED:
166 * 166 *
167 * Always covered by spc_lwplock, which protects LWPs not 167 * Always covered by spc_lwplock, which protects LWPs not
168 * associated with any other sync object. This is a per-CPU 168 * associated with any other sync object. This is a per-CPU
169 * lock and matches lwp::l_cpu. 169 * lock and matches lwp::l_cpu.
170 * 170 *
171 * LSRUN: 171 * LSRUN:
172 * 172 *
173 * Always covered by spc_mutex, which protects the run queues. 173 * Always covered by spc_mutex, which protects the run queues.
174 * This is a per-CPU lock and matches lwp::l_cpu. 174 * This is a per-CPU lock and matches lwp::l_cpu.
175 * 175 *
176 * LSSLEEP: 176 * LSSLEEP:
177 * 177 *
178 * Covered by a lock associated with the sleep queue (sometimes 178 * Covered by a lock associated with the sleep queue (sometimes
179 * a turnstile sleep queue) that the LWP resides on. This can 179 * a turnstile sleep queue) that the LWP resides on. This can
180 * be spc_lwplock for SOBJ_SLEEPQ_NULL (an "untracked" sleep). 180 * be spc_lwplock for SOBJ_SLEEPQ_NULL (an "untracked" sleep).
181 * 181 *
182 * LSSTOP: 182 * LSSTOP:
183 * 183 *
184 * If the LWP was previously sleeping (l_wchan != NULL), then 184 * If the LWP was previously sleeping (l_wchan != NULL), then
185 * l_mutex references the sleep queue lock. If the LWP was 185 * l_mutex references the sleep queue lock. If the LWP was
186 * runnable or on the CPU when halted, or has been removed from 186 * runnable or on the CPU when halted, or has been removed from
187 * the sleep queue since halted, then the lock is spc_lwplock. 187 * the sleep queue since halted, then the lock is spc_lwplock.
188 * 188 *
189 * The lock order is as follows: 189 * The lock order is as follows:
190 * 190 *
191 * sleepq -> turnstile -> spc_lwplock -> spc_mutex 191 * sleepq -> turnstile -> spc_lwplock -> spc_mutex
192 * 192 *
193 * Each process has an scheduler state lock (proc::p_lock), and a 193 * Each process has an scheduler state lock (proc::p_lock), and a
194 * number of counters on LWPs and their states: p_nzlwps, p_nrlwps, and 194 * number of counters on LWPs and their states: p_nzlwps, p_nrlwps, and
195 * so on. When an LWP is to be entered into or removed from one of the 195 * so on. When an LWP is to be entered into or removed from one of the
196 * following states, p_lock must be held and the process wide counters 196 * following states, p_lock must be held and the process wide counters
197 * adjusted: 197 * adjusted:
198 * 198 *
199 * LSIDL, LSZOMB, LSSTOP, LSSUSPENDED 199 * LSIDL, LSZOMB, LSSTOP, LSSUSPENDED
200 * 200 *
201 * (But not always for kernel threads. There are some special cases 201 * (But not always for kernel threads. There are some special cases
202 * as mentioned above: soft interrupts, and the idle loops.) 202 * as mentioned above: soft interrupts, and the idle loops.)
203 * 203 *
204 * Note that an LWP is considered running or likely to run soon if in 204 * Note that an LWP is considered running or likely to run soon if in
205 * one of the following states. This affects the value of p_nrlwps: 205 * one of the following states. This affects the value of p_nrlwps:
206 * 206 *
207 * LSRUN, LSONPROC, LSSLEEP 207 * LSRUN, LSONPROC, LSSLEEP
208 * 208 *
209 * p_lock does not need to be held when transitioning among these 209 * p_lock does not need to be held when transitioning among these
210 * three states, hence p_lock is rarely taken for state transitions. 210 * three states, hence p_lock is rarely taken for state transitions.
211 */ 211 */
212 212
213#include <sys/cdefs.h> 213#include <sys/cdefs.h>
214__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.233 2020/04/04 20:20:12 thorpej Exp $"); 214__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.234 2020/04/19 23:05:04 ad Exp $");
215 215
216#include "opt_ddb.h" 216#include "opt_ddb.h"
217#include "opt_lockdebug.h" 217#include "opt_lockdebug.h"
218#include "opt_dtrace.h" 218#include "opt_dtrace.h"
219 219
220#define _LWP_API_PRIVATE 220#define _LWP_API_PRIVATE
221 221
222#include <sys/param.h> 222#include <sys/param.h>
223#include <sys/systm.h> 223#include <sys/systm.h>
224#include <sys/cpu.h> 224#include <sys/cpu.h>
225#include <sys/pool.h> 225#include <sys/pool.h>
226#include <sys/proc.h> 226#include <sys/proc.h>
227#include <sys/syscallargs.h> 227#include <sys/syscallargs.h>
228#include <sys/syscall_stats.h> 228#include <sys/syscall_stats.h>
229#include <sys/kauth.h> 229#include <sys/kauth.h>
230#include <sys/sleepq.h> 230#include <sys/sleepq.h>
231#include <sys/lockdebug.h> 231#include <sys/lockdebug.h>
232#include <sys/kmem.h> 232#include <sys/kmem.h>
233#include <sys/pset.h> 233#include <sys/pset.h>
234#include <sys/intr.h> 234#include <sys/intr.h>
235#include <sys/lwpctl.h> 235#include <sys/lwpctl.h>
236#include <sys/atomic.h> 236#include <sys/atomic.h>
237#include <sys/filedesc.h> 237#include <sys/filedesc.h>
238#include <sys/fstrans.h> 238#include <sys/fstrans.h>
239#include <sys/dtrace_bsd.h> 239#include <sys/dtrace_bsd.h>
240#include <sys/sdt.h> 240#include <sys/sdt.h>
241#include <sys/ptrace.h> 241#include <sys/ptrace.h>
242#include <sys/xcall.h> 242#include <sys/xcall.h>
243#include <sys/uidinfo.h> 243#include <sys/uidinfo.h>
244#include <sys/sysctl.h> 244#include <sys/sysctl.h>
245#include <sys/psref.h> 245#include <sys/psref.h>
246#include <sys/msan.h> 246#include <sys/msan.h>
247#include <sys/kcov.h> 247#include <sys/kcov.h>
248#include <sys/thmap.h> 248#include <sys/thmap.h>
249#include <sys/cprng.h> 249#include <sys/cprng.h>
250 250
251#include <uvm/uvm_extern.h> 251#include <uvm/uvm_extern.h>
252#include <uvm/uvm_object.h> 252#include <uvm/uvm_object.h>
253 253
254static pool_cache_t lwp_cache __read_mostly; 254static pool_cache_t lwp_cache __read_mostly;
255struct lwplist alllwp __cacheline_aligned; 255struct lwplist alllwp __cacheline_aligned;
256 256
257/* 257/*
258 * Lookups by global thread ID operate outside of the normal LWP 258 * Lookups by global thread ID operate outside of the normal LWP
259 * locking protocol. 259 * locking protocol.
260 * 260 *
261 * We are using a thmap, which internally can perform lookups lock-free. 261 * We are using a thmap, which internally can perform lookups lock-free.
262 * However, we still need to serialize lookups against LWP exit. We 262 * However, we still need to serialize lookups against LWP exit. We
263 * achieve this as follows: 263 * achieve this as follows:
264 * 264 *
265 * => Assignment of TID is performed lazily by the LWP itself, when it 265 * => Assignment of TID is performed lazily by the LWP itself, when it
266 * is first requested. Insertion into the thmap is done completely 266 * is first requested. Insertion into the thmap is done completely
267 * lock-free (other than the internal locking performed by thmap itself). 267 * lock-free (other than the internal locking performed by thmap itself).
268 * Once the TID is published in the map, the l___tid field in the LWP 268 * Once the TID is published in the map, the l___tid field in the LWP
269 * is protected by p_lock. 269 * is protected by p_lock.
270 * 270 *
271 * => When we look up an LWP in the thmap, we take lwp_threadid_lock as 271 * => When we look up an LWP in the thmap, we take lwp_threadid_lock as
272 * a READER. While still holding the lock, we add a reference to 272 * a READER. While still holding the lock, we add a reference to
273 * the LWP (using atomics). After adding the reference, we drop the 273 * the LWP (using atomics). After adding the reference, we drop the
274 * lwp_threadid_lock. We now take p_lock and check the state of the 274 * lwp_threadid_lock. We now take p_lock and check the state of the
275 * LWP. If the LWP is draining its references or if the l___tid field 275 * LWP. If the LWP is draining its references or if the l___tid field
276 * has been invalidated, we drop the reference we took and return NULL. 276 * has been invalidated, we drop the reference we took and return NULL.
277 * Otherwise, the lookup has succeeded and the LWP is returned with a 277 * Otherwise, the lookup has succeeded and the LWP is returned with a
278 * reference count that the caller is responsible for dropping. 278 * reference count that the caller is responsible for dropping.
279 * 279 *
280 * => When a LWP is exiting it releases its TID. While holding the 280 * => When a LWP is exiting it releases its TID. While holding the
281 * p_lock, the entry is deleted from the thmap and the l___tid field 281 * p_lock, the entry is deleted from the thmap and the l___tid field
282 * invalidated. Once the field is invalidated, p_lock is released. 282 * invalidated. Once the field is invalidated, p_lock is released.
283 * It is done in this sequence because the l___tid field is used as 283 * It is done in this sequence because the l___tid field is used as
284 * the lookup key storage in the thmap in order to conserve memory. 284 * the lookup key storage in the thmap in order to conserve memory.
285 * Even if a lookup races with this process and succeeds only to have 285 * Even if a lookup races with this process and succeeds only to have
286 * the TID invalidated, it's OK because it also results in a reference 286 * the TID invalidated, it's OK because it also results in a reference
287 * that will be drained later. 287 * that will be drained later.
288 * 288 *
289 * => Deleting a node also requires GC of now-unused thmap nodes. The 289 * => Deleting a node also requires GC of now-unused thmap nodes. The
290 * serialization point between stage_gc and gc is performed by simply 290 * serialization point between stage_gc and gc is performed by simply
291 * taking the lwp_threadid_lock as a WRITER and immediately releasing 291 * taking the lwp_threadid_lock as a WRITER and immediately releasing
292 * it. By doing this, we know that any busy readers will have drained. 292 * it. By doing this, we know that any busy readers will have drained.
293 * 293 *
294 * => When a LWP is exiting, it also drains off any references being 294 * => When a LWP is exiting, it also drains off any references being
295 * held by others. However, the reference in the lookup path is taken 295 * held by others. However, the reference in the lookup path is taken
296 * outside the normal locking protocol. There needs to be additional 296 * outside the normal locking protocol. There needs to be additional
297 * serialization so that EITHER lwp_drainrefs() sees the incremented 297 * serialization so that EITHER lwp_drainrefs() sees the incremented
298 * reference count so that it knows to wait, OR lwp_getref_tid() sees 298 * reference count so that it knows to wait, OR lwp_getref_tid() sees
299 * that the LWP is waiting to drain and thus drops the reference 299 * that the LWP is waiting to drain and thus drops the reference
300 * immediately. This is achieved by taking lwp_threadid_lock as a 300 * immediately. This is achieved by taking lwp_threadid_lock as a
301 * WRITER when setting LPR_DRAINING. Note the locking order: 301 * WRITER when setting LPR_DRAINING. Note the locking order:
302 * 302 *
303 * p_lock -> lwp_threadid_lock 303 * p_lock -> lwp_threadid_lock
304 * 304 *
305 * Note that this scheme could easily use pserialize(9) in place of the 305 * Note that this scheme could easily use pserialize(9) in place of the
306 * lwp_threadid_lock rwlock lock. However, this would require placing a 306 * lwp_threadid_lock rwlock lock. However, this would require placing a
307 * pserialize_perform() call in the LWP exit path, which is arguably more 307 * pserialize_perform() call in the LWP exit path, which is arguably more
308 * expensive than briefly taking a global lock that should be relatively 308 * expensive than briefly taking a global lock that should be relatively
309 * uncontended. This issue can be revisited if the rwlock proves to be 309 * uncontended. This issue can be revisited if the rwlock proves to be
310 * a performance problem. 310 * a performance problem.
311 */ 311 */
312static krwlock_t lwp_threadid_lock __cacheline_aligned; 312static krwlock_t lwp_threadid_lock __cacheline_aligned;
313static thmap_t * lwp_threadid_map __read_mostly; 313static thmap_t * lwp_threadid_map __read_mostly;
314 314
315static void lwp_dtor(void *, void *); 315static void lwp_dtor(void *, void *);
316 316
317/* DTrace proc provider probes */ 317/* DTrace proc provider probes */
318SDT_PROVIDER_DEFINE(proc); 318SDT_PROVIDER_DEFINE(proc);
319 319
320SDT_PROBE_DEFINE1(proc, kernel, , lwp__create, "struct lwp *"); 320SDT_PROBE_DEFINE1(proc, kernel, , lwp__create, "struct lwp *");
321SDT_PROBE_DEFINE1(proc, kernel, , lwp__start, "struct lwp *"); 321SDT_PROBE_DEFINE1(proc, kernel, , lwp__start, "struct lwp *");
322SDT_PROBE_DEFINE1(proc, kernel, , lwp__exit, "struct lwp *"); 322SDT_PROBE_DEFINE1(proc, kernel, , lwp__exit, "struct lwp *");
323 323
324struct turnstile turnstile0 __cacheline_aligned; 324struct turnstile turnstile0 __cacheline_aligned;
325struct lwp lwp0 __aligned(MIN_LWP_ALIGNMENT) = { 325struct lwp lwp0 __aligned(MIN_LWP_ALIGNMENT) = {
326#ifdef LWP0_CPU_INFO 326#ifdef LWP0_CPU_INFO
327 .l_cpu = LWP0_CPU_INFO, 327 .l_cpu = LWP0_CPU_INFO,
328#endif 328#endif
329#ifdef LWP0_MD_INITIALIZER 329#ifdef LWP0_MD_INITIALIZER
330 .l_md = LWP0_MD_INITIALIZER, 330 .l_md = LWP0_MD_INITIALIZER,
331#endif 331#endif
332 .l_proc = &proc0, 332 .l_proc = &proc0,
333 .l_lid = 1, 333 .l_lid = 1,
334 .l_flag = LW_SYSTEM, 334 .l_flag = LW_SYSTEM,
335 .l_stat = LSONPROC, 335 .l_stat = LSONPROC,
336 .l_ts = &turnstile0, 336 .l_ts = &turnstile0,
337 .l_syncobj = &sched_syncobj, 337 .l_syncobj = &sched_syncobj,
338 .l_refcnt = 0, 338 .l_refcnt = 0,
339 .l_priority = PRI_USER + NPRI_USER - 1, 339 .l_priority = PRI_USER + NPRI_USER - 1,
340 .l_inheritedprio = -1, 340 .l_inheritedprio = -1,
341 .l_class = SCHED_OTHER, 341 .l_class = SCHED_OTHER,
342 .l_psid = PS_NONE, 342 .l_psid = PS_NONE,
343 .l_pi_lenders = SLIST_HEAD_INITIALIZER(&lwp0.l_pi_lenders), 343 .l_pi_lenders = SLIST_HEAD_INITIALIZER(&lwp0.l_pi_lenders),
344 .l_name = __UNCONST("swapper"), 344 .l_name = __UNCONST("swapper"),
345 .l_fd = &filedesc0, 345 .l_fd = &filedesc0,
346}; 346};
347 347
348static void lwp_threadid_init(void); 348static void lwp_threadid_init(void);
349static int sysctl_kern_maxlwp(SYSCTLFN_PROTO); 349static int sysctl_kern_maxlwp(SYSCTLFN_PROTO);
350 350
351/* 351/*
352 * sysctl helper routine for kern.maxlwp. Ensures that the new 352 * sysctl helper routine for kern.maxlwp. Ensures that the new
353 * values are not too low or too high. 353 * values are not too low or too high.
354 */ 354 */
355static int 355static int
356sysctl_kern_maxlwp(SYSCTLFN_ARGS) 356sysctl_kern_maxlwp(SYSCTLFN_ARGS)
357{ 357{
358 int error, nmaxlwp; 358 int error, nmaxlwp;
359 struct sysctlnode node; 359 struct sysctlnode node;
360 360
361 nmaxlwp = maxlwp; 361 nmaxlwp = maxlwp;
362 node = *rnode; 362 node = *rnode;
363 node.sysctl_data = &nmaxlwp; 363 node.sysctl_data = &nmaxlwp;
364 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 364 error = sysctl_lookup(SYSCTLFN_CALL(&node));
365 if (error || newp == NULL) 365 if (error || newp == NULL)
366 return error; 366 return error;
367 367
368 if (nmaxlwp < 0 || nmaxlwp >= 65536) 368 if (nmaxlwp < 0 || nmaxlwp >= 65536)
369 return EINVAL; 369 return EINVAL;
370 if (nmaxlwp > cpu_maxlwp()) 370 if (nmaxlwp > cpu_maxlwp())
371 return EINVAL; 371 return EINVAL;
372 maxlwp = nmaxlwp; 372 maxlwp = nmaxlwp;
373 373
374 return 0; 374 return 0;
375} 375}
376 376
377static void 377static void
378sysctl_kern_lwp_setup(void) 378sysctl_kern_lwp_setup(void)
379{ 379{
380 struct sysctllog *clog = NULL; 380 struct sysctllog *clog = NULL;
381 381
382 sysctl_createv(&clog, 0, NULL, NULL, 382 sysctl_createv(&clog, 0, NULL, NULL,
383 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 383 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
384 CTLTYPE_INT, "maxlwp", 384 CTLTYPE_INT, "maxlwp",
385 SYSCTL_DESCR("Maximum number of simultaneous threads"), 385 SYSCTL_DESCR("Maximum number of simultaneous threads"),
386 sysctl_kern_maxlwp, 0, NULL, 0, 386 sysctl_kern_maxlwp, 0, NULL, 0,
387 CTL_KERN, CTL_CREATE, CTL_EOL); 387 CTL_KERN, CTL_CREATE, CTL_EOL);
388} 388}
389 389
390void 390void
391lwpinit(void) 391lwpinit(void)
392{ 392{
393 393
394 LIST_INIT(&alllwp); 394 LIST_INIT(&alllwp);
395 lwpinit_specificdata(); 395 lwpinit_specificdata();
396 lwp_cache = pool_cache_init(sizeof(lwp_t), MIN_LWP_ALIGNMENT, 0, 0, 396 lwp_cache = pool_cache_init(sizeof(lwp_t), MIN_LWP_ALIGNMENT, 0, 0,
397 "lwppl", NULL, IPL_NONE, NULL, lwp_dtor, NULL); 397 "lwppl", NULL, IPL_NONE, NULL, lwp_dtor, NULL);
398 398
399 maxlwp = cpu_maxlwp(); 399 maxlwp = cpu_maxlwp();
400 sysctl_kern_lwp_setup(); 400 sysctl_kern_lwp_setup();
401 lwp_threadid_init(); 401 lwp_threadid_init();
402} 402}
403 403
404void 404void
405lwp0_init(void) 405lwp0_init(void)
406{ 406{
407 struct lwp *l = &lwp0; 407 struct lwp *l = &lwp0;
408 408
409 KASSERT((void *)uvm_lwp_getuarea(l) != NULL); 409 KASSERT((void *)uvm_lwp_getuarea(l) != NULL);
410 KASSERT(l->l_lid == proc0.p_nlwpid); 410 KASSERT(l->l_lid == proc0.p_nlwpid);
411 411
412 LIST_INSERT_HEAD(&alllwp, l, l_list); 412 LIST_INSERT_HEAD(&alllwp, l, l_list);
413 413
414 callout_init(&l->l_timeout_ch, CALLOUT_MPSAFE); 414 callout_init(&l->l_timeout_ch, CALLOUT_MPSAFE);
415 callout_setfunc(&l->l_timeout_ch, sleepq_timeout, l); 415 callout_setfunc(&l->l_timeout_ch, sleepq_timeout, l);
416 cv_init(&l->l_sigcv, "sigwait"); 416 cv_init(&l->l_sigcv, "sigwait");
417 cv_init(&l->l_waitcv, "vfork"); 417 cv_init(&l->l_waitcv, "vfork");
418 418
419 kauth_cred_hold(proc0.p_cred); 419 kauth_cred_hold(proc0.p_cred);
420 l->l_cred = proc0.p_cred; 420 l->l_cred = proc0.p_cred;
421 421
422 kdtrace_thread_ctor(NULL, l); 422 kdtrace_thread_ctor(NULL, l);
423 lwp_initspecific(l); 423 lwp_initspecific(l);
424 424
425 SYSCALL_TIME_LWP_INIT(l); 425 SYSCALL_TIME_LWP_INIT(l);
426} 426}
427 427
428static void 428static void
429lwp_dtor(void *arg, void *obj) 429lwp_dtor(void *arg, void *obj)
430{ 430{
431 lwp_t *l = obj; 431 lwp_t *l = obj;
432 (void)l; 432 (void)l;
433 433
434 /* 434 /*
435 * Provide a barrier to ensure that all mutex_oncpu() and rw_oncpu() 435 * Provide a barrier to ensure that all mutex_oncpu() and rw_oncpu()
436 * calls will exit before memory of LWP is returned to the pool, where 436 * calls will exit before memory of LWP is returned to the pool, where
437 * KVA of LWP structure might be freed and re-used for other purposes. 437 * KVA of LWP structure might be freed and re-used for other purposes.
438 * Kernel preemption is disabled around mutex_oncpu() and rw_oncpu() 438 * Kernel preemption is disabled around mutex_oncpu() and rw_oncpu()
439 * callers, therefore cross-call to all CPUs will do the job. Also, 439 * callers, therefore cross-call to all CPUs will do the job. Also,
440 * the value of l->l_cpu must be still valid at this point. 440 * the value of l->l_cpu must be still valid at this point.
441 */ 441 */
442 KASSERT(l->l_cpu != NULL); 442 KASSERT(l->l_cpu != NULL);
443 xc_barrier(0); 443 xc_barrier(0);
444} 444}
445 445
446/* 446/*
447 * Set an suspended. 447 * Set an suspended.
448 * 448 *
449 * Must be called with p_lock held, and the LWP locked. Will unlock the 449 * Must be called with p_lock held, and the LWP locked. Will unlock the
450 * LWP before return. 450 * LWP before return.
451 */ 451 */
452int 452int
453lwp_suspend(struct lwp *curl, struct lwp *t) 453lwp_suspend(struct lwp *curl, struct lwp *t)
454{ 454{
455 int error; 455 int error;
456 456
457 KASSERT(mutex_owned(t->l_proc->p_lock)); 457 KASSERT(mutex_owned(t->l_proc->p_lock));
458 KASSERT(lwp_locked(t, NULL)); 458 KASSERT(lwp_locked(t, NULL));
459 459
460 KASSERT(curl != t || curl->l_stat == LSONPROC); 460 KASSERT(curl != t || curl->l_stat == LSONPROC);
461 461
462 /* 462 /*
463 * If the current LWP has been told to exit, we must not suspend anyone 463 * If the current LWP has been told to exit, we must not suspend anyone
464 * else or deadlock could occur. We won't return to userspace. 464 * else or deadlock could occur. We won't return to userspace.
465 */ 465 */
466 if ((curl->l_flag & (LW_WEXIT | LW_WCORE)) != 0) { 466 if ((curl->l_flag & (LW_WEXIT | LW_WCORE)) != 0) {
467 lwp_unlock(t); 467 lwp_unlock(t);
468 return (EDEADLK); 468 return (EDEADLK);
469 } 469 }
470 470
471 if ((t->l_flag & LW_DBGSUSPEND) != 0) { 471 if ((t->l_flag & LW_DBGSUSPEND) != 0) {
472 lwp_unlock(t); 472 lwp_unlock(t);
473 return 0; 473 return 0;
474 } 474 }
475 475
476 error = 0; 476 error = 0;
477 477
478 switch (t->l_stat) { 478 switch (t->l_stat) {
479 case LSRUN: 479 case LSRUN:
480 case LSONPROC: 480 case LSONPROC:
481 t->l_flag |= LW_WSUSPEND; 481 t->l_flag |= LW_WSUSPEND;
482 lwp_need_userret(t); 482 lwp_need_userret(t);
483 lwp_unlock(t); 483 lwp_unlock(t);
484 break; 484 break;
485 485
486 case LSSLEEP: 486 case LSSLEEP:
487 t->l_flag |= LW_WSUSPEND; 487 t->l_flag |= LW_WSUSPEND;
488 488
489 /* 489 /*
490 * Kick the LWP and try to get it to the kernel boundary 490 * Kick the LWP and try to get it to the kernel boundary
491 * so that it will release any locks that it holds. 491 * so that it will release any locks that it holds.
492 * setrunnable() will release the lock. 492 * setrunnable() will release the lock.
493 */ 493 */
494 if ((t->l_flag & LW_SINTR) != 0) 494 if ((t->l_flag & LW_SINTR) != 0)
495 setrunnable(t); 495 setrunnable(t);
496 else 496 else
497 lwp_unlock(t); 497 lwp_unlock(t);
498 break; 498 break;
499 499
500 case LSSUSPENDED: 500 case LSSUSPENDED:
501 lwp_unlock(t); 501 lwp_unlock(t);
502 break; 502 break;
503 503
504 case LSSTOP: 504 case LSSTOP:
505 t->l_flag |= LW_WSUSPEND; 505 t->l_flag |= LW_WSUSPEND;
506 setrunnable(t); 506 setrunnable(t);
507 break; 507 break;
508 508
509 case LSIDL: 509 case LSIDL:
510 case LSZOMB: 510 case LSZOMB:
511 error = EINTR; /* It's what Solaris does..... */ 511 error = EINTR; /* It's what Solaris does..... */
512 lwp_unlock(t); 512 lwp_unlock(t);
513 break; 513 break;
514 } 514 }
515 515
516 return (error); 516 return (error);
517} 517}
518 518
519/* 519/*
520 * Restart a suspended LWP. 520 * Restart a suspended LWP.
521 * 521 *
522 * Must be called with p_lock held, and the LWP locked. Will unlock the 522 * Must be called with p_lock held, and the LWP locked. Will unlock the
523 * LWP before return. 523 * LWP before return.
524 */ 524 */
525void 525void
526lwp_continue(struct lwp *l) 526lwp_continue(struct lwp *l)
527{ 527{
528 528
529 KASSERT(mutex_owned(l->l_proc->p_lock)); 529 KASSERT(mutex_owned(l->l_proc->p_lock));
530 KASSERT(lwp_locked(l, NULL)); 530 KASSERT(lwp_locked(l, NULL));
531 531
532 /* If rebooting or not suspended, then just bail out. */ 532 /* If rebooting or not suspended, then just bail out. */
533 if ((l->l_flag & LW_WREBOOT) != 0) { 533 if ((l->l_flag & LW_WREBOOT) != 0) {
534 lwp_unlock(l); 534 lwp_unlock(l);
535 return; 535 return;
536 } 536 }
537 537
538 l->l_flag &= ~LW_WSUSPEND; 538 l->l_flag &= ~LW_WSUSPEND;
539 539
540 if (l->l_stat != LSSUSPENDED || (l->l_flag & LW_DBGSUSPEND) != 0) { 540 if (l->l_stat != LSSUSPENDED || (l->l_flag & LW_DBGSUSPEND) != 0) {
541 lwp_unlock(l); 541 lwp_unlock(l);
542 return; 542 return;
543 } 543 }
544 544
545 /* setrunnable() will release the lock. */ 545 /* setrunnable() will release the lock. */
546 setrunnable(l); 546 setrunnable(l);
547} 547}
548 548
549/* 549/*
550 * Restart a stopped LWP. 550 * Restart a stopped LWP.
551 * 551 *
552 * Must be called with p_lock held, and the LWP NOT locked. Will unlock the 552 * Must be called with p_lock held, and the LWP NOT locked. Will unlock the
553 * LWP before return. 553 * LWP before return.
554 */ 554 */
555void 555void
556lwp_unstop(struct lwp *l) 556lwp_unstop(struct lwp *l)
557{ 557{
558 struct proc *p = l->l_proc; 558 struct proc *p = l->l_proc;
559 559
560 KASSERT(mutex_owned(proc_lock)); 560 KASSERT(mutex_owned(proc_lock));
561 KASSERT(mutex_owned(p->p_lock)); 561 KASSERT(mutex_owned(p->p_lock));
562 562
563 lwp_lock(l); 563 lwp_lock(l);
564 564
565 KASSERT((l->l_flag & LW_DBGSUSPEND) == 0); 565 KASSERT((l->l_flag & LW_DBGSUSPEND) == 0);
566 566
567 /* If not stopped, then just bail out. */ 567 /* If not stopped, then just bail out. */
568 if (l->l_stat != LSSTOP) { 568 if (l->l_stat != LSSTOP) {
569 lwp_unlock(l); 569 lwp_unlock(l);
570 return; 570 return;
571 } 571 }
572 572
573 p->p_stat = SACTIVE; 573 p->p_stat = SACTIVE;
574 p->p_sflag &= ~PS_STOPPING; 574 p->p_sflag &= ~PS_STOPPING;
575 575
576 if (!p->p_waited) 576 if (!p->p_waited)
577 p->p_pptr->p_nstopchild--; 577 p->p_pptr->p_nstopchild--;
578 578
579 if (l->l_wchan == NULL) { 579 if (l->l_wchan == NULL) {
580 /* setrunnable() will release the lock. */ 580 /* setrunnable() will release the lock. */
581 setrunnable(l); 581 setrunnable(l);
582 } else if (p->p_xsig && (l->l_flag & LW_SINTR) != 0) { 582 } else if (p->p_xsig && (l->l_flag & LW_SINTR) != 0) {
583 /* setrunnable() so we can receive the signal */ 583 /* setrunnable() so we can receive the signal */
584 setrunnable(l); 584 setrunnable(l);
585 } else { 585 } else {
586 l->l_stat = LSSLEEP; 586 l->l_stat = LSSLEEP;
587 p->p_nrlwps++; 587 p->p_nrlwps++;
588 lwp_unlock(l); 588 lwp_unlock(l);
589 } 589 }
590} 590}
591 591
592/* 592/*
593 * Wait for an LWP within the current process to exit. If 'lid' is 593 * Wait for an LWP within the current process to exit. If 'lid' is
594 * non-zero, we are waiting for a specific LWP. 594 * non-zero, we are waiting for a specific LWP.
595 * 595 *
596 * Must be called with p->p_lock held. 596 * Must be called with p->p_lock held.
597 */ 597 */
598int 598int
599lwp_wait(struct lwp *l, lwpid_t lid, lwpid_t *departed, bool exiting) 599lwp_wait(struct lwp *l, lwpid_t lid, lwpid_t *departed, bool exiting)
600{ 600{
601 const lwpid_t curlid = l->l_lid; 601 const lwpid_t curlid = l->l_lid;
602 proc_t *p = l->l_proc; 602 proc_t *p = l->l_proc;
603 lwp_t *l2, *next; 603 lwp_t *l2, *next;
604 int error; 604 int error;
605 605
606 KASSERT(mutex_owned(p->p_lock)); 606 KASSERT(mutex_owned(p->p_lock));
607 607
608 p->p_nlwpwait++; 608 p->p_nlwpwait++;
609 l->l_waitingfor = lid; 609 l->l_waitingfor = lid;
610 610
611 for (;;) { 611 for (;;) {
612 int nfound; 612 int nfound;
613 613
614 /* 614 /*
615 * Avoid a race between exit1() and sigexit(): if the 615 * Avoid a race between exit1() and sigexit(): if the
616 * process is dumping core, then we need to bail out: call 616 * process is dumping core, then we need to bail out: call
617 * into lwp_userret() where we will be suspended until the 617 * into lwp_userret() where we will be suspended until the
618 * deed is done. 618 * deed is done.
619 */ 619 */
620 if ((p->p_sflag & PS_WCORE) != 0) { 620 if ((p->p_sflag & PS_WCORE) != 0) {
621 mutex_exit(p->p_lock); 621 mutex_exit(p->p_lock);
622 lwp_userret(l); 622 lwp_userret(l);
623 KASSERT(false); 623 KASSERT(false);
624 } 624 }
625 625
626 /* 626 /*
627 * First off, drain any detached LWP that is waiting to be 627 * First off, drain any detached LWP that is waiting to be
628 * reaped. 628 * reaped.
629 */ 629 */
630 while ((l2 = p->p_zomblwp) != NULL) { 630 while ((l2 = p->p_zomblwp) != NULL) {
631 p->p_zomblwp = NULL; 631 p->p_zomblwp = NULL;
632 lwp_free(l2, false, false);/* releases proc mutex */ 632 lwp_free(l2, false, false);/* releases proc mutex */
633 mutex_enter(p->p_lock); 633 mutex_enter(p->p_lock);
634 } 634 }
635 635
636 /* 636 /*
637 * Now look for an LWP to collect. If the whole process is 637 * Now look for an LWP to collect. If the whole process is
638 * exiting, count detached LWPs as eligible to be collected, 638 * exiting, count detached LWPs as eligible to be collected,
639 * but don't drain them here. 639 * but don't drain them here.
640 */ 640 */
641 nfound = 0; 641 nfound = 0;
642 error = 0; 642 error = 0;
643 643
644 /* 644 /*
645 * If given a specific LID, go via the tree and make sure 645 * If given a specific LID, go via the tree and make sure
646 * it's not detached. 646 * it's not detached.
647 */ 647 */
648 if (lid != 0) { 648 if (lid != 0) {
649 l2 = radix_tree_lookup_node(&p->p_lwptree, 649 l2 = radix_tree_lookup_node(&p->p_lwptree,
650 (uint64_t)(lid - 1)); 650 (uint64_t)(lid - 1));
651 if (l2 == NULL) { 651 if (l2 == NULL) {
652 error = ESRCH; 652 error = ESRCH;
653 break; 653 break;
654 } 654 }
655 KASSERT(l2->l_lid == lid); 655 KASSERT(l2->l_lid == lid);
656 if ((l2->l_prflag & LPR_DETACHED) != 0) { 656 if ((l2->l_prflag & LPR_DETACHED) != 0) {
657 error = EINVAL; 657 error = EINVAL;
658 break; 658 break;
659 } 659 }
660 } else { 660 } else {
661 l2 = LIST_FIRST(&p->p_lwps); 661 l2 = LIST_FIRST(&p->p_lwps);
662 } 662 }
663 for (; l2 != NULL; l2 = next) { 663 for (; l2 != NULL; l2 = next) {
664 next = (lid != 0 ? NULL : LIST_NEXT(l2, l_sibling)); 664 next = (lid != 0 ? NULL : LIST_NEXT(l2, l_sibling));
665 665
666 /* 666 /*
667 * If a specific wait and the target is waiting on 667 * If a specific wait and the target is waiting on
668 * us, then avoid deadlock. This also traps LWPs 668 * us, then avoid deadlock. This also traps LWPs
669 * that try to wait on themselves. 669 * that try to wait on themselves.
670 * 670 *
671 * Note that this does not handle more complicated 671 * Note that this does not handle more complicated
672 * cycles, like: t1 -> t2 -> t3 -> t1. The process 672 * cycles, like: t1 -> t2 -> t3 -> t1. The process
673 * can still be killed so it is not a major problem. 673 * can still be killed so it is not a major problem.
674 */ 674 */
675 if (l2->l_lid == lid && l2->l_waitingfor == curlid) { 675 if (l2->l_lid == lid && l2->l_waitingfor == curlid) {
676 error = EDEADLK; 676 error = EDEADLK;
677 break; 677 break;
678 } 678 }
679 if (l2 == l) 679 if (l2 == l)
680 continue; 680 continue;
681 if ((l2->l_prflag & LPR_DETACHED) != 0) { 681 if ((l2->l_prflag & LPR_DETACHED) != 0) {
682 nfound += exiting; 682 nfound += exiting;
683 continue; 683 continue;
684 } 684 }
685 if (lid != 0) { 685 if (lid != 0) {
686 /* 686 /*
687 * Mark this LWP as the first waiter, if there 687 * Mark this LWP as the first waiter, if there
688 * is no other. 688 * is no other.
689 */ 689 */
690 if (l2->l_waiter == 0) 690 if (l2->l_waiter == 0)
691 l2->l_waiter = curlid; 691 l2->l_waiter = curlid;
692 } else if (l2->l_waiter != 0) { 692 } else if (l2->l_waiter != 0) {
693 /* 693 /*
694 * It already has a waiter - so don't 694 * It already has a waiter - so don't
695 * collect it. If the waiter doesn't 695 * collect it. If the waiter doesn't
696 * grab it we'll get another chance 696 * grab it we'll get another chance
697 * later. 697 * later.
698 */ 698 */
699 nfound++; 699 nfound++;
700 continue; 700 continue;
701 } 701 }
702 nfound++; 702 nfound++;
703 703
704 /* No need to lock the LWP in order to see LSZOMB. */ 704 /* No need to lock the LWP in order to see LSZOMB. */
705 if (l2->l_stat != LSZOMB) 705 if (l2->l_stat != LSZOMB)
706 continue; 706 continue;
707 707
708 /* 708 /*
709 * We're no longer waiting. Reset the "first waiter" 709 * We're no longer waiting. Reset the "first waiter"
710 * pointer on the target, in case it was us. 710 * pointer on the target, in case it was us.
711 */ 711 */
712 l->l_waitingfor = 0; 712 l->l_waitingfor = 0;
713 l2->l_waiter = 0; 713 l2->l_waiter = 0;
714 p->p_nlwpwait--; 714 p->p_nlwpwait--;
715 if (departed) 715 if (departed)
716 *departed = l2->l_lid; 716 *departed = l2->l_lid;
717 sched_lwp_collect(l2); 717 sched_lwp_collect(l2);
718 718
719 /* lwp_free() releases the proc lock. */ 719 /* lwp_free() releases the proc lock. */
720 lwp_free(l2, false, false); 720 lwp_free(l2, false, false);
721 mutex_enter(p->p_lock); 721 mutex_enter(p->p_lock);
722 return 0; 722 return 0;
723 } 723 }
724 724
725 if (error != 0) 725 if (error != 0)
726 break; 726 break;
727 if (nfound == 0) { 727 if (nfound == 0) {
728 error = ESRCH; 728 error = ESRCH;
729 break; 729 break;
730 } 730 }
731 731
732 /* 732 /*
733 * Note: since the lock will be dropped, need to restart on 733 * Note: since the lock will be dropped, need to restart on
734 * wakeup to run all LWPs again, e.g. there may be new LWPs. 734 * wakeup to run all LWPs again, e.g. there may be new LWPs.
735 */ 735 */
736 if (exiting) { 736 if (exiting) {
737 KASSERT(p->p_nlwps > 1); 737 KASSERT(p->p_nlwps > 1);
738 error = cv_timedwait(&p->p_lwpcv, p->p_lock, 1); 738 error = cv_timedwait(&p->p_lwpcv, p->p_lock, 1);
739 break; 739 break;
740 } 740 }
741 741
742 /* 742 /*
743 * Break out if the process is exiting, or if all LWPs are 743 * Break out if all LWPs are in _lwp_wait(). There are
744 * in _lwp_wait(). There are other ways to hang the process 744 * other ways to hang the process with _lwp_wait(), but the
745 * with _lwp_wait(), but the sleep is interruptable so 745 * sleep is interruptable so little point checking for them.
746 * little point checking for them. 
747 */ 746 */
748 if ((p->p_sflag & PS_WEXIT) != 0 || 747 if (p->p_nlwpwait == p->p_nlwps) {
749 p->p_nlwpwait == p->p_nlwps) { 
750 error = EDEADLK; 748 error = EDEADLK;
751 break; 749 break;
752 } 750 }
753 751
754 /* 752 /*
755 * Sit around and wait for something to happen. We'll be  753 * Sit around and wait for something to happen. We'll be
756 * awoken if any of the conditions examined change: if an 754 * awoken if any of the conditions examined change: if an
757 * LWP exits, is collected, or is detached. 755 * LWP exits, is collected, or is detached.
758 */ 756 */
759 if ((error = cv_wait_sig(&p->p_lwpcv, p->p_lock)) != 0) 757 if ((error = cv_wait_sig(&p->p_lwpcv, p->p_lock)) != 0)
760 break; 758 break;
761 } 759 }
762 760
763 /* 761 /*
764 * We didn't find any LWPs to collect, we may have received a  762 * We didn't find any LWPs to collect, we may have received a
765 * signal, or some other condition has caused us to bail out. 763 * signal, or some other condition has caused us to bail out.
766 * 764 *
767 * If waiting on a specific LWP, clear the waiters marker: some 765 * If waiting on a specific LWP, clear the waiters marker: some
768 * other LWP may want it. Then, kick all the remaining waiters 766 * other LWP may want it. Then, kick all the remaining waiters
769 * so that they can re-check for zombies and for deadlock. 767 * so that they can re-check for zombies and for deadlock.
770 */ 768 */
771 if (lid != 0) { 769 if (lid != 0) {
772 l2 = radix_tree_lookup_node(&p->p_lwptree, 770 l2 = radix_tree_lookup_node(&p->p_lwptree,
773 (uint64_t)(lid - 1)); 771 (uint64_t)(lid - 1));
774 KASSERT(l2 == NULL || l2->l_lid == lid); 772 KASSERT(l2 == NULL || l2->l_lid == lid);
775 773
776 if (l2 != NULL && l2->l_waiter == curlid) 774 if (l2 != NULL && l2->l_waiter == curlid)
777 l2->l_waiter = 0; 775 l2->l_waiter = 0;
778 } 776 }
779 p->p_nlwpwait--; 777 p->p_nlwpwait--;
780 l->l_waitingfor = 0; 778 l->l_waitingfor = 0;
781 cv_broadcast(&p->p_lwpcv); 779 cv_broadcast(&p->p_lwpcv);
782 780
783 return error; 781 return error;
784} 782}
785 783
786/* 784/*
787 * Find an unused LID for a new LWP. 785 * Find an unused LID for a new LWP.
788 */ 786 */
789static lwpid_t 787static lwpid_t
790lwp_find_free_lid(struct proc *p) 788lwp_find_free_lid(struct proc *p)
791{ 789{
792 struct lwp *gang[32]; 790 struct lwp *gang[32];
793 lwpid_t lid; 791 lwpid_t lid;
794 unsigned n; 792 unsigned n;
795 793
796 KASSERT(mutex_owned(p->p_lock)); 794 KASSERT(mutex_owned(p->p_lock));
797 KASSERT(p->p_nlwpid > 0); 795 KASSERT(p->p_nlwpid > 0);
798 796
799 /* 797 /*
800 * Scoot forward through the tree in blocks of LIDs doing gang 798 * Scoot forward through the tree in blocks of LIDs doing gang
801 * lookup with dense=true, meaning the lookup will terminate the 799 * lookup with dense=true, meaning the lookup will terminate the
802 * instant a hole is encountered. Most of the time the first entry 800 * instant a hole is encountered. Most of the time the first entry
803 * (p->p_nlwpid) is free and the lookup fails fast. 801 * (p->p_nlwpid) is free and the lookup fails fast.
804 */ 802 */
805 for (lid = p->p_nlwpid;;) { 803 for (lid = p->p_nlwpid;;) {
806 n = radix_tree_gang_lookup_node(&p->p_lwptree, lid - 1, 804 n = radix_tree_gang_lookup_node(&p->p_lwptree, lid - 1,
807 (void **)gang, __arraycount(gang), true); 805 (void **)gang, __arraycount(gang), true);
808 if (n == 0) { 806 if (n == 0) {
809 /* Start point was empty. */ 807 /* Start point was empty. */
810 break; 808 break;
811 } 809 }
812 KASSERT(gang[0]->l_lid == lid); 810 KASSERT(gang[0]->l_lid == lid);
813 lid = gang[n - 1]->l_lid + 1; 811 lid = gang[n - 1]->l_lid + 1;
814 if (n < __arraycount(gang)) { 812 if (n < __arraycount(gang)) {
815 /* Scan encountered a hole. */ 813 /* Scan encountered a hole. */
816 break; 814 break;
817 } 815 }
818 } 816 }
819 817
820 return (lwpid_t)lid; 818 return (lwpid_t)lid;
821} 819}
822 820
823/* 821/*
824 * Create a new LWP within process 'p2', using LWP 'l1' as a template. 822 * Create a new LWP within process 'p2', using LWP 'l1' as a template.
825 * The new LWP is created in state LSIDL and must be set running, 823 * The new LWP is created in state LSIDL and must be set running,
826 * suspended, or stopped by the caller. 824 * suspended, or stopped by the caller.
827 */ 825 */
828int 826int
829lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, int flags, 827lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, int flags,
830 void *stack, size_t stacksize, void (*func)(void *), void *arg, 828 void *stack, size_t stacksize, void (*func)(void *), void *arg,
831 lwp_t **rnewlwpp, int sclass, const sigset_t *sigmask, 829 lwp_t **rnewlwpp, int sclass, const sigset_t *sigmask,
832 const stack_t *sigstk) 830 const stack_t *sigstk)
833{ 831{
834 struct lwp *l2; 832 struct lwp *l2;
835 turnstile_t *ts; 833 turnstile_t *ts;
836 lwpid_t lid; 834 lwpid_t lid;
837 835
838 KASSERT(l1 == curlwp || l1->l_proc == &proc0); 836 KASSERT(l1 == curlwp || l1->l_proc == &proc0);
839 837
840 /* 838 /*
841 * Enforce limits, excluding the first lwp and kthreads. We must 839 * Enforce limits, excluding the first lwp and kthreads. We must
842 * use the process credentials here when adjusting the limit, as 840 * use the process credentials here when adjusting the limit, as
843 * they are what's tied to the accounting entity. However for 841 * they are what's tied to the accounting entity. However for
844 * authorizing the action, we'll use the LWP's credentials. 842 * authorizing the action, we'll use the LWP's credentials.
845 */ 843 */
846 mutex_enter(p2->p_lock); 844 mutex_enter(p2->p_lock);
847 if (p2->p_nlwps != 0 && p2 != &proc0) { 845 if (p2->p_nlwps != 0 && p2 != &proc0) {
848 uid_t uid = kauth_cred_getuid(p2->p_cred); 846 uid_t uid = kauth_cred_getuid(p2->p_cred);
849 int count = chglwpcnt(uid, 1); 847 int count = chglwpcnt(uid, 1);
850 if (__predict_false(count > 848 if (__predict_false(count >
851 p2->p_rlimit[RLIMIT_NTHR].rlim_cur)) { 849 p2->p_rlimit[RLIMIT_NTHR].rlim_cur)) {
852 if (kauth_authorize_process(l1->l_cred, 850 if (kauth_authorize_process(l1->l_cred,
853 KAUTH_PROCESS_RLIMIT, p2, 851 KAUTH_PROCESS_RLIMIT, p2,
854 KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS), 852 KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS),
855 &p2->p_rlimit[RLIMIT_NTHR], KAUTH_ARG(RLIMIT_NTHR)) 853 &p2->p_rlimit[RLIMIT_NTHR], KAUTH_ARG(RLIMIT_NTHR))
856 != 0) { 854 != 0) {
857 (void)chglwpcnt(uid, -1); 855 (void)chglwpcnt(uid, -1);
858 mutex_exit(p2->p_lock); 856 mutex_exit(p2->p_lock);
859 return EAGAIN; 857 return EAGAIN;
860 } 858 }
861 } 859 }
862 } 860 }
863 861
864 /* 862 /*
865 * First off, reap any detached LWP waiting to be collected. 863 * First off, reap any detached LWP waiting to be collected.
866 * We can re-use its LWP structure and turnstile. 864 * We can re-use its LWP structure and turnstile.
867 */ 865 */
868 if ((l2 = p2->p_zomblwp) != NULL) { 866 if ((l2 = p2->p_zomblwp) != NULL) {
869 p2->p_zomblwp = NULL; 867 p2->p_zomblwp = NULL;
870 lwp_free(l2, true, false); 868 lwp_free(l2, true, false);
871 /* p2 now unlocked by lwp_free() */ 869 /* p2 now unlocked by lwp_free() */
872 ts = l2->l_ts; 870 ts = l2->l_ts;
873 KASSERT(l2->l_inheritedprio == -1); 871 KASSERT(l2->l_inheritedprio == -1);
874 KASSERT(SLIST_EMPTY(&l2->l_pi_lenders)); 872 KASSERT(SLIST_EMPTY(&l2->l_pi_lenders));
875 memset(l2, 0, sizeof(*l2)); 873 memset(l2, 0, sizeof(*l2));
876 l2->l_ts = ts; 874 l2->l_ts = ts;
877 } else { 875 } else {
878 mutex_exit(p2->p_lock); 876 mutex_exit(p2->p_lock);
879 l2 = pool_cache_get(lwp_cache, PR_WAITOK); 877 l2 = pool_cache_get(lwp_cache, PR_WAITOK);
880 memset(l2, 0, sizeof(*l2)); 878 memset(l2, 0, sizeof(*l2));
881 l2->l_ts = pool_cache_get(turnstile_cache, PR_WAITOK); 879 l2->l_ts = pool_cache_get(turnstile_cache, PR_WAITOK);
882 SLIST_INIT(&l2->l_pi_lenders); 880 SLIST_INIT(&l2->l_pi_lenders);
883 } 881 }
884 882
885 l2->l_stat = LSIDL; 883 l2->l_stat = LSIDL;
886 l2->l_proc = p2; 884 l2->l_proc = p2;
887 l2->l_refcnt = 0; 885 l2->l_refcnt = 0;
888 l2->l_class = sclass; 886 l2->l_class = sclass;
889 887
890 /* 888 /*
891 * If vfork(), we want the LWP to run fast and on the same CPU 889 * If vfork(), we want the LWP to run fast and on the same CPU
892 * as its parent, so that it can reuse the VM context and cache 890 * as its parent, so that it can reuse the VM context and cache
893 * footprint on the local CPU. 891 * footprint on the local CPU.
894 */ 892 */
895 l2->l_kpriority = ((flags & LWP_VFORK) ? true : false); 893 l2->l_kpriority = ((flags & LWP_VFORK) ? true : false);
896 l2->l_kpribase = PRI_KERNEL; 894 l2->l_kpribase = PRI_KERNEL;
897 l2->l_priority = l1->l_priority; 895 l2->l_priority = l1->l_priority;
898 l2->l_inheritedprio = -1; 896 l2->l_inheritedprio = -1;
899 l2->l_protectprio = -1; 897 l2->l_protectprio = -1;
900 l2->l_auxprio = -1; 898 l2->l_auxprio = -1;
901 l2->l_flag = 0; 899 l2->l_flag = 0;
902 l2->l_pflag = LP_MPSAFE; 900 l2->l_pflag = LP_MPSAFE;
903 TAILQ_INIT(&l2->l_ld_locks); 901 TAILQ_INIT(&l2->l_ld_locks);
904 l2->l_psrefs = 0; 902 l2->l_psrefs = 0;
905 kmsan_lwp_alloc(l2); 903 kmsan_lwp_alloc(l2);
906 904
907 /* 905 /*
908 * For vfork, borrow parent's lwpctl context if it exists. 906 * For vfork, borrow parent's lwpctl context if it exists.
909 * This also causes us to return via lwp_userret. 907 * This also causes us to return via lwp_userret.
910 */ 908 */
911 if (flags & LWP_VFORK && l1->l_lwpctl) { 909 if (flags & LWP_VFORK && l1->l_lwpctl) {
912 l2->l_lwpctl = l1->l_lwpctl; 910 l2->l_lwpctl = l1->l_lwpctl;
913 l2->l_flag |= LW_LWPCTL; 911 l2->l_flag |= LW_LWPCTL;
914 } 912 }
915 913
916 /* 914 /*
917 * If not the first LWP in the process, grab a reference to the 915 * If not the first LWP in the process, grab a reference to the
918 * descriptor table. 916 * descriptor table.
919 */ 917 */
920 l2->l_fd = p2->p_fd; 918 l2->l_fd = p2->p_fd;
921 if (p2->p_nlwps != 0) { 919 if (p2->p_nlwps != 0) {
922 KASSERT(l1->l_proc == p2); 920 KASSERT(l1->l_proc == p2);
923 fd_hold(l2); 921 fd_hold(l2);
924 } else { 922 } else {
925 KASSERT(l1->l_proc != p2); 923 KASSERT(l1->l_proc != p2);
926 } 924 }
927 925
928 if (p2->p_flag & PK_SYSTEM) { 926 if (p2->p_flag & PK_SYSTEM) {
929 /* Mark it as a system LWP. */ 927 /* Mark it as a system LWP. */
930 l2->l_flag |= LW_SYSTEM; 928 l2->l_flag |= LW_SYSTEM;
931 } 929 }
932 930
933 kpreempt_disable(); 931 kpreempt_disable();
934 l2->l_mutex = l1->l_cpu->ci_schedstate.spc_lwplock; 932 l2->l_mutex = l1->l_cpu->ci_schedstate.spc_lwplock;
935 l2->l_cpu = l1->l_cpu; 933 l2->l_cpu = l1->l_cpu;
936 kpreempt_enable(); 934 kpreempt_enable();
937 935
938 kdtrace_thread_ctor(NULL, l2); 936 kdtrace_thread_ctor(NULL, l2);
939 lwp_initspecific(l2); 937 lwp_initspecific(l2);
940 sched_lwp_fork(l1, l2); 938 sched_lwp_fork(l1, l2);
941 lwp_update_creds(l2); 939 lwp_update_creds(l2);
942 callout_init(&l2->l_timeout_ch, CALLOUT_MPSAFE); 940 callout_init(&l2->l_timeout_ch, CALLOUT_MPSAFE);
943 callout_setfunc(&l2->l_timeout_ch, sleepq_timeout, l2); 941 callout_setfunc(&l2->l_timeout_ch, sleepq_timeout, l2);
944 cv_init(&l2->l_sigcv, "sigwait"); 942 cv_init(&l2->l_sigcv, "sigwait");
945 cv_init(&l2->l_waitcv, "vfork"); 943 cv_init(&l2->l_waitcv, "vfork");
946 l2->l_syncobj = &sched_syncobj; 944 l2->l_syncobj = &sched_syncobj;
947 PSREF_DEBUG_INIT_LWP(l2); 945 PSREF_DEBUG_INIT_LWP(l2);
948 946
949 if (rnewlwpp != NULL) 947 if (rnewlwpp != NULL)
950 *rnewlwpp = l2; 948 *rnewlwpp = l2;
951 949
952 /* 950 /*
953 * PCU state needs to be saved before calling uvm_lwp_fork() so that 951 * PCU state needs to be saved before calling uvm_lwp_fork() so that
954 * the MD cpu_lwp_fork() can copy the saved state to the new LWP. 952 * the MD cpu_lwp_fork() can copy the saved state to the new LWP.
955 */ 953 */
956 pcu_save_all(l1); 954 pcu_save_all(l1);
957#if PCU_UNIT_COUNT > 0 955#if PCU_UNIT_COUNT > 0
958 l2->l_pcu_valid = l1->l_pcu_valid; 956 l2->l_pcu_valid = l1->l_pcu_valid;
959#endif 957#endif
960 958
961 uvm_lwp_setuarea(l2, uaddr); 959 uvm_lwp_setuarea(l2, uaddr);
962 uvm_lwp_fork(l1, l2, stack, stacksize, func, (arg != NULL) ? arg : l2); 960 uvm_lwp_fork(l1, l2, stack, stacksize, func, (arg != NULL) ? arg : l2);
963 961
964 if ((flags & LWP_PIDLID) != 0) { 962 if ((flags & LWP_PIDLID) != 0) {
965 /* Linux threads: use a PID. */ 963 /* Linux threads: use a PID. */
966 lid = proc_alloc_pid(p2); 964 lid = proc_alloc_pid(p2);
967 l2->l_pflag |= LP_PIDLID; 965 l2->l_pflag |= LP_PIDLID;
968 } else if (p2->p_nlwps == 0) { 966 } else if (p2->p_nlwps == 0) {
969 /* 967 /*
970 * First LWP in process. Copy the parent's LID to avoid 968 * First LWP in process. Copy the parent's LID to avoid
971 * causing problems for fork() + threads. Don't give 969 * causing problems for fork() + threads. Don't give
972 * subsequent threads the distinction of using LID 1. 970 * subsequent threads the distinction of using LID 1.
973 */ 971 */
974 lid = l1->l_lid; 972 lid = l1->l_lid;
975 p2->p_nlwpid = 2; 973 p2->p_nlwpid = 2;
976 } else { 974 } else {
977 /* Scan the radix tree for a free LID. */ 975 /* Scan the radix tree for a free LID. */
978 lid = 0; 976 lid = 0;
979 } 977 }
980 978
981 /* 979 /*
982 * Allocate LID if needed, and insert into the radix tree. The 980 * Allocate LID if needed, and insert into the radix tree. The
983 * first LWP in most processes has a LID of 1. It turns out that if 981 * first LWP in most processes has a LID of 1. It turns out that if
984 * you insert an item with a key of zero to a radixtree, it's stored 982 * you insert an item with a key of zero to a radixtree, it's stored
985 * directly in the root (p_lwptree) and no extra memory is 983 * directly in the root (p_lwptree) and no extra memory is
986 * allocated. We therefore always subtract 1 from the LID, which 984 * allocated. We therefore always subtract 1 from the LID, which
987 * means no memory is allocated for the tree unless the program is 985 * means no memory is allocated for the tree unless the program is
988 * using threads. NB: the allocation and insert must take place 986 * using threads. NB: the allocation and insert must take place
989 * under the same hold of p_lock. 987 * under the same hold of p_lock.
990 */ 988 */
991 mutex_enter(p2->p_lock); 989 mutex_enter(p2->p_lock);
992 for (;;) { 990 for (;;) {
993 int error; 991 int error;
994 992
995 l2->l_lid = (lid == 0 ? lwp_find_free_lid(p2) : lid); 993 l2->l_lid = (lid == 0 ? lwp_find_free_lid(p2) : lid);
996 994
997 rw_enter(&p2->p_treelock, RW_WRITER); 995 rw_enter(&p2->p_treelock, RW_WRITER);
998 error = radix_tree_insert_node(&p2->p_lwptree, 996 error = radix_tree_insert_node(&p2->p_lwptree,
999 (uint64_t)(l2->l_lid - 1), l2); 997 (uint64_t)(l2->l_lid - 1), l2);
1000 rw_exit(&p2->p_treelock); 998 rw_exit(&p2->p_treelock);
1001 999
1002 if (__predict_true(error == 0)) { 1000 if (__predict_true(error == 0)) {
1003 if (lid == 0) 1001 if (lid == 0)
1004 p2->p_nlwpid = l2->l_lid + 1; 1002 p2->p_nlwpid = l2->l_lid + 1;
1005 break; 1003 break;
1006 } 1004 }
1007 1005
1008 KASSERT(error == ENOMEM); 1006 KASSERT(error == ENOMEM);
1009 mutex_exit(p2->p_lock); 1007 mutex_exit(p2->p_lock);
1010 radix_tree_await_memory(); 1008 radix_tree_await_memory();
1011 mutex_enter(p2->p_lock); 1009 mutex_enter(p2->p_lock);
1012 } 1010 }
1013 1011
1014 if ((flags & LWP_DETACHED) != 0) { 1012 if ((flags & LWP_DETACHED) != 0) {
1015 l2->l_prflag = LPR_DETACHED; 1013 l2->l_prflag = LPR_DETACHED;
1016 p2->p_ndlwps++; 1014 p2->p_ndlwps++;
1017 } else 1015 } else
1018 l2->l_prflag = 0; 1016 l2->l_prflag = 0;
1019 1017
1020 if (l1->l_proc == p2) { 1018 if (l1->l_proc == p2) {
1021 /* 1019 /*
1022 * These flags are set while p_lock is held. Copy with 1020 * These flags are set while p_lock is held. Copy with
1023 * p_lock held too, so the LWP doesn't sneak into the 1021 * p_lock held too, so the LWP doesn't sneak into the
1024 * process without them being set. 1022 * process without them being set.
1025 */ 1023 */
1026 l2->l_flag |= (l1->l_flag & (LW_WEXIT | LW_WREBOOT | LW_WCORE)); 1024 l2->l_flag |= (l1->l_flag & (LW_WEXIT | LW_WREBOOT | LW_WCORE));
1027 } else { 1025 } else {
1028 /* fork(): pending core/exit doesn't apply to child. */ 1026 /* fork(): pending core/exit doesn't apply to child. */
1029 l2->l_flag |= (l1->l_flag & LW_WREBOOT); 1027 l2->l_flag |= (l1->l_flag & LW_WREBOOT);
1030 } 1028 }
1031 1029
1032 l2->l_sigstk = *sigstk; 1030 l2->l_sigstk = *sigstk;
1033 l2->l_sigmask = *sigmask; 1031 l2->l_sigmask = *sigmask;
1034 TAILQ_INIT(&l2->l_sigpend.sp_info); 1032 TAILQ_INIT(&l2->l_sigpend.sp_info);
1035 sigemptyset(&l2->l_sigpend.sp_set); 1033 sigemptyset(&l2->l_sigpend.sp_set);
1036 LIST_INSERT_HEAD(&p2->p_lwps, l2, l_sibling); 1034 LIST_INSERT_HEAD(&p2->p_lwps, l2, l_sibling);
1037 p2->p_nlwps++; 1035 p2->p_nlwps++;
1038 p2->p_nrlwps++; 1036 p2->p_nrlwps++;
1039 1037
1040 KASSERT(l2->l_affinity == NULL); 1038 KASSERT(l2->l_affinity == NULL);
1041 1039
1042 /* Inherit the affinity mask. */ 1040 /* Inherit the affinity mask. */
1043 if (l1->l_affinity) { 1041 if (l1->l_affinity) {
1044 /* 1042 /*
1045 * Note that we hold the state lock while inheriting 1043 * Note that we hold the state lock while inheriting
1046 * the affinity to avoid race with sched_setaffinity(). 1044 * the affinity to avoid race with sched_setaffinity().
1047 */ 1045 */
1048 lwp_lock(l1); 1046 lwp_lock(l1);
1049 if (l1->l_affinity) { 1047 if (l1->l_affinity) {
1050 kcpuset_use(l1->l_affinity); 1048 kcpuset_use(l1->l_affinity);
1051 l2->l_affinity = l1->l_affinity; 1049 l2->l_affinity = l1->l_affinity;
1052 } 1050 }
1053 lwp_unlock(l1); 1051 lwp_unlock(l1);
1054 } 1052 }
1055 1053
1056 /* This marks the end of the "must be atomic" section. */ 1054 /* This marks the end of the "must be atomic" section. */
1057 mutex_exit(p2->p_lock); 1055 mutex_exit(p2->p_lock);
1058 1056
1059 SDT_PROBE(proc, kernel, , lwp__create, l2, 0, 0, 0, 0); 1057 SDT_PROBE(proc, kernel, , lwp__create, l2, 0, 0, 0, 0);
1060 1058
1061 mutex_enter(proc_lock); 1059 mutex_enter(proc_lock);
1062 LIST_INSERT_HEAD(&alllwp, l2, l_list); 1060 LIST_INSERT_HEAD(&alllwp, l2, l_list);
1063 /* Inherit a processor-set */ 1061 /* Inherit a processor-set */
1064 l2->l_psid = l1->l_psid; 1062 l2->l_psid = l1->l_psid;
1065 mutex_exit(proc_lock); 1063 mutex_exit(proc_lock);
1066 1064
1067 SYSCALL_TIME_LWP_INIT(l2); 1065 SYSCALL_TIME_LWP_INIT(l2);
1068 1066
1069 if (p2->p_emul->e_lwp_fork) 1067 if (p2->p_emul->e_lwp_fork)
1070 (*p2->p_emul->e_lwp_fork)(l1, l2); 1068 (*p2->p_emul->e_lwp_fork)(l1, l2);
1071 1069
1072 return (0); 1070 return (0);
1073} 1071}
1074 1072
1075/* 1073/*
1076 * Set a new LWP running. If the process is stopping, then the LWP is 1074 * Set a new LWP running. If the process is stopping, then the LWP is
1077 * created stopped. 1075 * created stopped.
1078 */ 1076 */
1079void 1077void
1080lwp_start(lwp_t *l, int flags) 1078lwp_start(lwp_t *l, int flags)
1081{ 1079{
1082 proc_t *p = l->l_proc; 1080 proc_t *p = l->l_proc;
1083 1081
1084 mutex_enter(p->p_lock); 1082 mutex_enter(p->p_lock);
1085 lwp_lock(l); 1083 lwp_lock(l);
1086 KASSERT(l->l_stat == LSIDL); 1084 KASSERT(l->l_stat == LSIDL);
1087 if ((flags & LWP_SUSPENDED) != 0) { 1085 if ((flags & LWP_SUSPENDED) != 0) {
1088 /* It'll suspend itself in lwp_userret(). */ 1086 /* It'll suspend itself in lwp_userret(). */
1089 l->l_flag |= LW_WSUSPEND; 1087 l->l_flag |= LW_WSUSPEND;
1090 } 1088 }
1091 if (p->p_stat == SSTOP || (p->p_sflag & PS_STOPPING) != 0) { 1089 if (p->p_stat == SSTOP || (p->p_sflag & PS_STOPPING) != 0) {
1092 KASSERT(l->l_wchan == NULL); 1090 KASSERT(l->l_wchan == NULL);
1093 l->l_stat = LSSTOP; 1091 l->l_stat = LSSTOP;
1094 p->p_nrlwps--; 1092 p->p_nrlwps--;
1095 lwp_unlock(l); 1093 lwp_unlock(l);
1096 } else { 1094 } else {
1097 setrunnable(l); 1095 setrunnable(l);
1098 /* LWP now unlocked */ 1096 /* LWP now unlocked */
1099 } 1097 }
1100 mutex_exit(p->p_lock); 1098 mutex_exit(p->p_lock);
1101} 1099}
1102 1100
1103/* 1101/*
1104 * Called by MD code when a new LWP begins execution. Must be called 1102 * Called by MD code when a new LWP begins execution. Must be called
1105 * with the previous LWP locked (so at splsched), or if there is no 1103 * with the previous LWP locked (so at splsched), or if there is no
1106 * previous LWP, at splsched. 1104 * previous LWP, at splsched.
1107 */ 1105 */
1108void 1106void
1109lwp_startup(struct lwp *prev, struct lwp *new_lwp) 1107lwp_startup(struct lwp *prev, struct lwp *new_lwp)
1110{ 1108{
1111 kmutex_t *lock; 1109 kmutex_t *lock;
1112 1110
1113 KASSERTMSG(new_lwp == curlwp, "l %p curlwp %p prevlwp %p", new_lwp, curlwp, prev); 1111 KASSERTMSG(new_lwp == curlwp, "l %p curlwp %p prevlwp %p", new_lwp, curlwp, prev);
1114 KASSERT(kpreempt_disabled()); 1112 KASSERT(kpreempt_disabled());
1115 KASSERT(prev != NULL); 1113 KASSERT(prev != NULL);
1116 KASSERT((prev->l_pflag & LP_RUNNING) != 0); 1114 KASSERT((prev->l_pflag & LP_RUNNING) != 0);
1117 KASSERT(curcpu()->ci_mtx_count == -2); 1115 KASSERT(curcpu()->ci_mtx_count == -2);
1118 1116
1119 /* 1117 /*
1120 * Immediately mark the previous LWP as no longer running and unlock 1118 * Immediately mark the previous LWP as no longer running and unlock
1121 * (to keep lock wait times short as possible). If a zombie, don't 1119 * (to keep lock wait times short as possible). If a zombie, don't
1122 * touch after clearing LP_RUNNING as it could be reaped by another 1120 * touch after clearing LP_RUNNING as it could be reaped by another
1123 * CPU. Issue a memory barrier to ensure this. 1121 * CPU. Issue a memory barrier to ensure this.
1124 */ 1122 */
1125 lock = prev->l_mutex; 1123 lock = prev->l_mutex;
1126 if (__predict_false(prev->l_stat == LSZOMB)) { 1124 if (__predict_false(prev->l_stat == LSZOMB)) {
1127 membar_sync(); 1125 membar_sync();
1128 } 1126 }
1129 prev->l_pflag &= ~LP_RUNNING; 1127 prev->l_pflag &= ~LP_RUNNING;
1130 mutex_spin_exit(lock); 1128 mutex_spin_exit(lock);
1131 1129
1132 /* Correct spin mutex count after mi_switch(). */ 1130 /* Correct spin mutex count after mi_switch(). */
1133 curcpu()->ci_mtx_count = 0; 1131 curcpu()->ci_mtx_count = 0;
1134 1132
1135 /* Install new VM context. */ 1133 /* Install new VM context. */
1136 if (__predict_true(new_lwp->l_proc->p_vmspace)) { 1134 if (__predict_true(new_lwp->l_proc->p_vmspace)) {
1137 pmap_activate(new_lwp); 1135 pmap_activate(new_lwp);
1138 } 1136 }
1139 1137
1140 /* We remain at IPL_SCHED from mi_switch() - reset it. */ 1138 /* We remain at IPL_SCHED from mi_switch() - reset it. */
1141 spl0(); 1139 spl0();
1142 1140
1143 LOCKDEBUG_BARRIER(NULL, 0); 1141 LOCKDEBUG_BARRIER(NULL, 0);
1144 SDT_PROBE(proc, kernel, , lwp__start, new_lwp, 0, 0, 0, 0); 1142 SDT_PROBE(proc, kernel, , lwp__start, new_lwp, 0, 0, 0, 0);
1145 1143
1146 /* For kthreads, acquire kernel lock if not MPSAFE. */ 1144 /* For kthreads, acquire kernel lock if not MPSAFE. */
1147 if (__predict_false((new_lwp->l_pflag & LP_MPSAFE) == 0)) { 1145 if (__predict_false((new_lwp->l_pflag & LP_MPSAFE) == 0)) {
1148 KERNEL_LOCK(1, new_lwp); 1146 KERNEL_LOCK(1, new_lwp);
1149 } 1147 }
1150} 1148}
1151 1149
1152/* 1150/*
1153 * Exit an LWP. 1151 * Exit an LWP.
1154 */ 1152 */
1155void 1153void
1156lwp_exit(struct lwp *l) 1154lwp_exit(struct lwp *l)
1157{ 1155{
1158 struct proc *p = l->l_proc; 1156 struct proc *p = l->l_proc;
1159 struct lwp *l2; 1157 struct lwp *l2;
1160 bool current; 1158 bool current;
1161 1159
1162 current = (l == curlwp); 1160 current = (l == curlwp);
1163 1161
1164 KASSERT(current || (l->l_stat == LSIDL && l->l_target_cpu == NULL)); 1162 KASSERT(current || (l->l_stat == LSIDL && l->l_target_cpu == NULL));
1165 KASSERT(p == curproc); 1163 KASSERT(p == curproc);
1166 1164
1167 SDT_PROBE(proc, kernel, , lwp__exit, l, 0, 0, 0, 0); 1165 SDT_PROBE(proc, kernel, , lwp__exit, l, 0, 0, 0, 0);
1168 1166
1169 /* Verify that we hold no locks; for DIAGNOSTIC check kernel_lock. */ 1167 /* Verify that we hold no locks; for DIAGNOSTIC check kernel_lock. */
1170 LOCKDEBUG_BARRIER(NULL, 0); 1168 LOCKDEBUG_BARRIER(NULL, 0);
1171 KASSERTMSG(curcpu()->ci_biglock_count == 0, "kernel_lock leaked"); 1169 KASSERTMSG(curcpu()->ci_biglock_count == 0, "kernel_lock leaked");
1172 1170
1173 /* 1171 /*
1174 * If we are the last live LWP in a process, we need to exit the 1172 * If we are the last live LWP in a process, we need to exit the
1175 * entire process. We do so with an exit status of zero, because 1173 * entire process. We do so with an exit status of zero, because
1176 * it's a "controlled" exit, and because that's what Solaris does. 1174 * it's a "controlled" exit, and because that's what Solaris does.
1177 * 1175 *
1178 * We are not quite a zombie yet, but for accounting purposes we 1176 * We are not quite a zombie yet, but for accounting purposes we
1179 * must increment the count of zombies here. 1177 * must increment the count of zombies here.
1180 * 1178 *
1181 * Note: the last LWP's specificdata will be deleted here. 1179 * Note: the last LWP's specificdata will be deleted here.
1182 */ 1180 */
1183 mutex_enter(p->p_lock); 1181 mutex_enter(p->p_lock);
1184 if (p->p_nlwps - p->p_nzlwps == 1) { 1182 if (p->p_nlwps - p->p_nzlwps == 1) {
1185 KASSERT(current == true); 1183 KASSERT(current == true);
1186 KASSERT(p != &proc0); 1184 KASSERT(p != &proc0);
1187 exit1(l, 0, 0); 1185 exit1(l, 0, 0);
1188 /* NOTREACHED */ 1186 /* NOTREACHED */
1189 } 1187 }
1190 p->p_nzlwps++; 1188 p->p_nzlwps++;
1191 1189
1192 /* 1190 /*
1193 * Perform any required thread cleanup. Do this early so 1191 * Perform any required thread cleanup. Do this early so
1194 * anyone wanting to look us up by our global thread ID 1192 * anyone wanting to look us up by our global thread ID
1195 * will fail to find us. 1193 * will fail to find us.
1196 * 1194 *
1197 * N.B. this will unlock p->p_lock on our behalf. 1195 * N.B. this will unlock p->p_lock on our behalf.
1198 */ 1196 */
1199 lwp_thread_cleanup(l); 1197 lwp_thread_cleanup(l);
1200 1198
1201 if (p->p_emul->e_lwp_exit) 1199 if (p->p_emul->e_lwp_exit)
1202 (*p->p_emul->e_lwp_exit)(l); 1200 (*p->p_emul->e_lwp_exit)(l);
1203 1201
1204 /* Drop filedesc reference. */ 1202 /* Drop filedesc reference. */
1205 fd_free(); 1203 fd_free();
1206 1204
1207 /* Release fstrans private data. */ 1205 /* Release fstrans private data. */
1208 fstrans_lwp_dtor(l); 1206 fstrans_lwp_dtor(l);
1209 1207
1210 /* Delete the specificdata while it's still safe to sleep. */ 1208 /* Delete the specificdata while it's still safe to sleep. */
1211 lwp_finispecific(l); 1209 lwp_finispecific(l);
1212 1210
1213 /* 1211 /*
1214 * Release our cached credentials. 1212 * Release our cached credentials.
1215 */ 1213 */
1216 kauth_cred_free(l->l_cred); 1214 kauth_cred_free(l->l_cred);
1217 callout_destroy(&l->l_timeout_ch); 1215 callout_destroy(&l->l_timeout_ch);
1218 1216
1219 /* 1217 /*
1220 * If traced, report LWP exit event to the debugger. 1218 * If traced, report LWP exit event to the debugger.
1221 * 1219 *
1222 * Remove the LWP from the global list. 1220 * Remove the LWP from the global list.
1223 * Free its LID from the PID namespace if needed. 1221 * Free its LID from the PID namespace if needed.
1224 */ 1222 */
1225 mutex_enter(proc_lock); 1223 mutex_enter(proc_lock);
1226 1224
1227 if ((p->p_slflag & (PSL_TRACED|PSL_TRACELWP_EXIT)) == 1225 if ((p->p_slflag & (PSL_TRACED|PSL_TRACELWP_EXIT)) ==
1228 (PSL_TRACED|PSL_TRACELWP_EXIT)) { 1226 (PSL_TRACED|PSL_TRACELWP_EXIT)) {
1229 mutex_enter(p->p_lock); 1227 mutex_enter(p->p_lock);
1230 if (ISSET(p->p_sflag, PS_WEXIT)) { 1228 if (ISSET(p->p_sflag, PS_WEXIT)) {
1231 mutex_exit(p->p_lock); 1229 mutex_exit(p->p_lock);
1232 /* 1230 /*
1233 * We are exiting, bail out without informing parent 1231 * We are exiting, bail out without informing parent
1234 * about a terminating LWP as it would deadlock. 1232 * about a terminating LWP as it would deadlock.
1235 */ 1233 */
1236 } else { 1234 } else {
1237 eventswitch(TRAP_LWP, PTRACE_LWP_EXIT, l->l_lid); 1235 eventswitch(TRAP_LWP, PTRACE_LWP_EXIT, l->l_lid);
1238 mutex_enter(proc_lock); 1236 mutex_enter(proc_lock);
1239 } 1237 }
1240 } 1238 }
1241 1239
1242 LIST_REMOVE(l, l_list); 1240 LIST_REMOVE(l, l_list);
1243 if ((l->l_pflag & LP_PIDLID) != 0 && l->l_lid != p->p_pid) { 1241 if ((l->l_pflag & LP_PIDLID) != 0 && l->l_lid != p->p_pid) {
1244 proc_free_pid(l->l_lid); 1242 proc_free_pid(l->l_lid);
1245 } 1243 }
1246 mutex_exit(proc_lock); 1244 mutex_exit(proc_lock);
1247 1245
1248 /* 1246 /*
1249 * Get rid of all references to the LWP that others (e.g. procfs) 1247 * Get rid of all references to the LWP that others (e.g. procfs)
1250 * may have, and mark the LWP as a zombie. If the LWP is detached, 1248 * may have, and mark the LWP as a zombie. If the LWP is detached,
1251 * mark it waiting for collection in the proc structure. Note that 1249 * mark it waiting for collection in the proc structure. Note that
1252 * before we can do that, we need to free any other dead, deatched 1250 * before we can do that, we need to free any other dead, deatched
1253 * LWP waiting to meet its maker. 1251 * LWP waiting to meet its maker.
1254 * 1252 *
1255 * All conditions need to be observed upon under the same hold of 1253 * All conditions need to be observed upon under the same hold of
1256 * p_lock, because if the lock is dropped any of them can change. 1254 * p_lock, because if the lock is dropped any of them can change.
1257 */ 1255 */
1258 mutex_enter(p->p_lock); 1256 mutex_enter(p->p_lock);
1259 for (;;) { 1257 for (;;) {
1260 if (lwp_drainrefs(l)) 1258 if (lwp_drainrefs(l))
1261 continue; 1259 continue;
1262 if ((l->l_prflag & LPR_DETACHED) != 0) { 1260 if ((l->l_prflag & LPR_DETACHED) != 0) {
1263 if ((l2 = p->p_zomblwp) != NULL) { 1261 if ((l2 = p->p_zomblwp) != NULL) {
1264 p->p_zomblwp = NULL; 1262 p->p_zomblwp = NULL;
1265 lwp_free(l2, false, false); 1263 lwp_free(l2, false, false);
1266 /* proc now unlocked */ 1264 /* proc now unlocked */
1267 mutex_enter(p->p_lock); 1265 mutex_enter(p->p_lock);
1268 continue; 1266 continue;
1269 } 1267 }
1270 p->p_zomblwp = l; 1268 p->p_zomblwp = l;
1271 } 1269 }
1272 break; 1270 break;
1273 } 1271 }
1274 1272
1275 /* 1273 /*
1276 * If we find a pending signal for the process and we have been 1274 * If we find a pending signal for the process and we have been
1277 * asked to check for signals, then we lose: arrange to have 1275 * asked to check for signals, then we lose: arrange to have
1278 * all other LWPs in the process check for signals. 1276 * all other LWPs in the process check for signals.
1279 */ 1277 */
1280 if ((l->l_flag & LW_PENDSIG) != 0 && 1278 if ((l->l_flag & LW_PENDSIG) != 0 &&
1281 firstsig(&p->p_sigpend.sp_set) != 0) { 1279 firstsig(&p->p_sigpend.sp_set) != 0) {
1282 LIST_FOREACH(l2, &p->p_lwps, l_sibling) { 1280 LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
1283 lwp_lock(l2); 1281 lwp_lock(l2);
1284 signotify(l2); 1282 signotify(l2);
1285 lwp_unlock(l2); 1283 lwp_unlock(l2);
1286 } 1284 }
1287 } 1285 }
1288 1286
1289 /* 1287 /*
1290 * Release any PCU resources before becoming a zombie. 1288 * Release any PCU resources before becoming a zombie.
1291 */ 1289 */
1292 pcu_discard_all(l); 1290 pcu_discard_all(l);
1293 1291
1294 lwp_lock(l); 1292 lwp_lock(l);
1295 l->l_stat = LSZOMB; 1293 l->l_stat = LSZOMB;
1296 if (l->l_name != NULL) { 1294 if (l->l_name != NULL) {
1297 strcpy(l->l_name, "(zombie)"); 1295 strcpy(l->l_name, "(zombie)");
1298 } 1296 }
1299 lwp_unlock(l); 1297 lwp_unlock(l);
1300 p->p_nrlwps--; 1298 p->p_nrlwps--;
1301 cv_broadcast(&p->p_lwpcv); 1299 cv_broadcast(&p->p_lwpcv);
1302 if (l->l_lwpctl != NULL) 1300 if (l->l_lwpctl != NULL)
1303 l->l_lwpctl->lc_curcpu = LWPCTL_CPU_EXITED; 1301 l->l_lwpctl->lc_curcpu = LWPCTL_CPU_EXITED;
1304 mutex_exit(p->p_lock); 1302 mutex_exit(p->p_lock);
1305 1303
1306 /* 1304 /*
1307 * We can no longer block. At this point, lwp_free() may already 1305 * We can no longer block. At this point, lwp_free() may already
1308 * be gunning for us. On a multi-CPU system, we may be off p_lwps. 1306 * be gunning for us. On a multi-CPU system, we may be off p_lwps.
1309 * 1307 *
1310 * Free MD LWP resources. 1308 * Free MD LWP resources.
1311 */ 1309 */
1312 cpu_lwp_free(l, 0); 1310 cpu_lwp_free(l, 0);
1313 1311
1314 if (current) { 1312 if (current) {
1315 /* Switch away into oblivion. */ 1313 /* Switch away into oblivion. */
1316 lwp_lock(l); 1314 lwp_lock(l);
1317 spc_lock(l->l_cpu); 1315 spc_lock(l->l_cpu);
1318 mi_switch(l); 1316 mi_switch(l);
1319 panic("lwp_exit"); 1317 panic("lwp_exit");
1320 } 1318 }
1321} 1319}
1322 1320
1323/* 1321/*
1324 * Free a dead LWP's remaining resources. 1322 * Free a dead LWP's remaining resources.
1325 * 1323 *
1326 * XXXLWP limits. 1324 * XXXLWP limits.
1327 */ 1325 */
1328void 1326void
1329lwp_free(struct lwp *l, bool recycle, bool last) 1327lwp_free(struct lwp *l, bool recycle, bool last)
1330{ 1328{
1331 struct proc *p = l->l_proc; 1329 struct proc *p = l->l_proc;
1332 struct rusage *ru; 1330 struct rusage *ru;
1333 struct lwp *l2 __diagused; 1331 struct lwp *l2 __diagused;
1334 ksiginfoq_t kq; 1332 ksiginfoq_t kq;
1335 1333
1336 KASSERT(l != curlwp); 1334 KASSERT(l != curlwp);
1337 KASSERT(last || mutex_owned(p->p_lock)); 1335 KASSERT(last || mutex_owned(p->p_lock));
1338 1336
1339 /* 1337 /*
1340 * We use the process credentials instead of the lwp credentials here 1338 * We use the process credentials instead of the lwp credentials here
1341 * because the lwp credentials maybe cached (just after a setuid call) 1339 * because the lwp credentials maybe cached (just after a setuid call)
1342 * and we don't want pay for syncing, since the lwp is going away 1340 * and we don't want pay for syncing, since the lwp is going away
1343 * anyway 1341 * anyway
1344 */ 1342 */
1345 if (p != &proc0 && p->p_nlwps != 1) 1343 if (p != &proc0 && p->p_nlwps != 1)
1346 (void)chglwpcnt(kauth_cred_getuid(p->p_cred), -1); 1344 (void)chglwpcnt(kauth_cred_getuid(p->p_cred), -1);
1347 1345
1348 /* 1346 /*
1349 * If this was not the last LWP in the process, then adjust counters 1347 * If this was not the last LWP in the process, then adjust counters
1350 * and unlock. This is done differently for the last LWP in exit1(). 1348 * and unlock. This is done differently for the last LWP in exit1().
1351 */ 1349 */
1352 if (!last) { 1350 if (!last) {
1353 /* 1351 /*
1354 * Add the LWP's run time to the process' base value. 1352 * Add the LWP's run time to the process' base value.
1355 * This needs to co-incide with coming off p_lwps. 1353 * This needs to co-incide with coming off p_lwps.
1356 */ 1354 */
1357 bintime_add(&p->p_rtime, &l->l_rtime); 1355 bintime_add(&p->p_rtime, &l->l_rtime);
1358 p->p_pctcpu += l->l_pctcpu; 1356 p->p_pctcpu += l->l_pctcpu;
1359 ru = &p->p_stats->p_ru; 1357 ru = &p->p_stats->p_ru;
1360 ruadd(ru, &l->l_ru); 1358 ruadd(ru, &l->l_ru);
1361 ru->ru_nvcsw += (l->l_ncsw - l->l_nivcsw); 1359 ru->ru_nvcsw += (l->l_ncsw - l->l_nivcsw);
1362 ru->ru_nivcsw += l->l_nivcsw; 1360 ru->ru_nivcsw += l->l_nivcsw;
1363 LIST_REMOVE(l, l_sibling); 1361 LIST_REMOVE(l, l_sibling);
1364 p->p_nlwps--; 1362 p->p_nlwps--;
1365 p->p_nzlwps--; 1363 p->p_nzlwps--;
1366 if ((l->l_prflag & LPR_DETACHED) != 0) 1364 if ((l->l_prflag & LPR_DETACHED) != 0)
1367 p->p_ndlwps--; 1365 p->p_ndlwps--;
1368 1366
1369 /* Make note of the LID being free, and remove from tree. */ 1367 /* Make note of the LID being free, and remove from tree. */
1370 if (l->l_lid < p->p_nlwpid) 1368 if (l->l_lid < p->p_nlwpid)
1371 p->p_nlwpid = l->l_lid; 1369 p->p_nlwpid = l->l_lid;
1372 rw_enter(&p->p_treelock, RW_WRITER); 1370 rw_enter(&p->p_treelock, RW_WRITER);
1373 l2 = radix_tree_remove_node(&p->p_lwptree, 1371 l2 = radix_tree_remove_node(&p->p_lwptree,
1374 (uint64_t)(l->l_lid - 1)); 1372 (uint64_t)(l->l_lid - 1));
1375 KASSERT(l2 == l); 1373 KASSERT(l2 == l);
1376 rw_exit(&p->p_treelock); 1374 rw_exit(&p->p_treelock);
1377 1375
1378 /* 1376 /*
1379 * Have any LWPs sleeping in lwp_wait() recheck for 1377 * Have any LWPs sleeping in lwp_wait() recheck for
1380 * deadlock. 1378 * deadlock.
1381 */ 1379 */
1382 cv_broadcast(&p->p_lwpcv); 1380 cv_broadcast(&p->p_lwpcv);
1383 mutex_exit(p->p_lock); 1381 mutex_exit(p->p_lock);
1384 } 1382 }
1385 1383
1386 /* 1384 /*
1387 * In the unlikely event that the LWP is still on the CPU, 1385 * In the unlikely event that the LWP is still on the CPU,
1388 * then spin until it has switched away. 1386 * then spin until it has switched away.
1389 */ 1387 */
1390 membar_consumer(); 1388 membar_consumer();
1391 while (__predict_false((l->l_pflag & LP_RUNNING) != 0)) { 1389 while (__predict_false((l->l_pflag & LP_RUNNING) != 0)) {
1392 SPINLOCK_BACKOFF_HOOK; 1390 SPINLOCK_BACKOFF_HOOK;
1393 } 1391 }
1394 1392
1395 /* 1393 /*
1396 * Destroy the LWP's remaining signal information. 1394 * Destroy the LWP's remaining signal information.
1397 */ 1395 */
1398 ksiginfo_queue_init(&kq); 1396 ksiginfo_queue_init(&kq);
1399 sigclear(&l->l_sigpend, NULL, &kq); 1397 sigclear(&l->l_sigpend, NULL, &kq);
1400 ksiginfo_queue_drain(&kq); 1398 ksiginfo_queue_drain(&kq);
1401 cv_destroy(&l->l_sigcv); 1399 cv_destroy(&l->l_sigcv);
1402 cv_destroy(&l->l_waitcv); 1400 cv_destroy(&l->l_waitcv);
1403 1401
1404 /* 1402 /*
1405 * Free lwpctl structure and affinity. 1403 * Free lwpctl structure and affinity.
1406 */ 1404 */
1407 if (l->l_lwpctl) { 1405 if (l->l_lwpctl) {
1408 lwp_ctl_free(l); 1406 lwp_ctl_free(l);
1409 } 1407 }
1410 if (l->l_affinity) { 1408 if (l->l_affinity) {
1411 kcpuset_unuse(l->l_affinity, NULL); 1409 kcpuset_unuse(l->l_affinity, NULL);
1412 l->l_affinity = NULL; 1410 l->l_affinity = NULL;
1413 } 1411 }
1414 1412
1415 /* 1413 /*
1416 * Free the LWP's turnstile and the LWP structure itself unless the 1414 * Free the LWP's turnstile and the LWP structure itself unless the
1417 * caller wants to recycle them. Also, free the scheduler specific 1415 * caller wants to recycle them. Also, free the scheduler specific
1418 * data. 1416 * data.
1419 * 1417 *
1420 * We can't return turnstile0 to the pool (it didn't come from it), 1418 * We can't return turnstile0 to the pool (it didn't come from it),
1421 * so if it comes up just drop it quietly and move on. 1419 * so if it comes up just drop it quietly and move on.
1422 * 1420 *
1423 * We don't recycle the VM resources at this time. 1421 * We don't recycle the VM resources at this time.
1424 */ 1422 */
1425 1423
1426 if (!recycle && l->l_ts != &turnstile0) 1424 if (!recycle && l->l_ts != &turnstile0)
1427 pool_cache_put(turnstile_cache, l->l_ts); 1425 pool_cache_put(turnstile_cache, l->l_ts);
1428 if (l->l_name != NULL) 1426 if (l->l_name != NULL)
1429 kmem_free(l->l_name, MAXCOMLEN); 1427 kmem_free(l->l_name, MAXCOMLEN);
1430 1428
1431 kmsan_lwp_free(l); 1429 kmsan_lwp_free(l);
1432 kcov_lwp_free(l); 1430 kcov_lwp_free(l);
1433 cpu_lwp_free2(l); 1431 cpu_lwp_free2(l);
1434 uvm_lwp_exit(l); 1432 uvm_lwp_exit(l);
1435 1433
1436 KASSERT(SLIST_EMPTY(&l->l_pi_lenders)); 1434 KASSERT(SLIST_EMPTY(&l->l_pi_lenders));
1437 KASSERT(l->l_inheritedprio == -1); 1435 KASSERT(l->l_inheritedprio == -1);
1438 KASSERT(l->l_blcnt == 0); 1436 KASSERT(l->l_blcnt == 0);
1439 kdtrace_thread_dtor(NULL, l); 1437 kdtrace_thread_dtor(NULL, l);
1440 if (!recycle) 1438 if (!recycle)
1441 pool_cache_put(lwp_cache, l); 1439 pool_cache_put(lwp_cache, l);
1442} 1440}
1443 1441
1444/* 1442/*
1445 * Migrate the LWP to the another CPU. Unlocks the LWP. 1443 * Migrate the LWP to the another CPU. Unlocks the LWP.
1446 */ 1444 */
1447void 1445void
1448lwp_migrate(lwp_t *l, struct cpu_info *tci) 1446lwp_migrate(lwp_t *l, struct cpu_info *tci)
1449{ 1447{
1450 struct schedstate_percpu *tspc; 1448 struct schedstate_percpu *tspc;
1451 int lstat = l->l_stat; 1449 int lstat = l->l_stat;
1452 1450
1453 KASSERT(lwp_locked(l, NULL)); 1451 KASSERT(lwp_locked(l, NULL));
1454 KASSERT(tci != NULL); 1452 KASSERT(tci != NULL);
1455 1453
1456 /* If LWP is still on the CPU, it must be handled like LSONPROC */ 1454 /* If LWP is still on the CPU, it must be handled like LSONPROC */
1457 if ((l->l_pflag & LP_RUNNING) != 0) { 1455 if ((l->l_pflag & LP_RUNNING) != 0) {
1458 lstat = LSONPROC; 1456 lstat = LSONPROC;
1459 } 1457 }
1460 1458
1461 /* 1459 /*
1462 * The destination CPU could be changed while previous migration 1460 * The destination CPU could be changed while previous migration
1463 * was not finished. 1461 * was not finished.
1464 */ 1462 */
1465 if (l->l_target_cpu != NULL) { 1463 if (l->l_target_cpu != NULL) {
1466 l->l_target_cpu = tci; 1464 l->l_target_cpu = tci;
1467 lwp_unlock(l); 1465 lwp_unlock(l);
1468 return; 1466 return;
1469 } 1467 }
1470 1468
1471 /* Nothing to do if trying to migrate to the same CPU */ 1469 /* Nothing to do if trying to migrate to the same CPU */
1472 if (l->l_cpu == tci) { 1470 if (l->l_cpu == tci) {
1473 lwp_unlock(l); 1471 lwp_unlock(l);
1474 return; 1472 return;
1475 } 1473 }
1476 1474
1477 KASSERT(l->l_target_cpu == NULL); 1475 KASSERT(l->l_target_cpu == NULL);
1478 tspc = &tci->ci_schedstate; 1476 tspc = &tci->ci_schedstate;
1479 switch (lstat) { 1477 switch (lstat) {
1480 case LSRUN: 1478 case LSRUN:
1481 l->l_target_cpu = tci; 1479 l->l_target_cpu = tci;
1482 break; 1480 break;
1483 case LSSLEEP: 1481 case LSSLEEP:
1484 l->l_cpu = tci; 1482 l->l_cpu = tci;
1485 break; 1483 break;
1486 case LSIDL: 1484 case LSIDL:
1487 case LSSTOP: 1485 case LSSTOP:
1488 case LSSUSPENDED: 1486 case LSSUSPENDED:
1489 l->l_cpu = tci; 1487 l->l_cpu = tci;
1490 if (l->l_wchan == NULL) { 1488 if (l->l_wchan == NULL) {
1491 lwp_unlock_to(l, tspc->spc_lwplock); 1489 lwp_unlock_to(l, tspc->spc_lwplock);
1492 return; 1490 return;
1493 } 1491 }
1494 break; 1492 break;
1495 case LSONPROC: 1493 case LSONPROC:
1496 l->l_target_cpu = tci; 1494 l->l_target_cpu = tci;
1497 spc_lock(l->l_cpu); 1495 spc_lock(l->l_cpu);
1498 sched_resched_cpu(l->l_cpu, PRI_USER_RT, true); 1496 sched_resched_cpu(l->l_cpu, PRI_USER_RT, true);
1499 /* spc now unlocked */ 1497 /* spc now unlocked */
1500 break; 1498 break;
1501 } 1499 }
1502 lwp_unlock(l); 1500 lwp_unlock(l);
1503} 1501}
1504 1502
1505/* 1503/*
1506 * Find the LWP in the process. Arguments may be zero, in such case, 1504 * Find the LWP in the process. Arguments may be zero, in such case,
1507 * the calling process and first LWP in the list will be used. 1505 * the calling process and first LWP in the list will be used.
1508 * On success - returns proc locked. 1506 * On success - returns proc locked.
1509 */ 1507 */
1510struct lwp * 1508struct lwp *
1511lwp_find2(pid_t pid, lwpid_t lid) 1509lwp_find2(pid_t pid, lwpid_t lid)
1512{ 1510{
1513 proc_t *p; 1511 proc_t *p;
1514 lwp_t *l; 1512 lwp_t *l;
1515 1513
1516 /* Find the process. */ 1514 /* Find the process. */
1517 if (pid != 0) { 1515 if (pid != 0) {
1518 mutex_enter(proc_lock); 1516 mutex_enter(proc_lock);
1519 p = proc_find(pid); 1517 p = proc_find(pid);
1520 if (p == NULL) { 1518 if (p == NULL) {
1521 mutex_exit(proc_lock); 1519 mutex_exit(proc_lock);
1522 return NULL; 1520 return NULL;
1523 } 1521 }
1524 mutex_enter(p->p_lock); 1522 mutex_enter(p->p_lock);
1525 mutex_exit(proc_lock); 1523 mutex_exit(proc_lock);
1526 } else { 1524 } else {
1527 p = curlwp->l_proc; 1525 p = curlwp->l_proc;
1528 mutex_enter(p->p_lock); 1526 mutex_enter(p->p_lock);
1529 } 1527 }
1530 /* Find the thread. */ 1528 /* Find the thread. */
1531 if (lid != 0) { 1529 if (lid != 0) {
1532 l = lwp_find(p, lid); 1530 l = lwp_find(p, lid);
1533 } else { 1531 } else {
1534 l = LIST_FIRST(&p->p_lwps); 1532 l = LIST_FIRST(&p->p_lwps);
1535 } 1533 }
1536 if (l == NULL) { 1534 if (l == NULL) {
1537 mutex_exit(p->p_lock); 1535 mutex_exit(p->p_lock);
1538 } 1536 }
1539 return l; 1537 return l;
1540} 1538}
1541 1539
1542/* 1540/*
1543 * Look up a live LWP within the specified process. 1541 * Look up a live LWP within the specified process.
1544 * 1542 *
1545 * Must be called with p->p_lock held (as it looks at the radix tree, 1543 * Must be called with p->p_lock held (as it looks at the radix tree,
1546 * and also wants to exclude idle and zombie LWPs). 1544 * and also wants to exclude idle and zombie LWPs).
1547 */ 1545 */
1548struct lwp * 1546struct lwp *
1549lwp_find(struct proc *p, lwpid_t id) 1547lwp_find(struct proc *p, lwpid_t id)
1550{ 1548{
1551 struct lwp *l; 1549 struct lwp *l;
1552 1550
1553 KASSERT(mutex_owned(p->p_lock)); 1551 KASSERT(mutex_owned(p->p_lock));
1554 1552
1555 l = radix_tree_lookup_node(&p->p_lwptree, (uint64_t)(id - 1)); 1553 l = radix_tree_lookup_node(&p->p_lwptree, (uint64_t)(id - 1));
1556 KASSERT(l == NULL || l->l_lid == id); 1554 KASSERT(l == NULL || l->l_lid == id);
1557 1555
1558 /* 1556 /*
1559 * No need to lock - all of these conditions will 1557 * No need to lock - all of these conditions will
1560 * be visible with the process level mutex held. 1558 * be visible with the process level mutex held.
1561 */ 1559 */
1562 if (l != NULL && (l->l_stat == LSIDL || l->l_stat == LSZOMB)) 1560 if (l != NULL && (l->l_stat == LSIDL || l->l_stat == LSZOMB))
1563 l = NULL; 1561 l = NULL;
1564 1562
1565 return l; 1563 return l;
1566} 1564}
1567 1565
1568/* 1566/*
1569 * Update an LWP's cached credentials to mirror the process' master copy. 1567 * Update an LWP's cached credentials to mirror the process' master copy.
1570 * 1568 *
1571 * This happens early in the syscall path, on user trap, and on LWP 1569 * This happens early in the syscall path, on user trap, and on LWP
1572 * creation. A long-running LWP can also voluntarily choose to update 1570 * creation. A long-running LWP can also voluntarily choose to update
1573 * its credentials by calling this routine. This may be called from 1571 * its credentials by calling this routine. This may be called from
1574 * LWP_CACHE_CREDS(), which checks l->l_cred != p->p_cred beforehand. 1572 * LWP_CACHE_CREDS(), which checks l->l_cred != p->p_cred beforehand.
1575 */ 1573 */
1576void 1574void
1577lwp_update_creds(struct lwp *l) 1575lwp_update_creds(struct lwp *l)
1578{ 1576{
1579 kauth_cred_t oc; 1577 kauth_cred_t oc;
1580 struct proc *p; 1578 struct proc *p;
1581 1579
1582 p = l->l_proc; 1580 p = l->l_proc;
1583 oc = l->l_cred; 1581 oc = l->l_cred;
1584 1582
1585 mutex_enter(p->p_lock); 1583 mutex_enter(p->p_lock);
1586 kauth_cred_hold(p->p_cred); 1584 kauth_cred_hold(p->p_cred);
1587 l->l_cred = p->p_cred; 1585 l->l_cred = p->p_cred;
1588 l->l_prflag &= ~LPR_CRMOD; 1586 l->l_prflag &= ~LPR_CRMOD;
1589 mutex_exit(p->p_lock); 1587 mutex_exit(p->p_lock);
1590 if (oc != NULL) 1588 if (oc != NULL)
1591 kauth_cred_free(oc); 1589 kauth_cred_free(oc);
1592} 1590}
1593 1591
1594/* 1592/*
1595 * Verify that an LWP is locked, and optionally verify that the lock matches 1593 * Verify that an LWP is locked, and optionally verify that the lock matches
1596 * one we specify. 1594 * one we specify.
1597 */ 1595 */
1598int 1596int
1599lwp_locked(struct lwp *l, kmutex_t *mtx) 1597lwp_locked(struct lwp *l, kmutex_t *mtx)
1600{ 1598{
1601 kmutex_t *cur = l->l_mutex; 1599 kmutex_t *cur = l->l_mutex;
1602 1600
1603 return mutex_owned(cur) && (mtx == cur || mtx == NULL); 1601 return mutex_owned(cur) && (mtx == cur || mtx == NULL);
1604} 1602}
1605 1603
1606/* 1604/*
1607 * Lend a new mutex to an LWP. The old mutex must be held. 1605 * Lend a new mutex to an LWP. The old mutex must be held.
1608 */ 1606 */
1609kmutex_t * 1607kmutex_t *
1610lwp_setlock(struct lwp *l, kmutex_t *mtx) 1608lwp_setlock(struct lwp *l, kmutex_t *mtx)
1611{ 1609{
1612 kmutex_t *oldmtx = l->l_mutex; 1610 kmutex_t *oldmtx = l->l_mutex;
1613 1611
1614 KASSERT(mutex_owned(oldmtx)); 1612 KASSERT(mutex_owned(oldmtx));
1615 1613
1616 membar_exit(); 1614 membar_exit();
1617 l->l_mutex = mtx; 1615 l->l_mutex = mtx;
1618 return oldmtx; 1616 return oldmtx;
1619} 1617}
1620 1618
1621/* 1619/*
1622 * Lend a new mutex to an LWP, and release the old mutex. The old mutex 1620 * Lend a new mutex to an LWP, and release the old mutex. The old mutex
1623 * must be held. 1621 * must be held.
1624 */ 1622 */
1625void 1623void
1626lwp_unlock_to(struct lwp *l, kmutex_t *mtx) 1624lwp_unlock_to(struct lwp *l, kmutex_t *mtx)
1627{ 1625{
1628 kmutex_t *old; 1626 kmutex_t *old;
1629 1627
1630 KASSERT(lwp_locked(l, NULL)); 1628 KASSERT(lwp_locked(l, NULL));
1631 1629
1632 old = l->l_mutex; 1630 old = l->l_mutex;
1633 membar_exit(); 1631 membar_exit();
1634 l->l_mutex = mtx; 1632 l->l_mutex = mtx;
1635 mutex_spin_exit(old); 1633 mutex_spin_exit(old);
1636} 1634}
1637 1635
1638int 1636int
1639lwp_trylock(struct lwp *l) 1637lwp_trylock(struct lwp *l)
1640{ 1638{
1641 kmutex_t *old; 1639 kmutex_t *old;
1642 1640
1643 for (;;) { 1641 for (;;) {
1644 if (!mutex_tryenter(old = l->l_mutex)) 1642 if (!mutex_tryenter(old = l->l_mutex))
1645 return 0; 1643 return 0;
1646 if (__predict_true(l->l_mutex == old)) 1644 if (__predict_true(l->l_mutex == old))
1647 return 1; 1645 return 1;
1648 mutex_spin_exit(old); 1646 mutex_spin_exit(old);
1649 } 1647 }
1650} 1648}
1651 1649
1652void 1650void
1653lwp_unsleep(lwp_t *l, bool unlock) 1651lwp_unsleep(lwp_t *l, bool unlock)
1654{ 1652{
1655 1653
1656 KASSERT(mutex_owned(l->l_mutex)); 1654 KASSERT(mutex_owned(l->l_mutex));
1657 (*l->l_syncobj->sobj_unsleep)(l, unlock); 1655 (*l->l_syncobj->sobj_unsleep)(l, unlock);
1658} 1656}
1659 1657
1660/* 1658/*
1661 * Handle exceptions for mi_userret(). Called if a member of LW_USERRET is 1659 * Handle exceptions for mi_userret(). Called if a member of LW_USERRET is
1662 * set. 1660 * set.
1663 */ 1661 */
1664void 1662void
1665lwp_userret(struct lwp *l) 1663lwp_userret(struct lwp *l)
1666{ 1664{
1667 struct proc *p; 1665 struct proc *p;
1668 int sig; 1666 int sig;
1669 1667
1670 KASSERT(l == curlwp); 1668 KASSERT(l == curlwp);
1671 KASSERT(l->l_stat == LSONPROC); 1669 KASSERT(l->l_stat == LSONPROC);
1672 p = l->l_proc; 1670 p = l->l_proc;
1673 1671
1674 /* 1672 /*
1675 * It is safe to do this read unlocked on a MP system.. 1673 * It is safe to do this read unlocked on a MP system..
1676 */ 1674 */
1677 while ((l->l_flag & LW_USERRET) != 0) { 1675 while ((l->l_flag & LW_USERRET) != 0) {
1678 /* 1676 /*
1679 * Process pending signals first, unless the process 1677 * Process pending signals first, unless the process
1680 * is dumping core or exiting, where we will instead 1678 * is dumping core or exiting, where we will instead
1681 * enter the LW_WSUSPEND case below. 1679 * enter the LW_WSUSPEND case below.
1682 */ 1680 */
1683 if ((l->l_flag & (LW_PENDSIG | LW_WCORE | LW_WEXIT)) == 1681 if ((l->l_flag & (LW_PENDSIG | LW_WCORE | LW_WEXIT)) ==
1684 LW_PENDSIG) { 1682 LW_PENDSIG) {
1685 mutex_enter(p->p_lock); 1683 mutex_enter(p->p_lock);
1686 while ((sig = issignal(l)) != 0) 1684 while ((sig = issignal(l)) != 0)
1687 postsig(sig); 1685 postsig(sig);
1688 mutex_exit(p->p_lock); 1686 mutex_exit(p->p_lock);
1689 } 1687 }
1690 1688
1691 /* 1689 /*
1692 * Core-dump or suspend pending. 1690 * Core-dump or suspend pending.
1693 * 1691 *
1694 * In case of core dump, suspend ourselves, so that the kernel 1692 * In case of core dump, suspend ourselves, so that the kernel
1695 * stack and therefore the userland registers saved in the 1693 * stack and therefore the userland registers saved in the
1696 * trapframe are around for coredump() to write them out. 1694 * trapframe are around for coredump() to write them out.
1697 * We also need to save any PCU resources that we have so that 1695 * We also need to save any PCU resources that we have so that
1698 * they accessible for coredump(). We issue a wakeup on 1696 * they accessible for coredump(). We issue a wakeup on
1699 * p->p_lwpcv so that sigexit() will write the core file out 1697 * p->p_lwpcv so that sigexit() will write the core file out
1700 * once all other LWPs are suspended.  1698 * once all other LWPs are suspended.
1701 */ 1699 */
1702 if ((l->l_flag & LW_WSUSPEND) != 0) { 1700 if ((l->l_flag & LW_WSUSPEND) != 0) {
1703 pcu_save_all(l); 1701 pcu_save_all(l);
1704 mutex_enter(p->p_lock); 1702 mutex_enter(p->p_lock);
1705 p->p_nrlwps--; 1703 p->p_nrlwps--;
1706 cv_broadcast(&p->p_lwpcv); 1704 cv_broadcast(&p->p_lwpcv);
1707 lwp_lock(l); 1705 lwp_lock(l);
1708 l->l_stat = LSSUSPENDED; 1706 l->l_stat = LSSUSPENDED;
1709 lwp_unlock(l); 1707 lwp_unlock(l);
1710 mutex_exit(p->p_lock); 1708 mutex_exit(p->p_lock);
1711 lwp_lock(l); 1709 lwp_lock(l);
1712 spc_lock(l->l_cpu); 1710 spc_lock(l->l_cpu);
1713 mi_switch(l); 1711 mi_switch(l);
1714 } 1712 }
1715 1713
1716 /* Process is exiting. */ 1714 /* Process is exiting. */
1717 if ((l->l_flag & LW_WEXIT) != 0) { 1715 if ((l->l_flag & LW_WEXIT) != 0) {
1718 lwp_exit(l); 1716 lwp_exit(l);
1719 KASSERT(0); 1717 KASSERT(0);
1720 /* NOTREACHED */ 1718 /* NOTREACHED */
1721 } 1719 }
1722 1720
1723 /* update lwpctl processor (for vfork child_return) */ 1721 /* update lwpctl processor (for vfork child_return) */
1724 if (l->l_flag & LW_LWPCTL) { 1722 if (l->l_flag & LW_LWPCTL) {
1725 lwp_lock(l); 1723 lwp_lock(l);
1726 KASSERT(kpreempt_disabled()); 1724 KASSERT(kpreempt_disabled());
1727 l->l_lwpctl->lc_curcpu = (int)cpu_index(l->l_cpu); 1725 l->l_lwpctl->lc_curcpu = (int)cpu_index(l->l_cpu);
1728 l->l_lwpctl->lc_pctr++; 1726 l->l_lwpctl->lc_pctr++;
1729 l->l_flag &= ~LW_LWPCTL; 1727 l->l_flag &= ~LW_LWPCTL;
1730 lwp_unlock(l); 1728 lwp_unlock(l);
1731 } 1729 }
1732 } 1730 }
1733} 1731}
1734 1732
1735/* 1733/*
1736 * Force an LWP to enter the kernel, to take a trip through lwp_userret(). 1734 * Force an LWP to enter the kernel, to take a trip through lwp_userret().
1737 */ 1735 */
1738void 1736void
1739lwp_need_userret(struct lwp *l) 1737lwp_need_userret(struct lwp *l)
1740{ 1738{
1741 1739
1742 KASSERT(!cpu_intr_p()); 1740 KASSERT(!cpu_intr_p());
1743 KASSERT(lwp_locked(l, NULL)); 1741 KASSERT(lwp_locked(l, NULL));
1744 1742
1745 /* 1743 /*
1746 * If the LWP is in any state other than LSONPROC, we know that it 1744 * If the LWP is in any state other than LSONPROC, we know that it
1747 * is executing in-kernel and will hit userret() on the way out.  1745 * is executing in-kernel and will hit userret() on the way out.
1748 * 1746 *