| @@ -1,1748 +1,1746 @@ | | | @@ -1,1748 +1,1746 @@ |
1 | /* $NetBSD: kern_lwp.c,v 1.233 2020/04/04 20:20:12 thorpej Exp $ */ | | 1 | /* $NetBSD: kern_lwp.c,v 1.234 2020/04/19 23:05:04 ad Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2019, 2020 | | 4 | * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2019, 2020 |
5 | * The NetBSD Foundation, Inc. | | 5 | * The NetBSD Foundation, Inc. |
6 | * All rights reserved. | | 6 | * All rights reserved. |
7 | * | | 7 | * |
8 | * This code is derived from software contributed to The NetBSD Foundation | | 8 | * This code is derived from software contributed to The NetBSD Foundation |
9 | * by Nathan J. Williams, and Andrew Doran. | | 9 | * by Nathan J. Williams, and Andrew Doran. |
10 | * | | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without | | 11 | * Redistribution and use in source and binary forms, with or without |
12 | * modification, are permitted provided that the following conditions | | 12 | * modification, are permitted provided that the following conditions |
13 | * are met: | | 13 | * are met: |
14 | * 1. Redistributions of source code must retain the above copyright | | 14 | * 1. Redistributions of source code must retain the above copyright |
15 | * notice, this list of conditions and the following disclaimer. | | 15 | * notice, this list of conditions and the following disclaimer. |
16 | * 2. Redistributions in binary form must reproduce the above copyright | | 16 | * 2. Redistributions in binary form must reproduce the above copyright |
17 | * notice, this list of conditions and the following disclaimer in the | | 17 | * notice, this list of conditions and the following disclaimer in the |
18 | * documentation and/or other materials provided with the distribution. | | 18 | * documentation and/or other materials provided with the distribution. |
19 | * | | 19 | * |
20 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | | 20 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
22 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 22 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
23 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | | 23 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
24 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | 24 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
25 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | 25 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
26 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | 26 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
27 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 27 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
29 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 29 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
30 | * POSSIBILITY OF SUCH DAMAGE. | | 30 | * POSSIBILITY OF SUCH DAMAGE. |
31 | */ | | 31 | */ |
32 | | | 32 | |
33 | /* | | 33 | /* |
34 | * Overview | | 34 | * Overview |
35 | * | | 35 | * |
36 | * Lightweight processes (LWPs) are the basic unit or thread of | | 36 | * Lightweight processes (LWPs) are the basic unit or thread of |
37 | * execution within the kernel. The core state of an LWP is described | | 37 | * execution within the kernel. The core state of an LWP is described |
38 | * by "struct lwp", also known as lwp_t. | | 38 | * by "struct lwp", also known as lwp_t. |
39 | * | | 39 | * |
40 | * Each LWP is contained within a process (described by "struct proc"), | | 40 | * Each LWP is contained within a process (described by "struct proc"), |
41 | * Every process contains at least one LWP, but may contain more. The | | 41 | * Every process contains at least one LWP, but may contain more. The |
42 | * process describes attributes shared among all of its LWPs such as a | | 42 | * process describes attributes shared among all of its LWPs such as a |
43 | * private address space, global execution state (stopped, active, | | 43 | * private address space, global execution state (stopped, active, |
44 | * zombie, ...), signal disposition and so on. On a multiprocessor | | 44 | * zombie, ...), signal disposition and so on. On a multiprocessor |
45 | * machine, multiple LWPs be executing concurrently in the kernel. | | 45 | * machine, multiple LWPs be executing concurrently in the kernel. |
46 | * | | 46 | * |
47 | * Execution states | | 47 | * Execution states |
48 | * | | 48 | * |
49 | * At any given time, an LWP has overall state that is described by | | 49 | * At any given time, an LWP has overall state that is described by |
50 | * lwp::l_stat. The states are broken into two sets below. The first | | 50 | * lwp::l_stat. The states are broken into two sets below. The first |
51 | * set is guaranteed to represent the absolute, current state of the | | 51 | * set is guaranteed to represent the absolute, current state of the |
52 | * LWP: | | 52 | * LWP: |
53 | * | | 53 | * |
54 | * LSONPROC | | 54 | * LSONPROC |
55 | * | | 55 | * |
56 | * On processor: the LWP is executing on a CPU, either in the | | 56 | * On processor: the LWP is executing on a CPU, either in the |
57 | * kernel or in user space. | | 57 | * kernel or in user space. |
58 | * | | 58 | * |
59 | * LSRUN | | 59 | * LSRUN |
60 | * | | 60 | * |
61 | * Runnable: the LWP is parked on a run queue, and may soon be | | 61 | * Runnable: the LWP is parked on a run queue, and may soon be |
62 | * chosen to run by an idle processor, or by a processor that | | 62 | * chosen to run by an idle processor, or by a processor that |
63 | * has been asked to preempt a currently runnning but lower | | 63 | * has been asked to preempt a currently runnning but lower |
64 | * priority LWP. | | 64 | * priority LWP. |
65 | * | | 65 | * |
66 | * LSIDL | | 66 | * LSIDL |
67 | * | | 67 | * |
68 | * Idle: the LWP has been created but has not yet executed, | | 68 | * Idle: the LWP has been created but has not yet executed, |
69 | * or it has ceased executing a unit of work and is waiting | | 69 | * or it has ceased executing a unit of work and is waiting |
70 | * to be started again. | | 70 | * to be started again. |
71 | * | | 71 | * |
72 | * LSSUSPENDED: | | 72 | * LSSUSPENDED: |
73 | * | | 73 | * |
74 | * Suspended: the LWP has had its execution suspended by | | 74 | * Suspended: the LWP has had its execution suspended by |
75 | * another LWP in the same process using the _lwp_suspend() | | 75 | * another LWP in the same process using the _lwp_suspend() |
76 | * system call. User-level LWPs also enter the suspended | | 76 | * system call. User-level LWPs also enter the suspended |
77 | * state when the system is shutting down. | | 77 | * state when the system is shutting down. |
78 | * | | 78 | * |
79 | * The second set represent a "statement of intent" on behalf of the | | 79 | * The second set represent a "statement of intent" on behalf of the |
80 | * LWP. The LWP may in fact be executing on a processor, may be | | 80 | * LWP. The LWP may in fact be executing on a processor, may be |
81 | * sleeping or idle. It is expected to take the necessary action to | | 81 | * sleeping or idle. It is expected to take the necessary action to |
82 | * stop executing or become "running" again within a short timeframe. | | 82 | * stop executing or become "running" again within a short timeframe. |
83 | * The LP_RUNNING flag in lwp::l_pflag indicates that an LWP is running. | | 83 | * The LP_RUNNING flag in lwp::l_pflag indicates that an LWP is running. |
84 | * Importantly, it indicates that its state is tied to a CPU. | | 84 | * Importantly, it indicates that its state is tied to a CPU. |
85 | * | | 85 | * |
86 | * LSZOMB: | | 86 | * LSZOMB: |
87 | * | | 87 | * |
88 | * Dead or dying: the LWP has released most of its resources | | 88 | * Dead or dying: the LWP has released most of its resources |
89 | * and is about to switch away into oblivion, or has already | | 89 | * and is about to switch away into oblivion, or has already |
90 | * switched away. When it switches away, its few remaining | | 90 | * switched away. When it switches away, its few remaining |
91 | * resources can be collected. | | 91 | * resources can be collected. |
92 | * | | 92 | * |
93 | * LSSLEEP: | | 93 | * LSSLEEP: |
94 | * | | 94 | * |
95 | * Sleeping: the LWP has entered itself onto a sleep queue, and | | 95 | * Sleeping: the LWP has entered itself onto a sleep queue, and |
96 | * has switched away or will switch away shortly to allow other | | 96 | * has switched away or will switch away shortly to allow other |
97 | * LWPs to run on the CPU. | | 97 | * LWPs to run on the CPU. |
98 | * | | 98 | * |
99 | * LSSTOP: | | 99 | * LSSTOP: |
100 | * | | 100 | * |
101 | * Stopped: the LWP has been stopped as a result of a job | | 101 | * Stopped: the LWP has been stopped as a result of a job |
102 | * control signal, or as a result of the ptrace() interface. | | 102 | * control signal, or as a result of the ptrace() interface. |
103 | * | | 103 | * |
104 | * Stopped LWPs may run briefly within the kernel to handle | | 104 | * Stopped LWPs may run briefly within the kernel to handle |
105 | * signals that they receive, but will not return to user space | | 105 | * signals that they receive, but will not return to user space |
106 | * until their process' state is changed away from stopped. | | 106 | * until their process' state is changed away from stopped. |
107 | * | | 107 | * |
108 | * Single LWPs within a process can not be set stopped | | 108 | * Single LWPs within a process can not be set stopped |
109 | * selectively: all actions that can stop or continue LWPs | | 109 | * selectively: all actions that can stop or continue LWPs |
110 | * occur at the process level. | | 110 | * occur at the process level. |
111 | * | | 111 | * |
112 | * State transitions | | 112 | * State transitions |
113 | * | | 113 | * |
114 | * Note that the LSSTOP state may only be set when returning to | | 114 | * Note that the LSSTOP state may only be set when returning to |
115 | * user space in userret(), or when sleeping interruptably. The | | 115 | * user space in userret(), or when sleeping interruptably. The |
116 | * LSSUSPENDED state may only be set in userret(). Before setting | | 116 | * LSSUSPENDED state may only be set in userret(). Before setting |
117 | * those states, we try to ensure that the LWPs will release all | | 117 | * those states, we try to ensure that the LWPs will release all |
118 | * locks that they hold, and at a minimum try to ensure that the | | 118 | * locks that they hold, and at a minimum try to ensure that the |
119 | * LWP can be set runnable again by a signal. | | 119 | * LWP can be set runnable again by a signal. |
120 | * | | 120 | * |
121 | * LWPs may transition states in the following ways: | | 121 | * LWPs may transition states in the following ways: |
122 | * | | 122 | * |
123 | * RUN -------> ONPROC ONPROC -----> RUN | | 123 | * RUN -------> ONPROC ONPROC -----> RUN |
124 | * > SLEEP | | 124 | * > SLEEP |
125 | * > STOPPED | | 125 | * > STOPPED |
126 | * > SUSPENDED | | 126 | * > SUSPENDED |
127 | * > ZOMB | | 127 | * > ZOMB |
128 | * > IDL (special cases) | | 128 | * > IDL (special cases) |
129 | * | | 129 | * |
130 | * STOPPED ---> RUN SUSPENDED --> RUN | | 130 | * STOPPED ---> RUN SUSPENDED --> RUN |
131 | * > SLEEP | | 131 | * > SLEEP |
132 | * | | 132 | * |
133 | * SLEEP -----> ONPROC IDL --------> RUN | | 133 | * SLEEP -----> ONPROC IDL --------> RUN |
134 | * > RUN > SUSPENDED | | 134 | * > RUN > SUSPENDED |
135 | * > STOPPED > STOPPED | | 135 | * > STOPPED > STOPPED |
136 | * > ONPROC (special cases) | | 136 | * > ONPROC (special cases) |
137 | * | | 137 | * |
138 | * Some state transitions are only possible with kernel threads (eg | | 138 | * Some state transitions are only possible with kernel threads (eg |
139 | * ONPROC -> IDL) and happen under tightly controlled circumstances | | 139 | * ONPROC -> IDL) and happen under tightly controlled circumstances |
140 | * free of unwanted side effects. | | 140 | * free of unwanted side effects. |
141 | * | | 141 | * |
142 | * Migration | | 142 | * Migration |
143 | * | | 143 | * |
144 | * Migration of threads from one CPU to another could be performed | | 144 | * Migration of threads from one CPU to another could be performed |
145 | * internally by the scheduler via sched_takecpu() or sched_catchlwp() | | 145 | * internally by the scheduler via sched_takecpu() or sched_catchlwp() |
146 | * functions. The universal lwp_migrate() function should be used for | | 146 | * functions. The universal lwp_migrate() function should be used for |
147 | * any other cases. Subsystems in the kernel must be aware that CPU | | 147 | * any other cases. Subsystems in the kernel must be aware that CPU |
148 | * of LWP may change, while it is not locked. | | 148 | * of LWP may change, while it is not locked. |
149 | * | | 149 | * |
150 | * Locking | | 150 | * Locking |
151 | * | | 151 | * |
152 | * The majority of fields in 'struct lwp' are covered by a single, | | 152 | * The majority of fields in 'struct lwp' are covered by a single, |
153 | * general spin lock pointed to by lwp::l_mutex. The locks covering | | 153 | * general spin lock pointed to by lwp::l_mutex. The locks covering |
154 | * each field are documented in sys/lwp.h. | | 154 | * each field are documented in sys/lwp.h. |
155 | * | | 155 | * |
156 | * State transitions must be made with the LWP's general lock held, | | 156 | * State transitions must be made with the LWP's general lock held, |
157 | * and may cause the LWP's lock pointer to change. Manipulation of | | 157 | * and may cause the LWP's lock pointer to change. Manipulation of |
158 | * the general lock is not performed directly, but through calls to | | 158 | * the general lock is not performed directly, but through calls to |
159 | * lwp_lock(), lwp_unlock() and others. It should be noted that the | | 159 | * lwp_lock(), lwp_unlock() and others. It should be noted that the |
160 | * adaptive locks are not allowed to be released while the LWP's lock | | 160 | * adaptive locks are not allowed to be released while the LWP's lock |
161 | * is being held (unlike for other spin-locks). | | 161 | * is being held (unlike for other spin-locks). |
162 | * | | 162 | * |
163 | * States and their associated locks: | | 163 | * States and their associated locks: |
164 | * | | 164 | * |
165 | * LSIDL, LSONPROC, LSZOMB, LSSUPENDED: | | 165 | * LSIDL, LSONPROC, LSZOMB, LSSUPENDED: |
166 | * | | 166 | * |
167 | * Always covered by spc_lwplock, which protects LWPs not | | 167 | * Always covered by spc_lwplock, which protects LWPs not |
168 | * associated with any other sync object. This is a per-CPU | | 168 | * associated with any other sync object. This is a per-CPU |
169 | * lock and matches lwp::l_cpu. | | 169 | * lock and matches lwp::l_cpu. |
170 | * | | 170 | * |
171 | * LSRUN: | | 171 | * LSRUN: |
172 | * | | 172 | * |
173 | * Always covered by spc_mutex, which protects the run queues. | | 173 | * Always covered by spc_mutex, which protects the run queues. |
174 | * This is a per-CPU lock and matches lwp::l_cpu. | | 174 | * This is a per-CPU lock and matches lwp::l_cpu. |
175 | * | | 175 | * |
176 | * LSSLEEP: | | 176 | * LSSLEEP: |
177 | * | | 177 | * |
178 | * Covered by a lock associated with the sleep queue (sometimes | | 178 | * Covered by a lock associated with the sleep queue (sometimes |
179 | * a turnstile sleep queue) that the LWP resides on. This can | | 179 | * a turnstile sleep queue) that the LWP resides on. This can |
180 | * be spc_lwplock for SOBJ_SLEEPQ_NULL (an "untracked" sleep). | | 180 | * be spc_lwplock for SOBJ_SLEEPQ_NULL (an "untracked" sleep). |
181 | * | | 181 | * |
182 | * LSSTOP: | | 182 | * LSSTOP: |
183 | * | | 183 | * |
184 | * If the LWP was previously sleeping (l_wchan != NULL), then | | 184 | * If the LWP was previously sleeping (l_wchan != NULL), then |
185 | * l_mutex references the sleep queue lock. If the LWP was | | 185 | * l_mutex references the sleep queue lock. If the LWP was |
186 | * runnable or on the CPU when halted, or has been removed from | | 186 | * runnable or on the CPU when halted, or has been removed from |
187 | * the sleep queue since halted, then the lock is spc_lwplock. | | 187 | * the sleep queue since halted, then the lock is spc_lwplock. |
188 | * | | 188 | * |
189 | * The lock order is as follows: | | 189 | * The lock order is as follows: |
190 | * | | 190 | * |
191 | * sleepq -> turnstile -> spc_lwplock -> spc_mutex | | 191 | * sleepq -> turnstile -> spc_lwplock -> spc_mutex |
192 | * | | 192 | * |
193 | * Each process has an scheduler state lock (proc::p_lock), and a | | 193 | * Each process has an scheduler state lock (proc::p_lock), and a |
194 | * number of counters on LWPs and their states: p_nzlwps, p_nrlwps, and | | 194 | * number of counters on LWPs and their states: p_nzlwps, p_nrlwps, and |
195 | * so on. When an LWP is to be entered into or removed from one of the | | 195 | * so on. When an LWP is to be entered into or removed from one of the |
196 | * following states, p_lock must be held and the process wide counters | | 196 | * following states, p_lock must be held and the process wide counters |
197 | * adjusted: | | 197 | * adjusted: |
198 | * | | 198 | * |
199 | * LSIDL, LSZOMB, LSSTOP, LSSUSPENDED | | 199 | * LSIDL, LSZOMB, LSSTOP, LSSUSPENDED |
200 | * | | 200 | * |
201 | * (But not always for kernel threads. There are some special cases | | 201 | * (But not always for kernel threads. There are some special cases |
202 | * as mentioned above: soft interrupts, and the idle loops.) | | 202 | * as mentioned above: soft interrupts, and the idle loops.) |
203 | * | | 203 | * |
204 | * Note that an LWP is considered running or likely to run soon if in | | 204 | * Note that an LWP is considered running or likely to run soon if in |
205 | * one of the following states. This affects the value of p_nrlwps: | | 205 | * one of the following states. This affects the value of p_nrlwps: |
206 | * | | 206 | * |
207 | * LSRUN, LSONPROC, LSSLEEP | | 207 | * LSRUN, LSONPROC, LSSLEEP |
208 | * | | 208 | * |
209 | * p_lock does not need to be held when transitioning among these | | 209 | * p_lock does not need to be held when transitioning among these |
210 | * three states, hence p_lock is rarely taken for state transitions. | | 210 | * three states, hence p_lock is rarely taken for state transitions. |
211 | */ | | 211 | */ |
212 | | | 212 | |
213 | #include <sys/cdefs.h> | | 213 | #include <sys/cdefs.h> |
214 | __KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.233 2020/04/04 20:20:12 thorpej Exp $"); | | 214 | __KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.234 2020/04/19 23:05:04 ad Exp $"); |
215 | | | 215 | |
216 | #include "opt_ddb.h" | | 216 | #include "opt_ddb.h" |
217 | #include "opt_lockdebug.h" | | 217 | #include "opt_lockdebug.h" |
218 | #include "opt_dtrace.h" | | 218 | #include "opt_dtrace.h" |
219 | | | 219 | |
220 | #define _LWP_API_PRIVATE | | 220 | #define _LWP_API_PRIVATE |
221 | | | 221 | |
222 | #include <sys/param.h> | | 222 | #include <sys/param.h> |
223 | #include <sys/systm.h> | | 223 | #include <sys/systm.h> |
224 | #include <sys/cpu.h> | | 224 | #include <sys/cpu.h> |
225 | #include <sys/pool.h> | | 225 | #include <sys/pool.h> |
226 | #include <sys/proc.h> | | 226 | #include <sys/proc.h> |
227 | #include <sys/syscallargs.h> | | 227 | #include <sys/syscallargs.h> |
228 | #include <sys/syscall_stats.h> | | 228 | #include <sys/syscall_stats.h> |
229 | #include <sys/kauth.h> | | 229 | #include <sys/kauth.h> |
230 | #include <sys/sleepq.h> | | 230 | #include <sys/sleepq.h> |
231 | #include <sys/lockdebug.h> | | 231 | #include <sys/lockdebug.h> |
232 | #include <sys/kmem.h> | | 232 | #include <sys/kmem.h> |
233 | #include <sys/pset.h> | | 233 | #include <sys/pset.h> |
234 | #include <sys/intr.h> | | 234 | #include <sys/intr.h> |
235 | #include <sys/lwpctl.h> | | 235 | #include <sys/lwpctl.h> |
236 | #include <sys/atomic.h> | | 236 | #include <sys/atomic.h> |
237 | #include <sys/filedesc.h> | | 237 | #include <sys/filedesc.h> |
238 | #include <sys/fstrans.h> | | 238 | #include <sys/fstrans.h> |
239 | #include <sys/dtrace_bsd.h> | | 239 | #include <sys/dtrace_bsd.h> |
240 | #include <sys/sdt.h> | | 240 | #include <sys/sdt.h> |
241 | #include <sys/ptrace.h> | | 241 | #include <sys/ptrace.h> |
242 | #include <sys/xcall.h> | | 242 | #include <sys/xcall.h> |
243 | #include <sys/uidinfo.h> | | 243 | #include <sys/uidinfo.h> |
244 | #include <sys/sysctl.h> | | 244 | #include <sys/sysctl.h> |
245 | #include <sys/psref.h> | | 245 | #include <sys/psref.h> |
246 | #include <sys/msan.h> | | 246 | #include <sys/msan.h> |
247 | #include <sys/kcov.h> | | 247 | #include <sys/kcov.h> |
248 | #include <sys/thmap.h> | | 248 | #include <sys/thmap.h> |
249 | #include <sys/cprng.h> | | 249 | #include <sys/cprng.h> |
250 | | | 250 | |
251 | #include <uvm/uvm_extern.h> | | 251 | #include <uvm/uvm_extern.h> |
252 | #include <uvm/uvm_object.h> | | 252 | #include <uvm/uvm_object.h> |
253 | | | 253 | |
254 | static pool_cache_t lwp_cache __read_mostly; | | 254 | static pool_cache_t lwp_cache __read_mostly; |
255 | struct lwplist alllwp __cacheline_aligned; | | 255 | struct lwplist alllwp __cacheline_aligned; |
256 | | | 256 | |
257 | /* | | 257 | /* |
258 | * Lookups by global thread ID operate outside of the normal LWP | | 258 | * Lookups by global thread ID operate outside of the normal LWP |
259 | * locking protocol. | | 259 | * locking protocol. |
260 | * | | 260 | * |
261 | * We are using a thmap, which internally can perform lookups lock-free. | | 261 | * We are using a thmap, which internally can perform lookups lock-free. |
262 | * However, we still need to serialize lookups against LWP exit. We | | 262 | * However, we still need to serialize lookups against LWP exit. We |
263 | * achieve this as follows: | | 263 | * achieve this as follows: |
264 | * | | 264 | * |
265 | * => Assignment of TID is performed lazily by the LWP itself, when it | | 265 | * => Assignment of TID is performed lazily by the LWP itself, when it |
266 | * is first requested. Insertion into the thmap is done completely | | 266 | * is first requested. Insertion into the thmap is done completely |
267 | * lock-free (other than the internal locking performed by thmap itself). | | 267 | * lock-free (other than the internal locking performed by thmap itself). |
268 | * Once the TID is published in the map, the l___tid field in the LWP | | 268 | * Once the TID is published in the map, the l___tid field in the LWP |
269 | * is protected by p_lock. | | 269 | * is protected by p_lock. |
270 | * | | 270 | * |
271 | * => When we look up an LWP in the thmap, we take lwp_threadid_lock as | | 271 | * => When we look up an LWP in the thmap, we take lwp_threadid_lock as |
272 | * a READER. While still holding the lock, we add a reference to | | 272 | * a READER. While still holding the lock, we add a reference to |
273 | * the LWP (using atomics). After adding the reference, we drop the | | 273 | * the LWP (using atomics). After adding the reference, we drop the |
274 | * lwp_threadid_lock. We now take p_lock and check the state of the | | 274 | * lwp_threadid_lock. We now take p_lock and check the state of the |
275 | * LWP. If the LWP is draining its references or if the l___tid field | | 275 | * LWP. If the LWP is draining its references or if the l___tid field |
276 | * has been invalidated, we drop the reference we took and return NULL. | | 276 | * has been invalidated, we drop the reference we took and return NULL. |
277 | * Otherwise, the lookup has succeeded and the LWP is returned with a | | 277 | * Otherwise, the lookup has succeeded and the LWP is returned with a |
278 | * reference count that the caller is responsible for dropping. | | 278 | * reference count that the caller is responsible for dropping. |
279 | * | | 279 | * |
280 | * => When a LWP is exiting it releases its TID. While holding the | | 280 | * => When a LWP is exiting it releases its TID. While holding the |
281 | * p_lock, the entry is deleted from the thmap and the l___tid field | | 281 | * p_lock, the entry is deleted from the thmap and the l___tid field |
282 | * invalidated. Once the field is invalidated, p_lock is released. | | 282 | * invalidated. Once the field is invalidated, p_lock is released. |
283 | * It is done in this sequence because the l___tid field is used as | | 283 | * It is done in this sequence because the l___tid field is used as |
284 | * the lookup key storage in the thmap in order to conserve memory. | | 284 | * the lookup key storage in the thmap in order to conserve memory. |
285 | * Even if a lookup races with this process and succeeds only to have | | 285 | * Even if a lookup races with this process and succeeds only to have |
286 | * the TID invalidated, it's OK because it also results in a reference | | 286 | * the TID invalidated, it's OK because it also results in a reference |
287 | * that will be drained later. | | 287 | * that will be drained later. |
288 | * | | 288 | * |
289 | * => Deleting a node also requires GC of now-unused thmap nodes. The | | 289 | * => Deleting a node also requires GC of now-unused thmap nodes. The |
290 | * serialization point between stage_gc and gc is performed by simply | | 290 | * serialization point between stage_gc and gc is performed by simply |
291 | * taking the lwp_threadid_lock as a WRITER and immediately releasing | | 291 | * taking the lwp_threadid_lock as a WRITER and immediately releasing |
292 | * it. By doing this, we know that any busy readers will have drained. | | 292 | * it. By doing this, we know that any busy readers will have drained. |
293 | * | | 293 | * |
294 | * => When a LWP is exiting, it also drains off any references being | | 294 | * => When a LWP is exiting, it also drains off any references being |
295 | * held by others. However, the reference in the lookup path is taken | | 295 | * held by others. However, the reference in the lookup path is taken |
296 | * outside the normal locking protocol. There needs to be additional | | 296 | * outside the normal locking protocol. There needs to be additional |
297 | * serialization so that EITHER lwp_drainrefs() sees the incremented | | 297 | * serialization so that EITHER lwp_drainrefs() sees the incremented |
298 | * reference count so that it knows to wait, OR lwp_getref_tid() sees | | 298 | * reference count so that it knows to wait, OR lwp_getref_tid() sees |
299 | * that the LWP is waiting to drain and thus drops the reference | | 299 | * that the LWP is waiting to drain and thus drops the reference |
300 | * immediately. This is achieved by taking lwp_threadid_lock as a | | 300 | * immediately. This is achieved by taking lwp_threadid_lock as a |
301 | * WRITER when setting LPR_DRAINING. Note the locking order: | | 301 | * WRITER when setting LPR_DRAINING. Note the locking order: |
302 | * | | 302 | * |
303 | * p_lock -> lwp_threadid_lock | | 303 | * p_lock -> lwp_threadid_lock |
304 | * | | 304 | * |
305 | * Note that this scheme could easily use pserialize(9) in place of the | | 305 | * Note that this scheme could easily use pserialize(9) in place of the |
306 | * lwp_threadid_lock rwlock lock. However, this would require placing a | | 306 | * lwp_threadid_lock rwlock lock. However, this would require placing a |
307 | * pserialize_perform() call in the LWP exit path, which is arguably more | | 307 | * pserialize_perform() call in the LWP exit path, which is arguably more |
308 | * expensive than briefly taking a global lock that should be relatively | | 308 | * expensive than briefly taking a global lock that should be relatively |
309 | * uncontended. This issue can be revisited if the rwlock proves to be | | 309 | * uncontended. This issue can be revisited if the rwlock proves to be |
310 | * a performance problem. | | 310 | * a performance problem. |
311 | */ | | 311 | */ |
312 | static krwlock_t lwp_threadid_lock __cacheline_aligned; | | 312 | static krwlock_t lwp_threadid_lock __cacheline_aligned; |
313 | static thmap_t * lwp_threadid_map __read_mostly; | | 313 | static thmap_t * lwp_threadid_map __read_mostly; |
314 | | | 314 | |
315 | static void lwp_dtor(void *, void *); | | 315 | static void lwp_dtor(void *, void *); |
316 | | | 316 | |
317 | /* DTrace proc provider probes */ | | 317 | /* DTrace proc provider probes */ |
318 | SDT_PROVIDER_DEFINE(proc); | | 318 | SDT_PROVIDER_DEFINE(proc); |
319 | | | 319 | |
320 | SDT_PROBE_DEFINE1(proc, kernel, , lwp__create, "struct lwp *"); | | 320 | SDT_PROBE_DEFINE1(proc, kernel, , lwp__create, "struct lwp *"); |
321 | SDT_PROBE_DEFINE1(proc, kernel, , lwp__start, "struct lwp *"); | | 321 | SDT_PROBE_DEFINE1(proc, kernel, , lwp__start, "struct lwp *"); |
322 | SDT_PROBE_DEFINE1(proc, kernel, , lwp__exit, "struct lwp *"); | | 322 | SDT_PROBE_DEFINE1(proc, kernel, , lwp__exit, "struct lwp *"); |
323 | | | 323 | |
324 | struct turnstile turnstile0 __cacheline_aligned; | | 324 | struct turnstile turnstile0 __cacheline_aligned; |
325 | struct lwp lwp0 __aligned(MIN_LWP_ALIGNMENT) = { | | 325 | struct lwp lwp0 __aligned(MIN_LWP_ALIGNMENT) = { |
326 | #ifdef LWP0_CPU_INFO | | 326 | #ifdef LWP0_CPU_INFO |
327 | .l_cpu = LWP0_CPU_INFO, | | 327 | .l_cpu = LWP0_CPU_INFO, |
328 | #endif | | 328 | #endif |
329 | #ifdef LWP0_MD_INITIALIZER | | 329 | #ifdef LWP0_MD_INITIALIZER |
330 | .l_md = LWP0_MD_INITIALIZER, | | 330 | .l_md = LWP0_MD_INITIALIZER, |
331 | #endif | | 331 | #endif |
332 | .l_proc = &proc0, | | 332 | .l_proc = &proc0, |
333 | .l_lid = 1, | | 333 | .l_lid = 1, |
334 | .l_flag = LW_SYSTEM, | | 334 | .l_flag = LW_SYSTEM, |
335 | .l_stat = LSONPROC, | | 335 | .l_stat = LSONPROC, |
336 | .l_ts = &turnstile0, | | 336 | .l_ts = &turnstile0, |
337 | .l_syncobj = &sched_syncobj, | | 337 | .l_syncobj = &sched_syncobj, |
338 | .l_refcnt = 0, | | 338 | .l_refcnt = 0, |
339 | .l_priority = PRI_USER + NPRI_USER - 1, | | 339 | .l_priority = PRI_USER + NPRI_USER - 1, |
340 | .l_inheritedprio = -1, | | 340 | .l_inheritedprio = -1, |
341 | .l_class = SCHED_OTHER, | | 341 | .l_class = SCHED_OTHER, |
342 | .l_psid = PS_NONE, | | 342 | .l_psid = PS_NONE, |
343 | .l_pi_lenders = SLIST_HEAD_INITIALIZER(&lwp0.l_pi_lenders), | | 343 | .l_pi_lenders = SLIST_HEAD_INITIALIZER(&lwp0.l_pi_lenders), |
344 | .l_name = __UNCONST("swapper"), | | 344 | .l_name = __UNCONST("swapper"), |
345 | .l_fd = &filedesc0, | | 345 | .l_fd = &filedesc0, |
346 | }; | | 346 | }; |
347 | | | 347 | |
348 | static void lwp_threadid_init(void); | | 348 | static void lwp_threadid_init(void); |
349 | static int sysctl_kern_maxlwp(SYSCTLFN_PROTO); | | 349 | static int sysctl_kern_maxlwp(SYSCTLFN_PROTO); |
350 | | | 350 | |
351 | /* | | 351 | /* |
352 | * sysctl helper routine for kern.maxlwp. Ensures that the new | | 352 | * sysctl helper routine for kern.maxlwp. Ensures that the new |
353 | * values are not too low or too high. | | 353 | * values are not too low or too high. |
354 | */ | | 354 | */ |
355 | static int | | 355 | static int |
356 | sysctl_kern_maxlwp(SYSCTLFN_ARGS) | | 356 | sysctl_kern_maxlwp(SYSCTLFN_ARGS) |
357 | { | | 357 | { |
358 | int error, nmaxlwp; | | 358 | int error, nmaxlwp; |
359 | struct sysctlnode node; | | 359 | struct sysctlnode node; |
360 | | | 360 | |
361 | nmaxlwp = maxlwp; | | 361 | nmaxlwp = maxlwp; |
362 | node = *rnode; | | 362 | node = *rnode; |
363 | node.sysctl_data = &nmaxlwp; | | 363 | node.sysctl_data = &nmaxlwp; |
364 | error = sysctl_lookup(SYSCTLFN_CALL(&node)); | | 364 | error = sysctl_lookup(SYSCTLFN_CALL(&node)); |
365 | if (error || newp == NULL) | | 365 | if (error || newp == NULL) |
366 | return error; | | 366 | return error; |
367 | | | 367 | |
368 | if (nmaxlwp < 0 || nmaxlwp >= 65536) | | 368 | if (nmaxlwp < 0 || nmaxlwp >= 65536) |
369 | return EINVAL; | | 369 | return EINVAL; |
370 | if (nmaxlwp > cpu_maxlwp()) | | 370 | if (nmaxlwp > cpu_maxlwp()) |
371 | return EINVAL; | | 371 | return EINVAL; |
372 | maxlwp = nmaxlwp; | | 372 | maxlwp = nmaxlwp; |
373 | | | 373 | |
374 | return 0; | | 374 | return 0; |
375 | } | | 375 | } |
376 | | | 376 | |
377 | static void | | 377 | static void |
378 | sysctl_kern_lwp_setup(void) | | 378 | sysctl_kern_lwp_setup(void) |
379 | { | | 379 | { |
380 | struct sysctllog *clog = NULL; | | 380 | struct sysctllog *clog = NULL; |
381 | | | 381 | |
382 | sysctl_createv(&clog, 0, NULL, NULL, | | 382 | sysctl_createv(&clog, 0, NULL, NULL, |
383 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, | | 383 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, |
384 | CTLTYPE_INT, "maxlwp", | | 384 | CTLTYPE_INT, "maxlwp", |
385 | SYSCTL_DESCR("Maximum number of simultaneous threads"), | | 385 | SYSCTL_DESCR("Maximum number of simultaneous threads"), |
386 | sysctl_kern_maxlwp, 0, NULL, 0, | | 386 | sysctl_kern_maxlwp, 0, NULL, 0, |
387 | CTL_KERN, CTL_CREATE, CTL_EOL); | | 387 | CTL_KERN, CTL_CREATE, CTL_EOL); |
388 | } | | 388 | } |
389 | | | 389 | |
390 | void | | 390 | void |
391 | lwpinit(void) | | 391 | lwpinit(void) |
392 | { | | 392 | { |
393 | | | 393 | |
394 | LIST_INIT(&alllwp); | | 394 | LIST_INIT(&alllwp); |
395 | lwpinit_specificdata(); | | 395 | lwpinit_specificdata(); |
396 | lwp_cache = pool_cache_init(sizeof(lwp_t), MIN_LWP_ALIGNMENT, 0, 0, | | 396 | lwp_cache = pool_cache_init(sizeof(lwp_t), MIN_LWP_ALIGNMENT, 0, 0, |
397 | "lwppl", NULL, IPL_NONE, NULL, lwp_dtor, NULL); | | 397 | "lwppl", NULL, IPL_NONE, NULL, lwp_dtor, NULL); |
398 | | | 398 | |
399 | maxlwp = cpu_maxlwp(); | | 399 | maxlwp = cpu_maxlwp(); |
400 | sysctl_kern_lwp_setup(); | | 400 | sysctl_kern_lwp_setup(); |
401 | lwp_threadid_init(); | | 401 | lwp_threadid_init(); |
402 | } | | 402 | } |
403 | | | 403 | |
404 | void | | 404 | void |
405 | lwp0_init(void) | | 405 | lwp0_init(void) |
406 | { | | 406 | { |
407 | struct lwp *l = &lwp0; | | 407 | struct lwp *l = &lwp0; |
408 | | | 408 | |
409 | KASSERT((void *)uvm_lwp_getuarea(l) != NULL); | | 409 | KASSERT((void *)uvm_lwp_getuarea(l) != NULL); |
410 | KASSERT(l->l_lid == proc0.p_nlwpid); | | 410 | KASSERT(l->l_lid == proc0.p_nlwpid); |
411 | | | 411 | |
412 | LIST_INSERT_HEAD(&alllwp, l, l_list); | | 412 | LIST_INSERT_HEAD(&alllwp, l, l_list); |
413 | | | 413 | |
414 | callout_init(&l->l_timeout_ch, CALLOUT_MPSAFE); | | 414 | callout_init(&l->l_timeout_ch, CALLOUT_MPSAFE); |
415 | callout_setfunc(&l->l_timeout_ch, sleepq_timeout, l); | | 415 | callout_setfunc(&l->l_timeout_ch, sleepq_timeout, l); |
416 | cv_init(&l->l_sigcv, "sigwait"); | | 416 | cv_init(&l->l_sigcv, "sigwait"); |
417 | cv_init(&l->l_waitcv, "vfork"); | | 417 | cv_init(&l->l_waitcv, "vfork"); |
418 | | | 418 | |
419 | kauth_cred_hold(proc0.p_cred); | | 419 | kauth_cred_hold(proc0.p_cred); |
420 | l->l_cred = proc0.p_cred; | | 420 | l->l_cred = proc0.p_cred; |
421 | | | 421 | |
422 | kdtrace_thread_ctor(NULL, l); | | 422 | kdtrace_thread_ctor(NULL, l); |
423 | lwp_initspecific(l); | | 423 | lwp_initspecific(l); |
424 | | | 424 | |
425 | SYSCALL_TIME_LWP_INIT(l); | | 425 | SYSCALL_TIME_LWP_INIT(l); |
426 | } | | 426 | } |
427 | | | 427 | |
428 | static void | | 428 | static void |
429 | lwp_dtor(void *arg, void *obj) | | 429 | lwp_dtor(void *arg, void *obj) |
430 | { | | 430 | { |
431 | lwp_t *l = obj; | | 431 | lwp_t *l = obj; |
432 | (void)l; | | 432 | (void)l; |
433 | | | 433 | |
434 | /* | | 434 | /* |
435 | * Provide a barrier to ensure that all mutex_oncpu() and rw_oncpu() | | 435 | * Provide a barrier to ensure that all mutex_oncpu() and rw_oncpu() |
436 | * calls will exit before memory of LWP is returned to the pool, where | | 436 | * calls will exit before memory of LWP is returned to the pool, where |
437 | * KVA of LWP structure might be freed and re-used for other purposes. | | 437 | * KVA of LWP structure might be freed and re-used for other purposes. |
438 | * Kernel preemption is disabled around mutex_oncpu() and rw_oncpu() | | 438 | * Kernel preemption is disabled around mutex_oncpu() and rw_oncpu() |
439 | * callers, therefore cross-call to all CPUs will do the job. Also, | | 439 | * callers, therefore cross-call to all CPUs will do the job. Also, |
440 | * the value of l->l_cpu must be still valid at this point. | | 440 | * the value of l->l_cpu must be still valid at this point. |
441 | */ | | 441 | */ |
442 | KASSERT(l->l_cpu != NULL); | | 442 | KASSERT(l->l_cpu != NULL); |
443 | xc_barrier(0); | | 443 | xc_barrier(0); |
444 | } | | 444 | } |
445 | | | 445 | |
446 | /* | | 446 | /* |
447 | * Set an suspended. | | 447 | * Set an suspended. |
448 | * | | 448 | * |
449 | * Must be called with p_lock held, and the LWP locked. Will unlock the | | 449 | * Must be called with p_lock held, and the LWP locked. Will unlock the |
450 | * LWP before return. | | 450 | * LWP before return. |
451 | */ | | 451 | */ |
452 | int | | 452 | int |
453 | lwp_suspend(struct lwp *curl, struct lwp *t) | | 453 | lwp_suspend(struct lwp *curl, struct lwp *t) |
454 | { | | 454 | { |
455 | int error; | | 455 | int error; |
456 | | | 456 | |
457 | KASSERT(mutex_owned(t->l_proc->p_lock)); | | 457 | KASSERT(mutex_owned(t->l_proc->p_lock)); |
458 | KASSERT(lwp_locked(t, NULL)); | | 458 | KASSERT(lwp_locked(t, NULL)); |
459 | | | 459 | |
460 | KASSERT(curl != t || curl->l_stat == LSONPROC); | | 460 | KASSERT(curl != t || curl->l_stat == LSONPROC); |
461 | | | 461 | |
462 | /* | | 462 | /* |
463 | * If the current LWP has been told to exit, we must not suspend anyone | | 463 | * If the current LWP has been told to exit, we must not suspend anyone |
464 | * else or deadlock could occur. We won't return to userspace. | | 464 | * else or deadlock could occur. We won't return to userspace. |
465 | */ | | 465 | */ |
466 | if ((curl->l_flag & (LW_WEXIT | LW_WCORE)) != 0) { | | 466 | if ((curl->l_flag & (LW_WEXIT | LW_WCORE)) != 0) { |
467 | lwp_unlock(t); | | 467 | lwp_unlock(t); |
468 | return (EDEADLK); | | 468 | return (EDEADLK); |
469 | } | | 469 | } |
470 | | | 470 | |
471 | if ((t->l_flag & LW_DBGSUSPEND) != 0) { | | 471 | if ((t->l_flag & LW_DBGSUSPEND) != 0) { |
472 | lwp_unlock(t); | | 472 | lwp_unlock(t); |
473 | return 0; | | 473 | return 0; |
474 | } | | 474 | } |
475 | | | 475 | |
476 | error = 0; | | 476 | error = 0; |
477 | | | 477 | |
478 | switch (t->l_stat) { | | 478 | switch (t->l_stat) { |
479 | case LSRUN: | | 479 | case LSRUN: |
480 | case LSONPROC: | | 480 | case LSONPROC: |
481 | t->l_flag |= LW_WSUSPEND; | | 481 | t->l_flag |= LW_WSUSPEND; |
482 | lwp_need_userret(t); | | 482 | lwp_need_userret(t); |
483 | lwp_unlock(t); | | 483 | lwp_unlock(t); |
484 | break; | | 484 | break; |
485 | | | 485 | |
486 | case LSSLEEP: | | 486 | case LSSLEEP: |
487 | t->l_flag |= LW_WSUSPEND; | | 487 | t->l_flag |= LW_WSUSPEND; |
488 | | | 488 | |
489 | /* | | 489 | /* |
490 | * Kick the LWP and try to get it to the kernel boundary | | 490 | * Kick the LWP and try to get it to the kernel boundary |
491 | * so that it will release any locks that it holds. | | 491 | * so that it will release any locks that it holds. |
492 | * setrunnable() will release the lock. | | 492 | * setrunnable() will release the lock. |
493 | */ | | 493 | */ |
494 | if ((t->l_flag & LW_SINTR) != 0) | | 494 | if ((t->l_flag & LW_SINTR) != 0) |
495 | setrunnable(t); | | 495 | setrunnable(t); |
496 | else | | 496 | else |
497 | lwp_unlock(t); | | 497 | lwp_unlock(t); |
498 | break; | | 498 | break; |
499 | | | 499 | |
500 | case LSSUSPENDED: | | 500 | case LSSUSPENDED: |
501 | lwp_unlock(t); | | 501 | lwp_unlock(t); |
502 | break; | | 502 | break; |
503 | | | 503 | |
504 | case LSSTOP: | | 504 | case LSSTOP: |
505 | t->l_flag |= LW_WSUSPEND; | | 505 | t->l_flag |= LW_WSUSPEND; |
506 | setrunnable(t); | | 506 | setrunnable(t); |
507 | break; | | 507 | break; |
508 | | | 508 | |
509 | case LSIDL: | | 509 | case LSIDL: |
510 | case LSZOMB: | | 510 | case LSZOMB: |
511 | error = EINTR; /* It's what Solaris does..... */ | | 511 | error = EINTR; /* It's what Solaris does..... */ |
512 | lwp_unlock(t); | | 512 | lwp_unlock(t); |
513 | break; | | 513 | break; |
514 | } | | 514 | } |
515 | | | 515 | |
516 | return (error); | | 516 | return (error); |
517 | } | | 517 | } |
518 | | | 518 | |
519 | /* | | 519 | /* |
520 | * Restart a suspended LWP. | | 520 | * Restart a suspended LWP. |
521 | * | | 521 | * |
522 | * Must be called with p_lock held, and the LWP locked. Will unlock the | | 522 | * Must be called with p_lock held, and the LWP locked. Will unlock the |
523 | * LWP before return. | | 523 | * LWP before return. |
524 | */ | | 524 | */ |
525 | void | | 525 | void |
526 | lwp_continue(struct lwp *l) | | 526 | lwp_continue(struct lwp *l) |
527 | { | | 527 | { |
528 | | | 528 | |
529 | KASSERT(mutex_owned(l->l_proc->p_lock)); | | 529 | KASSERT(mutex_owned(l->l_proc->p_lock)); |
530 | KASSERT(lwp_locked(l, NULL)); | | 530 | KASSERT(lwp_locked(l, NULL)); |
531 | | | 531 | |
532 | /* If rebooting or not suspended, then just bail out. */ | | 532 | /* If rebooting or not suspended, then just bail out. */ |
533 | if ((l->l_flag & LW_WREBOOT) != 0) { | | 533 | if ((l->l_flag & LW_WREBOOT) != 0) { |
534 | lwp_unlock(l); | | 534 | lwp_unlock(l); |
535 | return; | | 535 | return; |
536 | } | | 536 | } |
537 | | | 537 | |
538 | l->l_flag &= ~LW_WSUSPEND; | | 538 | l->l_flag &= ~LW_WSUSPEND; |
539 | | | 539 | |
540 | if (l->l_stat != LSSUSPENDED || (l->l_flag & LW_DBGSUSPEND) != 0) { | | 540 | if (l->l_stat != LSSUSPENDED || (l->l_flag & LW_DBGSUSPEND) != 0) { |
541 | lwp_unlock(l); | | 541 | lwp_unlock(l); |
542 | return; | | 542 | return; |
543 | } | | 543 | } |
544 | | | 544 | |
545 | /* setrunnable() will release the lock. */ | | 545 | /* setrunnable() will release the lock. */ |
546 | setrunnable(l); | | 546 | setrunnable(l); |
547 | } | | 547 | } |
548 | | | 548 | |
549 | /* | | 549 | /* |
550 | * Restart a stopped LWP. | | 550 | * Restart a stopped LWP. |
551 | * | | 551 | * |
552 | * Must be called with p_lock held, and the LWP NOT locked. Will unlock the | | 552 | * Must be called with p_lock held, and the LWP NOT locked. Will unlock the |
553 | * LWP before return. | | 553 | * LWP before return. |
554 | */ | | 554 | */ |
555 | void | | 555 | void |
556 | lwp_unstop(struct lwp *l) | | 556 | lwp_unstop(struct lwp *l) |
557 | { | | 557 | { |
558 | struct proc *p = l->l_proc; | | 558 | struct proc *p = l->l_proc; |
559 | | | 559 | |
560 | KASSERT(mutex_owned(proc_lock)); | | 560 | KASSERT(mutex_owned(proc_lock)); |
561 | KASSERT(mutex_owned(p->p_lock)); | | 561 | KASSERT(mutex_owned(p->p_lock)); |
562 | | | 562 | |
563 | lwp_lock(l); | | 563 | lwp_lock(l); |
564 | | | 564 | |
565 | KASSERT((l->l_flag & LW_DBGSUSPEND) == 0); | | 565 | KASSERT((l->l_flag & LW_DBGSUSPEND) == 0); |
566 | | | 566 | |
567 | /* If not stopped, then just bail out. */ | | 567 | /* If not stopped, then just bail out. */ |
568 | if (l->l_stat != LSSTOP) { | | 568 | if (l->l_stat != LSSTOP) { |
569 | lwp_unlock(l); | | 569 | lwp_unlock(l); |
570 | return; | | 570 | return; |
571 | } | | 571 | } |
572 | | | 572 | |
573 | p->p_stat = SACTIVE; | | 573 | p->p_stat = SACTIVE; |
574 | p->p_sflag &= ~PS_STOPPING; | | 574 | p->p_sflag &= ~PS_STOPPING; |
575 | | | 575 | |
576 | if (!p->p_waited) | | 576 | if (!p->p_waited) |
577 | p->p_pptr->p_nstopchild--; | | 577 | p->p_pptr->p_nstopchild--; |
578 | | | 578 | |
579 | if (l->l_wchan == NULL) { | | 579 | if (l->l_wchan == NULL) { |
580 | /* setrunnable() will release the lock. */ | | 580 | /* setrunnable() will release the lock. */ |
581 | setrunnable(l); | | 581 | setrunnable(l); |
582 | } else if (p->p_xsig && (l->l_flag & LW_SINTR) != 0) { | | 582 | } else if (p->p_xsig && (l->l_flag & LW_SINTR) != 0) { |
583 | /* setrunnable() so we can receive the signal */ | | 583 | /* setrunnable() so we can receive the signal */ |
584 | setrunnable(l); | | 584 | setrunnable(l); |
585 | } else { | | 585 | } else { |
586 | l->l_stat = LSSLEEP; | | 586 | l->l_stat = LSSLEEP; |
587 | p->p_nrlwps++; | | 587 | p->p_nrlwps++; |
588 | lwp_unlock(l); | | 588 | lwp_unlock(l); |
589 | } | | 589 | } |
590 | } | | 590 | } |
591 | | | 591 | |
592 | /* | | 592 | /* |
593 | * Wait for an LWP within the current process to exit. If 'lid' is | | 593 | * Wait for an LWP within the current process to exit. If 'lid' is |
594 | * non-zero, we are waiting for a specific LWP. | | 594 | * non-zero, we are waiting for a specific LWP. |
595 | * | | 595 | * |
596 | * Must be called with p->p_lock held. | | 596 | * Must be called with p->p_lock held. |
597 | */ | | 597 | */ |
598 | int | | 598 | int |
599 | lwp_wait(struct lwp *l, lwpid_t lid, lwpid_t *departed, bool exiting) | | 599 | lwp_wait(struct lwp *l, lwpid_t lid, lwpid_t *departed, bool exiting) |
600 | { | | 600 | { |
601 | const lwpid_t curlid = l->l_lid; | | 601 | const lwpid_t curlid = l->l_lid; |
602 | proc_t *p = l->l_proc; | | 602 | proc_t *p = l->l_proc; |
603 | lwp_t *l2, *next; | | 603 | lwp_t *l2, *next; |
604 | int error; | | 604 | int error; |
605 | | | 605 | |
606 | KASSERT(mutex_owned(p->p_lock)); | | 606 | KASSERT(mutex_owned(p->p_lock)); |
607 | | | 607 | |
608 | p->p_nlwpwait++; | | 608 | p->p_nlwpwait++; |
609 | l->l_waitingfor = lid; | | 609 | l->l_waitingfor = lid; |
610 | | | 610 | |
611 | for (;;) { | | 611 | for (;;) { |
612 | int nfound; | | 612 | int nfound; |
613 | | | 613 | |
614 | /* | | 614 | /* |
615 | * Avoid a race between exit1() and sigexit(): if the | | 615 | * Avoid a race between exit1() and sigexit(): if the |
616 | * process is dumping core, then we need to bail out: call | | 616 | * process is dumping core, then we need to bail out: call |
617 | * into lwp_userret() where we will be suspended until the | | 617 | * into lwp_userret() where we will be suspended until the |
618 | * deed is done. | | 618 | * deed is done. |
619 | */ | | 619 | */ |
620 | if ((p->p_sflag & PS_WCORE) != 0) { | | 620 | if ((p->p_sflag & PS_WCORE) != 0) { |
621 | mutex_exit(p->p_lock); | | 621 | mutex_exit(p->p_lock); |
622 | lwp_userret(l); | | 622 | lwp_userret(l); |
623 | KASSERT(false); | | 623 | KASSERT(false); |
624 | } | | 624 | } |
625 | | | 625 | |
626 | /* | | 626 | /* |
627 | * First off, drain any detached LWP that is waiting to be | | 627 | * First off, drain any detached LWP that is waiting to be |
628 | * reaped. | | 628 | * reaped. |
629 | */ | | 629 | */ |
630 | while ((l2 = p->p_zomblwp) != NULL) { | | 630 | while ((l2 = p->p_zomblwp) != NULL) { |
631 | p->p_zomblwp = NULL; | | 631 | p->p_zomblwp = NULL; |
632 | lwp_free(l2, false, false);/* releases proc mutex */ | | 632 | lwp_free(l2, false, false);/* releases proc mutex */ |
633 | mutex_enter(p->p_lock); | | 633 | mutex_enter(p->p_lock); |
634 | } | | 634 | } |
635 | | | 635 | |
636 | /* | | 636 | /* |
637 | * Now look for an LWP to collect. If the whole process is | | 637 | * Now look for an LWP to collect. If the whole process is |
638 | * exiting, count detached LWPs as eligible to be collected, | | 638 | * exiting, count detached LWPs as eligible to be collected, |
639 | * but don't drain them here. | | 639 | * but don't drain them here. |
640 | */ | | 640 | */ |
641 | nfound = 0; | | 641 | nfound = 0; |
642 | error = 0; | | 642 | error = 0; |
643 | | | 643 | |
644 | /* | | 644 | /* |
645 | * If given a specific LID, go via the tree and make sure | | 645 | * If given a specific LID, go via the tree and make sure |
646 | * it's not detached. | | 646 | * it's not detached. |
647 | */ | | 647 | */ |
648 | if (lid != 0) { | | 648 | if (lid != 0) { |
649 | l2 = radix_tree_lookup_node(&p->p_lwptree, | | 649 | l2 = radix_tree_lookup_node(&p->p_lwptree, |
650 | (uint64_t)(lid - 1)); | | 650 | (uint64_t)(lid - 1)); |
651 | if (l2 == NULL) { | | 651 | if (l2 == NULL) { |
652 | error = ESRCH; | | 652 | error = ESRCH; |
653 | break; | | 653 | break; |
654 | } | | 654 | } |
655 | KASSERT(l2->l_lid == lid); | | 655 | KASSERT(l2->l_lid == lid); |
656 | if ((l2->l_prflag & LPR_DETACHED) != 0) { | | 656 | if ((l2->l_prflag & LPR_DETACHED) != 0) { |
657 | error = EINVAL; | | 657 | error = EINVAL; |
658 | break; | | 658 | break; |
659 | } | | 659 | } |
660 | } else { | | 660 | } else { |
661 | l2 = LIST_FIRST(&p->p_lwps); | | 661 | l2 = LIST_FIRST(&p->p_lwps); |
662 | } | | 662 | } |
663 | for (; l2 != NULL; l2 = next) { | | 663 | for (; l2 != NULL; l2 = next) { |
664 | next = (lid != 0 ? NULL : LIST_NEXT(l2, l_sibling)); | | 664 | next = (lid != 0 ? NULL : LIST_NEXT(l2, l_sibling)); |
665 | | | 665 | |
666 | /* | | 666 | /* |
667 | * If a specific wait and the target is waiting on | | 667 | * If a specific wait and the target is waiting on |
668 | * us, then avoid deadlock. This also traps LWPs | | 668 | * us, then avoid deadlock. This also traps LWPs |
669 | * that try to wait on themselves. | | 669 | * that try to wait on themselves. |
670 | * | | 670 | * |
671 | * Note that this does not handle more complicated | | 671 | * Note that this does not handle more complicated |
672 | * cycles, like: t1 -> t2 -> t3 -> t1. The process | | 672 | * cycles, like: t1 -> t2 -> t3 -> t1. The process |
673 | * can still be killed so it is not a major problem. | | 673 | * can still be killed so it is not a major problem. |
674 | */ | | 674 | */ |
675 | if (l2->l_lid == lid && l2->l_waitingfor == curlid) { | | 675 | if (l2->l_lid == lid && l2->l_waitingfor == curlid) { |
676 | error = EDEADLK; | | 676 | error = EDEADLK; |
677 | break; | | 677 | break; |
678 | } | | 678 | } |
679 | if (l2 == l) | | 679 | if (l2 == l) |
680 | continue; | | 680 | continue; |
681 | if ((l2->l_prflag & LPR_DETACHED) != 0) { | | 681 | if ((l2->l_prflag & LPR_DETACHED) != 0) { |
682 | nfound += exiting; | | 682 | nfound += exiting; |
683 | continue; | | 683 | continue; |
684 | } | | 684 | } |
685 | if (lid != 0) { | | 685 | if (lid != 0) { |
686 | /* | | 686 | /* |
687 | * Mark this LWP as the first waiter, if there | | 687 | * Mark this LWP as the first waiter, if there |
688 | * is no other. | | 688 | * is no other. |
689 | */ | | 689 | */ |
690 | if (l2->l_waiter == 0) | | 690 | if (l2->l_waiter == 0) |
691 | l2->l_waiter = curlid; | | 691 | l2->l_waiter = curlid; |
692 | } else if (l2->l_waiter != 0) { | | 692 | } else if (l2->l_waiter != 0) { |
693 | /* | | 693 | /* |
694 | * It already has a waiter - so don't | | 694 | * It already has a waiter - so don't |
695 | * collect it. If the waiter doesn't | | 695 | * collect it. If the waiter doesn't |
696 | * grab it we'll get another chance | | 696 | * grab it we'll get another chance |
697 | * later. | | 697 | * later. |
698 | */ | | 698 | */ |
699 | nfound++; | | 699 | nfound++; |
700 | continue; | | 700 | continue; |
701 | } | | 701 | } |
702 | nfound++; | | 702 | nfound++; |
703 | | | 703 | |
704 | /* No need to lock the LWP in order to see LSZOMB. */ | | 704 | /* No need to lock the LWP in order to see LSZOMB. */ |
705 | if (l2->l_stat != LSZOMB) | | 705 | if (l2->l_stat != LSZOMB) |
706 | continue; | | 706 | continue; |
707 | | | 707 | |
708 | /* | | 708 | /* |
709 | * We're no longer waiting. Reset the "first waiter" | | 709 | * We're no longer waiting. Reset the "first waiter" |
710 | * pointer on the target, in case it was us. | | 710 | * pointer on the target, in case it was us. |
711 | */ | | 711 | */ |
712 | l->l_waitingfor = 0; | | 712 | l->l_waitingfor = 0; |
713 | l2->l_waiter = 0; | | 713 | l2->l_waiter = 0; |
714 | p->p_nlwpwait--; | | 714 | p->p_nlwpwait--; |
715 | if (departed) | | 715 | if (departed) |
716 | *departed = l2->l_lid; | | 716 | *departed = l2->l_lid; |
717 | sched_lwp_collect(l2); | | 717 | sched_lwp_collect(l2); |
718 | | | 718 | |
719 | /* lwp_free() releases the proc lock. */ | | 719 | /* lwp_free() releases the proc lock. */ |
720 | lwp_free(l2, false, false); | | 720 | lwp_free(l2, false, false); |
721 | mutex_enter(p->p_lock); | | 721 | mutex_enter(p->p_lock); |
722 | return 0; | | 722 | return 0; |
723 | } | | 723 | } |
724 | | | 724 | |
725 | if (error != 0) | | 725 | if (error != 0) |
726 | break; | | 726 | break; |
727 | if (nfound == 0) { | | 727 | if (nfound == 0) { |
728 | error = ESRCH; | | 728 | error = ESRCH; |
729 | break; | | 729 | break; |
730 | } | | 730 | } |
731 | | | 731 | |
732 | /* | | 732 | /* |
733 | * Note: since the lock will be dropped, need to restart on | | 733 | * Note: since the lock will be dropped, need to restart on |
734 | * wakeup to run all LWPs again, e.g. there may be new LWPs. | | 734 | * wakeup to run all LWPs again, e.g. there may be new LWPs. |
735 | */ | | 735 | */ |
736 | if (exiting) { | | 736 | if (exiting) { |
737 | KASSERT(p->p_nlwps > 1); | | 737 | KASSERT(p->p_nlwps > 1); |
738 | error = cv_timedwait(&p->p_lwpcv, p->p_lock, 1); | | 738 | error = cv_timedwait(&p->p_lwpcv, p->p_lock, 1); |
739 | break; | | 739 | break; |
740 | } | | 740 | } |
741 | | | 741 | |
742 | /* | | 742 | /* |
743 | * Break out if the process is exiting, or if all LWPs are | | 743 | * Break out if all LWPs are in _lwp_wait(). There are |
744 | * in _lwp_wait(). There are other ways to hang the process | | 744 | * other ways to hang the process with _lwp_wait(), but the |
745 | * with _lwp_wait(), but the sleep is interruptable so | | 745 | * sleep is interruptable so little point checking for them. |
746 | * little point checking for them. | | | |
747 | */ | | 746 | */ |
748 | if ((p->p_sflag & PS_WEXIT) != 0 || | | 747 | if (p->p_nlwpwait == p->p_nlwps) { |
749 | p->p_nlwpwait == p->p_nlwps) { | | | |
750 | error = EDEADLK; | | 748 | error = EDEADLK; |
751 | break; | | 749 | break; |
752 | } | | 750 | } |
753 | | | 751 | |
754 | /* | | 752 | /* |
755 | * Sit around and wait for something to happen. We'll be | | 753 | * Sit around and wait for something to happen. We'll be |
756 | * awoken if any of the conditions examined change: if an | | 754 | * awoken if any of the conditions examined change: if an |
757 | * LWP exits, is collected, or is detached. | | 755 | * LWP exits, is collected, or is detached. |
758 | */ | | 756 | */ |
759 | if ((error = cv_wait_sig(&p->p_lwpcv, p->p_lock)) != 0) | | 757 | if ((error = cv_wait_sig(&p->p_lwpcv, p->p_lock)) != 0) |
760 | break; | | 758 | break; |
761 | } | | 759 | } |
762 | | | 760 | |
763 | /* | | 761 | /* |
764 | * We didn't find any LWPs to collect, we may have received a | | 762 | * We didn't find any LWPs to collect, we may have received a |
765 | * signal, or some other condition has caused us to bail out. | | 763 | * signal, or some other condition has caused us to bail out. |
766 | * | | 764 | * |
767 | * If waiting on a specific LWP, clear the waiters marker: some | | 765 | * If waiting on a specific LWP, clear the waiters marker: some |
768 | * other LWP may want it. Then, kick all the remaining waiters | | 766 | * other LWP may want it. Then, kick all the remaining waiters |
769 | * so that they can re-check for zombies and for deadlock. | | 767 | * so that they can re-check for zombies and for deadlock. |
770 | */ | | 768 | */ |
771 | if (lid != 0) { | | 769 | if (lid != 0) { |
772 | l2 = radix_tree_lookup_node(&p->p_lwptree, | | 770 | l2 = radix_tree_lookup_node(&p->p_lwptree, |
773 | (uint64_t)(lid - 1)); | | 771 | (uint64_t)(lid - 1)); |
774 | KASSERT(l2 == NULL || l2->l_lid == lid); | | 772 | KASSERT(l2 == NULL || l2->l_lid == lid); |
775 | | | 773 | |
776 | if (l2 != NULL && l2->l_waiter == curlid) | | 774 | if (l2 != NULL && l2->l_waiter == curlid) |
777 | l2->l_waiter = 0; | | 775 | l2->l_waiter = 0; |
778 | } | | 776 | } |
779 | p->p_nlwpwait--; | | 777 | p->p_nlwpwait--; |
780 | l->l_waitingfor = 0; | | 778 | l->l_waitingfor = 0; |
781 | cv_broadcast(&p->p_lwpcv); | | 779 | cv_broadcast(&p->p_lwpcv); |
782 | | | 780 | |
783 | return error; | | 781 | return error; |
784 | } | | 782 | } |
785 | | | 783 | |
786 | /* | | 784 | /* |
787 | * Find an unused LID for a new LWP. | | 785 | * Find an unused LID for a new LWP. |
788 | */ | | 786 | */ |
789 | static lwpid_t | | 787 | static lwpid_t |
790 | lwp_find_free_lid(struct proc *p) | | 788 | lwp_find_free_lid(struct proc *p) |
791 | { | | 789 | { |
792 | struct lwp *gang[32]; | | 790 | struct lwp *gang[32]; |
793 | lwpid_t lid; | | 791 | lwpid_t lid; |
794 | unsigned n; | | 792 | unsigned n; |
795 | | | 793 | |
796 | KASSERT(mutex_owned(p->p_lock)); | | 794 | KASSERT(mutex_owned(p->p_lock)); |
797 | KASSERT(p->p_nlwpid > 0); | | 795 | KASSERT(p->p_nlwpid > 0); |
798 | | | 796 | |
799 | /* | | 797 | /* |
800 | * Scoot forward through the tree in blocks of LIDs doing gang | | 798 | * Scoot forward through the tree in blocks of LIDs doing gang |
801 | * lookup with dense=true, meaning the lookup will terminate the | | 799 | * lookup with dense=true, meaning the lookup will terminate the |
802 | * instant a hole is encountered. Most of the time the first entry | | 800 | * instant a hole is encountered. Most of the time the first entry |
803 | * (p->p_nlwpid) is free and the lookup fails fast. | | 801 | * (p->p_nlwpid) is free and the lookup fails fast. |
804 | */ | | 802 | */ |
805 | for (lid = p->p_nlwpid;;) { | | 803 | for (lid = p->p_nlwpid;;) { |
806 | n = radix_tree_gang_lookup_node(&p->p_lwptree, lid - 1, | | 804 | n = radix_tree_gang_lookup_node(&p->p_lwptree, lid - 1, |
807 | (void **)gang, __arraycount(gang), true); | | 805 | (void **)gang, __arraycount(gang), true); |
808 | if (n == 0) { | | 806 | if (n == 0) { |
809 | /* Start point was empty. */ | | 807 | /* Start point was empty. */ |
810 | break; | | 808 | break; |
811 | } | | 809 | } |
812 | KASSERT(gang[0]->l_lid == lid); | | 810 | KASSERT(gang[0]->l_lid == lid); |
813 | lid = gang[n - 1]->l_lid + 1; | | 811 | lid = gang[n - 1]->l_lid + 1; |
814 | if (n < __arraycount(gang)) { | | 812 | if (n < __arraycount(gang)) { |
815 | /* Scan encountered a hole. */ | | 813 | /* Scan encountered a hole. */ |
816 | break; | | 814 | break; |
817 | } | | 815 | } |
818 | } | | 816 | } |
819 | | | 817 | |
820 | return (lwpid_t)lid; | | 818 | return (lwpid_t)lid; |
821 | } | | 819 | } |
822 | | | 820 | |
823 | /* | | 821 | /* |
824 | * Create a new LWP within process 'p2', using LWP 'l1' as a template. | | 822 | * Create a new LWP within process 'p2', using LWP 'l1' as a template. |
825 | * The new LWP is created in state LSIDL and must be set running, | | 823 | * The new LWP is created in state LSIDL and must be set running, |
826 | * suspended, or stopped by the caller. | | 824 | * suspended, or stopped by the caller. |
827 | */ | | 825 | */ |
828 | int | | 826 | int |
829 | lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, int flags, | | 827 | lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, int flags, |
830 | void *stack, size_t stacksize, void (*func)(void *), void *arg, | | 828 | void *stack, size_t stacksize, void (*func)(void *), void *arg, |
831 | lwp_t **rnewlwpp, int sclass, const sigset_t *sigmask, | | 829 | lwp_t **rnewlwpp, int sclass, const sigset_t *sigmask, |
832 | const stack_t *sigstk) | | 830 | const stack_t *sigstk) |
833 | { | | 831 | { |
834 | struct lwp *l2; | | 832 | struct lwp *l2; |
835 | turnstile_t *ts; | | 833 | turnstile_t *ts; |
836 | lwpid_t lid; | | 834 | lwpid_t lid; |
837 | | | 835 | |
838 | KASSERT(l1 == curlwp || l1->l_proc == &proc0); | | 836 | KASSERT(l1 == curlwp || l1->l_proc == &proc0); |
839 | | | 837 | |
840 | /* | | 838 | /* |
841 | * Enforce limits, excluding the first lwp and kthreads. We must | | 839 | * Enforce limits, excluding the first lwp and kthreads. We must |
842 | * use the process credentials here when adjusting the limit, as | | 840 | * use the process credentials here when adjusting the limit, as |
843 | * they are what's tied to the accounting entity. However for | | 841 | * they are what's tied to the accounting entity. However for |
844 | * authorizing the action, we'll use the LWP's credentials. | | 842 | * authorizing the action, we'll use the LWP's credentials. |
845 | */ | | 843 | */ |
846 | mutex_enter(p2->p_lock); | | 844 | mutex_enter(p2->p_lock); |
847 | if (p2->p_nlwps != 0 && p2 != &proc0) { | | 845 | if (p2->p_nlwps != 0 && p2 != &proc0) { |
848 | uid_t uid = kauth_cred_getuid(p2->p_cred); | | 846 | uid_t uid = kauth_cred_getuid(p2->p_cred); |
849 | int count = chglwpcnt(uid, 1); | | 847 | int count = chglwpcnt(uid, 1); |
850 | if (__predict_false(count > | | 848 | if (__predict_false(count > |
851 | p2->p_rlimit[RLIMIT_NTHR].rlim_cur)) { | | 849 | p2->p_rlimit[RLIMIT_NTHR].rlim_cur)) { |
852 | if (kauth_authorize_process(l1->l_cred, | | 850 | if (kauth_authorize_process(l1->l_cred, |
853 | KAUTH_PROCESS_RLIMIT, p2, | | 851 | KAUTH_PROCESS_RLIMIT, p2, |
854 | KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS), | | 852 | KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS), |
855 | &p2->p_rlimit[RLIMIT_NTHR], KAUTH_ARG(RLIMIT_NTHR)) | | 853 | &p2->p_rlimit[RLIMIT_NTHR], KAUTH_ARG(RLIMIT_NTHR)) |
856 | != 0) { | | 854 | != 0) { |
857 | (void)chglwpcnt(uid, -1); | | 855 | (void)chglwpcnt(uid, -1); |
858 | mutex_exit(p2->p_lock); | | 856 | mutex_exit(p2->p_lock); |
859 | return EAGAIN; | | 857 | return EAGAIN; |
860 | } | | 858 | } |
861 | } | | 859 | } |
862 | } | | 860 | } |
863 | | | 861 | |
864 | /* | | 862 | /* |
865 | * First off, reap any detached LWP waiting to be collected. | | 863 | * First off, reap any detached LWP waiting to be collected. |
866 | * We can re-use its LWP structure and turnstile. | | 864 | * We can re-use its LWP structure and turnstile. |
867 | */ | | 865 | */ |
868 | if ((l2 = p2->p_zomblwp) != NULL) { | | 866 | if ((l2 = p2->p_zomblwp) != NULL) { |
869 | p2->p_zomblwp = NULL; | | 867 | p2->p_zomblwp = NULL; |
870 | lwp_free(l2, true, false); | | 868 | lwp_free(l2, true, false); |
871 | /* p2 now unlocked by lwp_free() */ | | 869 | /* p2 now unlocked by lwp_free() */ |
872 | ts = l2->l_ts; | | 870 | ts = l2->l_ts; |
873 | KASSERT(l2->l_inheritedprio == -1); | | 871 | KASSERT(l2->l_inheritedprio == -1); |
874 | KASSERT(SLIST_EMPTY(&l2->l_pi_lenders)); | | 872 | KASSERT(SLIST_EMPTY(&l2->l_pi_lenders)); |
875 | memset(l2, 0, sizeof(*l2)); | | 873 | memset(l2, 0, sizeof(*l2)); |
876 | l2->l_ts = ts; | | 874 | l2->l_ts = ts; |
877 | } else { | | 875 | } else { |
878 | mutex_exit(p2->p_lock); | | 876 | mutex_exit(p2->p_lock); |
879 | l2 = pool_cache_get(lwp_cache, PR_WAITOK); | | 877 | l2 = pool_cache_get(lwp_cache, PR_WAITOK); |
880 | memset(l2, 0, sizeof(*l2)); | | 878 | memset(l2, 0, sizeof(*l2)); |
881 | l2->l_ts = pool_cache_get(turnstile_cache, PR_WAITOK); | | 879 | l2->l_ts = pool_cache_get(turnstile_cache, PR_WAITOK); |
882 | SLIST_INIT(&l2->l_pi_lenders); | | 880 | SLIST_INIT(&l2->l_pi_lenders); |
883 | } | | 881 | } |
884 | | | 882 | |
885 | l2->l_stat = LSIDL; | | 883 | l2->l_stat = LSIDL; |
886 | l2->l_proc = p2; | | 884 | l2->l_proc = p2; |
887 | l2->l_refcnt = 0; | | 885 | l2->l_refcnt = 0; |
888 | l2->l_class = sclass; | | 886 | l2->l_class = sclass; |
889 | | | 887 | |
890 | /* | | 888 | /* |
891 | * If vfork(), we want the LWP to run fast and on the same CPU | | 889 | * If vfork(), we want the LWP to run fast and on the same CPU |
892 | * as its parent, so that it can reuse the VM context and cache | | 890 | * as its parent, so that it can reuse the VM context and cache |
893 | * footprint on the local CPU. | | 891 | * footprint on the local CPU. |
894 | */ | | 892 | */ |
895 | l2->l_kpriority = ((flags & LWP_VFORK) ? true : false); | | 893 | l2->l_kpriority = ((flags & LWP_VFORK) ? true : false); |
896 | l2->l_kpribase = PRI_KERNEL; | | 894 | l2->l_kpribase = PRI_KERNEL; |
897 | l2->l_priority = l1->l_priority; | | 895 | l2->l_priority = l1->l_priority; |
898 | l2->l_inheritedprio = -1; | | 896 | l2->l_inheritedprio = -1; |
899 | l2->l_protectprio = -1; | | 897 | l2->l_protectprio = -1; |
900 | l2->l_auxprio = -1; | | 898 | l2->l_auxprio = -1; |
901 | l2->l_flag = 0; | | 899 | l2->l_flag = 0; |
902 | l2->l_pflag = LP_MPSAFE; | | 900 | l2->l_pflag = LP_MPSAFE; |
903 | TAILQ_INIT(&l2->l_ld_locks); | | 901 | TAILQ_INIT(&l2->l_ld_locks); |
904 | l2->l_psrefs = 0; | | 902 | l2->l_psrefs = 0; |
905 | kmsan_lwp_alloc(l2); | | 903 | kmsan_lwp_alloc(l2); |
906 | | | 904 | |
907 | /* | | 905 | /* |
908 | * For vfork, borrow parent's lwpctl context if it exists. | | 906 | * For vfork, borrow parent's lwpctl context if it exists. |
909 | * This also causes us to return via lwp_userret. | | 907 | * This also causes us to return via lwp_userret. |
910 | */ | | 908 | */ |
911 | if (flags & LWP_VFORK && l1->l_lwpctl) { | | 909 | if (flags & LWP_VFORK && l1->l_lwpctl) { |
912 | l2->l_lwpctl = l1->l_lwpctl; | | 910 | l2->l_lwpctl = l1->l_lwpctl; |
913 | l2->l_flag |= LW_LWPCTL; | | 911 | l2->l_flag |= LW_LWPCTL; |
914 | } | | 912 | } |
915 | | | 913 | |
916 | /* | | 914 | /* |
917 | * If not the first LWP in the process, grab a reference to the | | 915 | * If not the first LWP in the process, grab a reference to the |
918 | * descriptor table. | | 916 | * descriptor table. |
919 | */ | | 917 | */ |
920 | l2->l_fd = p2->p_fd; | | 918 | l2->l_fd = p2->p_fd; |
921 | if (p2->p_nlwps != 0) { | | 919 | if (p2->p_nlwps != 0) { |
922 | KASSERT(l1->l_proc == p2); | | 920 | KASSERT(l1->l_proc == p2); |
923 | fd_hold(l2); | | 921 | fd_hold(l2); |
924 | } else { | | 922 | } else { |
925 | KASSERT(l1->l_proc != p2); | | 923 | KASSERT(l1->l_proc != p2); |
926 | } | | 924 | } |
927 | | | 925 | |
928 | if (p2->p_flag & PK_SYSTEM) { | | 926 | if (p2->p_flag & PK_SYSTEM) { |
929 | /* Mark it as a system LWP. */ | | 927 | /* Mark it as a system LWP. */ |
930 | l2->l_flag |= LW_SYSTEM; | | 928 | l2->l_flag |= LW_SYSTEM; |
931 | } | | 929 | } |
932 | | | 930 | |
933 | kpreempt_disable(); | | 931 | kpreempt_disable(); |
934 | l2->l_mutex = l1->l_cpu->ci_schedstate.spc_lwplock; | | 932 | l2->l_mutex = l1->l_cpu->ci_schedstate.spc_lwplock; |
935 | l2->l_cpu = l1->l_cpu; | | 933 | l2->l_cpu = l1->l_cpu; |
936 | kpreempt_enable(); | | 934 | kpreempt_enable(); |
937 | | | 935 | |
938 | kdtrace_thread_ctor(NULL, l2); | | 936 | kdtrace_thread_ctor(NULL, l2); |
939 | lwp_initspecific(l2); | | 937 | lwp_initspecific(l2); |
940 | sched_lwp_fork(l1, l2); | | 938 | sched_lwp_fork(l1, l2); |
941 | lwp_update_creds(l2); | | 939 | lwp_update_creds(l2); |
942 | callout_init(&l2->l_timeout_ch, CALLOUT_MPSAFE); | | 940 | callout_init(&l2->l_timeout_ch, CALLOUT_MPSAFE); |
943 | callout_setfunc(&l2->l_timeout_ch, sleepq_timeout, l2); | | 941 | callout_setfunc(&l2->l_timeout_ch, sleepq_timeout, l2); |
944 | cv_init(&l2->l_sigcv, "sigwait"); | | 942 | cv_init(&l2->l_sigcv, "sigwait"); |
945 | cv_init(&l2->l_waitcv, "vfork"); | | 943 | cv_init(&l2->l_waitcv, "vfork"); |
946 | l2->l_syncobj = &sched_syncobj; | | 944 | l2->l_syncobj = &sched_syncobj; |
947 | PSREF_DEBUG_INIT_LWP(l2); | | 945 | PSREF_DEBUG_INIT_LWP(l2); |
948 | | | 946 | |
949 | if (rnewlwpp != NULL) | | 947 | if (rnewlwpp != NULL) |
950 | *rnewlwpp = l2; | | 948 | *rnewlwpp = l2; |
951 | | | 949 | |
952 | /* | | 950 | /* |
953 | * PCU state needs to be saved before calling uvm_lwp_fork() so that | | 951 | * PCU state needs to be saved before calling uvm_lwp_fork() so that |
954 | * the MD cpu_lwp_fork() can copy the saved state to the new LWP. | | 952 | * the MD cpu_lwp_fork() can copy the saved state to the new LWP. |
955 | */ | | 953 | */ |
956 | pcu_save_all(l1); | | 954 | pcu_save_all(l1); |
957 | #if PCU_UNIT_COUNT > 0 | | 955 | #if PCU_UNIT_COUNT > 0 |
958 | l2->l_pcu_valid = l1->l_pcu_valid; | | 956 | l2->l_pcu_valid = l1->l_pcu_valid; |
959 | #endif | | 957 | #endif |
960 | | | 958 | |
961 | uvm_lwp_setuarea(l2, uaddr); | | 959 | uvm_lwp_setuarea(l2, uaddr); |
962 | uvm_lwp_fork(l1, l2, stack, stacksize, func, (arg != NULL) ? arg : l2); | | 960 | uvm_lwp_fork(l1, l2, stack, stacksize, func, (arg != NULL) ? arg : l2); |
963 | | | 961 | |
964 | if ((flags & LWP_PIDLID) != 0) { | | 962 | if ((flags & LWP_PIDLID) != 0) { |
965 | /* Linux threads: use a PID. */ | | 963 | /* Linux threads: use a PID. */ |
966 | lid = proc_alloc_pid(p2); | | 964 | lid = proc_alloc_pid(p2); |
967 | l2->l_pflag |= LP_PIDLID; | | 965 | l2->l_pflag |= LP_PIDLID; |
968 | } else if (p2->p_nlwps == 0) { | | 966 | } else if (p2->p_nlwps == 0) { |
969 | /* | | 967 | /* |
970 | * First LWP in process. Copy the parent's LID to avoid | | 968 | * First LWP in process. Copy the parent's LID to avoid |
971 | * causing problems for fork() + threads. Don't give | | 969 | * causing problems for fork() + threads. Don't give |
972 | * subsequent threads the distinction of using LID 1. | | 970 | * subsequent threads the distinction of using LID 1. |
973 | */ | | 971 | */ |
974 | lid = l1->l_lid; | | 972 | lid = l1->l_lid; |
975 | p2->p_nlwpid = 2; | | 973 | p2->p_nlwpid = 2; |
976 | } else { | | 974 | } else { |
977 | /* Scan the radix tree for a free LID. */ | | 975 | /* Scan the radix tree for a free LID. */ |
978 | lid = 0; | | 976 | lid = 0; |
979 | } | | 977 | } |
980 | | | 978 | |
981 | /* | | 979 | /* |
982 | * Allocate LID if needed, and insert into the radix tree. The | | 980 | * Allocate LID if needed, and insert into the radix tree. The |
983 | * first LWP in most processes has a LID of 1. It turns out that if | | 981 | * first LWP in most processes has a LID of 1. It turns out that if |
984 | * you insert an item with a key of zero to a radixtree, it's stored | | 982 | * you insert an item with a key of zero to a radixtree, it's stored |
985 | * directly in the root (p_lwptree) and no extra memory is | | 983 | * directly in the root (p_lwptree) and no extra memory is |
986 | * allocated. We therefore always subtract 1 from the LID, which | | 984 | * allocated. We therefore always subtract 1 from the LID, which |
987 | * means no memory is allocated for the tree unless the program is | | 985 | * means no memory is allocated for the tree unless the program is |
988 | * using threads. NB: the allocation and insert must take place | | 986 | * using threads. NB: the allocation and insert must take place |
989 | * under the same hold of p_lock. | | 987 | * under the same hold of p_lock. |
990 | */ | | 988 | */ |
991 | mutex_enter(p2->p_lock); | | 989 | mutex_enter(p2->p_lock); |
992 | for (;;) { | | 990 | for (;;) { |
993 | int error; | | 991 | int error; |
994 | | | 992 | |
995 | l2->l_lid = (lid == 0 ? lwp_find_free_lid(p2) : lid); | | 993 | l2->l_lid = (lid == 0 ? lwp_find_free_lid(p2) : lid); |
996 | | | 994 | |
997 | rw_enter(&p2->p_treelock, RW_WRITER); | | 995 | rw_enter(&p2->p_treelock, RW_WRITER); |
998 | error = radix_tree_insert_node(&p2->p_lwptree, | | 996 | error = radix_tree_insert_node(&p2->p_lwptree, |
999 | (uint64_t)(l2->l_lid - 1), l2); | | 997 | (uint64_t)(l2->l_lid - 1), l2); |
1000 | rw_exit(&p2->p_treelock); | | 998 | rw_exit(&p2->p_treelock); |
1001 | | | 999 | |
1002 | if (__predict_true(error == 0)) { | | 1000 | if (__predict_true(error == 0)) { |
1003 | if (lid == 0) | | 1001 | if (lid == 0) |
1004 | p2->p_nlwpid = l2->l_lid + 1; | | 1002 | p2->p_nlwpid = l2->l_lid + 1; |
1005 | break; | | 1003 | break; |
1006 | } | | 1004 | } |
1007 | | | 1005 | |
1008 | KASSERT(error == ENOMEM); | | 1006 | KASSERT(error == ENOMEM); |
1009 | mutex_exit(p2->p_lock); | | 1007 | mutex_exit(p2->p_lock); |
1010 | radix_tree_await_memory(); | | 1008 | radix_tree_await_memory(); |
1011 | mutex_enter(p2->p_lock); | | 1009 | mutex_enter(p2->p_lock); |
1012 | } | | 1010 | } |
1013 | | | 1011 | |
1014 | if ((flags & LWP_DETACHED) != 0) { | | 1012 | if ((flags & LWP_DETACHED) != 0) { |
1015 | l2->l_prflag = LPR_DETACHED; | | 1013 | l2->l_prflag = LPR_DETACHED; |
1016 | p2->p_ndlwps++; | | 1014 | p2->p_ndlwps++; |
1017 | } else | | 1015 | } else |
1018 | l2->l_prflag = 0; | | 1016 | l2->l_prflag = 0; |
1019 | | | 1017 | |
1020 | if (l1->l_proc == p2) { | | 1018 | if (l1->l_proc == p2) { |
1021 | /* | | 1019 | /* |
1022 | * These flags are set while p_lock is held. Copy with | | 1020 | * These flags are set while p_lock is held. Copy with |
1023 | * p_lock held too, so the LWP doesn't sneak into the | | 1021 | * p_lock held too, so the LWP doesn't sneak into the |
1024 | * process without them being set. | | 1022 | * process without them being set. |
1025 | */ | | 1023 | */ |
1026 | l2->l_flag |= (l1->l_flag & (LW_WEXIT | LW_WREBOOT | LW_WCORE)); | | 1024 | l2->l_flag |= (l1->l_flag & (LW_WEXIT | LW_WREBOOT | LW_WCORE)); |
1027 | } else { | | 1025 | } else { |
1028 | /* fork(): pending core/exit doesn't apply to child. */ | | 1026 | /* fork(): pending core/exit doesn't apply to child. */ |
1029 | l2->l_flag |= (l1->l_flag & LW_WREBOOT); | | 1027 | l2->l_flag |= (l1->l_flag & LW_WREBOOT); |
1030 | } | | 1028 | } |
1031 | | | 1029 | |
1032 | l2->l_sigstk = *sigstk; | | 1030 | l2->l_sigstk = *sigstk; |
1033 | l2->l_sigmask = *sigmask; | | 1031 | l2->l_sigmask = *sigmask; |
1034 | TAILQ_INIT(&l2->l_sigpend.sp_info); | | 1032 | TAILQ_INIT(&l2->l_sigpend.sp_info); |
1035 | sigemptyset(&l2->l_sigpend.sp_set); | | 1033 | sigemptyset(&l2->l_sigpend.sp_set); |
1036 | LIST_INSERT_HEAD(&p2->p_lwps, l2, l_sibling); | | 1034 | LIST_INSERT_HEAD(&p2->p_lwps, l2, l_sibling); |
1037 | p2->p_nlwps++; | | 1035 | p2->p_nlwps++; |
1038 | p2->p_nrlwps++; | | 1036 | p2->p_nrlwps++; |
1039 | | | 1037 | |
1040 | KASSERT(l2->l_affinity == NULL); | | 1038 | KASSERT(l2->l_affinity == NULL); |
1041 | | | 1039 | |
1042 | /* Inherit the affinity mask. */ | | 1040 | /* Inherit the affinity mask. */ |
1043 | if (l1->l_affinity) { | | 1041 | if (l1->l_affinity) { |
1044 | /* | | 1042 | /* |
1045 | * Note that we hold the state lock while inheriting | | 1043 | * Note that we hold the state lock while inheriting |
1046 | * the affinity to avoid race with sched_setaffinity(). | | 1044 | * the affinity to avoid race with sched_setaffinity(). |
1047 | */ | | 1045 | */ |
1048 | lwp_lock(l1); | | 1046 | lwp_lock(l1); |
1049 | if (l1->l_affinity) { | | 1047 | if (l1->l_affinity) { |
1050 | kcpuset_use(l1->l_affinity); | | 1048 | kcpuset_use(l1->l_affinity); |
1051 | l2->l_affinity = l1->l_affinity; | | 1049 | l2->l_affinity = l1->l_affinity; |
1052 | } | | 1050 | } |
1053 | lwp_unlock(l1); | | 1051 | lwp_unlock(l1); |
1054 | } | | 1052 | } |
1055 | | | 1053 | |
1056 | /* This marks the end of the "must be atomic" section. */ | | 1054 | /* This marks the end of the "must be atomic" section. */ |
1057 | mutex_exit(p2->p_lock); | | 1055 | mutex_exit(p2->p_lock); |
1058 | | | 1056 | |
1059 | SDT_PROBE(proc, kernel, , lwp__create, l2, 0, 0, 0, 0); | | 1057 | SDT_PROBE(proc, kernel, , lwp__create, l2, 0, 0, 0, 0); |
1060 | | | 1058 | |
1061 | mutex_enter(proc_lock); | | 1059 | mutex_enter(proc_lock); |
1062 | LIST_INSERT_HEAD(&alllwp, l2, l_list); | | 1060 | LIST_INSERT_HEAD(&alllwp, l2, l_list); |
1063 | /* Inherit a processor-set */ | | 1061 | /* Inherit a processor-set */ |
1064 | l2->l_psid = l1->l_psid; | | 1062 | l2->l_psid = l1->l_psid; |
1065 | mutex_exit(proc_lock); | | 1063 | mutex_exit(proc_lock); |
1066 | | | 1064 | |
1067 | SYSCALL_TIME_LWP_INIT(l2); | | 1065 | SYSCALL_TIME_LWP_INIT(l2); |
1068 | | | 1066 | |
1069 | if (p2->p_emul->e_lwp_fork) | | 1067 | if (p2->p_emul->e_lwp_fork) |
1070 | (*p2->p_emul->e_lwp_fork)(l1, l2); | | 1068 | (*p2->p_emul->e_lwp_fork)(l1, l2); |
1071 | | | 1069 | |
1072 | return (0); | | 1070 | return (0); |
1073 | } | | 1071 | } |
1074 | | | 1072 | |
1075 | /* | | 1073 | /* |
1076 | * Set a new LWP running. If the process is stopping, then the LWP is | | 1074 | * Set a new LWP running. If the process is stopping, then the LWP is |
1077 | * created stopped. | | 1075 | * created stopped. |
1078 | */ | | 1076 | */ |
1079 | void | | 1077 | void |
1080 | lwp_start(lwp_t *l, int flags) | | 1078 | lwp_start(lwp_t *l, int flags) |
1081 | { | | 1079 | { |
1082 | proc_t *p = l->l_proc; | | 1080 | proc_t *p = l->l_proc; |
1083 | | | 1081 | |
1084 | mutex_enter(p->p_lock); | | 1082 | mutex_enter(p->p_lock); |
1085 | lwp_lock(l); | | 1083 | lwp_lock(l); |
1086 | KASSERT(l->l_stat == LSIDL); | | 1084 | KASSERT(l->l_stat == LSIDL); |
1087 | if ((flags & LWP_SUSPENDED) != 0) { | | 1085 | if ((flags & LWP_SUSPENDED) != 0) { |
1088 | /* It'll suspend itself in lwp_userret(). */ | | 1086 | /* It'll suspend itself in lwp_userret(). */ |
1089 | l->l_flag |= LW_WSUSPEND; | | 1087 | l->l_flag |= LW_WSUSPEND; |
1090 | } | | 1088 | } |
1091 | if (p->p_stat == SSTOP || (p->p_sflag & PS_STOPPING) != 0) { | | 1089 | if (p->p_stat == SSTOP || (p->p_sflag & PS_STOPPING) != 0) { |
1092 | KASSERT(l->l_wchan == NULL); | | 1090 | KASSERT(l->l_wchan == NULL); |
1093 | l->l_stat = LSSTOP; | | 1091 | l->l_stat = LSSTOP; |
1094 | p->p_nrlwps--; | | 1092 | p->p_nrlwps--; |
1095 | lwp_unlock(l); | | 1093 | lwp_unlock(l); |
1096 | } else { | | 1094 | } else { |
1097 | setrunnable(l); | | 1095 | setrunnable(l); |
1098 | /* LWP now unlocked */ | | 1096 | /* LWP now unlocked */ |
1099 | } | | 1097 | } |
1100 | mutex_exit(p->p_lock); | | 1098 | mutex_exit(p->p_lock); |
1101 | } | | 1099 | } |
1102 | | | 1100 | |
1103 | /* | | 1101 | /* |
1104 | * Called by MD code when a new LWP begins execution. Must be called | | 1102 | * Called by MD code when a new LWP begins execution. Must be called |
1105 | * with the previous LWP locked (so at splsched), or if there is no | | 1103 | * with the previous LWP locked (so at splsched), or if there is no |
1106 | * previous LWP, at splsched. | | 1104 | * previous LWP, at splsched. |
1107 | */ | | 1105 | */ |
1108 | void | | 1106 | void |
1109 | lwp_startup(struct lwp *prev, struct lwp *new_lwp) | | 1107 | lwp_startup(struct lwp *prev, struct lwp *new_lwp) |
1110 | { | | 1108 | { |
1111 | kmutex_t *lock; | | 1109 | kmutex_t *lock; |
1112 | | | 1110 | |
1113 | KASSERTMSG(new_lwp == curlwp, "l %p curlwp %p prevlwp %p", new_lwp, curlwp, prev); | | 1111 | KASSERTMSG(new_lwp == curlwp, "l %p curlwp %p prevlwp %p", new_lwp, curlwp, prev); |
1114 | KASSERT(kpreempt_disabled()); | | 1112 | KASSERT(kpreempt_disabled()); |
1115 | KASSERT(prev != NULL); | | 1113 | KASSERT(prev != NULL); |
1116 | KASSERT((prev->l_pflag & LP_RUNNING) != 0); | | 1114 | KASSERT((prev->l_pflag & LP_RUNNING) != 0); |
1117 | KASSERT(curcpu()->ci_mtx_count == -2); | | 1115 | KASSERT(curcpu()->ci_mtx_count == -2); |
1118 | | | 1116 | |
1119 | /* | | 1117 | /* |
1120 | * Immediately mark the previous LWP as no longer running and unlock | | 1118 | * Immediately mark the previous LWP as no longer running and unlock |
1121 | * (to keep lock wait times short as possible). If a zombie, don't | | 1119 | * (to keep lock wait times short as possible). If a zombie, don't |
1122 | * touch after clearing LP_RUNNING as it could be reaped by another | | 1120 | * touch after clearing LP_RUNNING as it could be reaped by another |
1123 | * CPU. Issue a memory barrier to ensure this. | | 1121 | * CPU. Issue a memory barrier to ensure this. |
1124 | */ | | 1122 | */ |
1125 | lock = prev->l_mutex; | | 1123 | lock = prev->l_mutex; |
1126 | if (__predict_false(prev->l_stat == LSZOMB)) { | | 1124 | if (__predict_false(prev->l_stat == LSZOMB)) { |
1127 | membar_sync(); | | 1125 | membar_sync(); |
1128 | } | | 1126 | } |
1129 | prev->l_pflag &= ~LP_RUNNING; | | 1127 | prev->l_pflag &= ~LP_RUNNING; |
1130 | mutex_spin_exit(lock); | | 1128 | mutex_spin_exit(lock); |
1131 | | | 1129 | |
1132 | /* Correct spin mutex count after mi_switch(). */ | | 1130 | /* Correct spin mutex count after mi_switch(). */ |
1133 | curcpu()->ci_mtx_count = 0; | | 1131 | curcpu()->ci_mtx_count = 0; |
1134 | | | 1132 | |
1135 | /* Install new VM context. */ | | 1133 | /* Install new VM context. */ |
1136 | if (__predict_true(new_lwp->l_proc->p_vmspace)) { | | 1134 | if (__predict_true(new_lwp->l_proc->p_vmspace)) { |
1137 | pmap_activate(new_lwp); | | 1135 | pmap_activate(new_lwp); |
1138 | } | | 1136 | } |
1139 | | | 1137 | |
1140 | /* We remain at IPL_SCHED from mi_switch() - reset it. */ | | 1138 | /* We remain at IPL_SCHED from mi_switch() - reset it. */ |
1141 | spl0(); | | 1139 | spl0(); |
1142 | | | 1140 | |
1143 | LOCKDEBUG_BARRIER(NULL, 0); | | 1141 | LOCKDEBUG_BARRIER(NULL, 0); |
1144 | SDT_PROBE(proc, kernel, , lwp__start, new_lwp, 0, 0, 0, 0); | | 1142 | SDT_PROBE(proc, kernel, , lwp__start, new_lwp, 0, 0, 0, 0); |
1145 | | | 1143 | |
1146 | /* For kthreads, acquire kernel lock if not MPSAFE. */ | | 1144 | /* For kthreads, acquire kernel lock if not MPSAFE. */ |
1147 | if (__predict_false((new_lwp->l_pflag & LP_MPSAFE) == 0)) { | | 1145 | if (__predict_false((new_lwp->l_pflag & LP_MPSAFE) == 0)) { |
1148 | KERNEL_LOCK(1, new_lwp); | | 1146 | KERNEL_LOCK(1, new_lwp); |
1149 | } | | 1147 | } |
1150 | } | | 1148 | } |
1151 | | | 1149 | |
1152 | /* | | 1150 | /* |
1153 | * Exit an LWP. | | 1151 | * Exit an LWP. |
1154 | */ | | 1152 | */ |
1155 | void | | 1153 | void |
1156 | lwp_exit(struct lwp *l) | | 1154 | lwp_exit(struct lwp *l) |
1157 | { | | 1155 | { |
1158 | struct proc *p = l->l_proc; | | 1156 | struct proc *p = l->l_proc; |
1159 | struct lwp *l2; | | 1157 | struct lwp *l2; |
1160 | bool current; | | 1158 | bool current; |
1161 | | | 1159 | |
1162 | current = (l == curlwp); | | 1160 | current = (l == curlwp); |
1163 | | | 1161 | |
1164 | KASSERT(current || (l->l_stat == LSIDL && l->l_target_cpu == NULL)); | | 1162 | KASSERT(current || (l->l_stat == LSIDL && l->l_target_cpu == NULL)); |
1165 | KASSERT(p == curproc); | | 1163 | KASSERT(p == curproc); |
1166 | | | 1164 | |
1167 | SDT_PROBE(proc, kernel, , lwp__exit, l, 0, 0, 0, 0); | | 1165 | SDT_PROBE(proc, kernel, , lwp__exit, l, 0, 0, 0, 0); |
1168 | | | 1166 | |
1169 | /* Verify that we hold no locks; for DIAGNOSTIC check kernel_lock. */ | | 1167 | /* Verify that we hold no locks; for DIAGNOSTIC check kernel_lock. */ |
1170 | LOCKDEBUG_BARRIER(NULL, 0); | | 1168 | LOCKDEBUG_BARRIER(NULL, 0); |
1171 | KASSERTMSG(curcpu()->ci_biglock_count == 0, "kernel_lock leaked"); | | 1169 | KASSERTMSG(curcpu()->ci_biglock_count == 0, "kernel_lock leaked"); |
1172 | | | 1170 | |
1173 | /* | | 1171 | /* |
1174 | * If we are the last live LWP in a process, we need to exit the | | 1172 | * If we are the last live LWP in a process, we need to exit the |
1175 | * entire process. We do so with an exit status of zero, because | | 1173 | * entire process. We do so with an exit status of zero, because |
1176 | * it's a "controlled" exit, and because that's what Solaris does. | | 1174 | * it's a "controlled" exit, and because that's what Solaris does. |
1177 | * | | 1175 | * |
1178 | * We are not quite a zombie yet, but for accounting purposes we | | 1176 | * We are not quite a zombie yet, but for accounting purposes we |
1179 | * must increment the count of zombies here. | | 1177 | * must increment the count of zombies here. |
1180 | * | | 1178 | * |
1181 | * Note: the last LWP's specificdata will be deleted here. | | 1179 | * Note: the last LWP's specificdata will be deleted here. |
1182 | */ | | 1180 | */ |
1183 | mutex_enter(p->p_lock); | | 1181 | mutex_enter(p->p_lock); |
1184 | if (p->p_nlwps - p->p_nzlwps == 1) { | | 1182 | if (p->p_nlwps - p->p_nzlwps == 1) { |
1185 | KASSERT(current == true); | | 1183 | KASSERT(current == true); |
1186 | KASSERT(p != &proc0); | | 1184 | KASSERT(p != &proc0); |
1187 | exit1(l, 0, 0); | | 1185 | exit1(l, 0, 0); |
1188 | /* NOTREACHED */ | | 1186 | /* NOTREACHED */ |
1189 | } | | 1187 | } |
1190 | p->p_nzlwps++; | | 1188 | p->p_nzlwps++; |
1191 | | | 1189 | |
1192 | /* | | 1190 | /* |
1193 | * Perform any required thread cleanup. Do this early so | | 1191 | * Perform any required thread cleanup. Do this early so |
1194 | * anyone wanting to look us up by our global thread ID | | 1192 | * anyone wanting to look us up by our global thread ID |
1195 | * will fail to find us. | | 1193 | * will fail to find us. |
1196 | * | | 1194 | * |
1197 | * N.B. this will unlock p->p_lock on our behalf. | | 1195 | * N.B. this will unlock p->p_lock on our behalf. |
1198 | */ | | 1196 | */ |
1199 | lwp_thread_cleanup(l); | | 1197 | lwp_thread_cleanup(l); |
1200 | | | 1198 | |
1201 | if (p->p_emul->e_lwp_exit) | | 1199 | if (p->p_emul->e_lwp_exit) |
1202 | (*p->p_emul->e_lwp_exit)(l); | | 1200 | (*p->p_emul->e_lwp_exit)(l); |
1203 | | | 1201 | |
1204 | /* Drop filedesc reference. */ | | 1202 | /* Drop filedesc reference. */ |
1205 | fd_free(); | | 1203 | fd_free(); |
1206 | | | 1204 | |
1207 | /* Release fstrans private data. */ | | 1205 | /* Release fstrans private data. */ |
1208 | fstrans_lwp_dtor(l); | | 1206 | fstrans_lwp_dtor(l); |
1209 | | | 1207 | |
1210 | /* Delete the specificdata while it's still safe to sleep. */ | | 1208 | /* Delete the specificdata while it's still safe to sleep. */ |
1211 | lwp_finispecific(l); | | 1209 | lwp_finispecific(l); |
1212 | | | 1210 | |
1213 | /* | | 1211 | /* |
1214 | * Release our cached credentials. | | 1212 | * Release our cached credentials. |
1215 | */ | | 1213 | */ |
1216 | kauth_cred_free(l->l_cred); | | 1214 | kauth_cred_free(l->l_cred); |
1217 | callout_destroy(&l->l_timeout_ch); | | 1215 | callout_destroy(&l->l_timeout_ch); |
1218 | | | 1216 | |
1219 | /* | | 1217 | /* |
1220 | * If traced, report LWP exit event to the debugger. | | 1218 | * If traced, report LWP exit event to the debugger. |
1221 | * | | 1219 | * |
1222 | * Remove the LWP from the global list. | | 1220 | * Remove the LWP from the global list. |
1223 | * Free its LID from the PID namespace if needed. | | 1221 | * Free its LID from the PID namespace if needed. |
1224 | */ | | 1222 | */ |
1225 | mutex_enter(proc_lock); | | 1223 | mutex_enter(proc_lock); |
1226 | | | 1224 | |
1227 | if ((p->p_slflag & (PSL_TRACED|PSL_TRACELWP_EXIT)) == | | 1225 | if ((p->p_slflag & (PSL_TRACED|PSL_TRACELWP_EXIT)) == |
1228 | (PSL_TRACED|PSL_TRACELWP_EXIT)) { | | 1226 | (PSL_TRACED|PSL_TRACELWP_EXIT)) { |
1229 | mutex_enter(p->p_lock); | | 1227 | mutex_enter(p->p_lock); |
1230 | if (ISSET(p->p_sflag, PS_WEXIT)) { | | 1228 | if (ISSET(p->p_sflag, PS_WEXIT)) { |
1231 | mutex_exit(p->p_lock); | | 1229 | mutex_exit(p->p_lock); |
1232 | /* | | 1230 | /* |
1233 | * We are exiting, bail out without informing parent | | 1231 | * We are exiting, bail out without informing parent |
1234 | * about a terminating LWP as it would deadlock. | | 1232 | * about a terminating LWP as it would deadlock. |
1235 | */ | | 1233 | */ |
1236 | } else { | | 1234 | } else { |
1237 | eventswitch(TRAP_LWP, PTRACE_LWP_EXIT, l->l_lid); | | 1235 | eventswitch(TRAP_LWP, PTRACE_LWP_EXIT, l->l_lid); |
1238 | mutex_enter(proc_lock); | | 1236 | mutex_enter(proc_lock); |
1239 | } | | 1237 | } |
1240 | } | | 1238 | } |
1241 | | | 1239 | |
1242 | LIST_REMOVE(l, l_list); | | 1240 | LIST_REMOVE(l, l_list); |
1243 | if ((l->l_pflag & LP_PIDLID) != 0 && l->l_lid != p->p_pid) { | | 1241 | if ((l->l_pflag & LP_PIDLID) != 0 && l->l_lid != p->p_pid) { |
1244 | proc_free_pid(l->l_lid); | | 1242 | proc_free_pid(l->l_lid); |
1245 | } | | 1243 | } |
1246 | mutex_exit(proc_lock); | | 1244 | mutex_exit(proc_lock); |
1247 | | | 1245 | |
1248 | /* | | 1246 | /* |
1249 | * Get rid of all references to the LWP that others (e.g. procfs) | | 1247 | * Get rid of all references to the LWP that others (e.g. procfs) |
1250 | * may have, and mark the LWP as a zombie. If the LWP is detached, | | 1248 | * may have, and mark the LWP as a zombie. If the LWP is detached, |
1251 | * mark it waiting for collection in the proc structure. Note that | | 1249 | * mark it waiting for collection in the proc structure. Note that |
1252 | * before we can do that, we need to free any other dead, deatched | | 1250 | * before we can do that, we need to free any other dead, deatched |
1253 | * LWP waiting to meet its maker. | | 1251 | * LWP waiting to meet its maker. |
1254 | * | | 1252 | * |
1255 | * All conditions need to be observed upon under the same hold of | | 1253 | * All conditions need to be observed upon under the same hold of |
1256 | * p_lock, because if the lock is dropped any of them can change. | | 1254 | * p_lock, because if the lock is dropped any of them can change. |
1257 | */ | | 1255 | */ |
1258 | mutex_enter(p->p_lock); | | 1256 | mutex_enter(p->p_lock); |
1259 | for (;;) { | | 1257 | for (;;) { |
1260 | if (lwp_drainrefs(l)) | | 1258 | if (lwp_drainrefs(l)) |
1261 | continue; | | 1259 | continue; |
1262 | if ((l->l_prflag & LPR_DETACHED) != 0) { | | 1260 | if ((l->l_prflag & LPR_DETACHED) != 0) { |
1263 | if ((l2 = p->p_zomblwp) != NULL) { | | 1261 | if ((l2 = p->p_zomblwp) != NULL) { |
1264 | p->p_zomblwp = NULL; | | 1262 | p->p_zomblwp = NULL; |
1265 | lwp_free(l2, false, false); | | 1263 | lwp_free(l2, false, false); |
1266 | /* proc now unlocked */ | | 1264 | /* proc now unlocked */ |
1267 | mutex_enter(p->p_lock); | | 1265 | mutex_enter(p->p_lock); |
1268 | continue; | | 1266 | continue; |
1269 | } | | 1267 | } |
1270 | p->p_zomblwp = l; | | 1268 | p->p_zomblwp = l; |
1271 | } | | 1269 | } |
1272 | break; | | 1270 | break; |
1273 | } | | 1271 | } |
1274 | | | 1272 | |
1275 | /* | | 1273 | /* |
1276 | * If we find a pending signal for the process and we have been | | 1274 | * If we find a pending signal for the process and we have been |
1277 | * asked to check for signals, then we lose: arrange to have | | 1275 | * asked to check for signals, then we lose: arrange to have |
1278 | * all other LWPs in the process check for signals. | | 1276 | * all other LWPs in the process check for signals. |
1279 | */ | | 1277 | */ |
1280 | if ((l->l_flag & LW_PENDSIG) != 0 && | | 1278 | if ((l->l_flag & LW_PENDSIG) != 0 && |
1281 | firstsig(&p->p_sigpend.sp_set) != 0) { | | 1279 | firstsig(&p->p_sigpend.sp_set) != 0) { |
1282 | LIST_FOREACH(l2, &p->p_lwps, l_sibling) { | | 1280 | LIST_FOREACH(l2, &p->p_lwps, l_sibling) { |
1283 | lwp_lock(l2); | | 1281 | lwp_lock(l2); |
1284 | signotify(l2); | | 1282 | signotify(l2); |
1285 | lwp_unlock(l2); | | 1283 | lwp_unlock(l2); |
1286 | } | | 1284 | } |
1287 | } | | 1285 | } |
1288 | | | 1286 | |
1289 | /* | | 1287 | /* |
1290 | * Release any PCU resources before becoming a zombie. | | 1288 | * Release any PCU resources before becoming a zombie. |
1291 | */ | | 1289 | */ |
1292 | pcu_discard_all(l); | | 1290 | pcu_discard_all(l); |
1293 | | | 1291 | |
1294 | lwp_lock(l); | | 1292 | lwp_lock(l); |
1295 | l->l_stat = LSZOMB; | | 1293 | l->l_stat = LSZOMB; |
1296 | if (l->l_name != NULL) { | | 1294 | if (l->l_name != NULL) { |
1297 | strcpy(l->l_name, "(zombie)"); | | 1295 | strcpy(l->l_name, "(zombie)"); |
1298 | } | | 1296 | } |
1299 | lwp_unlock(l); | | 1297 | lwp_unlock(l); |
1300 | p->p_nrlwps--; | | 1298 | p->p_nrlwps--; |
1301 | cv_broadcast(&p->p_lwpcv); | | 1299 | cv_broadcast(&p->p_lwpcv); |
1302 | if (l->l_lwpctl != NULL) | | 1300 | if (l->l_lwpctl != NULL) |
1303 | l->l_lwpctl->lc_curcpu = LWPCTL_CPU_EXITED; | | 1301 | l->l_lwpctl->lc_curcpu = LWPCTL_CPU_EXITED; |
1304 | mutex_exit(p->p_lock); | | 1302 | mutex_exit(p->p_lock); |
1305 | | | 1303 | |
1306 | /* | | 1304 | /* |
1307 | * We can no longer block. At this point, lwp_free() may already | | 1305 | * We can no longer block. At this point, lwp_free() may already |
1308 | * be gunning for us. On a multi-CPU system, we may be off p_lwps. | | 1306 | * be gunning for us. On a multi-CPU system, we may be off p_lwps. |
1309 | * | | 1307 | * |
1310 | * Free MD LWP resources. | | 1308 | * Free MD LWP resources. |
1311 | */ | | 1309 | */ |
1312 | cpu_lwp_free(l, 0); | | 1310 | cpu_lwp_free(l, 0); |
1313 | | | 1311 | |
1314 | if (current) { | | 1312 | if (current) { |
1315 | /* Switch away into oblivion. */ | | 1313 | /* Switch away into oblivion. */ |
1316 | lwp_lock(l); | | 1314 | lwp_lock(l); |
1317 | spc_lock(l->l_cpu); | | 1315 | spc_lock(l->l_cpu); |
1318 | mi_switch(l); | | 1316 | mi_switch(l); |
1319 | panic("lwp_exit"); | | 1317 | panic("lwp_exit"); |
1320 | } | | 1318 | } |
1321 | } | | 1319 | } |
1322 | | | 1320 | |
1323 | /* | | 1321 | /* |
1324 | * Free a dead LWP's remaining resources. | | 1322 | * Free a dead LWP's remaining resources. |
1325 | * | | 1323 | * |
1326 | * XXXLWP limits. | | 1324 | * XXXLWP limits. |
1327 | */ | | 1325 | */ |
1328 | void | | 1326 | void |
1329 | lwp_free(struct lwp *l, bool recycle, bool last) | | 1327 | lwp_free(struct lwp *l, bool recycle, bool last) |
1330 | { | | 1328 | { |
1331 | struct proc *p = l->l_proc; | | 1329 | struct proc *p = l->l_proc; |
1332 | struct rusage *ru; | | 1330 | struct rusage *ru; |
1333 | struct lwp *l2 __diagused; | | 1331 | struct lwp *l2 __diagused; |
1334 | ksiginfoq_t kq; | | 1332 | ksiginfoq_t kq; |
1335 | | | 1333 | |
1336 | KASSERT(l != curlwp); | | 1334 | KASSERT(l != curlwp); |
1337 | KASSERT(last || mutex_owned(p->p_lock)); | | 1335 | KASSERT(last || mutex_owned(p->p_lock)); |
1338 | | | 1336 | |
1339 | /* | | 1337 | /* |
1340 | * We use the process credentials instead of the lwp credentials here | | 1338 | * We use the process credentials instead of the lwp credentials here |
1341 | * because the lwp credentials maybe cached (just after a setuid call) | | 1339 | * because the lwp credentials maybe cached (just after a setuid call) |
1342 | * and we don't want pay for syncing, since the lwp is going away | | 1340 | * and we don't want pay for syncing, since the lwp is going away |
1343 | * anyway | | 1341 | * anyway |
1344 | */ | | 1342 | */ |
1345 | if (p != &proc0 && p->p_nlwps != 1) | | 1343 | if (p != &proc0 && p->p_nlwps != 1) |
1346 | (void)chglwpcnt(kauth_cred_getuid(p->p_cred), -1); | | 1344 | (void)chglwpcnt(kauth_cred_getuid(p->p_cred), -1); |
1347 | | | 1345 | |
1348 | /* | | 1346 | /* |
1349 | * If this was not the last LWP in the process, then adjust counters | | 1347 | * If this was not the last LWP in the process, then adjust counters |
1350 | * and unlock. This is done differently for the last LWP in exit1(). | | 1348 | * and unlock. This is done differently for the last LWP in exit1(). |
1351 | */ | | 1349 | */ |
1352 | if (!last) { | | 1350 | if (!last) { |
1353 | /* | | 1351 | /* |
1354 | * Add the LWP's run time to the process' base value. | | 1352 | * Add the LWP's run time to the process' base value. |
1355 | * This needs to co-incide with coming off p_lwps. | | 1353 | * This needs to co-incide with coming off p_lwps. |
1356 | */ | | 1354 | */ |
1357 | bintime_add(&p->p_rtime, &l->l_rtime); | | 1355 | bintime_add(&p->p_rtime, &l->l_rtime); |
1358 | p->p_pctcpu += l->l_pctcpu; | | 1356 | p->p_pctcpu += l->l_pctcpu; |
1359 | ru = &p->p_stats->p_ru; | | 1357 | ru = &p->p_stats->p_ru; |
1360 | ruadd(ru, &l->l_ru); | | 1358 | ruadd(ru, &l->l_ru); |
1361 | ru->ru_nvcsw += (l->l_ncsw - l->l_nivcsw); | | 1359 | ru->ru_nvcsw += (l->l_ncsw - l->l_nivcsw); |
1362 | ru->ru_nivcsw += l->l_nivcsw; | | 1360 | ru->ru_nivcsw += l->l_nivcsw; |
1363 | LIST_REMOVE(l, l_sibling); | | 1361 | LIST_REMOVE(l, l_sibling); |
1364 | p->p_nlwps--; | | 1362 | p->p_nlwps--; |
1365 | p->p_nzlwps--; | | 1363 | p->p_nzlwps--; |
1366 | if ((l->l_prflag & LPR_DETACHED) != 0) | | 1364 | if ((l->l_prflag & LPR_DETACHED) != 0) |
1367 | p->p_ndlwps--; | | 1365 | p->p_ndlwps--; |
1368 | | | 1366 | |
1369 | /* Make note of the LID being free, and remove from tree. */ | | 1367 | /* Make note of the LID being free, and remove from tree. */ |
1370 | if (l->l_lid < p->p_nlwpid) | | 1368 | if (l->l_lid < p->p_nlwpid) |
1371 | p->p_nlwpid = l->l_lid; | | 1369 | p->p_nlwpid = l->l_lid; |
1372 | rw_enter(&p->p_treelock, RW_WRITER); | | 1370 | rw_enter(&p->p_treelock, RW_WRITER); |
1373 | l2 = radix_tree_remove_node(&p->p_lwptree, | | 1371 | l2 = radix_tree_remove_node(&p->p_lwptree, |
1374 | (uint64_t)(l->l_lid - 1)); | | 1372 | (uint64_t)(l->l_lid - 1)); |
1375 | KASSERT(l2 == l); | | 1373 | KASSERT(l2 == l); |
1376 | rw_exit(&p->p_treelock); | | 1374 | rw_exit(&p->p_treelock); |
1377 | | | 1375 | |
1378 | /* | | 1376 | /* |
1379 | * Have any LWPs sleeping in lwp_wait() recheck for | | 1377 | * Have any LWPs sleeping in lwp_wait() recheck for |
1380 | * deadlock. | | 1378 | * deadlock. |
1381 | */ | | 1379 | */ |
1382 | cv_broadcast(&p->p_lwpcv); | | 1380 | cv_broadcast(&p->p_lwpcv); |
1383 | mutex_exit(p->p_lock); | | 1381 | mutex_exit(p->p_lock); |
1384 | } | | 1382 | } |
1385 | | | 1383 | |
1386 | /* | | 1384 | /* |
1387 | * In the unlikely event that the LWP is still on the CPU, | | 1385 | * In the unlikely event that the LWP is still on the CPU, |
1388 | * then spin until it has switched away. | | 1386 | * then spin until it has switched away. |
1389 | */ | | 1387 | */ |
1390 | membar_consumer(); | | 1388 | membar_consumer(); |
1391 | while (__predict_false((l->l_pflag & LP_RUNNING) != 0)) { | | 1389 | while (__predict_false((l->l_pflag & LP_RUNNING) != 0)) { |
1392 | SPINLOCK_BACKOFF_HOOK; | | 1390 | SPINLOCK_BACKOFF_HOOK; |
1393 | } | | 1391 | } |
1394 | | | 1392 | |
1395 | /* | | 1393 | /* |
1396 | * Destroy the LWP's remaining signal information. | | 1394 | * Destroy the LWP's remaining signal information. |
1397 | */ | | 1395 | */ |
1398 | ksiginfo_queue_init(&kq); | | 1396 | ksiginfo_queue_init(&kq); |
1399 | sigclear(&l->l_sigpend, NULL, &kq); | | 1397 | sigclear(&l->l_sigpend, NULL, &kq); |
1400 | ksiginfo_queue_drain(&kq); | | 1398 | ksiginfo_queue_drain(&kq); |
1401 | cv_destroy(&l->l_sigcv); | | 1399 | cv_destroy(&l->l_sigcv); |
1402 | cv_destroy(&l->l_waitcv); | | 1400 | cv_destroy(&l->l_waitcv); |
1403 | | | 1401 | |
1404 | /* | | 1402 | /* |
1405 | * Free lwpctl structure and affinity. | | 1403 | * Free lwpctl structure and affinity. |
1406 | */ | | 1404 | */ |
1407 | if (l->l_lwpctl) { | | 1405 | if (l->l_lwpctl) { |
1408 | lwp_ctl_free(l); | | 1406 | lwp_ctl_free(l); |
1409 | } | | 1407 | } |
1410 | if (l->l_affinity) { | | 1408 | if (l->l_affinity) { |
1411 | kcpuset_unuse(l->l_affinity, NULL); | | 1409 | kcpuset_unuse(l->l_affinity, NULL); |
1412 | l->l_affinity = NULL; | | 1410 | l->l_affinity = NULL; |
1413 | } | | 1411 | } |
1414 | | | 1412 | |
1415 | /* | | 1413 | /* |
1416 | * Free the LWP's turnstile and the LWP structure itself unless the | | 1414 | * Free the LWP's turnstile and the LWP structure itself unless the |
1417 | * caller wants to recycle them. Also, free the scheduler specific | | 1415 | * caller wants to recycle them. Also, free the scheduler specific |
1418 | * data. | | 1416 | * data. |
1419 | * | | 1417 | * |
1420 | * We can't return turnstile0 to the pool (it didn't come from it), | | 1418 | * We can't return turnstile0 to the pool (it didn't come from it), |
1421 | * so if it comes up just drop it quietly and move on. | | 1419 | * so if it comes up just drop it quietly and move on. |
1422 | * | | 1420 | * |
1423 | * We don't recycle the VM resources at this time. | | 1421 | * We don't recycle the VM resources at this time. |
1424 | */ | | 1422 | */ |
1425 | | | 1423 | |
1426 | if (!recycle && l->l_ts != &turnstile0) | | 1424 | if (!recycle && l->l_ts != &turnstile0) |
1427 | pool_cache_put(turnstile_cache, l->l_ts); | | 1425 | pool_cache_put(turnstile_cache, l->l_ts); |
1428 | if (l->l_name != NULL) | | 1426 | if (l->l_name != NULL) |
1429 | kmem_free(l->l_name, MAXCOMLEN); | | 1427 | kmem_free(l->l_name, MAXCOMLEN); |
1430 | | | 1428 | |
1431 | kmsan_lwp_free(l); | | 1429 | kmsan_lwp_free(l); |
1432 | kcov_lwp_free(l); | | 1430 | kcov_lwp_free(l); |
1433 | cpu_lwp_free2(l); | | 1431 | cpu_lwp_free2(l); |
1434 | uvm_lwp_exit(l); | | 1432 | uvm_lwp_exit(l); |
1435 | | | 1433 | |
1436 | KASSERT(SLIST_EMPTY(&l->l_pi_lenders)); | | 1434 | KASSERT(SLIST_EMPTY(&l->l_pi_lenders)); |
1437 | KASSERT(l->l_inheritedprio == -1); | | 1435 | KASSERT(l->l_inheritedprio == -1); |
1438 | KASSERT(l->l_blcnt == 0); | | 1436 | KASSERT(l->l_blcnt == 0); |
1439 | kdtrace_thread_dtor(NULL, l); | | 1437 | kdtrace_thread_dtor(NULL, l); |
1440 | if (!recycle) | | 1438 | if (!recycle) |
1441 | pool_cache_put(lwp_cache, l); | | 1439 | pool_cache_put(lwp_cache, l); |
1442 | } | | 1440 | } |
1443 | | | 1441 | |
1444 | /* | | 1442 | /* |
1445 | * Migrate the LWP to the another CPU. Unlocks the LWP. | | 1443 | * Migrate the LWP to the another CPU. Unlocks the LWP. |
1446 | */ | | 1444 | */ |
1447 | void | | 1445 | void |
1448 | lwp_migrate(lwp_t *l, struct cpu_info *tci) | | 1446 | lwp_migrate(lwp_t *l, struct cpu_info *tci) |
1449 | { | | 1447 | { |
1450 | struct schedstate_percpu *tspc; | | 1448 | struct schedstate_percpu *tspc; |
1451 | int lstat = l->l_stat; | | 1449 | int lstat = l->l_stat; |
1452 | | | 1450 | |
1453 | KASSERT(lwp_locked(l, NULL)); | | 1451 | KASSERT(lwp_locked(l, NULL)); |
1454 | KASSERT(tci != NULL); | | 1452 | KASSERT(tci != NULL); |
1455 | | | 1453 | |
1456 | /* If LWP is still on the CPU, it must be handled like LSONPROC */ | | 1454 | /* If LWP is still on the CPU, it must be handled like LSONPROC */ |
1457 | if ((l->l_pflag & LP_RUNNING) != 0) { | | 1455 | if ((l->l_pflag & LP_RUNNING) != 0) { |
1458 | lstat = LSONPROC; | | 1456 | lstat = LSONPROC; |
1459 | } | | 1457 | } |
1460 | | | 1458 | |
1461 | /* | | 1459 | /* |
1462 | * The destination CPU could be changed while previous migration | | 1460 | * The destination CPU could be changed while previous migration |
1463 | * was not finished. | | 1461 | * was not finished. |
1464 | */ | | 1462 | */ |
1465 | if (l->l_target_cpu != NULL) { | | 1463 | if (l->l_target_cpu != NULL) { |
1466 | l->l_target_cpu = tci; | | 1464 | l->l_target_cpu = tci; |
1467 | lwp_unlock(l); | | 1465 | lwp_unlock(l); |
1468 | return; | | 1466 | return; |
1469 | } | | 1467 | } |
1470 | | | 1468 | |
1471 | /* Nothing to do if trying to migrate to the same CPU */ | | 1469 | /* Nothing to do if trying to migrate to the same CPU */ |
1472 | if (l->l_cpu == tci) { | | 1470 | if (l->l_cpu == tci) { |
1473 | lwp_unlock(l); | | 1471 | lwp_unlock(l); |
1474 | return; | | 1472 | return; |
1475 | } | | 1473 | } |
1476 | | | 1474 | |
1477 | KASSERT(l->l_target_cpu == NULL); | | 1475 | KASSERT(l->l_target_cpu == NULL); |
1478 | tspc = &tci->ci_schedstate; | | 1476 | tspc = &tci->ci_schedstate; |
1479 | switch (lstat) { | | 1477 | switch (lstat) { |
1480 | case LSRUN: | | 1478 | case LSRUN: |
1481 | l->l_target_cpu = tci; | | 1479 | l->l_target_cpu = tci; |
1482 | break; | | 1480 | break; |
1483 | case LSSLEEP: | | 1481 | case LSSLEEP: |
1484 | l->l_cpu = tci; | | 1482 | l->l_cpu = tci; |
1485 | break; | | 1483 | break; |
1486 | case LSIDL: | | 1484 | case LSIDL: |
1487 | case LSSTOP: | | 1485 | case LSSTOP: |
1488 | case LSSUSPENDED: | | 1486 | case LSSUSPENDED: |
1489 | l->l_cpu = tci; | | 1487 | l->l_cpu = tci; |
1490 | if (l->l_wchan == NULL) { | | 1488 | if (l->l_wchan == NULL) { |
1491 | lwp_unlock_to(l, tspc->spc_lwplock); | | 1489 | lwp_unlock_to(l, tspc->spc_lwplock); |
1492 | return; | | 1490 | return; |
1493 | } | | 1491 | } |
1494 | break; | | 1492 | break; |
1495 | case LSONPROC: | | 1493 | case LSONPROC: |
1496 | l->l_target_cpu = tci; | | 1494 | l->l_target_cpu = tci; |
1497 | spc_lock(l->l_cpu); | | 1495 | spc_lock(l->l_cpu); |
1498 | sched_resched_cpu(l->l_cpu, PRI_USER_RT, true); | | 1496 | sched_resched_cpu(l->l_cpu, PRI_USER_RT, true); |
1499 | /* spc now unlocked */ | | 1497 | /* spc now unlocked */ |
1500 | break; | | 1498 | break; |
1501 | } | | 1499 | } |
1502 | lwp_unlock(l); | | 1500 | lwp_unlock(l); |
1503 | } | | 1501 | } |
1504 | | | 1502 | |
1505 | /* | | 1503 | /* |
1506 | * Find the LWP in the process. Arguments may be zero, in such case, | | 1504 | * Find the LWP in the process. Arguments may be zero, in such case, |
1507 | * the calling process and first LWP in the list will be used. | | 1505 | * the calling process and first LWP in the list will be used. |
1508 | * On success - returns proc locked. | | 1506 | * On success - returns proc locked. |
1509 | */ | | 1507 | */ |
1510 | struct lwp * | | 1508 | struct lwp * |
1511 | lwp_find2(pid_t pid, lwpid_t lid) | | 1509 | lwp_find2(pid_t pid, lwpid_t lid) |
1512 | { | | 1510 | { |
1513 | proc_t *p; | | 1511 | proc_t *p; |
1514 | lwp_t *l; | | 1512 | lwp_t *l; |
1515 | | | 1513 | |
1516 | /* Find the process. */ | | 1514 | /* Find the process. */ |
1517 | if (pid != 0) { | | 1515 | if (pid != 0) { |
1518 | mutex_enter(proc_lock); | | 1516 | mutex_enter(proc_lock); |
1519 | p = proc_find(pid); | | 1517 | p = proc_find(pid); |
1520 | if (p == NULL) { | | 1518 | if (p == NULL) { |
1521 | mutex_exit(proc_lock); | | 1519 | mutex_exit(proc_lock); |
1522 | return NULL; | | 1520 | return NULL; |
1523 | } | | 1521 | } |
1524 | mutex_enter(p->p_lock); | | 1522 | mutex_enter(p->p_lock); |
1525 | mutex_exit(proc_lock); | | 1523 | mutex_exit(proc_lock); |
1526 | } else { | | 1524 | } else { |
1527 | p = curlwp->l_proc; | | 1525 | p = curlwp->l_proc; |
1528 | mutex_enter(p->p_lock); | | 1526 | mutex_enter(p->p_lock); |
1529 | } | | 1527 | } |
1530 | /* Find the thread. */ | | 1528 | /* Find the thread. */ |
1531 | if (lid != 0) { | | 1529 | if (lid != 0) { |
1532 | l = lwp_find(p, lid); | | 1530 | l = lwp_find(p, lid); |
1533 | } else { | | 1531 | } else { |
1534 | l = LIST_FIRST(&p->p_lwps); | | 1532 | l = LIST_FIRST(&p->p_lwps); |
1535 | } | | 1533 | } |
1536 | if (l == NULL) { | | 1534 | if (l == NULL) { |
1537 | mutex_exit(p->p_lock); | | 1535 | mutex_exit(p->p_lock); |
1538 | } | | 1536 | } |
1539 | return l; | | 1537 | return l; |
1540 | } | | 1538 | } |
1541 | | | 1539 | |
1542 | /* | | 1540 | /* |
1543 | * Look up a live LWP within the specified process. | | 1541 | * Look up a live LWP within the specified process. |
1544 | * | | 1542 | * |
1545 | * Must be called with p->p_lock held (as it looks at the radix tree, | | 1543 | * Must be called with p->p_lock held (as it looks at the radix tree, |
1546 | * and also wants to exclude idle and zombie LWPs). | | 1544 | * and also wants to exclude idle and zombie LWPs). |
1547 | */ | | 1545 | */ |
1548 | struct lwp * | | 1546 | struct lwp * |
1549 | lwp_find(struct proc *p, lwpid_t id) | | 1547 | lwp_find(struct proc *p, lwpid_t id) |
1550 | { | | 1548 | { |
1551 | struct lwp *l; | | 1549 | struct lwp *l; |
1552 | | | 1550 | |
1553 | KASSERT(mutex_owned(p->p_lock)); | | 1551 | KASSERT(mutex_owned(p->p_lock)); |
1554 | | | 1552 | |
1555 | l = radix_tree_lookup_node(&p->p_lwptree, (uint64_t)(id - 1)); | | 1553 | l = radix_tree_lookup_node(&p->p_lwptree, (uint64_t)(id - 1)); |
1556 | KASSERT(l == NULL || l->l_lid == id); | | 1554 | KASSERT(l == NULL || l->l_lid == id); |
1557 | | | 1555 | |
1558 | /* | | 1556 | /* |
1559 | * No need to lock - all of these conditions will | | 1557 | * No need to lock - all of these conditions will |
1560 | * be visible with the process level mutex held. | | 1558 | * be visible with the process level mutex held. |
1561 | */ | | 1559 | */ |
1562 | if (l != NULL && (l->l_stat == LSIDL || l->l_stat == LSZOMB)) | | 1560 | if (l != NULL && (l->l_stat == LSIDL || l->l_stat == LSZOMB)) |
1563 | l = NULL; | | 1561 | l = NULL; |
1564 | | | 1562 | |
1565 | return l; | | 1563 | return l; |
1566 | } | | 1564 | } |
1567 | | | 1565 | |
1568 | /* | | 1566 | /* |
1569 | * Update an LWP's cached credentials to mirror the process' master copy. | | 1567 | * Update an LWP's cached credentials to mirror the process' master copy. |
1570 | * | | 1568 | * |
1571 | * This happens early in the syscall path, on user trap, and on LWP | | 1569 | * This happens early in the syscall path, on user trap, and on LWP |
1572 | * creation. A long-running LWP can also voluntarily choose to update | | 1570 | * creation. A long-running LWP can also voluntarily choose to update |
1573 | * its credentials by calling this routine. This may be called from | | 1571 | * its credentials by calling this routine. This may be called from |
1574 | * LWP_CACHE_CREDS(), which checks l->l_cred != p->p_cred beforehand. | | 1572 | * LWP_CACHE_CREDS(), which checks l->l_cred != p->p_cred beforehand. |
1575 | */ | | 1573 | */ |
1576 | void | | 1574 | void |
1577 | lwp_update_creds(struct lwp *l) | | 1575 | lwp_update_creds(struct lwp *l) |
1578 | { | | 1576 | { |
1579 | kauth_cred_t oc; | | 1577 | kauth_cred_t oc; |
1580 | struct proc *p; | | 1578 | struct proc *p; |
1581 | | | 1579 | |
1582 | p = l->l_proc; | | 1580 | p = l->l_proc; |
1583 | oc = l->l_cred; | | 1581 | oc = l->l_cred; |
1584 | | | 1582 | |
1585 | mutex_enter(p->p_lock); | | 1583 | mutex_enter(p->p_lock); |
1586 | kauth_cred_hold(p->p_cred); | | 1584 | kauth_cred_hold(p->p_cred); |
1587 | l->l_cred = p->p_cred; | | 1585 | l->l_cred = p->p_cred; |
1588 | l->l_prflag &= ~LPR_CRMOD; | | 1586 | l->l_prflag &= ~LPR_CRMOD; |
1589 | mutex_exit(p->p_lock); | | 1587 | mutex_exit(p->p_lock); |
1590 | if (oc != NULL) | | 1588 | if (oc != NULL) |
1591 | kauth_cred_free(oc); | | 1589 | kauth_cred_free(oc); |
1592 | } | | 1590 | } |
1593 | | | 1591 | |
1594 | /* | | 1592 | /* |
1595 | * Verify that an LWP is locked, and optionally verify that the lock matches | | 1593 | * Verify that an LWP is locked, and optionally verify that the lock matches |
1596 | * one we specify. | | 1594 | * one we specify. |
1597 | */ | | 1595 | */ |
1598 | int | | 1596 | int |
1599 | lwp_locked(struct lwp *l, kmutex_t *mtx) | | 1597 | lwp_locked(struct lwp *l, kmutex_t *mtx) |
1600 | { | | 1598 | { |
1601 | kmutex_t *cur = l->l_mutex; | | 1599 | kmutex_t *cur = l->l_mutex; |
1602 | | | 1600 | |
1603 | return mutex_owned(cur) && (mtx == cur || mtx == NULL); | | 1601 | return mutex_owned(cur) && (mtx == cur || mtx == NULL); |
1604 | } | | 1602 | } |
1605 | | | 1603 | |
1606 | /* | | 1604 | /* |
1607 | * Lend a new mutex to an LWP. The old mutex must be held. | | 1605 | * Lend a new mutex to an LWP. The old mutex must be held. |
1608 | */ | | 1606 | */ |
1609 | kmutex_t * | | 1607 | kmutex_t * |
1610 | lwp_setlock(struct lwp *l, kmutex_t *mtx) | | 1608 | lwp_setlock(struct lwp *l, kmutex_t *mtx) |
1611 | { | | 1609 | { |
1612 | kmutex_t *oldmtx = l->l_mutex; | | 1610 | kmutex_t *oldmtx = l->l_mutex; |
1613 | | | 1611 | |
1614 | KASSERT(mutex_owned(oldmtx)); | | 1612 | KASSERT(mutex_owned(oldmtx)); |
1615 | | | 1613 | |
1616 | membar_exit(); | | 1614 | membar_exit(); |
1617 | l->l_mutex = mtx; | | 1615 | l->l_mutex = mtx; |
1618 | return oldmtx; | | 1616 | return oldmtx; |
1619 | } | | 1617 | } |
1620 | | | 1618 | |
1621 | /* | | 1619 | /* |
1622 | * Lend a new mutex to an LWP, and release the old mutex. The old mutex | | 1620 | * Lend a new mutex to an LWP, and release the old mutex. The old mutex |
1623 | * must be held. | | 1621 | * must be held. |
1624 | */ | | 1622 | */ |
1625 | void | | 1623 | void |
1626 | lwp_unlock_to(struct lwp *l, kmutex_t *mtx) | | 1624 | lwp_unlock_to(struct lwp *l, kmutex_t *mtx) |
1627 | { | | 1625 | { |
1628 | kmutex_t *old; | | 1626 | kmutex_t *old; |
1629 | | | 1627 | |
1630 | KASSERT(lwp_locked(l, NULL)); | | 1628 | KASSERT(lwp_locked(l, NULL)); |
1631 | | | 1629 | |
1632 | old = l->l_mutex; | | 1630 | old = l->l_mutex; |
1633 | membar_exit(); | | 1631 | membar_exit(); |
1634 | l->l_mutex = mtx; | | 1632 | l->l_mutex = mtx; |
1635 | mutex_spin_exit(old); | | 1633 | mutex_spin_exit(old); |
1636 | } | | 1634 | } |
1637 | | | 1635 | |
1638 | int | | 1636 | int |
1639 | lwp_trylock(struct lwp *l) | | 1637 | lwp_trylock(struct lwp *l) |
1640 | { | | 1638 | { |
1641 | kmutex_t *old; | | 1639 | kmutex_t *old; |
1642 | | | 1640 | |
1643 | for (;;) { | | 1641 | for (;;) { |
1644 | if (!mutex_tryenter(old = l->l_mutex)) | | 1642 | if (!mutex_tryenter(old = l->l_mutex)) |
1645 | return 0; | | 1643 | return 0; |
1646 | if (__predict_true(l->l_mutex == old)) | | 1644 | if (__predict_true(l->l_mutex == old)) |
1647 | return 1; | | 1645 | return 1; |
1648 | mutex_spin_exit(old); | | 1646 | mutex_spin_exit(old); |
1649 | } | | 1647 | } |
1650 | } | | 1648 | } |
1651 | | | 1649 | |
1652 | void | | 1650 | void |
1653 | lwp_unsleep(lwp_t *l, bool unlock) | | 1651 | lwp_unsleep(lwp_t *l, bool unlock) |
1654 | { | | 1652 | { |
1655 | | | 1653 | |
1656 | KASSERT(mutex_owned(l->l_mutex)); | | 1654 | KASSERT(mutex_owned(l->l_mutex)); |
1657 | (*l->l_syncobj->sobj_unsleep)(l, unlock); | | 1655 | (*l->l_syncobj->sobj_unsleep)(l, unlock); |
1658 | } | | 1656 | } |
1659 | | | 1657 | |
1660 | /* | | 1658 | /* |
1661 | * Handle exceptions for mi_userret(). Called if a member of LW_USERRET is | | 1659 | * Handle exceptions for mi_userret(). Called if a member of LW_USERRET is |
1662 | * set. | | 1660 | * set. |
1663 | */ | | 1661 | */ |
1664 | void | | 1662 | void |
1665 | lwp_userret(struct lwp *l) | | 1663 | lwp_userret(struct lwp *l) |
1666 | { | | 1664 | { |
1667 | struct proc *p; | | 1665 | struct proc *p; |
1668 | int sig; | | 1666 | int sig; |
1669 | | | 1667 | |
1670 | KASSERT(l == curlwp); | | 1668 | KASSERT(l == curlwp); |
1671 | KASSERT(l->l_stat == LSONPROC); | | 1669 | KASSERT(l->l_stat == LSONPROC); |
1672 | p = l->l_proc; | | 1670 | p = l->l_proc; |
1673 | | | 1671 | |
1674 | /* | | 1672 | /* |
1675 | * It is safe to do this read unlocked on a MP system.. | | 1673 | * It is safe to do this read unlocked on a MP system.. |
1676 | */ | | 1674 | */ |
1677 | while ((l->l_flag & LW_USERRET) != 0) { | | 1675 | while ((l->l_flag & LW_USERRET) != 0) { |
1678 | /* | | 1676 | /* |
1679 | * Process pending signals first, unless the process | | 1677 | * Process pending signals first, unless the process |
1680 | * is dumping core or exiting, where we will instead | | 1678 | * is dumping core or exiting, where we will instead |
1681 | * enter the LW_WSUSPEND case below. | | 1679 | * enter the LW_WSUSPEND case below. |
1682 | */ | | 1680 | */ |
1683 | if ((l->l_flag & (LW_PENDSIG | LW_WCORE | LW_WEXIT)) == | | 1681 | if ((l->l_flag & (LW_PENDSIG | LW_WCORE | LW_WEXIT)) == |
1684 | LW_PENDSIG) { | | 1682 | LW_PENDSIG) { |
1685 | mutex_enter(p->p_lock); | | 1683 | mutex_enter(p->p_lock); |
1686 | while ((sig = issignal(l)) != 0) | | 1684 | while ((sig = issignal(l)) != 0) |
1687 | postsig(sig); | | 1685 | postsig(sig); |
1688 | mutex_exit(p->p_lock); | | 1686 | mutex_exit(p->p_lock); |
1689 | } | | 1687 | } |
1690 | | | 1688 | |
1691 | /* | | 1689 | /* |
1692 | * Core-dump or suspend pending. | | 1690 | * Core-dump or suspend pending. |
1693 | * | | 1691 | * |
1694 | * In case of core dump, suspend ourselves, so that the kernel | | 1692 | * In case of core dump, suspend ourselves, so that the kernel |
1695 | * stack and therefore the userland registers saved in the | | 1693 | * stack and therefore the userland registers saved in the |
1696 | * trapframe are around for coredump() to write them out. | | 1694 | * trapframe are around for coredump() to write them out. |
1697 | * We also need to save any PCU resources that we have so that | | 1695 | * We also need to save any PCU resources that we have so that |
1698 | * they accessible for coredump(). We issue a wakeup on | | 1696 | * they accessible for coredump(). We issue a wakeup on |
1699 | * p->p_lwpcv so that sigexit() will write the core file out | | 1697 | * p->p_lwpcv so that sigexit() will write the core file out |
1700 | * once all other LWPs are suspended. | | 1698 | * once all other LWPs are suspended. |
1701 | */ | | 1699 | */ |
1702 | if ((l->l_flag & LW_WSUSPEND) != 0) { | | 1700 | if ((l->l_flag & LW_WSUSPEND) != 0) { |
1703 | pcu_save_all(l); | | 1701 | pcu_save_all(l); |
1704 | mutex_enter(p->p_lock); | | 1702 | mutex_enter(p->p_lock); |
1705 | p->p_nrlwps--; | | 1703 | p->p_nrlwps--; |
1706 | cv_broadcast(&p->p_lwpcv); | | 1704 | cv_broadcast(&p->p_lwpcv); |
1707 | lwp_lock(l); | | 1705 | lwp_lock(l); |
1708 | l->l_stat = LSSUSPENDED; | | 1706 | l->l_stat = LSSUSPENDED; |
1709 | lwp_unlock(l); | | 1707 | lwp_unlock(l); |
1710 | mutex_exit(p->p_lock); | | 1708 | mutex_exit(p->p_lock); |
1711 | lwp_lock(l); | | 1709 | lwp_lock(l); |
1712 | spc_lock(l->l_cpu); | | 1710 | spc_lock(l->l_cpu); |
1713 | mi_switch(l); | | 1711 | mi_switch(l); |
1714 | } | | 1712 | } |
1715 | | | 1713 | |
1716 | /* Process is exiting. */ | | 1714 | /* Process is exiting. */ |
1717 | if ((l->l_flag & LW_WEXIT) != 0) { | | 1715 | if ((l->l_flag & LW_WEXIT) != 0) { |
1718 | lwp_exit(l); | | 1716 | lwp_exit(l); |
1719 | KASSERT(0); | | 1717 | KASSERT(0); |
1720 | /* NOTREACHED */ | | 1718 | /* NOTREACHED */ |
1721 | } | | 1719 | } |
1722 | | | 1720 | |
1723 | /* update lwpctl processor (for vfork child_return) */ | | 1721 | /* update lwpctl processor (for vfork child_return) */ |
1724 | if (l->l_flag & LW_LWPCTL) { | | 1722 | if (l->l_flag & LW_LWPCTL) { |
1725 | lwp_lock(l); | | 1723 | lwp_lock(l); |
1726 | KASSERT(kpreempt_disabled()); | | 1724 | KASSERT(kpreempt_disabled()); |
1727 | l->l_lwpctl->lc_curcpu = (int)cpu_index(l->l_cpu); | | 1725 | l->l_lwpctl->lc_curcpu = (int)cpu_index(l->l_cpu); |
1728 | l->l_lwpctl->lc_pctr++; | | 1726 | l->l_lwpctl->lc_pctr++; |
1729 | l->l_flag &= ~LW_LWPCTL; | | 1727 | l->l_flag &= ~LW_LWPCTL; |
1730 | lwp_unlock(l); | | 1728 | lwp_unlock(l); |
1731 | } | | 1729 | } |
1732 | } | | 1730 | } |
1733 | } | | 1731 | } |
1734 | | | 1732 | |
1735 | /* | | 1733 | /* |
1736 | * Force an LWP to enter the kernel, to take a trip through lwp_userret(). | | 1734 | * Force an LWP to enter the kernel, to take a trip through lwp_userret(). |
1737 | */ | | 1735 | */ |
1738 | void | | 1736 | void |
1739 | lwp_need_userret(struct lwp *l) | | 1737 | lwp_need_userret(struct lwp *l) |
1740 | { | | 1738 | { |
1741 | | | 1739 | |
1742 | KASSERT(!cpu_intr_p()); | | 1740 | KASSERT(!cpu_intr_p()); |
1743 | KASSERT(lwp_locked(l, NULL)); | | 1741 | KASSERT(lwp_locked(l, NULL)); |
1744 | | | 1742 | |
1745 | /* | | 1743 | /* |
1746 | * If the LWP is in any state other than LSONPROC, we know that it | | 1744 | * If the LWP is in any state other than LSONPROC, we know that it |
1747 | * is executing in-kernel and will hit userret() on the way out. | | 1745 | * is executing in-kernel and will hit userret() on the way out. |
1748 | * | | 1746 | * |