| @@ -1,1922 +1,1923 @@ | | | @@ -1,1922 +1,1923 @@ |
1 | /* $NetBSD: kern_lwp.c,v 1.221 2020/01/26 19:06:24 ad Exp $ */ | | 1 | /* $NetBSD: kern_lwp.c,v 1.222 2020/01/27 21:58:16 ad Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2019, 2020 | | 4 | * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2019, 2020 |
5 | * The NetBSD Foundation, Inc. | | 5 | * The NetBSD Foundation, Inc. |
6 | * All rights reserved. | | 6 | * All rights reserved. |
7 | * | | 7 | * |
8 | * This code is derived from software contributed to The NetBSD Foundation | | 8 | * This code is derived from software contributed to The NetBSD Foundation |
9 | * by Nathan J. Williams, and Andrew Doran. | | 9 | * by Nathan J. Williams, and Andrew Doran. |
10 | * | | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without | | 11 | * Redistribution and use in source and binary forms, with or without |
12 | * modification, are permitted provided that the following conditions | | 12 | * modification, are permitted provided that the following conditions |
13 | * are met: | | 13 | * are met: |
14 | * 1. Redistributions of source code must retain the above copyright | | 14 | * 1. Redistributions of source code must retain the above copyright |
15 | * notice, this list of conditions and the following disclaimer. | | 15 | * notice, this list of conditions and the following disclaimer. |
16 | * 2. Redistributions in binary form must reproduce the above copyright | | 16 | * 2. Redistributions in binary form must reproduce the above copyright |
17 | * notice, this list of conditions and the following disclaimer in the | | 17 | * notice, this list of conditions and the following disclaimer in the |
18 | * documentation and/or other materials provided with the distribution. | | 18 | * documentation and/or other materials provided with the distribution. |
19 | * | | 19 | * |
20 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | | 20 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
22 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 22 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
23 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | | 23 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
24 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | 24 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
25 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | 25 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
26 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | 26 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
27 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 27 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
29 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 29 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
30 | * POSSIBILITY OF SUCH DAMAGE. | | 30 | * POSSIBILITY OF SUCH DAMAGE. |
31 | */ | | 31 | */ |
32 | | | 32 | |
33 | /* | | 33 | /* |
34 | * Overview | | 34 | * Overview |
35 | * | | 35 | * |
36 | * Lightweight processes (LWPs) are the basic unit or thread of | | 36 | * Lightweight processes (LWPs) are the basic unit or thread of |
37 | * execution within the kernel. The core state of an LWP is described | | 37 | * execution within the kernel. The core state of an LWP is described |
38 | * by "struct lwp", also known as lwp_t. | | 38 | * by "struct lwp", also known as lwp_t. |
39 | * | | 39 | * |
40 | * Each LWP is contained within a process (described by "struct proc"), | | 40 | * Each LWP is contained within a process (described by "struct proc"), |
41 | * Every process contains at least one LWP, but may contain more. The | | 41 | * Every process contains at least one LWP, but may contain more. The |
42 | * process describes attributes shared among all of its LWPs such as a | | 42 | * process describes attributes shared among all of its LWPs such as a |
43 | * private address space, global execution state (stopped, active, | | 43 | * private address space, global execution state (stopped, active, |
44 | * zombie, ...), signal disposition and so on. On a multiprocessor | | 44 | * zombie, ...), signal disposition and so on. On a multiprocessor |
45 | * machine, multiple LWPs be executing concurrently in the kernel. | | 45 | * machine, multiple LWPs be executing concurrently in the kernel. |
46 | * | | 46 | * |
47 | * Execution states | | 47 | * Execution states |
48 | * | | 48 | * |
49 | * At any given time, an LWP has overall state that is described by | | 49 | * At any given time, an LWP has overall state that is described by |
50 | * lwp::l_stat. The states are broken into two sets below. The first | | 50 | * lwp::l_stat. The states are broken into two sets below. The first |
51 | * set is guaranteed to represent the absolute, current state of the | | 51 | * set is guaranteed to represent the absolute, current state of the |
52 | * LWP: | | 52 | * LWP: |
53 | * | | 53 | * |
54 | * LSONPROC | | 54 | * LSONPROC |
55 | * | | 55 | * |
56 | * On processor: the LWP is executing on a CPU, either in the | | 56 | * On processor: the LWP is executing on a CPU, either in the |
57 | * kernel or in user space. | | 57 | * kernel or in user space. |
58 | * | | 58 | * |
59 | * LSRUN | | 59 | * LSRUN |
60 | * | | 60 | * |
61 | * Runnable: the LWP is parked on a run queue, and may soon be | | 61 | * Runnable: the LWP is parked on a run queue, and may soon be |
62 | * chosen to run by an idle processor, or by a processor that | | 62 | * chosen to run by an idle processor, or by a processor that |
63 | * has been asked to preempt a currently runnning but lower | | 63 | * has been asked to preempt a currently runnning but lower |
64 | * priority LWP. | | 64 | * priority LWP. |
65 | * | | 65 | * |
66 | * LSIDL | | 66 | * LSIDL |
67 | * | | 67 | * |
68 | * Idle: the LWP has been created but has not yet executed, | | 68 | * Idle: the LWP has been created but has not yet executed, |
69 | * or it has ceased executing a unit of work and is waiting | | 69 | * or it has ceased executing a unit of work and is waiting |
70 | * to be started again. | | 70 | * to be started again. |
71 | * | | 71 | * |
72 | * LSSUSPENDED: | | 72 | * LSSUSPENDED: |
73 | * | | 73 | * |
74 | * Suspended: the LWP has had its execution suspended by | | 74 | * Suspended: the LWP has had its execution suspended by |
75 | * another LWP in the same process using the _lwp_suspend() | | 75 | * another LWP in the same process using the _lwp_suspend() |
76 | * system call. User-level LWPs also enter the suspended | | 76 | * system call. User-level LWPs also enter the suspended |
77 | * state when the system is shutting down. | | 77 | * state when the system is shutting down. |
78 | * | | 78 | * |
79 | * The second set represent a "statement of intent" on behalf of the | | 79 | * The second set represent a "statement of intent" on behalf of the |
80 | * LWP. The LWP may in fact be executing on a processor, may be | | 80 | * LWP. The LWP may in fact be executing on a processor, may be |
81 | * sleeping or idle. It is expected to take the necessary action to | | 81 | * sleeping or idle. It is expected to take the necessary action to |
82 | * stop executing or become "running" again within a short timeframe. | | 82 | * stop executing or become "running" again within a short timeframe. |
83 | * The LW_RUNNING flag in lwp::l_flag indicates that an LWP is running. | | 83 | * The LW_RUNNING flag in lwp::l_flag indicates that an LWP is running. |
84 | * Importantly, it indicates that its state is tied to a CPU. | | 84 | * Importantly, it indicates that its state is tied to a CPU. |
85 | * | | 85 | * |
86 | * LSZOMB: | | 86 | * LSZOMB: |
87 | * | | 87 | * |
88 | * Dead or dying: the LWP has released most of its resources | | 88 | * Dead or dying: the LWP has released most of its resources |
89 | * and is about to switch away into oblivion, or has already | | 89 | * and is about to switch away into oblivion, or has already |
90 | * switched away. When it switches away, its few remaining | | 90 | * switched away. When it switches away, its few remaining |
91 | * resources can be collected. | | 91 | * resources can be collected. |
92 | * | | 92 | * |
93 | * LSSLEEP: | | 93 | * LSSLEEP: |
94 | * | | 94 | * |
95 | * Sleeping: the LWP has entered itself onto a sleep queue, and | | 95 | * Sleeping: the LWP has entered itself onto a sleep queue, and |
96 | * has switched away or will switch away shortly to allow other | | 96 | * has switched away or will switch away shortly to allow other |
97 | * LWPs to run on the CPU. | | 97 | * LWPs to run on the CPU. |
98 | * | | 98 | * |
99 | * LSSTOP: | | 99 | * LSSTOP: |
100 | * | | 100 | * |
101 | * Stopped: the LWP has been stopped as a result of a job | | 101 | * Stopped: the LWP has been stopped as a result of a job |
102 | * control signal, or as a result of the ptrace() interface. | | 102 | * control signal, or as a result of the ptrace() interface. |
103 | * | | 103 | * |
104 | * Stopped LWPs may run briefly within the kernel to handle | | 104 | * Stopped LWPs may run briefly within the kernel to handle |
105 | * signals that they receive, but will not return to user space | | 105 | * signals that they receive, but will not return to user space |
106 | * until their process' state is changed away from stopped. | | 106 | * until their process' state is changed away from stopped. |
107 | * | | 107 | * |
108 | * Single LWPs within a process can not be set stopped | | 108 | * Single LWPs within a process can not be set stopped |
109 | * selectively: all actions that can stop or continue LWPs | | 109 | * selectively: all actions that can stop or continue LWPs |
110 | * occur at the process level. | | 110 | * occur at the process level. |
111 | * | | 111 | * |
112 | * State transitions | | 112 | * State transitions |
113 | * | | 113 | * |
114 | * Note that the LSSTOP state may only be set when returning to | | 114 | * Note that the LSSTOP state may only be set when returning to |
115 | * user space in userret(), or when sleeping interruptably. The | | 115 | * user space in userret(), or when sleeping interruptably. The |
116 | * LSSUSPENDED state may only be set in userret(). Before setting | | 116 | * LSSUSPENDED state may only be set in userret(). Before setting |
117 | * those states, we try to ensure that the LWPs will release all | | 117 | * those states, we try to ensure that the LWPs will release all |
118 | * locks that they hold, and at a minimum try to ensure that the | | 118 | * locks that they hold, and at a minimum try to ensure that the |
119 | * LWP can be set runnable again by a signal. | | 119 | * LWP can be set runnable again by a signal. |
120 | * | | 120 | * |
121 | * LWPs may transition states in the following ways: | | 121 | * LWPs may transition states in the following ways: |
122 | * | | 122 | * |
123 | * RUN -------> ONPROC ONPROC -----> RUN | | 123 | * RUN -------> ONPROC ONPROC -----> RUN |
124 | * > SLEEP | | 124 | * > SLEEP |
125 | * > STOPPED | | 125 | * > STOPPED |
126 | * > SUSPENDED | | 126 | * > SUSPENDED |
127 | * > ZOMB | | 127 | * > ZOMB |
128 | * > IDL (special cases) | | 128 | * > IDL (special cases) |
129 | * | | 129 | * |
130 | * STOPPED ---> RUN SUSPENDED --> RUN | | 130 | * STOPPED ---> RUN SUSPENDED --> RUN |
131 | * > SLEEP | | 131 | * > SLEEP |
132 | * | | 132 | * |
133 | * SLEEP -----> ONPROC IDL --------> RUN | | 133 | * SLEEP -----> ONPROC IDL --------> RUN |
134 | * > RUN > SUSPENDED | | 134 | * > RUN > SUSPENDED |
135 | * > STOPPED > STOPPED | | 135 | * > STOPPED > STOPPED |
136 | * > ONPROC (special cases) | | 136 | * > ONPROC (special cases) |
137 | * | | 137 | * |
138 | * Some state transitions are only possible with kernel threads (eg | | 138 | * Some state transitions are only possible with kernel threads (eg |
139 | * ONPROC -> IDL) and happen under tightly controlled circumstances | | 139 | * ONPROC -> IDL) and happen under tightly controlled circumstances |
140 | * free of unwanted side effects. | | 140 | * free of unwanted side effects. |
141 | * | | 141 | * |
142 | * Migration | | 142 | * Migration |
143 | * | | 143 | * |
144 | * Migration of threads from one CPU to another could be performed | | 144 | * Migration of threads from one CPU to another could be performed |
145 | * internally by the scheduler via sched_takecpu() or sched_catchlwp() | | 145 | * internally by the scheduler via sched_takecpu() or sched_catchlwp() |
146 | * functions. The universal lwp_migrate() function should be used for | | 146 | * functions. The universal lwp_migrate() function should be used for |
147 | * any other cases. Subsystems in the kernel must be aware that CPU | | 147 | * any other cases. Subsystems in the kernel must be aware that CPU |
148 | * of LWP may change, while it is not locked. | | 148 | * of LWP may change, while it is not locked. |
149 | * | | 149 | * |
150 | * Locking | | 150 | * Locking |
151 | * | | 151 | * |
152 | * The majority of fields in 'struct lwp' are covered by a single, | | 152 | * The majority of fields in 'struct lwp' are covered by a single, |
153 | * general spin lock pointed to by lwp::l_mutex. The locks covering | | 153 | * general spin lock pointed to by lwp::l_mutex. The locks covering |
154 | * each field are documented in sys/lwp.h. | | 154 | * each field are documented in sys/lwp.h. |
155 | * | | 155 | * |
156 | * State transitions must be made with the LWP's general lock held, | | 156 | * State transitions must be made with the LWP's general lock held, |
157 | * and may cause the LWP's lock pointer to change. Manipulation of | | 157 | * and may cause the LWP's lock pointer to change. Manipulation of |
158 | * the general lock is not performed directly, but through calls to | | 158 | * the general lock is not performed directly, but through calls to |
159 | * lwp_lock(), lwp_unlock() and others. It should be noted that the | | 159 | * lwp_lock(), lwp_unlock() and others. It should be noted that the |
160 | * adaptive locks are not allowed to be released while the LWP's lock | | 160 | * adaptive locks are not allowed to be released while the LWP's lock |
161 | * is being held (unlike for other spin-locks). | | 161 | * is being held (unlike for other spin-locks). |
162 | * | | 162 | * |
163 | * States and their associated locks: | | 163 | * States and their associated locks: |
164 | * | | 164 | * |
165 | * LSIDL, LSONPROC, LSZOMB, LSSUPENDED: | | 165 | * LSIDL, LSONPROC, LSZOMB, LSSUPENDED: |
166 | * | | 166 | * |
167 | * Always covered by spc_lwplock, which protects LWPs not | | 167 | * Always covered by spc_lwplock, which protects LWPs not |
168 | * associated with any other sync object. This is a per-CPU | | 168 | * associated with any other sync object. This is a per-CPU |
169 | * lock and matches lwp::l_cpu. | | 169 | * lock and matches lwp::l_cpu. |
170 | * | | 170 | * |
171 | * LSRUN: | | 171 | * LSRUN: |
172 | * | | 172 | * |
173 | * Always covered by spc_mutex, which protects the run queues. | | 173 | * Always covered by spc_mutex, which protects the run queues. |
174 | * This is a per-CPU lock and matches lwp::l_cpu. | | 174 | * This is a per-CPU lock and matches lwp::l_cpu. |
175 | * | | 175 | * |
176 | * LSSLEEP: | | 176 | * LSSLEEP: |
177 | * | | 177 | * |
178 | * Covered by a lock associated with the sleep queue (sometimes | | 178 | * Covered by a lock associated with the sleep queue (sometimes |
179 | * a turnstile sleep queue) that the LWP resides on. This can | | 179 | * a turnstile sleep queue) that the LWP resides on. This can |
180 | * be spc_lwplock for SOBJ_SLEEPQ_NULL (an "untracked" sleep). | | 180 | * be spc_lwplock for SOBJ_SLEEPQ_NULL (an "untracked" sleep). |
181 | * | | 181 | * |
182 | * LSSTOP: | | 182 | * LSSTOP: |
183 | * | | 183 | * |
184 | * If the LWP was previously sleeping (l_wchan != NULL), then | | 184 | * If the LWP was previously sleeping (l_wchan != NULL), then |
185 | * l_mutex references the sleep queue lock. If the LWP was | | 185 | * l_mutex references the sleep queue lock. If the LWP was |
186 | * runnable or on the CPU when halted, or has been removed from | | 186 | * runnable or on the CPU when halted, or has been removed from |
187 | * the sleep queue since halted, then the lock is spc_lwplock. | | 187 | * the sleep queue since halted, then the lock is spc_lwplock. |
188 | * | | 188 | * |
189 | * The lock order is as follows: | | 189 | * The lock order is as follows: |
190 | * | | 190 | * |
191 | * sleepq -> turnstile -> spc_lwplock -> spc_mutex | | 191 | * sleepq -> turnstile -> spc_lwplock -> spc_mutex |
192 | * | | 192 | * |
193 | * Each process has an scheduler state lock (proc::p_lock), and a | | 193 | * Each process has an scheduler state lock (proc::p_lock), and a |
194 | * number of counters on LWPs and their states: p_nzlwps, p_nrlwps, and | | 194 | * number of counters on LWPs and their states: p_nzlwps, p_nrlwps, and |
195 | * so on. When an LWP is to be entered into or removed from one of the | | 195 | * so on. When an LWP is to be entered into or removed from one of the |
196 | * following states, p_lock must be held and the process wide counters | | 196 | * following states, p_lock must be held and the process wide counters |
197 | * adjusted: | | 197 | * adjusted: |
198 | * | | 198 | * |
199 | * LSIDL, LSZOMB, LSSTOP, LSSUSPENDED | | 199 | * LSIDL, LSZOMB, LSSTOP, LSSUSPENDED |
200 | * | | 200 | * |
201 | * (But not always for kernel threads. There are some special cases | | 201 | * (But not always for kernel threads. There are some special cases |
202 | * as mentioned above: soft interrupts, and the idle loops.) | | 202 | * as mentioned above: soft interrupts, and the idle loops.) |
203 | * | | 203 | * |
204 | * Note that an LWP is considered running or likely to run soon if in | | 204 | * Note that an LWP is considered running or likely to run soon if in |
205 | * one of the following states. This affects the value of p_nrlwps: | | 205 | * one of the following states. This affects the value of p_nrlwps: |
206 | * | | 206 | * |
207 | * LSRUN, LSONPROC, LSSLEEP | | 207 | * LSRUN, LSONPROC, LSSLEEP |
208 | * | | 208 | * |
209 | * p_lock does not need to be held when transitioning among these | | 209 | * p_lock does not need to be held when transitioning among these |
210 | * three states, hence p_lock is rarely taken for state transitions. | | 210 | * three states, hence p_lock is rarely taken for state transitions. |
211 | */ | | 211 | */ |
212 | | | 212 | |
213 | #include <sys/cdefs.h> | | 213 | #include <sys/cdefs.h> |
214 | __KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.221 2020/01/26 19:06:24 ad Exp $"); | | 214 | __KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.222 2020/01/27 21:58:16 ad Exp $"); |
215 | | | 215 | |
216 | #include "opt_ddb.h" | | 216 | #include "opt_ddb.h" |
217 | #include "opt_lockdebug.h" | | 217 | #include "opt_lockdebug.h" |
218 | #include "opt_dtrace.h" | | 218 | #include "opt_dtrace.h" |
219 | | | 219 | |
220 | #define _LWP_API_PRIVATE | | 220 | #define _LWP_API_PRIVATE |
221 | | | 221 | |
222 | #include <sys/param.h> | | 222 | #include <sys/param.h> |
223 | #include <sys/systm.h> | | 223 | #include <sys/systm.h> |
224 | #include <sys/cpu.h> | | 224 | #include <sys/cpu.h> |
225 | #include <sys/pool.h> | | 225 | #include <sys/pool.h> |
226 | #include <sys/proc.h> | | 226 | #include <sys/proc.h> |
227 | #include <sys/syscallargs.h> | | 227 | #include <sys/syscallargs.h> |
228 | #include <sys/syscall_stats.h> | | 228 | #include <sys/syscall_stats.h> |
229 | #include <sys/kauth.h> | | 229 | #include <sys/kauth.h> |
230 | #include <sys/sleepq.h> | | 230 | #include <sys/sleepq.h> |
231 | #include <sys/lockdebug.h> | | 231 | #include <sys/lockdebug.h> |
232 | #include <sys/kmem.h> | | 232 | #include <sys/kmem.h> |
233 | #include <sys/pset.h> | | 233 | #include <sys/pset.h> |
234 | #include <sys/intr.h> | | 234 | #include <sys/intr.h> |
235 | #include <sys/lwpctl.h> | | 235 | #include <sys/lwpctl.h> |
236 | #include <sys/atomic.h> | | 236 | #include <sys/atomic.h> |
237 | #include <sys/filedesc.h> | | 237 | #include <sys/filedesc.h> |
238 | #include <sys/fstrans.h> | | 238 | #include <sys/fstrans.h> |
239 | #include <sys/dtrace_bsd.h> | | 239 | #include <sys/dtrace_bsd.h> |
240 | #include <sys/sdt.h> | | 240 | #include <sys/sdt.h> |
241 | #include <sys/ptrace.h> | | 241 | #include <sys/ptrace.h> |
242 | #include <sys/xcall.h> | | 242 | #include <sys/xcall.h> |
243 | #include <sys/uidinfo.h> | | 243 | #include <sys/uidinfo.h> |
244 | #include <sys/sysctl.h> | | 244 | #include <sys/sysctl.h> |
245 | #include <sys/psref.h> | | 245 | #include <sys/psref.h> |
246 | #include <sys/msan.h> | | 246 | #include <sys/msan.h> |
247 | | | 247 | |
248 | #include <uvm/uvm_extern.h> | | 248 | #include <uvm/uvm_extern.h> |
249 | #include <uvm/uvm_object.h> | | 249 | #include <uvm/uvm_object.h> |
250 | | | 250 | |
251 | static pool_cache_t lwp_cache __read_mostly; | | 251 | static pool_cache_t lwp_cache __read_mostly; |
252 | struct lwplist alllwp __cacheline_aligned; | | 252 | struct lwplist alllwp __cacheline_aligned; |
253 | | | 253 | |
254 | static void lwp_dtor(void *, void *); | | 254 | static void lwp_dtor(void *, void *); |
255 | | | 255 | |
256 | /* DTrace proc provider probes */ | | 256 | /* DTrace proc provider probes */ |
257 | SDT_PROVIDER_DEFINE(proc); | | 257 | SDT_PROVIDER_DEFINE(proc); |
258 | | | 258 | |
259 | SDT_PROBE_DEFINE1(proc, kernel, , lwp__create, "struct lwp *"); | | 259 | SDT_PROBE_DEFINE1(proc, kernel, , lwp__create, "struct lwp *"); |
260 | SDT_PROBE_DEFINE1(proc, kernel, , lwp__start, "struct lwp *"); | | 260 | SDT_PROBE_DEFINE1(proc, kernel, , lwp__start, "struct lwp *"); |
261 | SDT_PROBE_DEFINE1(proc, kernel, , lwp__exit, "struct lwp *"); | | 261 | SDT_PROBE_DEFINE1(proc, kernel, , lwp__exit, "struct lwp *"); |
262 | | | 262 | |
263 | struct turnstile turnstile0 __cacheline_aligned; | | 263 | struct turnstile turnstile0 __cacheline_aligned; |
264 | struct lwp lwp0 __aligned(MIN_LWP_ALIGNMENT) = { | | 264 | struct lwp lwp0 __aligned(MIN_LWP_ALIGNMENT) = { |
265 | #ifdef LWP0_CPU_INFO | | 265 | #ifdef LWP0_CPU_INFO |
266 | .l_cpu = LWP0_CPU_INFO, | | 266 | .l_cpu = LWP0_CPU_INFO, |
267 | #endif | | 267 | #endif |
268 | #ifdef LWP0_MD_INITIALIZER | | 268 | #ifdef LWP0_MD_INITIALIZER |
269 | .l_md = LWP0_MD_INITIALIZER, | | 269 | .l_md = LWP0_MD_INITIALIZER, |
270 | #endif | | 270 | #endif |
271 | .l_proc = &proc0, | | 271 | .l_proc = &proc0, |
272 | .l_lid = 1, | | 272 | .l_lid = 1, |
273 | .l_flag = LW_SYSTEM, | | 273 | .l_flag = LW_SYSTEM, |
274 | .l_stat = LSONPROC, | | 274 | .l_stat = LSONPROC, |
275 | .l_ts = &turnstile0, | | 275 | .l_ts = &turnstile0, |
276 | .l_syncobj = &sched_syncobj, | | 276 | .l_syncobj = &sched_syncobj, |
277 | .l_refcnt = 1, | | 277 | .l_refcnt = 1, |
278 | .l_priority = PRI_USER + NPRI_USER - 1, | | 278 | .l_priority = PRI_USER + NPRI_USER - 1, |
279 | .l_inheritedprio = -1, | | 279 | .l_inheritedprio = -1, |
280 | .l_class = SCHED_OTHER, | | 280 | .l_class = SCHED_OTHER, |
281 | .l_psid = PS_NONE, | | 281 | .l_psid = PS_NONE, |
282 | .l_pi_lenders = SLIST_HEAD_INITIALIZER(&lwp0.l_pi_lenders), | | 282 | .l_pi_lenders = SLIST_HEAD_INITIALIZER(&lwp0.l_pi_lenders), |
283 | .l_name = __UNCONST("swapper"), | | 283 | .l_name = __UNCONST("swapper"), |
284 | .l_fd = &filedesc0, | | 284 | .l_fd = &filedesc0, |
285 | }; | | 285 | }; |
286 | | | 286 | |
287 | static int sysctl_kern_maxlwp(SYSCTLFN_PROTO); | | 287 | static int sysctl_kern_maxlwp(SYSCTLFN_PROTO); |
288 | | | 288 | |
289 | /* | | 289 | /* |
290 | * sysctl helper routine for kern.maxlwp. Ensures that the new | | 290 | * sysctl helper routine for kern.maxlwp. Ensures that the new |
291 | * values are not too low or too high. | | 291 | * values are not too low or too high. |
292 | */ | | 292 | */ |
293 | static int | | 293 | static int |
294 | sysctl_kern_maxlwp(SYSCTLFN_ARGS) | | 294 | sysctl_kern_maxlwp(SYSCTLFN_ARGS) |
295 | { | | 295 | { |
296 | int error, nmaxlwp; | | 296 | int error, nmaxlwp; |
297 | struct sysctlnode node; | | 297 | struct sysctlnode node; |
298 | | | 298 | |
299 | nmaxlwp = maxlwp; | | 299 | nmaxlwp = maxlwp; |
300 | node = *rnode; | | 300 | node = *rnode; |
301 | node.sysctl_data = &nmaxlwp; | | 301 | node.sysctl_data = &nmaxlwp; |
302 | error = sysctl_lookup(SYSCTLFN_CALL(&node)); | | 302 | error = sysctl_lookup(SYSCTLFN_CALL(&node)); |
303 | if (error || newp == NULL) | | 303 | if (error || newp == NULL) |
304 | return error; | | 304 | return error; |
305 | | | 305 | |
306 | if (nmaxlwp < 0 || nmaxlwp >= 65536) | | 306 | if (nmaxlwp < 0 || nmaxlwp >= 65536) |
307 | return EINVAL; | | 307 | return EINVAL; |
308 | if (nmaxlwp > cpu_maxlwp()) | | 308 | if (nmaxlwp > cpu_maxlwp()) |
309 | return EINVAL; | | 309 | return EINVAL; |
310 | maxlwp = nmaxlwp; | | 310 | maxlwp = nmaxlwp; |
311 | | | 311 | |
312 | return 0; | | 312 | return 0; |
313 | } | | 313 | } |
314 | | | 314 | |
315 | static void | | 315 | static void |
316 | sysctl_kern_lwp_setup(void) | | 316 | sysctl_kern_lwp_setup(void) |
317 | { | | 317 | { |
318 | struct sysctllog *clog = NULL; | | 318 | struct sysctllog *clog = NULL; |
319 | | | 319 | |
320 | sysctl_createv(&clog, 0, NULL, NULL, | | 320 | sysctl_createv(&clog, 0, NULL, NULL, |
321 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, | | 321 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, |
322 | CTLTYPE_INT, "maxlwp", | | 322 | CTLTYPE_INT, "maxlwp", |
323 | SYSCTL_DESCR("Maximum number of simultaneous threads"), | | 323 | SYSCTL_DESCR("Maximum number of simultaneous threads"), |
324 | sysctl_kern_maxlwp, 0, NULL, 0, | | 324 | sysctl_kern_maxlwp, 0, NULL, 0, |
325 | CTL_KERN, CTL_CREATE, CTL_EOL); | | 325 | CTL_KERN, CTL_CREATE, CTL_EOL); |
326 | } | | 326 | } |
327 | | | 327 | |
328 | void | | 328 | void |
329 | lwpinit(void) | | 329 | lwpinit(void) |
330 | { | | 330 | { |
331 | | | 331 | |
332 | LIST_INIT(&alllwp); | | 332 | LIST_INIT(&alllwp); |
333 | lwpinit_specificdata(); | | 333 | lwpinit_specificdata(); |
334 | lwp_sys_init(); | | 334 | lwp_sys_init(); |
335 | lwp_cache = pool_cache_init(sizeof(lwp_t), MIN_LWP_ALIGNMENT, 0, 0, | | 335 | lwp_cache = pool_cache_init(sizeof(lwp_t), MIN_LWP_ALIGNMENT, 0, 0, |
336 | "lwppl", NULL, IPL_NONE, NULL, lwp_dtor, NULL); | | 336 | "lwppl", NULL, IPL_NONE, NULL, lwp_dtor, NULL); |
337 | | | 337 | |
338 | maxlwp = cpu_maxlwp(); | | 338 | maxlwp = cpu_maxlwp(); |
339 | sysctl_kern_lwp_setup(); | | 339 | sysctl_kern_lwp_setup(); |
340 | } | | 340 | } |
341 | | | 341 | |
342 | void | | 342 | void |
343 | lwp0_init(void) | | 343 | lwp0_init(void) |
344 | { | | 344 | { |
345 | struct lwp *l = &lwp0; | | 345 | struct lwp *l = &lwp0; |
346 | | | 346 | |
347 | KASSERT((void *)uvm_lwp_getuarea(l) != NULL); | | 347 | KASSERT((void *)uvm_lwp_getuarea(l) != NULL); |
348 | KASSERT(l->l_lid == proc0.p_nlwpid); | | 348 | KASSERT(l->l_lid == proc0.p_nlwpid); |
349 | | | 349 | |
350 | LIST_INSERT_HEAD(&alllwp, l, l_list); | | 350 | LIST_INSERT_HEAD(&alllwp, l, l_list); |
351 | | | 351 | |
352 | callout_init(&l->l_timeout_ch, CALLOUT_MPSAFE); | | 352 | callout_init(&l->l_timeout_ch, CALLOUT_MPSAFE); |
353 | callout_setfunc(&l->l_timeout_ch, sleepq_timeout, l); | | 353 | callout_setfunc(&l->l_timeout_ch, sleepq_timeout, l); |
354 | cv_init(&l->l_sigcv, "sigwait"); | | 354 | cv_init(&l->l_sigcv, "sigwait"); |
355 | cv_init(&l->l_waitcv, "vfork"); | | 355 | cv_init(&l->l_waitcv, "vfork"); |
356 | | | 356 | |
357 | kauth_cred_hold(proc0.p_cred); | | 357 | kauth_cred_hold(proc0.p_cred); |
358 | l->l_cred = proc0.p_cred; | | 358 | l->l_cred = proc0.p_cred; |
359 | | | 359 | |
360 | kdtrace_thread_ctor(NULL, l); | | 360 | kdtrace_thread_ctor(NULL, l); |
361 | lwp_initspecific(l); | | 361 | lwp_initspecific(l); |
362 | | | 362 | |
363 | SYSCALL_TIME_LWP_INIT(l); | | 363 | SYSCALL_TIME_LWP_INIT(l); |
364 | } | | 364 | } |
365 | | | 365 | |
366 | static void | | 366 | static void |
367 | lwp_dtor(void *arg, void *obj) | | 367 | lwp_dtor(void *arg, void *obj) |
368 | { | | 368 | { |
369 | lwp_t *l = obj; | | 369 | lwp_t *l = obj; |
370 | (void)l; | | 370 | (void)l; |
371 | | | 371 | |
372 | /* | | 372 | /* |
373 | * Provide a barrier to ensure that all mutex_oncpu() and rw_oncpu() | | 373 | * Provide a barrier to ensure that all mutex_oncpu() and rw_oncpu() |
374 | * calls will exit before memory of LWP is returned to the pool, where | | 374 | * calls will exit before memory of LWP is returned to the pool, where |
375 | * KVA of LWP structure might be freed and re-used for other purposes. | | 375 | * KVA of LWP structure might be freed and re-used for other purposes. |
376 | * Kernel preemption is disabled around mutex_oncpu() and rw_oncpu() | | 376 | * Kernel preemption is disabled around mutex_oncpu() and rw_oncpu() |
377 | * callers, therefore cross-call to all CPUs will do the job. Also, | | 377 | * callers, therefore cross-call to all CPUs will do the job. Also, |
378 | * the value of l->l_cpu must be still valid at this point. | | 378 | * the value of l->l_cpu must be still valid at this point. |
379 | */ | | 379 | */ |
380 | KASSERT(l->l_cpu != NULL); | | 380 | KASSERT(l->l_cpu != NULL); |
381 | xc_barrier(0); | | 381 | xc_barrier(0); |
382 | } | | 382 | } |
383 | | | 383 | |
384 | /* | | 384 | /* |
385 | * Set an suspended. | | 385 | * Set an suspended. |
386 | * | | 386 | * |
387 | * Must be called with p_lock held, and the LWP locked. Will unlock the | | 387 | * Must be called with p_lock held, and the LWP locked. Will unlock the |
388 | * LWP before return. | | 388 | * LWP before return. |
389 | */ | | 389 | */ |
390 | int | | 390 | int |
391 | lwp_suspend(struct lwp *curl, struct lwp *t) | | 391 | lwp_suspend(struct lwp *curl, struct lwp *t) |
392 | { | | 392 | { |
393 | int error; | | 393 | int error; |
394 | | | 394 | |
395 | KASSERT(mutex_owned(t->l_proc->p_lock)); | | 395 | KASSERT(mutex_owned(t->l_proc->p_lock)); |
396 | KASSERT(lwp_locked(t, NULL)); | | 396 | KASSERT(lwp_locked(t, NULL)); |
397 | | | 397 | |
398 | KASSERT(curl != t || curl->l_stat == LSONPROC); | | 398 | KASSERT(curl != t || curl->l_stat == LSONPROC); |
399 | | | 399 | |
400 | /* | | 400 | /* |
401 | * If the current LWP has been told to exit, we must not suspend anyone | | 401 | * If the current LWP has been told to exit, we must not suspend anyone |
402 | * else or deadlock could occur. We won't return to userspace. | | 402 | * else or deadlock could occur. We won't return to userspace. |
403 | */ | | 403 | */ |
404 | if ((curl->l_flag & (LW_WEXIT | LW_WCORE)) != 0) { | | 404 | if ((curl->l_flag & (LW_WEXIT | LW_WCORE)) != 0) { |
405 | lwp_unlock(t); | | 405 | lwp_unlock(t); |
406 | return (EDEADLK); | | 406 | return (EDEADLK); |
407 | } | | 407 | } |
408 | | | 408 | |
409 | if ((t->l_flag & LW_DBGSUSPEND) != 0) { | | 409 | if ((t->l_flag & LW_DBGSUSPEND) != 0) { |
410 | lwp_unlock(t); | | 410 | lwp_unlock(t); |
411 | return 0; | | 411 | return 0; |
412 | } | | 412 | } |
413 | | | 413 | |
414 | error = 0; | | 414 | error = 0; |
415 | | | 415 | |
416 | switch (t->l_stat) { | | 416 | switch (t->l_stat) { |
417 | case LSRUN: | | 417 | case LSRUN: |
418 | case LSONPROC: | | 418 | case LSONPROC: |
419 | t->l_flag |= LW_WSUSPEND; | | 419 | t->l_flag |= LW_WSUSPEND; |
420 | lwp_need_userret(t); | | 420 | lwp_need_userret(t); |
421 | lwp_unlock(t); | | 421 | lwp_unlock(t); |
422 | break; | | 422 | break; |
423 | | | 423 | |
424 | case LSSLEEP: | | 424 | case LSSLEEP: |
425 | t->l_flag |= LW_WSUSPEND; | | 425 | t->l_flag |= LW_WSUSPEND; |
426 | | | 426 | |
427 | /* | | 427 | /* |
428 | * Kick the LWP and try to get it to the kernel boundary | | 428 | * Kick the LWP and try to get it to the kernel boundary |
429 | * so that it will release any locks that it holds. | | 429 | * so that it will release any locks that it holds. |
430 | * setrunnable() will release the lock. | | 430 | * setrunnable() will release the lock. |
431 | */ | | 431 | */ |
432 | if ((t->l_flag & LW_SINTR) != 0) | | 432 | if ((t->l_flag & LW_SINTR) != 0) |
433 | setrunnable(t); | | 433 | setrunnable(t); |
434 | else | | 434 | else |
435 | lwp_unlock(t); | | 435 | lwp_unlock(t); |
436 | break; | | 436 | break; |
437 | | | 437 | |
438 | case LSSUSPENDED: | | 438 | case LSSUSPENDED: |
439 | lwp_unlock(t); | | 439 | lwp_unlock(t); |
440 | break; | | 440 | break; |
441 | | | 441 | |
442 | case LSSTOP: | | 442 | case LSSTOP: |
443 | t->l_flag |= LW_WSUSPEND; | | 443 | t->l_flag |= LW_WSUSPEND; |
444 | setrunnable(t); | | 444 | setrunnable(t); |
445 | break; | | 445 | break; |
446 | | | 446 | |
447 | case LSIDL: | | 447 | case LSIDL: |
448 | case LSZOMB: | | 448 | case LSZOMB: |
449 | error = EINTR; /* It's what Solaris does..... */ | | 449 | error = EINTR; /* It's what Solaris does..... */ |
450 | lwp_unlock(t); | | 450 | lwp_unlock(t); |
451 | break; | | 451 | break; |
452 | } | | 452 | } |
453 | | | 453 | |
454 | return (error); | | 454 | return (error); |
455 | } | | 455 | } |
456 | | | 456 | |
457 | /* | | 457 | /* |
458 | * Restart a suspended LWP. | | 458 | * Restart a suspended LWP. |
459 | * | | 459 | * |
460 | * Must be called with p_lock held, and the LWP locked. Will unlock the | | 460 | * Must be called with p_lock held, and the LWP locked. Will unlock the |
461 | * LWP before return. | | 461 | * LWP before return. |
462 | */ | | 462 | */ |
463 | void | | 463 | void |
464 | lwp_continue(struct lwp *l) | | 464 | lwp_continue(struct lwp *l) |
465 | { | | 465 | { |
466 | | | 466 | |
467 | KASSERT(mutex_owned(l->l_proc->p_lock)); | | 467 | KASSERT(mutex_owned(l->l_proc->p_lock)); |
468 | KASSERT(lwp_locked(l, NULL)); | | 468 | KASSERT(lwp_locked(l, NULL)); |
469 | | | 469 | |
470 | /* If rebooting or not suspended, then just bail out. */ | | 470 | /* If rebooting or not suspended, then just bail out. */ |
471 | if ((l->l_flag & LW_WREBOOT) != 0) { | | 471 | if ((l->l_flag & LW_WREBOOT) != 0) { |
472 | lwp_unlock(l); | | 472 | lwp_unlock(l); |
473 | return; | | 473 | return; |
474 | } | | 474 | } |
475 | | | 475 | |
476 | l->l_flag &= ~LW_WSUSPEND; | | 476 | l->l_flag &= ~LW_WSUSPEND; |
477 | | | 477 | |
478 | if (l->l_stat != LSSUSPENDED || (l->l_flag & LW_DBGSUSPEND) != 0) { | | 478 | if (l->l_stat != LSSUSPENDED || (l->l_flag & LW_DBGSUSPEND) != 0) { |
479 | lwp_unlock(l); | | 479 | lwp_unlock(l); |
480 | return; | | 480 | return; |
481 | } | | 481 | } |
482 | | | 482 | |
483 | /* setrunnable() will release the lock. */ | | 483 | /* setrunnable() will release the lock. */ |
484 | setrunnable(l); | | 484 | setrunnable(l); |
485 | } | | 485 | } |
486 | | | 486 | |
487 | /* | | 487 | /* |
488 | * Restart a stopped LWP. | | 488 | * Restart a stopped LWP. |
489 | * | | 489 | * |
490 | * Must be called with p_lock held, and the LWP NOT locked. Will unlock the | | 490 | * Must be called with p_lock held, and the LWP NOT locked. Will unlock the |
491 | * LWP before return. | | 491 | * LWP before return. |
492 | */ | | 492 | */ |
493 | void | | 493 | void |
494 | lwp_unstop(struct lwp *l) | | 494 | lwp_unstop(struct lwp *l) |
495 | { | | 495 | { |
496 | struct proc *p = l->l_proc; | | 496 | struct proc *p = l->l_proc; |
497 | | | 497 | |
498 | KASSERT(mutex_owned(proc_lock)); | | 498 | KASSERT(mutex_owned(proc_lock)); |
499 | KASSERT(mutex_owned(p->p_lock)); | | 499 | KASSERT(mutex_owned(p->p_lock)); |
500 | | | 500 | |
501 | lwp_lock(l); | | 501 | lwp_lock(l); |
502 | | | 502 | |
503 | KASSERT((l->l_flag & LW_DBGSUSPEND) == 0); | | 503 | KASSERT((l->l_flag & LW_DBGSUSPEND) == 0); |
504 | | | 504 | |
505 | /* If not stopped, then just bail out. */ | | 505 | /* If not stopped, then just bail out. */ |
506 | if (l->l_stat != LSSTOP) { | | 506 | if (l->l_stat != LSSTOP) { |
507 | lwp_unlock(l); | | 507 | lwp_unlock(l); |
508 | return; | | 508 | return; |
509 | } | | 509 | } |
510 | | | 510 | |
511 | p->p_stat = SACTIVE; | | 511 | p->p_stat = SACTIVE; |
512 | p->p_sflag &= ~PS_STOPPING; | | 512 | p->p_sflag &= ~PS_STOPPING; |
513 | | | 513 | |
514 | if (!p->p_waited) | | 514 | if (!p->p_waited) |
515 | p->p_pptr->p_nstopchild--; | | 515 | p->p_pptr->p_nstopchild--; |
516 | | | 516 | |
517 | if (l->l_wchan == NULL) { | | 517 | if (l->l_wchan == NULL) { |
518 | /* setrunnable() will release the lock. */ | | 518 | /* setrunnable() will release the lock. */ |
519 | setrunnable(l); | | 519 | setrunnable(l); |
520 | } else if (p->p_xsig && (l->l_flag & LW_SINTR) != 0) { | | 520 | } else if (p->p_xsig && (l->l_flag & LW_SINTR) != 0) { |
521 | /* setrunnable() so we can receive the signal */ | | 521 | /* setrunnable() so we can receive the signal */ |
522 | setrunnable(l); | | 522 | setrunnable(l); |
523 | } else { | | 523 | } else { |
524 | l->l_stat = LSSLEEP; | | 524 | l->l_stat = LSSLEEP; |
525 | p->p_nrlwps++; | | 525 | p->p_nrlwps++; |
526 | lwp_unlock(l); | | 526 | lwp_unlock(l); |
527 | } | | 527 | } |
528 | } | | 528 | } |
529 | | | 529 | |
530 | /* | | 530 | /* |
531 | * Wait for an LWP within the current process to exit. If 'lid' is | | 531 | * Wait for an LWP within the current process to exit. If 'lid' is |
532 | * non-zero, we are waiting for a specific LWP. | | 532 | * non-zero, we are waiting for a specific LWP. |
533 | * | | 533 | * |
534 | * Must be called with p->p_lock held. | | 534 | * Must be called with p->p_lock held. |
535 | */ | | 535 | */ |
536 | int | | 536 | int |
537 | lwp_wait(struct lwp *l, lwpid_t lid, lwpid_t *departed, bool exiting) | | 537 | lwp_wait(struct lwp *l, lwpid_t lid, lwpid_t *departed, bool exiting) |
538 | { | | 538 | { |
539 | const lwpid_t curlid = l->l_lid; | | 539 | const lwpid_t curlid = l->l_lid; |
540 | proc_t *p = l->l_proc; | | 540 | proc_t *p = l->l_proc; |
541 | lwp_t *l2; | | 541 | lwp_t *l2; |
542 | int error; | | 542 | int error; |
543 | | | 543 | |
544 | KASSERT(mutex_owned(p->p_lock)); | | 544 | KASSERT(mutex_owned(p->p_lock)); |
545 | | | 545 | |
546 | p->p_nlwpwait++; | | 546 | p->p_nlwpwait++; |
547 | l->l_waitingfor = lid; | | 547 | l->l_waitingfor = lid; |
548 | | | 548 | |
549 | for (;;) { | | 549 | for (;;) { |
550 | int nfound; | | 550 | int nfound; |
551 | | | 551 | |
552 | /* | | 552 | /* |
553 | * Avoid a race between exit1() and sigexit(): if the | | 553 | * Avoid a race between exit1() and sigexit(): if the |
554 | * process is dumping core, then we need to bail out: call | | 554 | * process is dumping core, then we need to bail out: call |
555 | * into lwp_userret() where we will be suspended until the | | 555 | * into lwp_userret() where we will be suspended until the |
556 | * deed is done. | | 556 | * deed is done. |
557 | */ | | 557 | */ |
558 | if ((p->p_sflag & PS_WCORE) != 0) { | | 558 | if ((p->p_sflag & PS_WCORE) != 0) { |
559 | mutex_exit(p->p_lock); | | 559 | mutex_exit(p->p_lock); |
560 | lwp_userret(l); | | 560 | lwp_userret(l); |
561 | KASSERT(false); | | 561 | KASSERT(false); |
562 | } | | 562 | } |
563 | | | 563 | |
564 | /* | | 564 | /* |
565 | * First off, drain any detached LWP that is waiting to be | | 565 | * First off, drain any detached LWP that is waiting to be |
566 | * reaped. | | 566 | * reaped. |
567 | */ | | 567 | */ |
568 | while ((l2 = p->p_zomblwp) != NULL) { | | 568 | while ((l2 = p->p_zomblwp) != NULL) { |
569 | p->p_zomblwp = NULL; | | 569 | p->p_zomblwp = NULL; |
570 | lwp_free(l2, false, false);/* releases proc mutex */ | | 570 | lwp_free(l2, false, false);/* releases proc mutex */ |
571 | mutex_enter(p->p_lock); | | 571 | mutex_enter(p->p_lock); |
572 | } | | 572 | } |
573 | | | 573 | |
574 | /* | | 574 | /* |
575 | * Now look for an LWP to collect. If the whole process is | | 575 | * Now look for an LWP to collect. If the whole process is |
576 | * exiting, count detached LWPs as eligible to be collected, | | 576 | * exiting, count detached LWPs as eligible to be collected, |
577 | * but don't drain them here. | | 577 | * but don't drain them here. |
578 | */ | | 578 | */ |
579 | nfound = 0; | | 579 | nfound = 0; |
580 | error = 0; | | 580 | error = 0; |
581 | LIST_FOREACH(l2, &p->p_lwps, l_sibling) { | | 581 | LIST_FOREACH(l2, &p->p_lwps, l_sibling) { |
582 | /* | | 582 | /* |
583 | * If a specific wait and the target is waiting on | | 583 | * If a specific wait and the target is waiting on |
584 | * us, then avoid deadlock. This also traps LWPs | | 584 | * us, then avoid deadlock. This also traps LWPs |
585 | * that try to wait on themselves. | | 585 | * that try to wait on themselves. |
586 | * | | 586 | * |
587 | * Note that this does not handle more complicated | | 587 | * Note that this does not handle more complicated |
588 | * cycles, like: t1 -> t2 -> t3 -> t1. The process | | 588 | * cycles, like: t1 -> t2 -> t3 -> t1. The process |
589 | * can still be killed so it is not a major problem. | | 589 | * can still be killed so it is not a major problem. |
590 | */ | | 590 | */ |
591 | if (l2->l_lid == lid && l2->l_waitingfor == curlid) { | | 591 | if (l2->l_lid == lid && l2->l_waitingfor == curlid) { |
592 | error = EDEADLK; | | 592 | error = EDEADLK; |
593 | break; | | 593 | break; |
594 | } | | 594 | } |
595 | if (l2 == l) | | 595 | if (l2 == l) |
596 | continue; | | 596 | continue; |
597 | if ((l2->l_prflag & LPR_DETACHED) != 0) { | | 597 | if ((l2->l_prflag & LPR_DETACHED) != 0) { |
598 | nfound += exiting; | | 598 | nfound += exiting; |
599 | continue; | | 599 | continue; |
600 | } | | 600 | } |
601 | if (lid != 0) { | | 601 | if (lid != 0) { |
602 | if (l2->l_lid != lid) | | 602 | if (l2->l_lid != lid) |
603 | continue; | | 603 | continue; |
604 | /* | | 604 | /* |
605 | * Mark this LWP as the first waiter, if there | | 605 | * Mark this LWP as the first waiter, if there |
606 | * is no other. | | 606 | * is no other. |
607 | */ | | 607 | */ |
608 | if (l2->l_waiter == 0) | | 608 | if (l2->l_waiter == 0) |
609 | l2->l_waiter = curlid; | | 609 | l2->l_waiter = curlid; |
610 | } else if (l2->l_waiter != 0) { | | 610 | } else if (l2->l_waiter != 0) { |
611 | /* | | 611 | /* |
612 | * It already has a waiter - so don't | | 612 | * It already has a waiter - so don't |
613 | * collect it. If the waiter doesn't | | 613 | * collect it. If the waiter doesn't |
614 | * grab it we'll get another chance | | 614 | * grab it we'll get another chance |
615 | * later. | | 615 | * later. |
616 | */ | | 616 | */ |
617 | nfound++; | | 617 | nfound++; |
618 | continue; | | 618 | continue; |
619 | } | | 619 | } |
620 | nfound++; | | 620 | nfound++; |
621 | | | 621 | |
622 | /* No need to lock the LWP in order to see LSZOMB. */ | | 622 | /* No need to lock the LWP in order to see LSZOMB. */ |
623 | if (l2->l_stat != LSZOMB) | | 623 | if (l2->l_stat != LSZOMB) |
624 | continue; | | 624 | continue; |
625 | | | 625 | |
626 | /* | | 626 | /* |
627 | * We're no longer waiting. Reset the "first waiter" | | 627 | * We're no longer waiting. Reset the "first waiter" |
628 | * pointer on the target, in case it was us. | | 628 | * pointer on the target, in case it was us. |
629 | */ | | 629 | */ |
630 | l->l_waitingfor = 0; | | 630 | l->l_waitingfor = 0; |
631 | l2->l_waiter = 0; | | 631 | l2->l_waiter = 0; |
632 | p->p_nlwpwait--; | | 632 | p->p_nlwpwait--; |
633 | if (departed) | | 633 | if (departed) |
634 | *departed = l2->l_lid; | | 634 | *departed = l2->l_lid; |
635 | sched_lwp_collect(l2); | | 635 | sched_lwp_collect(l2); |
636 | | | 636 | |
637 | /* lwp_free() releases the proc lock. */ | | 637 | /* lwp_free() releases the proc lock. */ |
638 | lwp_free(l2, false, false); | | 638 | lwp_free(l2, false, false); |
639 | mutex_enter(p->p_lock); | | 639 | mutex_enter(p->p_lock); |
640 | return 0; | | 640 | return 0; |
641 | } | | 641 | } |
642 | | | 642 | |
643 | if (error != 0) | | 643 | if (error != 0) |
644 | break; | | 644 | break; |
645 | if (nfound == 0) { | | 645 | if (nfound == 0) { |
646 | error = ESRCH; | | 646 | error = ESRCH; |
647 | break; | | 647 | break; |
648 | } | | 648 | } |
649 | | | 649 | |
650 | /* | | 650 | /* |
651 | * Note: since the lock will be dropped, need to restart on | | 651 | * Note: since the lock will be dropped, need to restart on |
652 | * wakeup to run all LWPs again, e.g. there may be new LWPs. | | 652 | * wakeup to run all LWPs again, e.g. there may be new LWPs. |
653 | */ | | 653 | */ |
654 | if (exiting) { | | 654 | if (exiting) { |
655 | KASSERT(p->p_nlwps > 1); | | 655 | KASSERT(p->p_nlwps > 1); |
656 | cv_wait(&p->p_lwpcv, p->p_lock); | | 656 | error = cv_timedwait(&p->p_lwpcv, p->p_lock, 1); |
657 | error = EAGAIN; | | | |
658 | break; | | 657 | break; |
659 | } | | 658 | } |
660 | | | 659 | |
661 | /* | | 660 | /* |
662 | * If all other LWPs are waiting for exits or suspends | | 661 | * If all other LWPs are waiting for exits or suspends |
663 | * and the supply of zombies and potential zombies is | | 662 | * and the supply of zombies and potential zombies is |
664 | * exhausted, then we are about to deadlock. | | 663 | * exhausted, then we are about to deadlock. |
665 | * | | | |
666 | * If the process is exiting (and this LWP is not the one | | | |
667 | * that is coordinating the exit) then bail out now. | | | |
668 | */ | | 664 | */ |
669 | if ((p->p_sflag & PS_WEXIT) != 0 || | | 665 | if ((p->p_sflag & PS_WEXIT) != 0 || |
670 | p->p_nrlwps + p->p_nzlwps - p->p_ndlwps <= p->p_nlwpwait) { | | 666 | p->p_nrlwps + p->p_nzlwps - p->p_ndlwps <= p->p_nlwpwait) { |
671 | error = EDEADLK; | | 667 | error = EDEADLK; |
672 | break; | | 668 | break; |
673 | } | | 669 | } |
674 | | | 670 | |
675 | /* | | 671 | /* |
676 | * Sit around and wait for something to happen. We'll be | | 672 | * Sit around and wait for something to happen. We'll be |
677 | * awoken if any of the conditions examined change: if an | | 673 | * awoken if any of the conditions examined change: if an |
678 | * LWP exits, is collected, or is detached. | | 674 | * LWP exits, is collected, or is detached. |
679 | */ | | 675 | */ |
680 | if ((error = cv_wait_sig(&p->p_lwpcv, p->p_lock)) != 0) | | 676 | if ((error = cv_wait_sig(&p->p_lwpcv, p->p_lock)) != 0) |
681 | break; | | 677 | break; |
682 | } | | 678 | } |
683 | | | 679 | |
684 | /* | | 680 | /* |
685 | * We didn't find any LWPs to collect, we may have received a | | 681 | * We didn't find any LWPs to collect, we may have received a |
686 | * signal, or some other condition has caused us to bail out. | | 682 | * signal, or some other condition has caused us to bail out. |
687 | * | | 683 | * |
688 | * If waiting on a specific LWP, clear the waiters marker: some | | 684 | * If waiting on a specific LWP, clear the waiters marker: some |
689 | * other LWP may want it. Then, kick all the remaining waiters | | 685 | * other LWP may want it. Then, kick all the remaining waiters |
690 | * so that they can re-check for zombies and for deadlock. | | 686 | * so that they can re-check for zombies and for deadlock. |
691 | */ | | 687 | */ |
692 | if (lid != 0) { | | 688 | if (lid != 0) { |
693 | LIST_FOREACH(l2, &p->p_lwps, l_sibling) { | | 689 | LIST_FOREACH(l2, &p->p_lwps, l_sibling) { |
694 | if (l2->l_lid == lid) { | | 690 | if (l2->l_lid == lid) { |
695 | if (l2->l_waiter == curlid) | | 691 | if (l2->l_waiter == curlid) |
696 | l2->l_waiter = 0; | | 692 | l2->l_waiter = 0; |
697 | break; | | 693 | break; |
698 | } | | 694 | } |
699 | } | | 695 | } |
700 | } | | 696 | } |
701 | p->p_nlwpwait--; | | 697 | p->p_nlwpwait--; |
702 | l->l_waitingfor = 0; | | 698 | l->l_waitingfor = 0; |
703 | cv_broadcast(&p->p_lwpcv); | | 699 | cv_broadcast(&p->p_lwpcv); |
704 | | | 700 | |
705 | return error; | | 701 | return error; |
706 | } | | 702 | } |
707 | | | 703 | |
708 | static lwpid_t | | 704 | static lwpid_t |
709 | lwp_find_free_lid(lwpid_t try_lid, lwp_t * new_lwp, proc_t *p) | | 705 | lwp_find_free_lid(lwpid_t try_lid, lwp_t * new_lwp, proc_t *p) |
710 | { | | 706 | { |
711 | #define LID_SCAN (1u << 31) | | 707 | #define LID_SCAN (1u << 31) |
712 | lwp_t *scan, *free_before; | | 708 | lwp_t *scan, *free_before; |
713 | lwpid_t nxt_lid; | | 709 | lwpid_t nxt_lid; |
714 | | | 710 | |
715 | /* | | 711 | /* |
716 | * We want the first unused lid greater than or equal to | | 712 | * We want the first unused lid greater than or equal to |
717 | * try_lid (modulo 2^31). | | 713 | * try_lid (modulo 2^31). |
718 | * (If nothing else ld.elf_so doesn't want lwpid with the top bit set.) | | 714 | * (If nothing else ld.elf_so doesn't want lwpid with the top bit set.) |
719 | * We must not return 0, and avoiding 'LID_SCAN - 1' makes | | 715 | * We must not return 0, and avoiding 'LID_SCAN - 1' makes |
720 | * the outer test easier. | | 716 | * the outer test easier. |
721 | * This would be much easier if the list were sorted in | | 717 | * This would be much easier if the list were sorted in |
722 | * increasing order. | | 718 | * increasing order. |
723 | * The list is kept sorted in decreasing order. | | 719 | * The list is kept sorted in decreasing order. |
724 | * This code is only used after a process has generated 2^31 lwp. | | 720 | * This code is only used after a process has generated 2^31 lwp. |
725 | * | | 721 | * |
726 | * Code assumes it can always find an id. | | 722 | * Code assumes it can always find an id. |
727 | */ | | 723 | */ |
728 | | | 724 | |
729 | try_lid &= LID_SCAN - 1; | | 725 | try_lid &= LID_SCAN - 1; |
730 | if (try_lid <= 1) | | 726 | if (try_lid <= 1) |
731 | try_lid = 2; | | 727 | try_lid = 2; |
732 | | | 728 | |
733 | free_before = NULL; | | 729 | free_before = NULL; |
734 | nxt_lid = LID_SCAN - 1; | | 730 | nxt_lid = LID_SCAN - 1; |
735 | LIST_FOREACH(scan, &p->p_lwps, l_sibling) { | | 731 | LIST_FOREACH(scan, &p->p_lwps, l_sibling) { |
736 | if (scan->l_lid != nxt_lid) { | | 732 | if (scan->l_lid != nxt_lid) { |
737 | /* There are available lid before this entry */ | | 733 | /* There are available lid before this entry */ |
738 | free_before = scan; | | 734 | free_before = scan; |
739 | if (try_lid > scan->l_lid) | | 735 | if (try_lid > scan->l_lid) |
740 | break; | | 736 | break; |
741 | } | | 737 | } |
742 | if (try_lid == scan->l_lid) { | | 738 | if (try_lid == scan->l_lid) { |
743 | /* The ideal lid is busy, take a higher one */ | | 739 | /* The ideal lid is busy, take a higher one */ |
744 | if (free_before != NULL) { | | 740 | if (free_before != NULL) { |
745 | try_lid = free_before->l_lid + 1; | | 741 | try_lid = free_before->l_lid + 1; |
746 | break; | | 742 | break; |
747 | } | | 743 | } |
748 | /* No higher ones, reuse low numbers */ | | 744 | /* No higher ones, reuse low numbers */ |
749 | try_lid = 2; | | 745 | try_lid = 2; |
750 | } | | 746 | } |
751 | | | 747 | |
752 | nxt_lid = scan->l_lid - 1; | | 748 | nxt_lid = scan->l_lid - 1; |
753 | if (LIST_NEXT(scan, l_sibling) == NULL) { | | 749 | if (LIST_NEXT(scan, l_sibling) == NULL) { |
754 | /* The value we have is lower than any existing lwp */ | | 750 | /* The value we have is lower than any existing lwp */ |
755 | LIST_INSERT_AFTER(scan, new_lwp, l_sibling); | | 751 | LIST_INSERT_AFTER(scan, new_lwp, l_sibling); |
756 | return try_lid; | | 752 | return try_lid; |
757 | } | | 753 | } |
758 | } | | 754 | } |
759 | | | 755 | |
760 | LIST_INSERT_BEFORE(free_before, new_lwp, l_sibling); | | 756 | LIST_INSERT_BEFORE(free_before, new_lwp, l_sibling); |
761 | return try_lid; | | 757 | return try_lid; |
762 | } | | 758 | } |
763 | | | 759 | |
764 | /* | | 760 | /* |
765 | * Create a new LWP within process 'p2', using LWP 'l1' as a template. | | 761 | * Create a new LWP within process 'p2', using LWP 'l1' as a template. |
766 | * The new LWP is created in state LSIDL and must be set running, | | 762 | * The new LWP is created in state LSIDL and must be set running, |
767 | * suspended, or stopped by the caller. | | 763 | * suspended, or stopped by the caller. |
768 | */ | | 764 | */ |
769 | int | | 765 | int |
770 | lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, int flags, | | 766 | lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, int flags, |
771 | void *stack, size_t stacksize, void (*func)(void *), void *arg, | | 767 | void *stack, size_t stacksize, void (*func)(void *), void *arg, |
772 | lwp_t **rnewlwpp, int sclass, const sigset_t *sigmask, | | 768 | lwp_t **rnewlwpp, int sclass, const sigset_t *sigmask, |
773 | const stack_t *sigstk) | | 769 | const stack_t *sigstk) |
774 | { | | 770 | { |
775 | struct lwp *l2; | | 771 | struct lwp *l2; |
776 | turnstile_t *ts; | | 772 | turnstile_t *ts; |
777 | lwpid_t lid; | | 773 | lwpid_t lid; |
778 | | | 774 | |
779 | KASSERT(l1 == curlwp || l1->l_proc == &proc0); | | 775 | KASSERT(l1 == curlwp || l1->l_proc == &proc0); |
780 | | | 776 | |
781 | /* | | 777 | /* |
782 | * Enforce limits, excluding the first lwp and kthreads. We must | | 778 | * Enforce limits, excluding the first lwp and kthreads. We must |
783 | * use the process credentials here when adjusting the limit, as | | 779 | * use the process credentials here when adjusting the limit, as |
784 | * they are what's tied to the accounting entity. However for | | 780 | * they are what's tied to the accounting entity. However for |
785 | * authorizing the action, we'll use the LWP's credentials. | | 781 | * authorizing the action, we'll use the LWP's credentials. |
786 | */ | | 782 | */ |
787 | mutex_enter(p2->p_lock); | | 783 | mutex_enter(p2->p_lock); |
788 | if (p2->p_nlwps != 0 && p2 != &proc0) { | | 784 | if (p2->p_nlwps != 0 && p2 != &proc0) { |
789 | uid_t uid = kauth_cred_getuid(p2->p_cred); | | 785 | uid_t uid = kauth_cred_getuid(p2->p_cred); |
790 | int count = chglwpcnt(uid, 1); | | 786 | int count = chglwpcnt(uid, 1); |
791 | if (__predict_false(count > | | 787 | if (__predict_false(count > |
792 | p2->p_rlimit[RLIMIT_NTHR].rlim_cur)) { | | 788 | p2->p_rlimit[RLIMIT_NTHR].rlim_cur)) { |
793 | if (kauth_authorize_process(l1->l_cred, | | 789 | if (kauth_authorize_process(l1->l_cred, |
794 | KAUTH_PROCESS_RLIMIT, p2, | | 790 | KAUTH_PROCESS_RLIMIT, p2, |
795 | KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS), | | 791 | KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS), |
796 | &p2->p_rlimit[RLIMIT_NTHR], KAUTH_ARG(RLIMIT_NTHR)) | | 792 | &p2->p_rlimit[RLIMIT_NTHR], KAUTH_ARG(RLIMIT_NTHR)) |
797 | != 0) { | | 793 | != 0) { |
798 | (void)chglwpcnt(uid, -1); | | 794 | (void)chglwpcnt(uid, -1); |
799 | mutex_exit(p2->p_lock); | | 795 | mutex_exit(p2->p_lock); |
800 | return EAGAIN; | | 796 | return EAGAIN; |
801 | } | | 797 | } |
802 | } | | 798 | } |
803 | } | | 799 | } |
804 | | | 800 | |
805 | /* | | 801 | /* |
806 | * First off, reap any detached LWP waiting to be collected. | | 802 | * First off, reap any detached LWP waiting to be collected. |
807 | * We can re-use its LWP structure and turnstile. | | 803 | * We can re-use its LWP structure and turnstile. |
808 | */ | | 804 | */ |
809 | if ((l2 = p2->p_zomblwp) != NULL) { | | 805 | if ((l2 = p2->p_zomblwp) != NULL) { |
810 | p2->p_zomblwp = NULL; | | 806 | p2->p_zomblwp = NULL; |
811 | lwp_free(l2, true, false); | | 807 | lwp_free(l2, true, false); |
812 | /* p2 now unlocked by lwp_free() */ | | 808 | /* p2 now unlocked by lwp_free() */ |
813 | ts = l2->l_ts; | | 809 | ts = l2->l_ts; |
814 | KASSERT(l2->l_inheritedprio == -1); | | 810 | KASSERT(l2->l_inheritedprio == -1); |
815 | KASSERT(SLIST_EMPTY(&l2->l_pi_lenders)); | | 811 | KASSERT(SLIST_EMPTY(&l2->l_pi_lenders)); |
816 | memset(l2, 0, sizeof(*l2)); | | 812 | memset(l2, 0, sizeof(*l2)); |
817 | l2->l_ts = ts; | | 813 | l2->l_ts = ts; |
818 | } else { | | 814 | } else { |
819 | mutex_exit(p2->p_lock); | | 815 | mutex_exit(p2->p_lock); |
820 | l2 = pool_cache_get(lwp_cache, PR_WAITOK); | | 816 | l2 = pool_cache_get(lwp_cache, PR_WAITOK); |
821 | memset(l2, 0, sizeof(*l2)); | | 817 | memset(l2, 0, sizeof(*l2)); |
822 | l2->l_ts = pool_cache_get(turnstile_cache, PR_WAITOK); | | 818 | l2->l_ts = pool_cache_get(turnstile_cache, PR_WAITOK); |
823 | SLIST_INIT(&l2->l_pi_lenders); | | 819 | SLIST_INIT(&l2->l_pi_lenders); |
824 | } | | 820 | } |
825 | | | 821 | |
826 | l2->l_stat = LSIDL; | | 822 | l2->l_stat = LSIDL; |
827 | l2->l_proc = p2; | | 823 | l2->l_proc = p2; |
828 | l2->l_refcnt = 1; | | 824 | l2->l_refcnt = 1; |
829 | l2->l_class = sclass; | | 825 | l2->l_class = sclass; |
830 | | | 826 | |
831 | /* | | 827 | /* |
832 | * If vfork(), we want the LWP to run fast and on the same CPU | | 828 | * If vfork(), we want the LWP to run fast and on the same CPU |
833 | * as its parent, so that it can reuse the VM context and cache | | 829 | * as its parent, so that it can reuse the VM context and cache |
834 | * footprint on the local CPU. | | 830 | * footprint on the local CPU. |
835 | */ | | 831 | */ |
836 | l2->l_kpriority = ((flags & LWP_VFORK) ? true : false); | | 832 | l2->l_kpriority = ((flags & LWP_VFORK) ? true : false); |
837 | l2->l_kpribase = PRI_KERNEL; | | 833 | l2->l_kpribase = PRI_KERNEL; |
838 | l2->l_priority = l1->l_priority; | | 834 | l2->l_priority = l1->l_priority; |
839 | l2->l_inheritedprio = -1; | | 835 | l2->l_inheritedprio = -1; |
840 | l2->l_protectprio = -1; | | 836 | l2->l_protectprio = -1; |
841 | l2->l_auxprio = -1; | | 837 | l2->l_auxprio = -1; |
842 | l2->l_flag = (l1->l_flag & (LW_WEXIT | LW_WREBOOT | LW_WCORE)); | | 838 | l2->l_flag = 0; |
843 | l2->l_pflag = LP_MPSAFE; | | 839 | l2->l_pflag = LP_MPSAFE; |
844 | TAILQ_INIT(&l2->l_ld_locks); | | 840 | TAILQ_INIT(&l2->l_ld_locks); |
845 | l2->l_psrefs = 0; | | 841 | l2->l_psrefs = 0; |
846 | kmsan_lwp_alloc(l2); | | 842 | kmsan_lwp_alloc(l2); |
847 | | | 843 | |
848 | /* | | 844 | /* |
849 | * For vfork, borrow parent's lwpctl context if it exists. | | 845 | * For vfork, borrow parent's lwpctl context if it exists. |
850 | * This also causes us to return via lwp_userret. | | 846 | * This also causes us to return via lwp_userret. |
851 | */ | | 847 | */ |
852 | if (flags & LWP_VFORK && l1->l_lwpctl) { | | 848 | if (flags & LWP_VFORK && l1->l_lwpctl) { |
853 | l2->l_lwpctl = l1->l_lwpctl; | | 849 | l2->l_lwpctl = l1->l_lwpctl; |
854 | l2->l_flag |= LW_LWPCTL; | | 850 | l2->l_flag |= LW_LWPCTL; |
855 | } | | 851 | } |
856 | | | 852 | |
857 | /* | | 853 | /* |
858 | * If not the first LWP in the process, grab a reference to the | | 854 | * If not the first LWP in the process, grab a reference to the |
859 | * descriptor table. | | 855 | * descriptor table. |
860 | */ | | 856 | */ |
861 | l2->l_fd = p2->p_fd; | | 857 | l2->l_fd = p2->p_fd; |
862 | if (p2->p_nlwps != 0) { | | 858 | if (p2->p_nlwps != 0) { |
863 | KASSERT(l1->l_proc == p2); | | 859 | KASSERT(l1->l_proc == p2); |
864 | fd_hold(l2); | | 860 | fd_hold(l2); |
865 | } else { | | 861 | } else { |
866 | KASSERT(l1->l_proc != p2); | | 862 | KASSERT(l1->l_proc != p2); |
867 | } | | 863 | } |
868 | | | 864 | |
869 | if (p2->p_flag & PK_SYSTEM) { | | 865 | if (p2->p_flag & PK_SYSTEM) { |
870 | /* Mark it as a system LWP. */ | | 866 | /* Mark it as a system LWP. */ |
871 | l2->l_flag |= LW_SYSTEM; | | 867 | l2->l_flag |= LW_SYSTEM; |
872 | } | | 868 | } |
873 | | | 869 | |
874 | kpreempt_disable(); | | 870 | kpreempt_disable(); |
875 | l2->l_mutex = l1->l_cpu->ci_schedstate.spc_lwplock; | | 871 | l2->l_mutex = l1->l_cpu->ci_schedstate.spc_lwplock; |
876 | l2->l_cpu = l1->l_cpu; | | 872 | l2->l_cpu = l1->l_cpu; |
877 | kpreempt_enable(); | | 873 | kpreempt_enable(); |
878 | | | 874 | |
879 | kdtrace_thread_ctor(NULL, l2); | | 875 | kdtrace_thread_ctor(NULL, l2); |
880 | lwp_initspecific(l2); | | 876 | lwp_initspecific(l2); |
881 | sched_lwp_fork(l1, l2); | | 877 | sched_lwp_fork(l1, l2); |
882 | lwp_update_creds(l2); | | 878 | lwp_update_creds(l2); |
883 | callout_init(&l2->l_timeout_ch, CALLOUT_MPSAFE); | | 879 | callout_init(&l2->l_timeout_ch, CALLOUT_MPSAFE); |
884 | callout_setfunc(&l2->l_timeout_ch, sleepq_timeout, l2); | | 880 | callout_setfunc(&l2->l_timeout_ch, sleepq_timeout, l2); |
885 | cv_init(&l2->l_sigcv, "sigwait"); | | 881 | cv_init(&l2->l_sigcv, "sigwait"); |
886 | cv_init(&l2->l_waitcv, "vfork"); | | 882 | cv_init(&l2->l_waitcv, "vfork"); |
887 | l2->l_syncobj = &sched_syncobj; | | 883 | l2->l_syncobj = &sched_syncobj; |
888 | PSREF_DEBUG_INIT_LWP(l2); | | 884 | PSREF_DEBUG_INIT_LWP(l2); |
889 | | | 885 | |
890 | if (rnewlwpp != NULL) | | 886 | if (rnewlwpp != NULL) |
891 | *rnewlwpp = l2; | | 887 | *rnewlwpp = l2; |
892 | | | 888 | |
893 | /* | | 889 | /* |
894 | * PCU state needs to be saved before calling uvm_lwp_fork() so that | | 890 | * PCU state needs to be saved before calling uvm_lwp_fork() so that |
895 | * the MD cpu_lwp_fork() can copy the saved state to the new LWP. | | 891 | * the MD cpu_lwp_fork() can copy the saved state to the new LWP. |
896 | */ | | 892 | */ |
897 | pcu_save_all(l1); | | 893 | pcu_save_all(l1); |
898 | | | 894 | |
899 | uvm_lwp_setuarea(l2, uaddr); | | 895 | uvm_lwp_setuarea(l2, uaddr); |
900 | uvm_lwp_fork(l1, l2, stack, stacksize, func, (arg != NULL) ? arg : l2); | | 896 | uvm_lwp_fork(l1, l2, stack, stacksize, func, (arg != NULL) ? arg : l2); |
901 | | | 897 | |
902 | if ((flags & LWP_PIDLID) != 0) { | | 898 | if ((flags & LWP_PIDLID) != 0) { |
903 | lid = proc_alloc_pid(p2); | | 899 | lid = proc_alloc_pid(p2); |
904 | l2->l_pflag |= LP_PIDLID; | | 900 | l2->l_pflag |= LP_PIDLID; |
905 | } else if (p2->p_nlwps == 0) { | | 901 | } else if (p2->p_nlwps == 0) { |
906 | lid = l1->l_lid; | | 902 | lid = l1->l_lid; |
907 | /* | | 903 | /* |
908 | * Update next LWP ID, too. If this overflows to LID_SCAN, | | 904 | * Update next LWP ID, too. If this overflows to LID_SCAN, |
909 | * the slow path of scanning will be used for the next LWP. | | 905 | * the slow path of scanning will be used for the next LWP. |
910 | */ | | 906 | */ |
911 | p2->p_nlwpid = lid + 1; | | 907 | p2->p_nlwpid = lid + 1; |
912 | } else { | | 908 | } else { |
913 | lid = 0; | | 909 | lid = 0; |
914 | } | | 910 | } |
915 | | | 911 | |
916 | mutex_enter(p2->p_lock); | | 912 | mutex_enter(p2->p_lock); |
917 | | | 913 | |
918 | if ((flags & LWP_DETACHED) != 0) { | | 914 | if ((flags & LWP_DETACHED) != 0) { |
919 | l2->l_prflag = LPR_DETACHED; | | 915 | l2->l_prflag = LPR_DETACHED; |
920 | p2->p_ndlwps++; | | 916 | p2->p_ndlwps++; |
921 | } else | | 917 | } else |
922 | l2->l_prflag = 0; | | 918 | l2->l_prflag = 0; |
923 | | | 919 | |
| | | 920 | if (l1->l_proc == p2) |
| | | 921 | l2->l_flag |= (l1->l_flag & (LW_WEXIT | LW_WREBOOT | LW_WCORE)); |
| | | 922 | else |
| | | 923 | l2->l_flag |= (l1->l_flag & LW_WREBOOT); |
| | | 924 | |
924 | l2->l_sigstk = *sigstk; | | 925 | l2->l_sigstk = *sigstk; |
925 | l2->l_sigmask = *sigmask; | | 926 | l2->l_sigmask = *sigmask; |
926 | TAILQ_INIT(&l2->l_sigpend.sp_info); | | 927 | TAILQ_INIT(&l2->l_sigpend.sp_info); |
927 | sigemptyset(&l2->l_sigpend.sp_set); | | 928 | sigemptyset(&l2->l_sigpend.sp_set); |
928 | | | 929 | |
929 | if (__predict_true(lid == 0)) { | | 930 | if (__predict_true(lid == 0)) { |
930 | /* | | 931 | /* |
931 | * XXX: l_lid are expected to be unique (for a process) | | 932 | * XXX: l_lid are expected to be unique (for a process) |
932 | * if LWP_PIDLID is sometimes set this won't be true. | | 933 | * if LWP_PIDLID is sometimes set this won't be true. |
933 | * Once 2^31 threads have been allocated we have to | | 934 | * Once 2^31 threads have been allocated we have to |
934 | * scan to ensure we allocate a unique value. | | 935 | * scan to ensure we allocate a unique value. |
935 | */ | | 936 | */ |
936 | lid = ++p2->p_nlwpid; | | 937 | lid = ++p2->p_nlwpid; |
937 | if (__predict_false(lid & LID_SCAN)) { | | 938 | if (__predict_false(lid & LID_SCAN)) { |
938 | lid = lwp_find_free_lid(lid, l2, p2); | | 939 | lid = lwp_find_free_lid(lid, l2, p2); |
939 | p2->p_nlwpid = lid | LID_SCAN; | | 940 | p2->p_nlwpid = lid | LID_SCAN; |
940 | /* l2 as been inserted into p_lwps in order */ | | 941 | /* l2 as been inserted into p_lwps in order */ |
941 | goto skip_insert; | | 942 | goto skip_insert; |
942 | } | | 943 | } |
943 | p2->p_nlwpid = lid; | | 944 | p2->p_nlwpid = lid; |
944 | } | | 945 | } |
945 | LIST_INSERT_HEAD(&p2->p_lwps, l2, l_sibling); | | 946 | LIST_INSERT_HEAD(&p2->p_lwps, l2, l_sibling); |
946 | skip_insert: | | 947 | skip_insert: |
947 | l2->l_lid = lid; | | 948 | l2->l_lid = lid; |
948 | p2->p_nlwps++; | | 949 | p2->p_nlwps++; |
949 | p2->p_nrlwps++; | | 950 | p2->p_nrlwps++; |
950 | | | 951 | |
951 | KASSERT(l2->l_affinity == NULL); | | 952 | KASSERT(l2->l_affinity == NULL); |
952 | | | 953 | |
953 | /* Inherit the affinity mask. */ | | 954 | /* Inherit the affinity mask. */ |
954 | if (l1->l_affinity) { | | 955 | if (l1->l_affinity) { |
955 | /* | | 956 | /* |
956 | * Note that we hold the state lock while inheriting | | 957 | * Note that we hold the state lock while inheriting |
957 | * the affinity to avoid race with sched_setaffinity(). | | 958 | * the affinity to avoid race with sched_setaffinity(). |
958 | */ | | 959 | */ |
959 | lwp_lock(l1); | | 960 | lwp_lock(l1); |
960 | if (l1->l_affinity) { | | 961 | if (l1->l_affinity) { |
961 | kcpuset_use(l1->l_affinity); | | 962 | kcpuset_use(l1->l_affinity); |
962 | l2->l_affinity = l1->l_affinity; | | 963 | l2->l_affinity = l1->l_affinity; |
963 | } | | 964 | } |
964 | lwp_unlock(l1); | | 965 | lwp_unlock(l1); |
965 | } | | 966 | } |
966 | mutex_exit(p2->p_lock); | | 967 | mutex_exit(p2->p_lock); |
967 | | | 968 | |
968 | SDT_PROBE(proc, kernel, , lwp__create, l2, 0, 0, 0, 0); | | 969 | SDT_PROBE(proc, kernel, , lwp__create, l2, 0, 0, 0, 0); |
969 | | | 970 | |
970 | mutex_enter(proc_lock); | | 971 | mutex_enter(proc_lock); |
971 | LIST_INSERT_HEAD(&alllwp, l2, l_list); | | 972 | LIST_INSERT_HEAD(&alllwp, l2, l_list); |
972 | /* Inherit a processor-set */ | | 973 | /* Inherit a processor-set */ |
973 | l2->l_psid = l1->l_psid; | | 974 | l2->l_psid = l1->l_psid; |
974 | mutex_exit(proc_lock); | | 975 | mutex_exit(proc_lock); |
975 | | | 976 | |
976 | SYSCALL_TIME_LWP_INIT(l2); | | 977 | SYSCALL_TIME_LWP_INIT(l2); |
977 | | | 978 | |
978 | if (p2->p_emul->e_lwp_fork) | | 979 | if (p2->p_emul->e_lwp_fork) |
979 | (*p2->p_emul->e_lwp_fork)(l1, l2); | | 980 | (*p2->p_emul->e_lwp_fork)(l1, l2); |
980 | | | 981 | |
981 | return (0); | | 982 | return (0); |
982 | } | | 983 | } |
983 | | | 984 | |
984 | /* | | 985 | /* |
985 | * Set a new LWP running. If the process is stopping, then the LWP is | | 986 | * Set a new LWP running. If the process is stopping, then the LWP is |
986 | * created stopped. | | 987 | * created stopped. |
987 | */ | | 988 | */ |
988 | void | | 989 | void |
989 | lwp_start(lwp_t *l, int flags) | | 990 | lwp_start(lwp_t *l, int flags) |
990 | { | | 991 | { |
991 | proc_t *p = l->l_proc; | | 992 | proc_t *p = l->l_proc; |
992 | | | 993 | |
993 | mutex_enter(p->p_lock); | | 994 | mutex_enter(p->p_lock); |
994 | lwp_lock(l); | | 995 | lwp_lock(l); |
995 | KASSERT(l->l_stat == LSIDL); | | 996 | KASSERT(l->l_stat == LSIDL); |
996 | if ((flags & LWP_SUSPENDED) != 0) { | | 997 | if ((flags & LWP_SUSPENDED) != 0) { |
997 | /* It'll suspend itself in lwp_userret(). */ | | 998 | /* It'll suspend itself in lwp_userret(). */ |
998 | l->l_flag |= LW_WSUSPEND; | | 999 | l->l_flag |= LW_WSUSPEND; |
999 | } | | 1000 | } |
1000 | if (p->p_stat == SSTOP || (p->p_sflag & PS_STOPPING) != 0) { | | 1001 | if (p->p_stat == SSTOP || (p->p_sflag & PS_STOPPING) != 0) { |
1001 | KASSERT(l->l_wchan == NULL); | | 1002 | KASSERT(l->l_wchan == NULL); |
1002 | l->l_stat = LSSTOP; | | 1003 | l->l_stat = LSSTOP; |
1003 | p->p_nrlwps--; | | 1004 | p->p_nrlwps--; |
1004 | lwp_unlock(l); | | 1005 | lwp_unlock(l); |
1005 | } else { | | 1006 | } else { |
1006 | setrunnable(l); | | 1007 | setrunnable(l); |
1007 | /* LWP now unlocked */ | | 1008 | /* LWP now unlocked */ |
1008 | } | | 1009 | } |
1009 | mutex_exit(p->p_lock); | | 1010 | mutex_exit(p->p_lock); |
1010 | } | | 1011 | } |
1011 | | | 1012 | |
1012 | /* | | 1013 | /* |
1013 | * Called by MD code when a new LWP begins execution. Must be called | | 1014 | * Called by MD code when a new LWP begins execution. Must be called |
1014 | * with the previous LWP locked (so at splsched), or if there is no | | 1015 | * with the previous LWP locked (so at splsched), or if there is no |
1015 | * previous LWP, at splsched. | | 1016 | * previous LWP, at splsched. |
1016 | */ | | 1017 | */ |
1017 | void | | 1018 | void |
1018 | lwp_startup(struct lwp *prev, struct lwp *new_lwp) | | 1019 | lwp_startup(struct lwp *prev, struct lwp *new_lwp) |
1019 | { | | 1020 | { |
1020 | | | 1021 | |
1021 | KASSERTMSG(new_lwp == curlwp, "l %p curlwp %p prevlwp %p", new_lwp, curlwp, prev); | | 1022 | KASSERTMSG(new_lwp == curlwp, "l %p curlwp %p prevlwp %p", new_lwp, curlwp, prev); |
1022 | KASSERT(kpreempt_disabled()); | | 1023 | KASSERT(kpreempt_disabled()); |
1023 | KASSERT(prev != NULL); | | 1024 | KASSERT(prev != NULL); |
1024 | KASSERT((prev->l_flag & LW_RUNNING) != 0); | | 1025 | KASSERT((prev->l_flag & LW_RUNNING) != 0); |
1025 | KASSERT(curcpu()->ci_mtx_count == -2); | | 1026 | KASSERT(curcpu()->ci_mtx_count == -2); |
1026 | | | 1027 | |
1027 | /* Immediately mark previous LWP as no longer running, and unlock. */ | | 1028 | /* Immediately mark previous LWP as no longer running, and unlock. */ |
1028 | prev->l_flag &= ~LW_RUNNING; | | 1029 | prev->l_flag &= ~LW_RUNNING; |
1029 | lwp_unlock(prev); | | 1030 | lwp_unlock(prev); |
1030 | | | 1031 | |
1031 | /* Correct spin mutex count after mi_switch(). */ | | 1032 | /* Correct spin mutex count after mi_switch(). */ |
1032 | curcpu()->ci_mtx_count = 0; | | 1033 | curcpu()->ci_mtx_count = 0; |
1033 | | | 1034 | |
1034 | /* Install new VM context. */ | | 1035 | /* Install new VM context. */ |
1035 | if (__predict_true(new_lwp->l_proc->p_vmspace)) { | | 1036 | if (__predict_true(new_lwp->l_proc->p_vmspace)) { |
1036 | pmap_activate(new_lwp); | | 1037 | pmap_activate(new_lwp); |
1037 | } | | 1038 | } |
1038 | | | 1039 | |
1039 | /* We remain at IPL_SCHED from mi_switch() - reset it. */ | | 1040 | /* We remain at IPL_SCHED from mi_switch() - reset it. */ |
1040 | spl0(); | | 1041 | spl0(); |
1041 | | | 1042 | |
1042 | LOCKDEBUG_BARRIER(NULL, 0); | | 1043 | LOCKDEBUG_BARRIER(NULL, 0); |
1043 | SDT_PROBE(proc, kernel, , lwp__start, new_lwp, 0, 0, 0, 0); | | 1044 | SDT_PROBE(proc, kernel, , lwp__start, new_lwp, 0, 0, 0, 0); |
1044 | | | 1045 | |
1045 | /* For kthreads, acquire kernel lock if not MPSAFE. */ | | 1046 | /* For kthreads, acquire kernel lock if not MPSAFE. */ |
1046 | if (__predict_false((new_lwp->l_pflag & LP_MPSAFE) == 0)) { | | 1047 | if (__predict_false((new_lwp->l_pflag & LP_MPSAFE) == 0)) { |
1047 | KERNEL_LOCK(1, new_lwp); | | 1048 | KERNEL_LOCK(1, new_lwp); |
1048 | } | | 1049 | } |
1049 | } | | 1050 | } |
1050 | | | 1051 | |
1051 | /* | | 1052 | /* |
1052 | * Exit an LWP. | | 1053 | * Exit an LWP. |
1053 | */ | | 1054 | */ |
1054 | void | | 1055 | void |
1055 | lwp_exit(struct lwp *l) | | 1056 | lwp_exit(struct lwp *l) |
1056 | { | | 1057 | { |
1057 | struct proc *p = l->l_proc; | | 1058 | struct proc *p = l->l_proc; |
1058 | struct lwp *l2; | | 1059 | struct lwp *l2; |
1059 | bool current; | | 1060 | bool current; |
1060 | | | 1061 | |
1061 | current = (l == curlwp); | | 1062 | current = (l == curlwp); |
1062 | | | 1063 | |
1063 | KASSERT(current || (l->l_stat == LSIDL && l->l_target_cpu == NULL)); | | 1064 | KASSERT(current || (l->l_stat == LSIDL && l->l_target_cpu == NULL)); |
1064 | KASSERT(p == curproc); | | 1065 | KASSERT(p == curproc); |
1065 | | | 1066 | |
1066 | SDT_PROBE(proc, kernel, , lwp__exit, l, 0, 0, 0, 0); | | 1067 | SDT_PROBE(proc, kernel, , lwp__exit, l, 0, 0, 0, 0); |
1067 | | | 1068 | |
1068 | /* Verify that we hold no locks; for DIAGNOSTIC check kernel_lock. */ | | 1069 | /* Verify that we hold no locks; for DIAGNOSTIC check kernel_lock. */ |
1069 | LOCKDEBUG_BARRIER(NULL, 0); | | 1070 | LOCKDEBUG_BARRIER(NULL, 0); |
1070 | KASSERTMSG(curcpu()->ci_biglock_count == 0, "kernel_lock leaked"); | | 1071 | KASSERTMSG(curcpu()->ci_biglock_count == 0, "kernel_lock leaked"); |
1071 | | | 1072 | |
1072 | /* | | 1073 | /* |
1073 | * If we are the last live LWP in a process, we need to exit the | | 1074 | * If we are the last live LWP in a process, we need to exit the |
1074 | * entire process. We do so with an exit status of zero, because | | 1075 | * entire process. We do so with an exit status of zero, because |
1075 | * it's a "controlled" exit, and because that's what Solaris does. | | 1076 | * it's a "controlled" exit, and because that's what Solaris does. |
1076 | * | | 1077 | * |
1077 | * We are not quite a zombie yet, but for accounting purposes we | | 1078 | * We are not quite a zombie yet, but for accounting purposes we |
1078 | * must increment the count of zombies here. | | 1079 | * must increment the count of zombies here. |
1079 | * | | 1080 | * |
1080 | * Note: the last LWP's specificdata will be deleted here. | | 1081 | * Note: the last LWP's specificdata will be deleted here. |
1081 | */ | | 1082 | */ |
1082 | mutex_enter(p->p_lock); | | 1083 | mutex_enter(p->p_lock); |
1083 | if (p->p_nlwps - p->p_nzlwps == 1) { | | 1084 | if (p->p_nlwps - p->p_nzlwps == 1) { |
1084 | KASSERT(current == true); | | 1085 | KASSERT(current == true); |
1085 | KASSERT(p != &proc0); | | 1086 | KASSERT(p != &proc0); |
1086 | exit1(l, 0, 0); | | 1087 | exit1(l, 0, 0); |
1087 | /* NOTREACHED */ | | 1088 | /* NOTREACHED */ |
1088 | } | | 1089 | } |
1089 | p->p_nzlwps++; | | 1090 | p->p_nzlwps++; |
1090 | mutex_exit(p->p_lock); | | 1091 | mutex_exit(p->p_lock); |
1091 | | | 1092 | |
1092 | if (p->p_emul->e_lwp_exit) | | 1093 | if (p->p_emul->e_lwp_exit) |
1093 | (*p->p_emul->e_lwp_exit)(l); | | 1094 | (*p->p_emul->e_lwp_exit)(l); |
1094 | | | 1095 | |
1095 | /* Drop filedesc reference. */ | | 1096 | /* Drop filedesc reference. */ |
1096 | fd_free(); | | 1097 | fd_free(); |
1097 | | | 1098 | |
1098 | /* Release fstrans private data. */ | | 1099 | /* Release fstrans private data. */ |
1099 | fstrans_lwp_dtor(l); | | 1100 | fstrans_lwp_dtor(l); |
1100 | | | 1101 | |
1101 | /* Delete the specificdata while it's still safe to sleep. */ | | 1102 | /* Delete the specificdata while it's still safe to sleep. */ |
1102 | lwp_finispecific(l); | | 1103 | lwp_finispecific(l); |
1103 | | | 1104 | |
1104 | /* | | 1105 | /* |
1105 | * Release our cached credentials. | | 1106 | * Release our cached credentials. |
1106 | */ | | 1107 | */ |
1107 | kauth_cred_free(l->l_cred); | | 1108 | kauth_cred_free(l->l_cred); |
1108 | callout_destroy(&l->l_timeout_ch); | | 1109 | callout_destroy(&l->l_timeout_ch); |
1109 | | | 1110 | |
1110 | /* | | 1111 | /* |
1111 | * If traced, report LWP exit event to the debugger. | | 1112 | * If traced, report LWP exit event to the debugger. |
1112 | * | | 1113 | * |
1113 | * Remove the LWP from the global list. | | 1114 | * Remove the LWP from the global list. |
1114 | * Free its LID from the PID namespace if needed. | | 1115 | * Free its LID from the PID namespace if needed. |
1115 | */ | | 1116 | */ |
1116 | mutex_enter(proc_lock); | | 1117 | mutex_enter(proc_lock); |
1117 | | | 1118 | |
1118 | if ((p->p_slflag & (PSL_TRACED|PSL_TRACELWP_EXIT)) == | | 1119 | if ((p->p_slflag & (PSL_TRACED|PSL_TRACELWP_EXIT)) == |
1119 | (PSL_TRACED|PSL_TRACELWP_EXIT)) { | | 1120 | (PSL_TRACED|PSL_TRACELWP_EXIT)) { |
1120 | mutex_enter(p->p_lock); | | 1121 | mutex_enter(p->p_lock); |
1121 | if (ISSET(p->p_sflag, PS_WEXIT)) { | | 1122 | if (ISSET(p->p_sflag, PS_WEXIT)) { |
1122 | mutex_exit(p->p_lock); | | 1123 | mutex_exit(p->p_lock); |
1123 | /* | | 1124 | /* |
1124 | * We are exiting, bail out without informing parent | | 1125 | * We are exiting, bail out without informing parent |
1125 | * about a terminating LWP as it would deadlock. | | 1126 | * about a terminating LWP as it would deadlock. |
1126 | */ | | 1127 | */ |
1127 | } else { | | 1128 | } else { |
1128 | eventswitch(TRAP_LWP, PTRACE_LWP_EXIT, l->l_lid); | | 1129 | eventswitch(TRAP_LWP, PTRACE_LWP_EXIT, l->l_lid); |
1129 | mutex_enter(proc_lock); | | 1130 | mutex_enter(proc_lock); |
1130 | } | | 1131 | } |
1131 | } | | 1132 | } |
1132 | | | 1133 | |
1133 | LIST_REMOVE(l, l_list); | | 1134 | LIST_REMOVE(l, l_list); |
1134 | if ((l->l_pflag & LP_PIDLID) != 0 && l->l_lid != p->p_pid) { | | 1135 | if ((l->l_pflag & LP_PIDLID) != 0 && l->l_lid != p->p_pid) { |
1135 | proc_free_pid(l->l_lid); | | 1136 | proc_free_pid(l->l_lid); |
1136 | } | | 1137 | } |
1137 | mutex_exit(proc_lock); | | 1138 | mutex_exit(proc_lock); |
1138 | | | 1139 | |
1139 | /* | | 1140 | /* |
1140 | * Get rid of all references to the LWP that others (e.g. procfs) | | 1141 | * Get rid of all references to the LWP that others (e.g. procfs) |
1141 | * may have, and mark the LWP as a zombie. If the LWP is detached, | | 1142 | * may have, and mark the LWP as a zombie. If the LWP is detached, |
1142 | * mark it waiting for collection in the proc structure. Note that | | 1143 | * mark it waiting for collection in the proc structure. Note that |
1143 | * before we can do that, we need to free any other dead, deatched | | 1144 | * before we can do that, we need to free any other dead, deatched |
1144 | * LWP waiting to meet its maker. | | 1145 | * LWP waiting to meet its maker. |
1145 | */ | | 1146 | */ |
1146 | mutex_enter(p->p_lock); | | 1147 | mutex_enter(p->p_lock); |
1147 | lwp_drainrefs(l); | | 1148 | lwp_drainrefs(l); |
1148 | | | 1149 | |
1149 | if ((l->l_prflag & LPR_DETACHED) != 0) { | | 1150 | if ((l->l_prflag & LPR_DETACHED) != 0) { |
1150 | while ((l2 = p->p_zomblwp) != NULL) { | | 1151 | while ((l2 = p->p_zomblwp) != NULL) { |
1151 | p->p_zomblwp = NULL; | | 1152 | p->p_zomblwp = NULL; |
1152 | lwp_free(l2, false, false);/* releases proc mutex */ | | 1153 | lwp_free(l2, false, false);/* releases proc mutex */ |
1153 | mutex_enter(p->p_lock); | | 1154 | mutex_enter(p->p_lock); |
1154 | l->l_refcnt++; | | 1155 | l->l_refcnt++; |
1155 | lwp_drainrefs(l); | | 1156 | lwp_drainrefs(l); |
1156 | } | | 1157 | } |
1157 | p->p_zomblwp = l; | | 1158 | p->p_zomblwp = l; |
1158 | } | | 1159 | } |
1159 | | | 1160 | |
1160 | /* | | 1161 | /* |
1161 | * If we find a pending signal for the process and we have been | | 1162 | * If we find a pending signal for the process and we have been |
1162 | * asked to check for signals, then we lose: arrange to have | | 1163 | * asked to check for signals, then we lose: arrange to have |
1163 | * all other LWPs in the process check for signals. | | 1164 | * all other LWPs in the process check for signals. |
1164 | */ | | 1165 | */ |
1165 | if ((l->l_flag & LW_PENDSIG) != 0 && | | 1166 | if ((l->l_flag & LW_PENDSIG) != 0 && |
1166 | firstsig(&p->p_sigpend.sp_set) != 0) { | | 1167 | firstsig(&p->p_sigpend.sp_set) != 0) { |
1167 | LIST_FOREACH(l2, &p->p_lwps, l_sibling) { | | 1168 | LIST_FOREACH(l2, &p->p_lwps, l_sibling) { |
1168 | lwp_lock(l2); | | 1169 | lwp_lock(l2); |
1169 | signotify(l2); | | 1170 | signotify(l2); |
1170 | lwp_unlock(l2); | | 1171 | lwp_unlock(l2); |
1171 | } | | 1172 | } |
1172 | } | | 1173 | } |
1173 | | | 1174 | |
1174 | /* | | 1175 | /* |
1175 | * Release any PCU resources before becoming a zombie. | | 1176 | * Release any PCU resources before becoming a zombie. |
1176 | */ | | 1177 | */ |
1177 | pcu_discard_all(l); | | 1178 | pcu_discard_all(l); |
1178 | | | 1179 | |
1179 | lwp_lock(l); | | 1180 | lwp_lock(l); |
1180 | l->l_stat = LSZOMB; | | 1181 | l->l_stat = LSZOMB; |
1181 | if (l->l_name != NULL) { | | 1182 | if (l->l_name != NULL) { |
1182 | strcpy(l->l_name, "(zombie)"); | | 1183 | strcpy(l->l_name, "(zombie)"); |
1183 | } | | 1184 | } |
1184 | lwp_unlock(l); | | 1185 | lwp_unlock(l); |
1185 | p->p_nrlwps--; | | 1186 | p->p_nrlwps--; |
1186 | cv_broadcast(&p->p_lwpcv); | | 1187 | cv_broadcast(&p->p_lwpcv); |
1187 | if (l->l_lwpctl != NULL) | | 1188 | if (l->l_lwpctl != NULL) |
1188 | l->l_lwpctl->lc_curcpu = LWPCTL_CPU_EXITED; | | 1189 | l->l_lwpctl->lc_curcpu = LWPCTL_CPU_EXITED; |
1189 | mutex_exit(p->p_lock); | | 1190 | mutex_exit(p->p_lock); |
1190 | | | 1191 | |
1191 | /* | | 1192 | /* |
1192 | * We can no longer block. At this point, lwp_free() may already | | 1193 | * We can no longer block. At this point, lwp_free() may already |
1193 | * be gunning for us. On a multi-CPU system, we may be off p_lwps. | | 1194 | * be gunning for us. On a multi-CPU system, we may be off p_lwps. |
1194 | * | | 1195 | * |
1195 | * Free MD LWP resources. | | 1196 | * Free MD LWP resources. |
1196 | */ | | 1197 | */ |
1197 | cpu_lwp_free(l, 0); | | 1198 | cpu_lwp_free(l, 0); |
1198 | | | 1199 | |
1199 | if (current) { | | 1200 | if (current) { |
1200 | /* For the LW_RUNNING check in lwp_free(). */ | | 1201 | /* For the LW_RUNNING check in lwp_free(). */ |
1201 | membar_exit(); | | 1202 | membar_exit(); |
1202 | /* Switch away into oblivion. */ | | 1203 | /* Switch away into oblivion. */ |
1203 | lwp_lock(l); | | 1204 | lwp_lock(l); |
1204 | spc_lock(l->l_cpu); | | 1205 | spc_lock(l->l_cpu); |
1205 | mi_switch(l); | | 1206 | mi_switch(l); |
1206 | panic("lwp_exit"); | | 1207 | panic("lwp_exit"); |
1207 | } | | 1208 | } |
1208 | } | | 1209 | } |
1209 | | | 1210 | |
1210 | /* | | 1211 | /* |
1211 | * Free a dead LWP's remaining resources. | | 1212 | * Free a dead LWP's remaining resources. |
1212 | * | | 1213 | * |
1213 | * XXXLWP limits. | | 1214 | * XXXLWP limits. |
1214 | */ | | 1215 | */ |
1215 | void | | 1216 | void |
1216 | lwp_free(struct lwp *l, bool recycle, bool last) | | 1217 | lwp_free(struct lwp *l, bool recycle, bool last) |
1217 | { | | 1218 | { |
1218 | struct proc *p = l->l_proc; | | 1219 | struct proc *p = l->l_proc; |
1219 | struct rusage *ru; | | 1220 | struct rusage *ru; |
1220 | ksiginfoq_t kq; | | 1221 | ksiginfoq_t kq; |
1221 | | | 1222 | |
1222 | KASSERT(l != curlwp); | | 1223 | KASSERT(l != curlwp); |
1223 | KASSERT(last || mutex_owned(p->p_lock)); | | 1224 | KASSERT(last || mutex_owned(p->p_lock)); |
1224 | | | 1225 | |
1225 | /* | | 1226 | /* |
1226 | * We use the process credentials instead of the lwp credentials here | | 1227 | * We use the process credentials instead of the lwp credentials here |
1227 | * because the lwp credentials maybe cached (just after a setuid call) | | 1228 | * because the lwp credentials maybe cached (just after a setuid call) |
1228 | * and we don't want pay for syncing, since the lwp is going away | | 1229 | * and we don't want pay for syncing, since the lwp is going away |
1229 | * anyway | | 1230 | * anyway |
1230 | */ | | 1231 | */ |
1231 | if (p != &proc0 && p->p_nlwps != 1) | | 1232 | if (p != &proc0 && p->p_nlwps != 1) |
1232 | (void)chglwpcnt(kauth_cred_getuid(p->p_cred), -1); | | 1233 | (void)chglwpcnt(kauth_cred_getuid(p->p_cred), -1); |
1233 | | | 1234 | |
1234 | /* | | 1235 | /* |
1235 | * If this was not the last LWP in the process, then adjust | | 1236 | * If this was not the last LWP in the process, then adjust |
1236 | * counters and unlock. | | 1237 | * counters and unlock. |
1237 | */ | | 1238 | */ |
1238 | if (!last) { | | 1239 | if (!last) { |
1239 | /* | | 1240 | /* |
1240 | * Add the LWP's run time to the process' base value. | | 1241 | * Add the LWP's run time to the process' base value. |
1241 | * This needs to co-incide with coming off p_lwps. | | 1242 | * This needs to co-incide with coming off p_lwps. |
1242 | */ | | 1243 | */ |
1243 | bintime_add(&p->p_rtime, &l->l_rtime); | | 1244 | bintime_add(&p->p_rtime, &l->l_rtime); |
1244 | p->p_pctcpu += l->l_pctcpu; | | 1245 | p->p_pctcpu += l->l_pctcpu; |
1245 | ru = &p->p_stats->p_ru; | | 1246 | ru = &p->p_stats->p_ru; |
1246 | ruadd(ru, &l->l_ru); | | 1247 | ruadd(ru, &l->l_ru); |
1247 | ru->ru_nvcsw += (l->l_ncsw - l->l_nivcsw); | | 1248 | ru->ru_nvcsw += (l->l_ncsw - l->l_nivcsw); |
1248 | ru->ru_nivcsw += l->l_nivcsw; | | 1249 | ru->ru_nivcsw += l->l_nivcsw; |
1249 | LIST_REMOVE(l, l_sibling); | | 1250 | LIST_REMOVE(l, l_sibling); |
1250 | p->p_nlwps--; | | 1251 | p->p_nlwps--; |
1251 | p->p_nzlwps--; | | 1252 | p->p_nzlwps--; |
1252 | if ((l->l_prflag & LPR_DETACHED) != 0) | | 1253 | if ((l->l_prflag & LPR_DETACHED) != 0) |
1253 | p->p_ndlwps--; | | 1254 | p->p_ndlwps--; |
1254 | | | 1255 | |
1255 | /* | | 1256 | /* |
1256 | * Have any LWPs sleeping in lwp_wait() recheck for | | 1257 | * Have any LWPs sleeping in lwp_wait() recheck for |
1257 | * deadlock. | | 1258 | * deadlock. |
1258 | */ | | 1259 | */ |
1259 | cv_broadcast(&p->p_lwpcv); | | 1260 | cv_broadcast(&p->p_lwpcv); |
1260 | mutex_exit(p->p_lock); | | 1261 | mutex_exit(p->p_lock); |
1261 | } | | 1262 | } |
1262 | | | 1263 | |
1263 | #ifdef MULTIPROCESSOR | | 1264 | #ifdef MULTIPROCESSOR |
1264 | /* | | 1265 | /* |
1265 | * In the unlikely event that the LWP is still on the CPU, | | 1266 | * In the unlikely event that the LWP is still on the CPU, |
1266 | * then spin until it has switched away. We need to release | | 1267 | * then spin until it has switched away. We need to release |
1267 | * all locks to avoid deadlock against interrupt handlers on | | 1268 | * all locks to avoid deadlock against interrupt handlers on |
1268 | * the target CPU. | | 1269 | * the target CPU. |
1269 | */ | | 1270 | */ |
1270 | membar_enter(); | | 1271 | membar_enter(); |
1271 | while (__predict_false((l->l_flag & LW_RUNNING) != 0)) { | | 1272 | while (__predict_false((l->l_flag & LW_RUNNING) != 0)) { |
1272 | SPINLOCK_BACKOFF_HOOK; | | 1273 | SPINLOCK_BACKOFF_HOOK; |
1273 | } | | 1274 | } |
1274 | #endif | | 1275 | #endif |
1275 | | | 1276 | |
1276 | /* | | 1277 | /* |
1277 | * Destroy the LWP's remaining signal information. | | 1278 | * Destroy the LWP's remaining signal information. |
1278 | */ | | 1279 | */ |
1279 | ksiginfo_queue_init(&kq); | | 1280 | ksiginfo_queue_init(&kq); |
1280 | sigclear(&l->l_sigpend, NULL, &kq); | | 1281 | sigclear(&l->l_sigpend, NULL, &kq); |
1281 | ksiginfo_queue_drain(&kq); | | 1282 | ksiginfo_queue_drain(&kq); |
1282 | cv_destroy(&l->l_sigcv); | | 1283 | cv_destroy(&l->l_sigcv); |
1283 | cv_destroy(&l->l_waitcv); | | 1284 | cv_destroy(&l->l_waitcv); |
1284 | | | 1285 | |
1285 | /* | | 1286 | /* |
1286 | * Free lwpctl structure and affinity. | | 1287 | * Free lwpctl structure and affinity. |
1287 | */ | | 1288 | */ |
1288 | if (l->l_lwpctl) { | | 1289 | if (l->l_lwpctl) { |
1289 | lwp_ctl_free(l); | | 1290 | lwp_ctl_free(l); |
1290 | } | | 1291 | } |
1291 | if (l->l_affinity) { | | 1292 | if (l->l_affinity) { |
1292 | kcpuset_unuse(l->l_affinity, NULL); | | 1293 | kcpuset_unuse(l->l_affinity, NULL); |
1293 | l->l_affinity = NULL; | | 1294 | l->l_affinity = NULL; |
1294 | } | | 1295 | } |
1295 | | | 1296 | |
1296 | /* | | 1297 | /* |
1297 | * Free the LWP's turnstile and the LWP structure itself unless the | | 1298 | * Free the LWP's turnstile and the LWP structure itself unless the |
1298 | * caller wants to recycle them. Also, free the scheduler specific | | 1299 | * caller wants to recycle them. Also, free the scheduler specific |
1299 | * data. | | 1300 | * data. |
1300 | * | | 1301 | * |
1301 | * We can't return turnstile0 to the pool (it didn't come from it), | | 1302 | * We can't return turnstile0 to the pool (it didn't come from it), |
1302 | * so if it comes up just drop it quietly and move on. | | 1303 | * so if it comes up just drop it quietly and move on. |
1303 | * | | 1304 | * |
1304 | * We don't recycle the VM resources at this time. | | 1305 | * We don't recycle the VM resources at this time. |
1305 | */ | | 1306 | */ |
1306 | | | 1307 | |
1307 | if (!recycle && l->l_ts != &turnstile0) | | 1308 | if (!recycle && l->l_ts != &turnstile0) |
1308 | pool_cache_put(turnstile_cache, l->l_ts); | | 1309 | pool_cache_put(turnstile_cache, l->l_ts); |
1309 | if (l->l_name != NULL) | | 1310 | if (l->l_name != NULL) |
1310 | kmem_free(l->l_name, MAXCOMLEN); | | 1311 | kmem_free(l->l_name, MAXCOMLEN); |
1311 | | | 1312 | |
1312 | kmsan_lwp_free(l); | | 1313 | kmsan_lwp_free(l); |
1313 | cpu_lwp_free2(l); | | 1314 | cpu_lwp_free2(l); |
1314 | uvm_lwp_exit(l); | | 1315 | uvm_lwp_exit(l); |
1315 | | | 1316 | |
1316 | KASSERT(SLIST_EMPTY(&l->l_pi_lenders)); | | 1317 | KASSERT(SLIST_EMPTY(&l->l_pi_lenders)); |
1317 | KASSERT(l->l_inheritedprio == -1); | | 1318 | KASSERT(l->l_inheritedprio == -1); |
1318 | KASSERT(l->l_blcnt == 0); | | 1319 | KASSERT(l->l_blcnt == 0); |
1319 | kdtrace_thread_dtor(NULL, l); | | 1320 | kdtrace_thread_dtor(NULL, l); |
1320 | if (!recycle) | | 1321 | if (!recycle) |
1321 | pool_cache_put(lwp_cache, l); | | 1322 | pool_cache_put(lwp_cache, l); |
1322 | } | | 1323 | } |
1323 | | | 1324 | |
1324 | /* | | 1325 | /* |
1325 | * Migrate the LWP to the another CPU. Unlocks the LWP. | | 1326 | * Migrate the LWP to the another CPU. Unlocks the LWP. |
1326 | */ | | 1327 | */ |
1327 | void | | 1328 | void |
1328 | lwp_migrate(lwp_t *l, struct cpu_info *tci) | | 1329 | lwp_migrate(lwp_t *l, struct cpu_info *tci) |
1329 | { | | 1330 | { |
1330 | struct schedstate_percpu *tspc; | | 1331 | struct schedstate_percpu *tspc; |
1331 | int lstat = l->l_stat; | | 1332 | int lstat = l->l_stat; |
1332 | | | 1333 | |
1333 | KASSERT(lwp_locked(l, NULL)); | | 1334 | KASSERT(lwp_locked(l, NULL)); |
1334 | KASSERT(tci != NULL); | | 1335 | KASSERT(tci != NULL); |
1335 | | | 1336 | |
1336 | /* If LWP is still on the CPU, it must be handled like LSONPROC */ | | 1337 | /* If LWP is still on the CPU, it must be handled like LSONPROC */ |
1337 | if ((l->l_flag & LW_RUNNING) != 0) { | | 1338 | if ((l->l_flag & LW_RUNNING) != 0) { |
1338 | lstat = LSONPROC; | | 1339 | lstat = LSONPROC; |
1339 | } | | 1340 | } |
1340 | | | 1341 | |
1341 | /* | | 1342 | /* |
1342 | * The destination CPU could be changed while previous migration | | 1343 | * The destination CPU could be changed while previous migration |
1343 | * was not finished. | | 1344 | * was not finished. |
1344 | */ | | 1345 | */ |
1345 | if (l->l_target_cpu != NULL) { | | 1346 | if (l->l_target_cpu != NULL) { |
1346 | l->l_target_cpu = tci; | | 1347 | l->l_target_cpu = tci; |
1347 | lwp_unlock(l); | | 1348 | lwp_unlock(l); |
1348 | return; | | 1349 | return; |
1349 | } | | 1350 | } |
1350 | | | 1351 | |
1351 | /* Nothing to do if trying to migrate to the same CPU */ | | 1352 | /* Nothing to do if trying to migrate to the same CPU */ |
1352 | if (l->l_cpu == tci) { | | 1353 | if (l->l_cpu == tci) { |
1353 | lwp_unlock(l); | | 1354 | lwp_unlock(l); |
1354 | return; | | 1355 | return; |
1355 | } | | 1356 | } |
1356 | | | 1357 | |
1357 | KASSERT(l->l_target_cpu == NULL); | | 1358 | KASSERT(l->l_target_cpu == NULL); |
1358 | tspc = &tci->ci_schedstate; | | 1359 | tspc = &tci->ci_schedstate; |
1359 | switch (lstat) { | | 1360 | switch (lstat) { |
1360 | case LSRUN: | | 1361 | case LSRUN: |
1361 | l->l_target_cpu = tci; | | 1362 | l->l_target_cpu = tci; |
1362 | break; | | 1363 | break; |
1363 | case LSSLEEP: | | 1364 | case LSSLEEP: |
1364 | l->l_cpu = tci; | | 1365 | l->l_cpu = tci; |
1365 | break; | | 1366 | break; |
1366 | case LSIDL: | | 1367 | case LSIDL: |
1367 | case LSSTOP: | | 1368 | case LSSTOP: |
1368 | case LSSUSPENDED: | | 1369 | case LSSUSPENDED: |
1369 | l->l_cpu = tci; | | 1370 | l->l_cpu = tci; |
1370 | if (l->l_wchan == NULL) { | | 1371 | if (l->l_wchan == NULL) { |
1371 | lwp_unlock_to(l, tspc->spc_lwplock); | | 1372 | lwp_unlock_to(l, tspc->spc_lwplock); |
1372 | return; | | 1373 | return; |
1373 | } | | 1374 | } |
1374 | break; | | 1375 | break; |
1375 | case LSONPROC: | | 1376 | case LSONPROC: |
1376 | l->l_target_cpu = tci; | | 1377 | l->l_target_cpu = tci; |
1377 | spc_lock(l->l_cpu); | | 1378 | spc_lock(l->l_cpu); |
1378 | sched_resched_cpu(l->l_cpu, PRI_USER_RT, true); | | 1379 | sched_resched_cpu(l->l_cpu, PRI_USER_RT, true); |
1379 | /* spc now unlocked */ | | 1380 | /* spc now unlocked */ |
1380 | break; | | 1381 | break; |
1381 | } | | 1382 | } |
1382 | lwp_unlock(l); | | 1383 | lwp_unlock(l); |
1383 | } | | 1384 | } |
1384 | | | 1385 | |
1385 | /* | | 1386 | /* |
1386 | * Find the LWP in the process. Arguments may be zero, in such case, | | 1387 | * Find the LWP in the process. Arguments may be zero, in such case, |
1387 | * the calling process and first LWP in the list will be used. | | 1388 | * the calling process and first LWP in the list will be used. |
1388 | * On success - returns proc locked. | | 1389 | * On success - returns proc locked. |
1389 | */ | | 1390 | */ |
1390 | struct lwp * | | 1391 | struct lwp * |
1391 | lwp_find2(pid_t pid, lwpid_t lid) | | 1392 | lwp_find2(pid_t pid, lwpid_t lid) |
1392 | { | | 1393 | { |
1393 | proc_t *p; | | 1394 | proc_t *p; |
1394 | lwp_t *l; | | 1395 | lwp_t *l; |
1395 | | | 1396 | |
1396 | /* Find the process. */ | | 1397 | /* Find the process. */ |
1397 | if (pid != 0) { | | 1398 | if (pid != 0) { |
1398 | mutex_enter(proc_lock); | | 1399 | mutex_enter(proc_lock); |
1399 | p = proc_find(pid); | | 1400 | p = proc_find(pid); |
1400 | if (p == NULL) { | | 1401 | if (p == NULL) { |
1401 | mutex_exit(proc_lock); | | 1402 | mutex_exit(proc_lock); |
1402 | return NULL; | | 1403 | return NULL; |
1403 | } | | 1404 | } |
1404 | mutex_enter(p->p_lock); | | 1405 | mutex_enter(p->p_lock); |
1405 | mutex_exit(proc_lock); | | 1406 | mutex_exit(proc_lock); |
1406 | } else { | | 1407 | } else { |
1407 | p = curlwp->l_proc; | | 1408 | p = curlwp->l_proc; |
1408 | mutex_enter(p->p_lock); | | 1409 | mutex_enter(p->p_lock); |
1409 | } | | 1410 | } |
1410 | /* Find the thread. */ | | 1411 | /* Find the thread. */ |
1411 | if (lid != 0) { | | 1412 | if (lid != 0) { |
1412 | l = lwp_find(p, lid); | | 1413 | l = lwp_find(p, lid); |
1413 | } else { | | 1414 | } else { |
1414 | l = LIST_FIRST(&p->p_lwps); | | 1415 | l = LIST_FIRST(&p->p_lwps); |
1415 | } | | 1416 | } |
1416 | if (l == NULL) { | | 1417 | if (l == NULL) { |
1417 | mutex_exit(p->p_lock); | | 1418 | mutex_exit(p->p_lock); |
1418 | } | | 1419 | } |
1419 | return l; | | 1420 | return l; |
1420 | } | | 1421 | } |
1421 | | | 1422 | |
1422 | /* | | 1423 | /* |
1423 | * Look up a live LWP within the specified process. | | 1424 | * Look up a live LWP within the specified process. |
1424 | * | | 1425 | * |
1425 | * Must be called with p->p_lock held. | | 1426 | * Must be called with p->p_lock held. |
1426 | */ | | 1427 | */ |
1427 | struct lwp * | | 1428 | struct lwp * |
1428 | lwp_find(struct proc *p, lwpid_t id) | | 1429 | lwp_find(struct proc *p, lwpid_t id) |
1429 | { | | 1430 | { |
1430 | struct lwp *l; | | 1431 | struct lwp *l; |
1431 | | | 1432 | |
1432 | KASSERT(mutex_owned(p->p_lock)); | | 1433 | KASSERT(mutex_owned(p->p_lock)); |
1433 | | | 1434 | |
1434 | LIST_FOREACH(l, &p->p_lwps, l_sibling) { | | 1435 | LIST_FOREACH(l, &p->p_lwps, l_sibling) { |
1435 | if (l->l_lid == id) | | 1436 | if (l->l_lid == id) |
1436 | break; | | 1437 | break; |
1437 | } | | 1438 | } |
1438 | | | 1439 | |
1439 | /* | | 1440 | /* |
1440 | * No need to lock - all of these conditions will | | 1441 | * No need to lock - all of these conditions will |
1441 | * be visible with the process level mutex held. | | 1442 | * be visible with the process level mutex held. |
1442 | */ | | 1443 | */ |
1443 | if (l != NULL && (l->l_stat == LSIDL || l->l_stat == LSZOMB)) | | 1444 | if (l != NULL && (l->l_stat == LSIDL || l->l_stat == LSZOMB)) |
1444 | l = NULL; | | 1445 | l = NULL; |
1445 | | | 1446 | |
1446 | return l; | | 1447 | return l; |
1447 | } | | 1448 | } |
1448 | | | 1449 | |
1449 | /* | | 1450 | /* |
1450 | * Update an LWP's cached credentials to mirror the process' master copy. | | 1451 | * Update an LWP's cached credentials to mirror the process' master copy. |
1451 | * | | 1452 | * |
1452 | * This happens early in the syscall path, on user trap, and on LWP | | 1453 | * This happens early in the syscall path, on user trap, and on LWP |
1453 | * creation. A long-running LWP can also voluntarily choose to update | | 1454 | * creation. A long-running LWP can also voluntarily choose to update |
1454 | * its credentials by calling this routine. This may be called from | | 1455 | * its credentials by calling this routine. This may be called from |
1455 | * LWP_CACHE_CREDS(), which checks l->l_cred != p->p_cred beforehand. | | 1456 | * LWP_CACHE_CREDS(), which checks l->l_cred != p->p_cred beforehand. |
1456 | */ | | 1457 | */ |
1457 | void | | 1458 | void |
1458 | lwp_update_creds(struct lwp *l) | | 1459 | lwp_update_creds(struct lwp *l) |
1459 | { | | 1460 | { |
1460 | kauth_cred_t oc; | | 1461 | kauth_cred_t oc; |
1461 | struct proc *p; | | 1462 | struct proc *p; |
1462 | | | 1463 | |
1463 | p = l->l_proc; | | 1464 | p = l->l_proc; |
1464 | oc = l->l_cred; | | 1465 | oc = l->l_cred; |
1465 | | | 1466 | |
1466 | mutex_enter(p->p_lock); | | 1467 | mutex_enter(p->p_lock); |
1467 | kauth_cred_hold(p->p_cred); | | 1468 | kauth_cred_hold(p->p_cred); |
1468 | l->l_cred = p->p_cred; | | 1469 | l->l_cred = p->p_cred; |
1469 | l->l_prflag &= ~LPR_CRMOD; | | 1470 | l->l_prflag &= ~LPR_CRMOD; |
1470 | mutex_exit(p->p_lock); | | 1471 | mutex_exit(p->p_lock); |
1471 | if (oc != NULL) | | 1472 | if (oc != NULL) |
1472 | kauth_cred_free(oc); | | 1473 | kauth_cred_free(oc); |
1473 | } | | 1474 | } |
1474 | | | 1475 | |
1475 | /* | | 1476 | /* |
1476 | * Verify that an LWP is locked, and optionally verify that the lock matches | | 1477 | * Verify that an LWP is locked, and optionally verify that the lock matches |
1477 | * one we specify. | | 1478 | * one we specify. |
1478 | */ | | 1479 | */ |
1479 | int | | 1480 | int |
1480 | lwp_locked(struct lwp *l, kmutex_t *mtx) | | 1481 | lwp_locked(struct lwp *l, kmutex_t *mtx) |
1481 | { | | 1482 | { |
1482 | kmutex_t *cur = l->l_mutex; | | 1483 | kmutex_t *cur = l->l_mutex; |
1483 | | | 1484 | |
1484 | return mutex_owned(cur) && (mtx == cur || mtx == NULL); | | 1485 | return mutex_owned(cur) && (mtx == cur || mtx == NULL); |
1485 | } | | 1486 | } |
1486 | | | 1487 | |
1487 | /* | | 1488 | /* |
1488 | * Lend a new mutex to an LWP. The old mutex must be held. | | 1489 | * Lend a new mutex to an LWP. The old mutex must be held. |
1489 | */ | | 1490 | */ |
1490 | kmutex_t * | | 1491 | kmutex_t * |
1491 | lwp_setlock(struct lwp *l, kmutex_t *mtx) | | 1492 | lwp_setlock(struct lwp *l, kmutex_t *mtx) |
1492 | { | | 1493 | { |
1493 | kmutex_t *oldmtx = l->l_mutex; | | 1494 | kmutex_t *oldmtx = l->l_mutex; |
1494 | | | 1495 | |
1495 | KASSERT(mutex_owned(oldmtx)); | | 1496 | KASSERT(mutex_owned(oldmtx)); |
1496 | | | 1497 | |
1497 | membar_exit(); | | 1498 | membar_exit(); |
1498 | l->l_mutex = mtx; | | 1499 | l->l_mutex = mtx; |
1499 | return oldmtx; | | 1500 | return oldmtx; |
1500 | } | | 1501 | } |
1501 | | | 1502 | |
1502 | /* | | 1503 | /* |
1503 | * Lend a new mutex to an LWP, and release the old mutex. The old mutex | | 1504 | * Lend a new mutex to an LWP, and release the old mutex. The old mutex |
1504 | * must be held. | | 1505 | * must be held. |
1505 | */ | | 1506 | */ |
1506 | void | | 1507 | void |
1507 | lwp_unlock_to(struct lwp *l, kmutex_t *mtx) | | 1508 | lwp_unlock_to(struct lwp *l, kmutex_t *mtx) |
1508 | { | | 1509 | { |
1509 | kmutex_t *old; | | 1510 | kmutex_t *old; |
1510 | | | 1511 | |
1511 | KASSERT(lwp_locked(l, NULL)); | | 1512 | KASSERT(lwp_locked(l, NULL)); |
1512 | | | 1513 | |
1513 | old = l->l_mutex; | | 1514 | old = l->l_mutex; |
1514 | membar_exit(); | | 1515 | membar_exit(); |
1515 | l->l_mutex = mtx; | | 1516 | l->l_mutex = mtx; |
1516 | mutex_spin_exit(old); | | 1517 | mutex_spin_exit(old); |
1517 | } | | 1518 | } |
1518 | | | 1519 | |
1519 | int | | 1520 | int |
1520 | lwp_trylock(struct lwp *l) | | 1521 | lwp_trylock(struct lwp *l) |
1521 | { | | 1522 | { |
1522 | kmutex_t *old; | | 1523 | kmutex_t *old; |
1523 | | | 1524 | |
1524 | for (;;) { | | 1525 | for (;;) { |
1525 | if (!mutex_tryenter(old = l->l_mutex)) | | 1526 | if (!mutex_tryenter(old = l->l_mutex)) |
1526 | return 0; | | 1527 | return 0; |
1527 | if (__predict_true(l->l_mutex == old)) | | 1528 | if (__predict_true(l->l_mutex == old)) |
1528 | return 1; | | 1529 | return 1; |
1529 | mutex_spin_exit(old); | | 1530 | mutex_spin_exit(old); |
1530 | } | | 1531 | } |
1531 | } | | 1532 | } |
1532 | | | 1533 | |
1533 | void | | 1534 | void |
1534 | lwp_unsleep(lwp_t *l, bool unlock) | | 1535 | lwp_unsleep(lwp_t *l, bool unlock) |
1535 | { | | 1536 | { |
1536 | | | 1537 | |
1537 | KASSERT(mutex_owned(l->l_mutex)); | | 1538 | KASSERT(mutex_owned(l->l_mutex)); |
1538 | (*l->l_syncobj->sobj_unsleep)(l, unlock); | | 1539 | (*l->l_syncobj->sobj_unsleep)(l, unlock); |
1539 | } | | 1540 | } |
1540 | | | 1541 | |
1541 | /* | | 1542 | /* |
1542 | * Handle exceptions for mi_userret(). Called if a member of LW_USERRET is | | 1543 | * Handle exceptions for mi_userret(). Called if a member of LW_USERRET is |
1543 | * set. | | 1544 | * set. |
1544 | */ | | 1545 | */ |
1545 | void | | 1546 | void |
1546 | lwp_userret(struct lwp *l) | | 1547 | lwp_userret(struct lwp *l) |
1547 | { | | 1548 | { |
1548 | struct proc *p; | | 1549 | struct proc *p; |
1549 | int sig; | | 1550 | int sig; |
1550 | | | 1551 | |
1551 | KASSERT(l == curlwp); | | 1552 | KASSERT(l == curlwp); |
1552 | KASSERT(l->l_stat == LSONPROC); | | 1553 | KASSERT(l->l_stat == LSONPROC); |
1553 | p = l->l_proc; | | 1554 | p = l->l_proc; |
1554 | | | 1555 | |
1555 | #ifndef __HAVE_FAST_SOFTINTS | | 1556 | #ifndef __HAVE_FAST_SOFTINTS |
1556 | /* Run pending soft interrupts. */ | | 1557 | /* Run pending soft interrupts. */ |
1557 | if (l->l_cpu->ci_data.cpu_softints != 0) | | 1558 | if (l->l_cpu->ci_data.cpu_softints != 0) |
1558 | softint_overlay(); | | 1559 | softint_overlay(); |
1559 | #endif | | 1560 | #endif |
1560 | | | 1561 | |
1561 | /* | | 1562 | /* |
1562 | * It is safe to do this read unlocked on a MP system.. | | 1563 | * It is safe to do this read unlocked on a MP system.. |
1563 | */ | | 1564 | */ |
1564 | while ((l->l_flag & LW_USERRET) != 0) { | | 1565 | while ((l->l_flag & LW_USERRET) != 0) { |
1565 | /* | | 1566 | /* |
1566 | * Process pending signals first, unless the process | | 1567 | * Process pending signals first, unless the process |
1567 | * is dumping core or exiting, where we will instead | | 1568 | * is dumping core or exiting, where we will instead |
1568 | * enter the LW_WSUSPEND case below. | | 1569 | * enter the LW_WSUSPEND case below. |
1569 | */ | | 1570 | */ |
1570 | if ((l->l_flag & (LW_PENDSIG | LW_WCORE | LW_WEXIT)) == | | 1571 | if ((l->l_flag & (LW_PENDSIG | LW_WCORE | LW_WEXIT)) == |
1571 | LW_PENDSIG) { | | 1572 | LW_PENDSIG) { |
1572 | mutex_enter(p->p_lock); | | 1573 | mutex_enter(p->p_lock); |
1573 | while ((sig = issignal(l)) != 0) | | 1574 | while ((sig = issignal(l)) != 0) |
1574 | postsig(sig); | | 1575 | postsig(sig); |
1575 | mutex_exit(p->p_lock); | | 1576 | mutex_exit(p->p_lock); |
1576 | } | | 1577 | } |
1577 | | | 1578 | |
1578 | /* | | 1579 | /* |
1579 | * Core-dump or suspend pending. | | 1580 | * Core-dump or suspend pending. |
1580 | * | | 1581 | * |
1581 | * In case of core dump, suspend ourselves, so that the kernel | | 1582 | * In case of core dump, suspend ourselves, so that the kernel |
1582 | * stack and therefore the userland registers saved in the | | 1583 | * stack and therefore the userland registers saved in the |
1583 | * trapframe are around for coredump() to write them out. | | 1584 | * trapframe are around for coredump() to write them out. |
1584 | * We also need to save any PCU resources that we have so that | | 1585 | * We also need to save any PCU resources that we have so that |
1585 | * they accessible for coredump(). We issue a wakeup on | | 1586 | * they accessible for coredump(). We issue a wakeup on |
1586 | * p->p_lwpcv so that sigexit() will write the core file out | | 1587 | * p->p_lwpcv so that sigexit() will write the core file out |
1587 | * once all other LWPs are suspended. | | 1588 | * once all other LWPs are suspended. |
1588 | */ | | 1589 | */ |
1589 | if ((l->l_flag & LW_WSUSPEND) != 0) { | | 1590 | if ((l->l_flag & LW_WSUSPEND) != 0) { |
1590 | pcu_save_all(l); | | 1591 | pcu_save_all(l); |
1591 | mutex_enter(p->p_lock); | | 1592 | mutex_enter(p->p_lock); |
1592 | p->p_nrlwps--; | | 1593 | p->p_nrlwps--; |
1593 | cv_broadcast(&p->p_lwpcv); | | 1594 | cv_broadcast(&p->p_lwpcv); |
1594 | lwp_lock(l); | | 1595 | lwp_lock(l); |
1595 | l->l_stat = LSSUSPENDED; | | 1596 | l->l_stat = LSSUSPENDED; |
1596 | lwp_unlock(l); | | 1597 | lwp_unlock(l); |
1597 | mutex_exit(p->p_lock); | | 1598 | mutex_exit(p->p_lock); |
1598 | lwp_lock(l); | | 1599 | lwp_lock(l); |
1599 | spc_lock(l->l_cpu); | | 1600 | spc_lock(l->l_cpu); |
1600 | mi_switch(l); | | 1601 | mi_switch(l); |
1601 | } | | 1602 | } |
1602 | | | 1603 | |
1603 | /* Process is exiting. */ | | 1604 | /* Process is exiting. */ |
1604 | if ((l->l_flag & LW_WEXIT) != 0) { | | 1605 | if ((l->l_flag & LW_WEXIT) != 0) { |
1605 | lwp_exit(l); | | 1606 | lwp_exit(l); |
1606 | KASSERT(0); | | 1607 | KASSERT(0); |
1607 | /* NOTREACHED */ | | 1608 | /* NOTREACHED */ |
1608 | } | | 1609 | } |
1609 | | | 1610 | |
1610 | /* update lwpctl processor (for vfork child_return) */ | | 1611 | /* update lwpctl processor (for vfork child_return) */ |
1611 | if (l->l_flag & LW_LWPCTL) { | | 1612 | if (l->l_flag & LW_LWPCTL) { |
1612 | lwp_lock(l); | | 1613 | lwp_lock(l); |
1613 | KASSERT(kpreempt_disabled()); | | 1614 | KASSERT(kpreempt_disabled()); |
1614 | l->l_lwpctl->lc_curcpu = (int)cpu_index(l->l_cpu); | | 1615 | l->l_lwpctl->lc_curcpu = (int)cpu_index(l->l_cpu); |
1615 | l->l_lwpctl->lc_pctr++; | | 1616 | l->l_lwpctl->lc_pctr++; |
1616 | l->l_flag &= ~LW_LWPCTL; | | 1617 | l->l_flag &= ~LW_LWPCTL; |
1617 | lwp_unlock(l); | | 1618 | lwp_unlock(l); |
1618 | } | | 1619 | } |
1619 | } | | 1620 | } |
1620 | } | | 1621 | } |
1621 | | | 1622 | |
1622 | /* | | 1623 | /* |
1623 | * Force an LWP to enter the kernel, to take a trip through lwp_userret(). | | 1624 | * Force an LWP to enter the kernel, to take a trip through lwp_userret(). |
1624 | */ | | 1625 | */ |
1625 | void | | 1626 | void |
1626 | lwp_need_userret(struct lwp *l) | | 1627 | lwp_need_userret(struct lwp *l) |
1627 | { | | 1628 | { |
1628 | | | 1629 | |
1629 | KASSERT(!cpu_intr_p()); | | 1630 | KASSERT(!cpu_intr_p()); |
1630 | KASSERT(lwp_locked(l, NULL)); | | 1631 | KASSERT(lwp_locked(l, NULL)); |
1631 | | | 1632 | |
1632 | /* | | 1633 | /* |
1633 | * If the LWP is in any state other than LSONPROC, we know that it | | 1634 | * If the LWP is in any state other than LSONPROC, we know that it |
1634 | * is executing in-kernel and will hit userret() on the way out. | | 1635 | * is executing in-kernel and will hit userret() on the way out. |
1635 | * | | 1636 | * |
1636 | * If the LWP is curlwp, then we know we'll be back out to userspace | | 1637 | * If the LWP is curlwp, then we know we'll be back out to userspace |
1637 | * soon (can't be called from a hardware interrupt here). | | 1638 | * soon (can't be called from a hardware interrupt here). |
1638 | * | | 1639 | * |
1639 | * Otherwise, we can't be sure what the LWP is doing, so first make | | 1640 | * Otherwise, we can't be sure what the LWP is doing, so first make |
1640 | * sure the update to l_flag will be globally visible, and then | | 1641 | * sure the update to l_flag will be globally visible, and then |
1641 | * force the LWP to take a trip through trap() where it will do | | 1642 | * force the LWP to take a trip through trap() where it will do |
1642 | * userret(). | | 1643 | * userret(). |
1643 | */ | | 1644 | */ |
1644 | if (l->l_stat == LSONPROC && l != curlwp) { | | 1645 | if (l->l_stat == LSONPROC && l != curlwp) { |
1645 | membar_producer(); | | 1646 | membar_producer(); |
1646 | cpu_signotify(l); | | 1647 | cpu_signotify(l); |
1647 | } | | 1648 | } |
1648 | } | | 1649 | } |
1649 | | | 1650 | |
1650 | /* | | 1651 | /* |
1651 | * Add one reference to an LWP. This will prevent the LWP from | | 1652 | * Add one reference to an LWP. This will prevent the LWP from |
1652 | * exiting, thus keep the lwp structure and PCB around to inspect. | | 1653 | * exiting, thus keep the lwp structure and PCB around to inspect. |
1653 | */ | | 1654 | */ |
1654 | void | | 1655 | void |
1655 | lwp_addref(struct lwp *l) | | 1656 | lwp_addref(struct lwp *l) |
1656 | { | | 1657 | { |
1657 | | | 1658 | |
1658 | KASSERT(mutex_owned(l->l_proc->p_lock)); | | 1659 | KASSERT(mutex_owned(l->l_proc->p_lock)); |
1659 | KASSERT(l->l_stat != LSZOMB); | | 1660 | KASSERT(l->l_stat != LSZOMB); |
1660 | KASSERT(l->l_refcnt != 0); | | 1661 | KASSERT(l->l_refcnt != 0); |
1661 | | | 1662 | |
1662 | l->l_refcnt++; | | 1663 | l->l_refcnt++; |
1663 | } | | 1664 | } |
1664 | | | 1665 | |
1665 | /* | | 1666 | /* |
1666 | * Remove one reference to an LWP. If this is the last reference, | | 1667 | * Remove one reference to an LWP. If this is the last reference, |
1667 | * then we must finalize the LWP's death. | | 1668 | * then we must finalize the LWP's death. |
1668 | */ | | 1669 | */ |
1669 | void | | 1670 | void |
1670 | lwp_delref(struct lwp *l) | | 1671 | lwp_delref(struct lwp *l) |
1671 | { | | 1672 | { |
1672 | struct proc *p = l->l_proc; | | 1673 | struct proc *p = l->l_proc; |
1673 | | | 1674 | |
1674 | mutex_enter(p->p_lock); | | 1675 | mutex_enter(p->p_lock); |
1675 | lwp_delref2(l); | | 1676 | lwp_delref2(l); |
1676 | mutex_exit(p->p_lock); | | 1677 | mutex_exit(p->p_lock); |
1677 | } | | 1678 | } |
1678 | | | 1679 | |
1679 | /* | | 1680 | /* |
1680 | * Remove one reference to an LWP. If this is the last reference, | | 1681 | * Remove one reference to an LWP. If this is the last reference, |
1681 | * then we must finalize the LWP's death. The proc mutex is held | | 1682 | * then we must finalize the LWP's death. The proc mutex is held |
1682 | * on entry. | | 1683 | * on entry. |
1683 | */ | | 1684 | */ |
1684 | void | | 1685 | void |
1685 | lwp_delref2(struct lwp *l) | | 1686 | lwp_delref2(struct lwp *l) |
1686 | { | | 1687 | { |
1687 | struct proc *p = l->l_proc; | | 1688 | struct proc *p = l->l_proc; |
1688 | | | 1689 | |
1689 | KASSERT(mutex_owned(p->p_lock)); | | 1690 | KASSERT(mutex_owned(p->p_lock)); |
1690 | KASSERT(l->l_stat != LSZOMB); | | 1691 | KASSERT(l->l_stat != LSZOMB); |
1691 | KASSERT(l->l_refcnt > 0); | | 1692 | KASSERT(l->l_refcnt > 0); |
1692 | if (--l->l_refcnt == 0) | | 1693 | if (--l->l_refcnt == 0) |
1693 | cv_broadcast(&p->p_lwpcv); | | 1694 | cv_broadcast(&p->p_lwpcv); |
1694 | } | | 1695 | } |
1695 | | | 1696 | |
1696 | /* | | 1697 | /* |
1697 | * Drain all references to the current LWP. | | 1698 | * Drain all references to the current LWP. |
1698 | */ | | 1699 | */ |
1699 | void | | 1700 | void |
1700 | lwp_drainrefs(struct lwp *l) | | 1701 | lwp_drainrefs(struct lwp *l) |
1701 | { | | 1702 | { |
1702 | struct proc *p = l->l_proc; | | 1703 | struct proc *p = l->l_proc; |
1703 | | | 1704 | |
1704 | KASSERT(mutex_owned(p->p_lock)); | | 1705 | KASSERT(mutex_owned(p->p_lock)); |
1705 | KASSERT(l->l_refcnt != 0); | | 1706 | KASSERT(l->l_refcnt != 0); |
1706 | | | 1707 | |
1707 | l->l_refcnt--; | | 1708 | l->l_refcnt--; |
1708 | while (l->l_refcnt != 0) | | 1709 | while (l->l_refcnt != 0) |
1709 | cv_wait(&p->p_lwpcv, p->p_lock); | | 1710 | cv_wait(&p->p_lwpcv, p->p_lock); |
1710 | } | | 1711 | } |
1711 | | | 1712 | |
1712 | /* | | 1713 | /* |
1713 | * Return true if the specified LWP is 'alive'. Only p->p_lock need | | 1714 | * Return true if the specified LWP is 'alive'. Only p->p_lock need |
1714 | * be held. | | 1715 | * be held. |
1715 | */ | | 1716 | */ |
1716 | bool | | 1717 | bool |
1717 | lwp_alive(lwp_t *l) | | 1718 | lwp_alive(lwp_t *l) |
1718 | { | | 1719 | { |
1719 | | | 1720 | |
1720 | KASSERT(mutex_owned(l->l_proc->p_lock)); | | 1721 | KASSERT(mutex_owned(l->l_proc->p_lock)); |
1721 | | | 1722 | |
1722 | switch (l->l_stat) { | | 1723 | switch (l->l_stat) { |
1723 | case LSSLEEP: | | 1724 | case LSSLEEP: |
1724 | case LSRUN: | | 1725 | case LSRUN: |
1725 | case LSONPROC: | | 1726 | case LSONPROC: |
1726 | case LSSTOP: | | 1727 | case LSSTOP: |
1727 | case LSSUSPENDED: | | 1728 | case LSSUSPENDED: |
1728 | return true; | | 1729 | return true; |
1729 | default: | | 1730 | default: |
1730 | return false; | | 1731 | return false; |
1731 | } | | 1732 | } |
1732 | } | | 1733 | } |
1733 | | | 1734 | |
1734 | /* | | 1735 | /* |
1735 | * Return first live LWP in the process. | | 1736 | * Return first live LWP in the process. |
1736 | */ | | 1737 | */ |
1737 | lwp_t * | | 1738 | lwp_t * |
1738 | lwp_find_first(proc_t *p) | | 1739 | lwp_find_first(proc_t *p) |
1739 | { | | 1740 | { |
1740 | lwp_t *l; | | 1741 | lwp_t *l; |
1741 | | | 1742 | |
1742 | KASSERT(mutex_owned(p->p_lock)); | | 1743 | KASSERT(mutex_owned(p->p_lock)); |
1743 | | | 1744 | |
1744 | LIST_FOREACH(l, &p->p_lwps, l_sibling) { | | 1745 | LIST_FOREACH(l, &p->p_lwps, l_sibling) { |
1745 | if (lwp_alive(l)) { | | 1746 | if (lwp_alive(l)) { |
1746 | return l; | | 1747 | return l; |
1747 | } | | 1748 | } |
1748 | } | | 1749 | } |
1749 | | | 1750 | |
1750 | return NULL; | | 1751 | return NULL; |
1751 | } | | 1752 | } |
1752 | | | 1753 | |
1753 | /* | | 1754 | /* |
1754 | * Allocate a new lwpctl structure for a user LWP. | | 1755 | * Allocate a new lwpctl structure for a user LWP. |
1755 | */ | | 1756 | */ |
1756 | int | | 1757 | int |
1757 | lwp_ctl_alloc(vaddr_t *uaddr) | | 1758 | lwp_ctl_alloc(vaddr_t *uaddr) |
1758 | { | | 1759 | { |
1759 | lcproc_t *lp; | | 1760 | lcproc_t *lp; |
1760 | u_int bit, i, offset; | | 1761 | u_int bit, i, offset; |
1761 | struct uvm_object *uao; | | 1762 | struct uvm_object *uao; |
1762 | int error; | | 1763 | int error; |
1763 | lcpage_t *lcp; | | 1764 | lcpage_t *lcp; |
1764 | proc_t *p; | | 1765 | proc_t *p; |
1765 | lwp_t *l; | | 1766 | lwp_t *l; |
1766 | | | 1767 | |
1767 | l = curlwp; | | 1768 | l = curlwp; |
1768 | p = l->l_proc; | | 1769 | p = l->l_proc; |
1769 | | | 1770 | |
1770 | /* don't allow a vforked process to create lwp ctls */ | | 1771 | /* don't allow a vforked process to create lwp ctls */ |
1771 | if (p->p_lflag & PL_PPWAIT) | | 1772 | if (p->p_lflag & PL_PPWAIT) |
1772 | return EBUSY; | | 1773 | return EBUSY; |
1773 | | | 1774 | |
1774 | if (l->l_lcpage != NULL) { | | 1775 | if (l->l_lcpage != NULL) { |
1775 | lcp = l->l_lcpage; | | 1776 | lcp = l->l_lcpage; |
1776 | *uaddr = lcp->lcp_uaddr + (vaddr_t)l->l_lwpctl - lcp->lcp_kaddr; | | 1777 | *uaddr = lcp->lcp_uaddr + (vaddr_t)l->l_lwpctl - lcp->lcp_kaddr; |
1777 | return 0; | | 1778 | return 0; |
1778 | } | | 1779 | } |
1779 | | | 1780 | |
1780 | /* First time around, allocate header structure for the process. */ | | 1781 | /* First time around, allocate header structure for the process. */ |
1781 | if ((lp = p->p_lwpctl) == NULL) { | | 1782 | if ((lp = p->p_lwpctl) == NULL) { |
1782 | lp = kmem_alloc(sizeof(*lp), KM_SLEEP); | | 1783 | lp = kmem_alloc(sizeof(*lp), KM_SLEEP); |
1783 | mutex_init(&lp->lp_lock, MUTEX_DEFAULT, IPL_NONE); | | 1784 | mutex_init(&lp->lp_lock, MUTEX_DEFAULT, IPL_NONE); |
1784 | lp->lp_uao = NULL; | | 1785 | lp->lp_uao = NULL; |
1785 | TAILQ_INIT(&lp->lp_pages); | | 1786 | TAILQ_INIT(&lp->lp_pages); |
1786 | mutex_enter(p->p_lock); | | 1787 | mutex_enter(p->p_lock); |
1787 | if (p->p_lwpctl == NULL) { | | 1788 | if (p->p_lwpctl == NULL) { |
1788 | p->p_lwpctl = lp; | | 1789 | p->p_lwpctl = lp; |
1789 | mutex_exit(p->p_lock); | | 1790 | mutex_exit(p->p_lock); |
1790 | } else { | | 1791 | } else { |
1791 | mutex_exit(p->p_lock); | | 1792 | mutex_exit(p->p_lock); |
1792 | mutex_destroy(&lp->lp_lock); | | 1793 | mutex_destroy(&lp->lp_lock); |
1793 | kmem_free(lp, sizeof(*lp)); | | 1794 | kmem_free(lp, sizeof(*lp)); |
1794 | lp = p->p_lwpctl; | | 1795 | lp = p->p_lwpctl; |
1795 | } | | 1796 | } |
1796 | } | | 1797 | } |
1797 | | | 1798 | |
1798 | /* | | 1799 | /* |
1799 | * Set up an anonymous memory region to hold the shared pages. | | 1800 | * Set up an anonymous memory region to hold the shared pages. |
1800 | * Map them into the process' address space. The user vmspace | | 1801 | * Map them into the process' address space. The user vmspace |
1801 | * gets the first reference on the UAO. | | 1802 | * gets the first reference on the UAO. |
1802 | */ | | 1803 | */ |
1803 | mutex_enter(&lp->lp_lock); | | 1804 | mutex_enter(&lp->lp_lock); |
1804 | if (lp->lp_uao == NULL) { | | 1805 | if (lp->lp_uao == NULL) { |
1805 | lp->lp_uao = uao_create(LWPCTL_UAREA_SZ, 0); | | 1806 | lp->lp_uao = uao_create(LWPCTL_UAREA_SZ, 0); |
1806 | lp->lp_cur = 0; | | 1807 | lp->lp_cur = 0; |
1807 | lp->lp_max = LWPCTL_UAREA_SZ; | | 1808 | lp->lp_max = LWPCTL_UAREA_SZ; |
1808 | lp->lp_uva = p->p_emul->e_vm_default_addr(p, | | 1809 | lp->lp_uva = p->p_emul->e_vm_default_addr(p, |
1809 | (vaddr_t)p->p_vmspace->vm_daddr, LWPCTL_UAREA_SZ, | | 1810 | (vaddr_t)p->p_vmspace->vm_daddr, LWPCTL_UAREA_SZ, |
1810 | p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); | | 1811 | p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); |
1811 | error = uvm_map(&p->p_vmspace->vm_map, &lp->lp_uva, | | 1812 | error = uvm_map(&p->p_vmspace->vm_map, &lp->lp_uva, |
1812 | LWPCTL_UAREA_SZ, lp->lp_uao, 0, 0, UVM_MAPFLAG(UVM_PROT_RW, | | 1813 | LWPCTL_UAREA_SZ, lp->lp_uao, 0, 0, UVM_MAPFLAG(UVM_PROT_RW, |
1813 | UVM_PROT_RW, UVM_INH_NONE, UVM_ADV_NORMAL, 0)); | | 1814 | UVM_PROT_RW, UVM_INH_NONE, UVM_ADV_NORMAL, 0)); |
1814 | if (error != 0) { | | 1815 | if (error != 0) { |
1815 | uao_detach(lp->lp_uao); | | 1816 | uao_detach(lp->lp_uao); |
1816 | lp->lp_uao = NULL; | | 1817 | lp->lp_uao = NULL; |
1817 | mutex_exit(&lp->lp_lock); | | 1818 | mutex_exit(&lp->lp_lock); |
1818 | return error; | | 1819 | return error; |
1819 | } | | 1820 | } |
1820 | } | | 1821 | } |
1821 | | | 1822 | |
1822 | /* Get a free block and allocate for this LWP. */ | | 1823 | /* Get a free block and allocate for this LWP. */ |
1823 | TAILQ_FOREACH(lcp, &lp->lp_pages, lcp_chain) { | | 1824 | TAILQ_FOREACH(lcp, &lp->lp_pages, lcp_chain) { |
1824 | if (lcp->lcp_nfree != 0) | | 1825 | if (lcp->lcp_nfree != 0) |
1825 | break; | | 1826 | break; |
1826 | } | | 1827 | } |
1827 | if (lcp == NULL) { | | 1828 | if (lcp == NULL) { |
1828 | /* Nothing available - try to set up a free page. */ | | 1829 | /* Nothing available - try to set up a free page. */ |
1829 | if (lp->lp_cur == lp->lp_max) { | | 1830 | if (lp->lp_cur == lp->lp_max) { |
1830 | mutex_exit(&lp->lp_lock); | | 1831 | mutex_exit(&lp->lp_lock); |
1831 | return ENOMEM; | | 1832 | return ENOMEM; |
1832 | } | | 1833 | } |
1833 | lcp = kmem_alloc(LWPCTL_LCPAGE_SZ, KM_SLEEP); | | 1834 | lcp = kmem_alloc(LWPCTL_LCPAGE_SZ, KM_SLEEP); |
1834 | | | 1835 | |
1835 | /* | | 1836 | /* |
1836 | * Wire the next page down in kernel space. Since this | | 1837 | * Wire the next page down in kernel space. Since this |
1837 | * is a new mapping, we must add a reference. | | 1838 | * is a new mapping, we must add a reference. |
1838 | */ | | 1839 | */ |
1839 | uao = lp->lp_uao; | | 1840 | uao = lp->lp_uao; |
1840 | (*uao->pgops->pgo_reference)(uao); | | 1841 | (*uao->pgops->pgo_reference)(uao); |
1841 | lcp->lcp_kaddr = vm_map_min(kernel_map); | | 1842 | lcp->lcp_kaddr = vm_map_min(kernel_map); |
1842 | error = uvm_map(kernel_map, &lcp->lcp_kaddr, PAGE_SIZE, | | 1843 | error = uvm_map(kernel_map, &lcp->lcp_kaddr, PAGE_SIZE, |
1843 | uao, lp->lp_cur, PAGE_SIZE, | | 1844 | uao, lp->lp_cur, PAGE_SIZE, |
1844 | UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, | | 1845 | UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, |
1845 | UVM_INH_NONE, UVM_ADV_RANDOM, 0)); | | 1846 | UVM_INH_NONE, UVM_ADV_RANDOM, 0)); |
1846 | if (error != 0) { | | 1847 | if (error != 0) { |
1847 | mutex_exit(&lp->lp_lock); | | 1848 | mutex_exit(&lp->lp_lock); |
1848 | kmem_free(lcp, LWPCTL_LCPAGE_SZ); | | 1849 | kmem_free(lcp, LWPCTL_LCPAGE_SZ); |
1849 | (*uao->pgops->pgo_detach)(uao); | | 1850 | (*uao->pgops->pgo_detach)(uao); |
1850 | return error; | | 1851 | return error; |
1851 | } | | 1852 | } |
1852 | error = uvm_map_pageable(kernel_map, lcp->lcp_kaddr, | | 1853 | error = uvm_map_pageable(kernel_map, lcp->lcp_kaddr, |
1853 | lcp->lcp_kaddr + PAGE_SIZE, FALSE, 0); | | 1854 | lcp->lcp_kaddr + PAGE_SIZE, FALSE, 0); |
1854 | if (error != 0) { | | 1855 | if (error != 0) { |
1855 | mutex_exit(&lp->lp_lock); | | 1856 | mutex_exit(&lp->lp_lock); |
1856 | uvm_unmap(kernel_map, lcp->lcp_kaddr, | | 1857 | uvm_unmap(kernel_map, lcp->lcp_kaddr, |
1857 | lcp->lcp_kaddr + PAGE_SIZE); | | 1858 | lcp->lcp_kaddr + PAGE_SIZE); |
1858 | kmem_free(lcp, LWPCTL_LCPAGE_SZ); | | 1859 | kmem_free(lcp, LWPCTL_LCPAGE_SZ); |
1859 | return error; | | 1860 | return error; |
1860 | } | | 1861 | } |
1861 | /* Prepare the page descriptor and link into the list. */ | | 1862 | /* Prepare the page descriptor and link into the list. */ |
1862 | lcp->lcp_uaddr = lp->lp_uva + lp->lp_cur; | | 1863 | lcp->lcp_uaddr = lp->lp_uva + lp->lp_cur; |
1863 | lp->lp_cur += PAGE_SIZE; | | 1864 | lp->lp_cur += PAGE_SIZE; |
1864 | lcp->lcp_nfree = LWPCTL_PER_PAGE; | | 1865 | lcp->lcp_nfree = LWPCTL_PER_PAGE; |
1865 | lcp->lcp_rotor = 0; | | 1866 | lcp->lcp_rotor = 0; |
1866 | memset(lcp->lcp_bitmap, 0xff, LWPCTL_BITMAP_SZ); | | 1867 | memset(lcp->lcp_bitmap, 0xff, LWPCTL_BITMAP_SZ); |
1867 | TAILQ_INSERT_HEAD(&lp->lp_pages, lcp, lcp_chain); | | 1868 | TAILQ_INSERT_HEAD(&lp->lp_pages, lcp, lcp_chain); |
1868 | } | | 1869 | } |
1869 | for (i = lcp->lcp_rotor; lcp->lcp_bitmap[i] == 0;) { | | 1870 | for (i = lcp->lcp_rotor; lcp->lcp_bitmap[i] == 0;) { |
1870 | if (++i >= LWPCTL_BITMAP_ENTRIES) | | 1871 | if (++i >= LWPCTL_BITMAP_ENTRIES) |
1871 | i = 0; | | 1872 | i = 0; |
1872 | } | | 1873 | } |
1873 | bit = ffs(lcp->lcp_bitmap[i]) - 1; | | 1874 | bit = ffs(lcp->lcp_bitmap[i]) - 1; |
1874 | lcp->lcp_bitmap[i] ^= (1U << bit); | | 1875 | lcp->lcp_bitmap[i] ^= (1U << bit); |
1875 | lcp->lcp_rotor = i; | | 1876 | lcp->lcp_rotor = i; |
1876 | lcp->lcp_nfree--; | | 1877 | lcp->lcp_nfree--; |
1877 | l->l_lcpage = lcp; | | 1878 | l->l_lcpage = lcp; |
1878 | offset = (i << 5) + bit; | | 1879 | offset = (i << 5) + bit; |
1879 | l->l_lwpctl = (lwpctl_t *)lcp->lcp_kaddr + offset; | | 1880 | l->l_lwpctl = (lwpctl_t *)lcp->lcp_kaddr + offset; |
1880 | *uaddr = lcp->lcp_uaddr + offset * sizeof(lwpctl_t); | | 1881 | *uaddr = lcp->lcp_uaddr + offset * sizeof(lwpctl_t); |
1881 | mutex_exit(&lp->lp_lock); | | 1882 | mutex_exit(&lp->lp_lock); |
1882 | | | 1883 | |
1883 | KPREEMPT_DISABLE(l); | | 1884 | KPREEMPT_DISABLE(l); |
1884 | l->l_lwpctl->lc_curcpu = (int)cpu_index(curcpu()); | | 1885 | l->l_lwpctl->lc_curcpu = (int)cpu_index(curcpu()); |
1885 | KPREEMPT_ENABLE(l); | | 1886 | KPREEMPT_ENABLE(l); |
1886 | | | 1887 | |
1887 | return 0; | | 1888 | return 0; |
1888 | } | | 1889 | } |
1889 | | | 1890 | |
1890 | /* | | 1891 | /* |
1891 | * Free an lwpctl structure back to the per-process list. | | 1892 | * Free an lwpctl structure back to the per-process list. |
1892 | */ | | 1893 | */ |
1893 | void | | 1894 | void |
1894 | lwp_ctl_free(lwp_t *l) | | 1895 | lwp_ctl_free(lwp_t *l) |
1895 | { | | 1896 | { |
1896 | struct proc *p = l->l_proc; | | 1897 | struct proc *p = l->l_proc; |
1897 | lcproc_t *lp; | | 1898 | lcproc_t *lp; |
1898 | lcpage_t *lcp; | | 1899 | lcpage_t *lcp; |
1899 | u_int map, offset; | | 1900 | u_int map, offset; |
1900 | | | 1901 | |
1901 | /* don't free a lwp context we borrowed for vfork */ | | 1902 | /* don't free a lwp context we borrowed for vfork */ |
1902 | if (p->p_lflag & PL_PPWAIT) { | | 1903 | if (p->p_lflag & PL_PPWAIT) { |
1903 | l->l_lwpctl = NULL; | | 1904 | l->l_lwpctl = NULL; |
1904 | return; | | 1905 | return; |
1905 | } | | 1906 | } |
1906 | | | 1907 | |
1907 | lp = p->p_lwpctl; | | 1908 | lp = p->p_lwpctl; |
1908 | KASSERT(lp != NULL); | | 1909 | KASSERT(lp != NULL); |
1909 | | | 1910 | |
1910 | lcp = l->l_lcpage; | | 1911 | lcp = l->l_lcpage; |
1911 | offset = (u_int)((lwpctl_t *)l->l_lwpctl - (lwpctl_t *)lcp->lcp_kaddr); | | 1912 | offset = (u_int)((lwpctl_t *)l->l_lwpctl - (lwpctl_t *)lcp->lcp_kaddr); |
1912 | KASSERT(offset < LWPCTL_PER_PAGE); | | 1913 | KASSERT(offset < LWPCTL_PER_PAGE); |
1913 | | | 1914 | |
1914 | mutex_enter(&lp->lp_lock); | | 1915 | mutex_enter(&lp->lp_lock); |
1915 | lcp->lcp_nfree++; | | 1916 | lcp->lcp_nfree++; |
1916 | map = offset >> 5; | | 1917 | map = offset >> 5; |
1917 | lcp->lcp_bitmap[map] |= (1U << (offset & 31)); | | 1918 | lcp->lcp_bitmap[map] |= (1U << (offset & 31)); |
1918 | if (lcp->lcp_bitmap[lcp->lcp_rotor] == 0) | | 1919 | if (lcp->lcp_bitmap[lcp->lcp_rotor] == 0) |
1919 | lcp->lcp_rotor = map; | | 1920 | lcp->lcp_rotor = map; |
1920 | if (TAILQ_FIRST(&lp->lp_pages)->lcp_nfree == 0) { | | 1921 | if (TAILQ_FIRST(&lp->lp_pages)->lcp_nfree == 0) { |
1921 | TAILQ_REMOVE(&lp->lp_pages, lcp, lcp_chain); | | 1922 | TAILQ_REMOVE(&lp->lp_pages, lcp, lcp_chain); |
1922 | TAILQ_INSERT_HEAD(&lp->lp_pages, lcp, lcp_chain); | | 1923 | TAILQ_INSERT_HEAD(&lp->lp_pages, lcp, lcp_chain); |