| @@ -1,1225 +1,1225 @@ | | | @@ -1,1225 +1,1225 @@ |
1 | /* $NetBSD: kern_synch.c,v 1.353 2022/12/05 15:47:14 martin Exp $ */ | | 1 | /* $NetBSD: kern_synch.c,v 1.354 2023/04/09 12:16:42 riastradh Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 1999, 2000, 2004, 2006, 2007, 2008, 2009, 2019, 2020 | | 4 | * Copyright (c) 1999, 2000, 2004, 2006, 2007, 2008, 2009, 2019, 2020 |
5 | * The NetBSD Foundation, Inc. | | 5 | * The NetBSD Foundation, Inc. |
6 | * All rights reserved. | | 6 | * All rights reserved. |
7 | * | | 7 | * |
8 | * This code is derived from software contributed to The NetBSD Foundation | | 8 | * This code is derived from software contributed to The NetBSD Foundation |
9 | * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, | | 9 | * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, |
10 | * NASA Ames Research Center, by Charles M. Hannum, Andrew Doran and | | 10 | * NASA Ames Research Center, by Charles M. Hannum, Andrew Doran and |
11 | * Daniel Sieger. | | 11 | * Daniel Sieger. |
12 | * | | 12 | * |
13 | * Redistribution and use in source and binary forms, with or without | | 13 | * Redistribution and use in source and binary forms, with or without |
14 | * modification, are permitted provided that the following conditions | | 14 | * modification, are permitted provided that the following conditions |
15 | * are met: | | 15 | * are met: |
16 | * 1. Redistributions of source code must retain the above copyright | | 16 | * 1. Redistributions of source code must retain the above copyright |
17 | * notice, this list of conditions and the following disclaimer. | | 17 | * notice, this list of conditions and the following disclaimer. |
18 | * 2. Redistributions in binary form must reproduce the above copyright | | 18 | * 2. Redistributions in binary form must reproduce the above copyright |
19 | * notice, this list of conditions and the following disclaimer in the | | 19 | * notice, this list of conditions and the following disclaimer in the |
20 | * documentation and/or other materials provided with the distribution. | | 20 | * documentation and/or other materials provided with the distribution. |
21 | * | | 21 | * |
22 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | | 22 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
23 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 23 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
24 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 24 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
25 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | | 25 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
26 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | 26 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
27 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | 27 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
28 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | 28 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
29 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 29 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
30 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 30 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
31 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 31 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
32 | * POSSIBILITY OF SUCH DAMAGE. | | 32 | * POSSIBILITY OF SUCH DAMAGE. |
33 | */ | | 33 | */ |
34 | | | 34 | |
35 | /*- | | 35 | /*- |
36 | * Copyright (c) 1982, 1986, 1990, 1991, 1993 | | 36 | * Copyright (c) 1982, 1986, 1990, 1991, 1993 |
37 | * The Regents of the University of California. All rights reserved. | | 37 | * The Regents of the University of California. All rights reserved. |
38 | * (c) UNIX System Laboratories, Inc. | | 38 | * (c) UNIX System Laboratories, Inc. |
39 | * All or some portions of this file are derived from material licensed | | 39 | * All or some portions of this file are derived from material licensed |
40 | * to the University of California by American Telephone and Telegraph | | 40 | * to the University of California by American Telephone and Telegraph |
41 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | | 41 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with |
42 | * the permission of UNIX System Laboratories, Inc. | | 42 | * the permission of UNIX System Laboratories, Inc. |
43 | * | | 43 | * |
44 | * Redistribution and use in source and binary forms, with or without | | 44 | * Redistribution and use in source and binary forms, with or without |
45 | * modification, are permitted provided that the following conditions | | 45 | * modification, are permitted provided that the following conditions |
46 | * are met: | | 46 | * are met: |
47 | * 1. Redistributions of source code must retain the above copyright | | 47 | * 1. Redistributions of source code must retain the above copyright |
48 | * notice, this list of conditions and the following disclaimer. | | 48 | * notice, this list of conditions and the following disclaimer. |
49 | * 2. Redistributions in binary form must reproduce the above copyright | | 49 | * 2. Redistributions in binary form must reproduce the above copyright |
50 | * notice, this list of conditions and the following disclaimer in the | | 50 | * notice, this list of conditions and the following disclaimer in the |
51 | * documentation and/or other materials provided with the distribution. | | 51 | * documentation and/or other materials provided with the distribution. |
52 | * 3. Neither the name of the University nor the names of its contributors | | 52 | * 3. Neither the name of the University nor the names of its contributors |
53 | * may be used to endorse or promote products derived from this software | | 53 | * may be used to endorse or promote products derived from this software |
54 | * without specific prior written permission. | | 54 | * without specific prior written permission. |
55 | * | | 55 | * |
56 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | | 56 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
57 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | | 57 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
58 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | | 58 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
59 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | | 59 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
60 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 60 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
61 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | | 61 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
62 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 62 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
63 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 63 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
64 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 64 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
65 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 65 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
66 | * SUCH DAMAGE. | | 66 | * SUCH DAMAGE. |
67 | * | | 67 | * |
68 | * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 | | 68 | * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 |
69 | */ | | 69 | */ |
70 | | | 70 | |
71 | #include <sys/cdefs.h> | | 71 | #include <sys/cdefs.h> |
72 | __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.353 2022/12/05 15:47:14 martin Exp $"); | | 72 | __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.354 2023/04/09 12:16:42 riastradh Exp $"); |
73 | | | 73 | |
74 | #include "opt_kstack.h" | | 74 | #include "opt_kstack.h" |
75 | #include "opt_dtrace.h" | | 75 | #include "opt_dtrace.h" |
76 | | | 76 | |
77 | #define __MUTEX_PRIVATE | | 77 | #define __MUTEX_PRIVATE |
78 | | | 78 | |
79 | #include <sys/param.h> | | 79 | #include <sys/param.h> |
80 | #include <sys/systm.h> | | 80 | #include <sys/systm.h> |
81 | #include <sys/proc.h> | | 81 | #include <sys/proc.h> |
82 | #include <sys/kernel.h> | | 82 | #include <sys/kernel.h> |
83 | #include <sys/cpu.h> | | 83 | #include <sys/cpu.h> |
84 | #include <sys/pserialize.h> | | 84 | #include <sys/pserialize.h> |
85 | #include <sys/resource.h> | | 85 | #include <sys/resource.h> |
86 | #include <sys/resourcevar.h> | | 86 | #include <sys/resourcevar.h> |
87 | #include <sys/rwlock.h> | | 87 | #include <sys/rwlock.h> |
88 | #include <sys/sched.h> | | 88 | #include <sys/sched.h> |
89 | #include <sys/syscall_stats.h> | | 89 | #include <sys/syscall_stats.h> |
90 | #include <sys/sleepq.h> | | 90 | #include <sys/sleepq.h> |
91 | #include <sys/lockdebug.h> | | 91 | #include <sys/lockdebug.h> |
92 | #include <sys/evcnt.h> | | 92 | #include <sys/evcnt.h> |
93 | #include <sys/intr.h> | | 93 | #include <sys/intr.h> |
94 | #include <sys/lwpctl.h> | | 94 | #include <sys/lwpctl.h> |
95 | #include <sys/atomic.h> | | 95 | #include <sys/atomic.h> |
96 | #include <sys/syslog.h> | | 96 | #include <sys/syslog.h> |
97 | | | 97 | |
98 | #include <uvm/uvm_extern.h> | | 98 | #include <uvm/uvm_extern.h> |
99 | | | 99 | |
100 | #include <dev/lockstat.h> | | 100 | #include <dev/lockstat.h> |
101 | | | 101 | |
102 | #include <sys/dtrace_bsd.h> | | 102 | #include <sys/dtrace_bsd.h> |
103 | int dtrace_vtime_active=0; | | 103 | int dtrace_vtime_active=0; |
104 | dtrace_vtime_switch_func_t dtrace_vtime_switch_func; | | 104 | dtrace_vtime_switch_func_t dtrace_vtime_switch_func; |
105 | | | 105 | |
106 | static void sched_unsleep(struct lwp *, bool); | | 106 | static void sched_unsleep(struct lwp *, bool); |
107 | static void sched_changepri(struct lwp *, pri_t); | | 107 | static void sched_changepri(struct lwp *, pri_t); |
108 | static void sched_lendpri(struct lwp *, pri_t); | | 108 | static void sched_lendpri(struct lwp *, pri_t); |
109 | | | 109 | |
110 | syncobj_t sleep_syncobj = { | | 110 | syncobj_t sleep_syncobj = { |
111 | .sobj_flag = SOBJ_SLEEPQ_SORTED, | | 111 | .sobj_flag = SOBJ_SLEEPQ_SORTED, |
112 | .sobj_unsleep = sleepq_unsleep, | | 112 | .sobj_unsleep = sleepq_unsleep, |
113 | .sobj_changepri = sleepq_changepri, | | 113 | .sobj_changepri = sleepq_changepri, |
114 | .sobj_lendpri = sleepq_lendpri, | | 114 | .sobj_lendpri = sleepq_lendpri, |
115 | .sobj_owner = syncobj_noowner, | | 115 | .sobj_owner = syncobj_noowner, |
116 | }; | | 116 | }; |
117 | | | 117 | |
118 | syncobj_t sched_syncobj = { | | 118 | syncobj_t sched_syncobj = { |
119 | .sobj_flag = SOBJ_SLEEPQ_SORTED, | | 119 | .sobj_flag = SOBJ_SLEEPQ_SORTED, |
120 | .sobj_unsleep = sched_unsleep, | | 120 | .sobj_unsleep = sched_unsleep, |
121 | .sobj_changepri = sched_changepri, | | 121 | .sobj_changepri = sched_changepri, |
122 | .sobj_lendpri = sched_lendpri, | | 122 | .sobj_lendpri = sched_lendpri, |
123 | .sobj_owner = syncobj_noowner, | | 123 | .sobj_owner = syncobj_noowner, |
124 | }; | | 124 | }; |
125 | | | 125 | |
126 | syncobj_t kpause_syncobj = { | | 126 | syncobj_t kpause_syncobj = { |
127 | .sobj_flag = SOBJ_SLEEPQ_NULL, | | 127 | .sobj_flag = SOBJ_SLEEPQ_NULL, |
128 | .sobj_unsleep = sleepq_unsleep, | | 128 | .sobj_unsleep = sleepq_unsleep, |
129 | .sobj_changepri = sleepq_changepri, | | 129 | .sobj_changepri = sleepq_changepri, |
130 | .sobj_lendpri = sleepq_lendpri, | | 130 | .sobj_lendpri = sleepq_lendpri, |
131 | .sobj_owner = syncobj_noowner, | | 131 | .sobj_owner = syncobj_noowner, |
132 | }; | | 132 | }; |
133 | | | 133 | |
134 | /* "Lightning bolt": once a second sleep address. */ | | 134 | /* "Lightning bolt": once a second sleep address. */ |
135 | kcondvar_t lbolt __cacheline_aligned; | | 135 | kcondvar_t lbolt __cacheline_aligned; |
136 | | | 136 | |
137 | u_int sched_pstats_ticks __cacheline_aligned; | | 137 | u_int sched_pstats_ticks __cacheline_aligned; |
138 | | | 138 | |
139 | /* Preemption event counters. */ | | 139 | /* Preemption event counters. */ |
140 | static struct evcnt kpreempt_ev_crit __cacheline_aligned; | | 140 | static struct evcnt kpreempt_ev_crit __cacheline_aligned; |
141 | static struct evcnt kpreempt_ev_klock __cacheline_aligned; | | 141 | static struct evcnt kpreempt_ev_klock __cacheline_aligned; |
142 | static struct evcnt kpreempt_ev_immed __cacheline_aligned; | | 142 | static struct evcnt kpreempt_ev_immed __cacheline_aligned; |
143 | | | 143 | |
144 | void | | 144 | void |
145 | synch_init(void) | | 145 | synch_init(void) |
146 | { | | 146 | { |
147 | | | 147 | |
148 | cv_init(&lbolt, "lbolt"); | | 148 | cv_init(&lbolt, "lbolt"); |
149 | | | 149 | |
150 | evcnt_attach_dynamic(&kpreempt_ev_crit, EVCNT_TYPE_MISC, NULL, | | 150 | evcnt_attach_dynamic(&kpreempt_ev_crit, EVCNT_TYPE_MISC, NULL, |
151 | "kpreempt", "defer: critical section"); | | 151 | "kpreempt", "defer: critical section"); |
152 | evcnt_attach_dynamic(&kpreempt_ev_klock, EVCNT_TYPE_MISC, NULL, | | 152 | evcnt_attach_dynamic(&kpreempt_ev_klock, EVCNT_TYPE_MISC, NULL, |
153 | "kpreempt", "defer: kernel_lock"); | | 153 | "kpreempt", "defer: kernel_lock"); |
154 | evcnt_attach_dynamic(&kpreempt_ev_immed, EVCNT_TYPE_MISC, NULL, | | 154 | evcnt_attach_dynamic(&kpreempt_ev_immed, EVCNT_TYPE_MISC, NULL, |
155 | "kpreempt", "immediate"); | | 155 | "kpreempt", "immediate"); |
156 | } | | 156 | } |
157 | | | 157 | |
158 | /* | | 158 | /* |
159 | * OBSOLETE INTERFACE | | 159 | * OBSOLETE INTERFACE |
160 | * | | 160 | * |
161 | * General sleep call. Suspends the current LWP until a wakeup is | | 161 | * General sleep call. Suspends the current LWP until a wakeup is |
162 | * performed on the specified identifier. The LWP will then be made | | 162 | * performed on the specified identifier. The LWP will then be made |
163 | * runnable with the specified priority. Sleeps at most timo/hz seconds (0 | | 163 | * runnable with the specified priority. Sleeps at most timo/hz seconds (0 |
164 | * means no timeout). If pri includes PCATCH flag, signals are checked | | 164 | * means no timeout). If pri includes PCATCH flag, signals are checked |
165 | * before and after sleeping, else signals are not checked. Returns 0 if | | 165 | * before and after sleeping, else signals are not checked. Returns 0 if |
166 | * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a | | 166 | * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a |
167 | * signal needs to be delivered, ERESTART is returned if the current system | | 167 | * signal needs to be delivered, ERESTART is returned if the current system |
168 | * call should be restarted if possible, and EINTR is returned if the system | | 168 | * call should be restarted if possible, and EINTR is returned if the system |
169 | * call should be interrupted by the signal (return EINTR). | | 169 | * call should be interrupted by the signal (return EINTR). |
170 | */ | | 170 | */ |
171 | int | | 171 | int |
172 | tsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo) | | 172 | tsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo) |
173 | { | | 173 | { |
174 | struct lwp *l = curlwp; | | 174 | struct lwp *l = curlwp; |
175 | sleepq_t *sq; | | 175 | sleepq_t *sq; |
176 | kmutex_t *mp; | | 176 | kmutex_t *mp; |
177 | bool catch_p; | | 177 | bool catch_p; |
178 | | | 178 | |
179 | KASSERT((l->l_pflag & LP_INTR) == 0); | | 179 | KASSERT((l->l_pflag & LP_INTR) == 0); |
180 | KASSERT(ident != &lbolt); | | 180 | KASSERT(ident != &lbolt); |
181 | | | 181 | |
182 | if (sleepq_dontsleep(l)) { | | 182 | if (sleepq_dontsleep(l)) { |
183 | (void)sleepq_abort(NULL, 0); | | 183 | (void)sleepq_abort(NULL, 0); |
184 | return 0; | | 184 | return 0; |
185 | } | | 185 | } |
186 | | | 186 | |
187 | l->l_kpriority = true; | | 187 | l->l_kpriority = true; |
188 | catch_p = priority & PCATCH; | | 188 | catch_p = priority & PCATCH; |
189 | sq = sleeptab_lookup(&sleeptab, ident, &mp); | | 189 | sq = sleeptab_lookup(&sleeptab, ident, &mp); |
190 | sleepq_enter(sq, l, mp); | | 190 | sleepq_enter(sq, l, mp); |
191 | sleepq_enqueue(sq, ident, wmesg, &sleep_syncobj, catch_p); | | 191 | sleepq_enqueue(sq, ident, wmesg, &sleep_syncobj, catch_p); |
192 | return sleepq_block(timo, catch_p, &sleep_syncobj); | | 192 | return sleepq_block(timo, catch_p, &sleep_syncobj); |
193 | } | | 193 | } |
194 | | | 194 | |
195 | int | | 195 | int |
196 | mtsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo, | | 196 | mtsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo, |
197 | kmutex_t *mtx) | | 197 | kmutex_t *mtx) |
198 | { | | 198 | { |
199 | struct lwp *l = curlwp; | | 199 | struct lwp *l = curlwp; |
200 | sleepq_t *sq; | | 200 | sleepq_t *sq; |
201 | kmutex_t *mp; | | 201 | kmutex_t *mp; |
202 | bool catch_p; | | 202 | bool catch_p; |
203 | int error; | | 203 | int error; |
204 | | | 204 | |
205 | KASSERT((l->l_pflag & LP_INTR) == 0); | | 205 | KASSERT((l->l_pflag & LP_INTR) == 0); |
206 | KASSERT(ident != &lbolt); | | 206 | KASSERT(ident != &lbolt); |
207 | | | 207 | |
208 | if (sleepq_dontsleep(l)) { | | 208 | if (sleepq_dontsleep(l)) { |
209 | (void)sleepq_abort(mtx, (priority & PNORELOCK) != 0); | | 209 | (void)sleepq_abort(mtx, (priority & PNORELOCK) != 0); |
210 | return 0; | | 210 | return 0; |
211 | } | | 211 | } |
212 | | | 212 | |
213 | l->l_kpriority = true; | | 213 | l->l_kpriority = true; |
214 | catch_p = priority & PCATCH; | | 214 | catch_p = priority & PCATCH; |
215 | sq = sleeptab_lookup(&sleeptab, ident, &mp); | | 215 | sq = sleeptab_lookup(&sleeptab, ident, &mp); |
216 | sleepq_enter(sq, l, mp); | | 216 | sleepq_enter(sq, l, mp); |
217 | sleepq_enqueue(sq, ident, wmesg, &sleep_syncobj, catch_p); | | 217 | sleepq_enqueue(sq, ident, wmesg, &sleep_syncobj, catch_p); |
218 | mutex_exit(mtx); | | 218 | mutex_exit(mtx); |
219 | error = sleepq_block(timo, catch_p, &sleep_syncobj); | | 219 | error = sleepq_block(timo, catch_p, &sleep_syncobj); |
220 | | | 220 | |
221 | if ((priority & PNORELOCK) == 0) | | 221 | if ((priority & PNORELOCK) == 0) |
222 | mutex_enter(mtx); | | 222 | mutex_enter(mtx); |
223 | | | 223 | |
224 | return error; | | 224 | return error; |
225 | } | | 225 | } |
226 | | | 226 | |
227 | /* | | 227 | /* |
228 | * General sleep call for situations where a wake-up is not expected. | | 228 | * General sleep call for situations where a wake-up is not expected. |
229 | */ | | 229 | */ |
230 | int | | 230 | int |
231 | kpause(const char *wmesg, bool intr, int timo, kmutex_t *mtx) | | 231 | kpause(const char *wmesg, bool intr, int timo, kmutex_t *mtx) |
232 | { | | 232 | { |
233 | struct lwp *l = curlwp; | | 233 | struct lwp *l = curlwp; |
234 | int error; | | 234 | int error; |
235 | | | 235 | |
236 | KASSERT(!(timo == 0 && intr == false)); | | 236 | KASSERT(timo != 0 || intr); |
237 | | | 237 | |
238 | if (sleepq_dontsleep(l)) | | 238 | if (sleepq_dontsleep(l)) |
239 | return sleepq_abort(NULL, 0); | | 239 | return sleepq_abort(NULL, 0); |
240 | | | 240 | |
241 | if (mtx != NULL) | | 241 | if (mtx != NULL) |
242 | mutex_exit(mtx); | | 242 | mutex_exit(mtx); |
243 | l->l_kpriority = true; | | 243 | l->l_kpriority = true; |
244 | lwp_lock(l); | | 244 | lwp_lock(l); |
245 | KERNEL_UNLOCK_ALL(NULL, &l->l_biglocks); | | 245 | KERNEL_UNLOCK_ALL(NULL, &l->l_biglocks); |
246 | sleepq_enqueue(NULL, l, wmesg, &kpause_syncobj, intr); | | 246 | sleepq_enqueue(NULL, l, wmesg, &kpause_syncobj, intr); |
247 | error = sleepq_block(timo, intr, &kpause_syncobj); | | 247 | error = sleepq_block(timo, intr, &kpause_syncobj); |
248 | if (mtx != NULL) | | 248 | if (mtx != NULL) |
249 | mutex_enter(mtx); | | 249 | mutex_enter(mtx); |
250 | | | 250 | |
251 | return error; | | 251 | return error; |
252 | } | | 252 | } |
253 | | | 253 | |
254 | /* | | 254 | /* |
255 | * OBSOLETE INTERFACE | | 255 | * OBSOLETE INTERFACE |
256 | * | | 256 | * |
257 | * Make all LWPs sleeping on the specified identifier runnable. | | 257 | * Make all LWPs sleeping on the specified identifier runnable. |
258 | */ | | 258 | */ |
259 | void | | 259 | void |
260 | wakeup(wchan_t ident) | | 260 | wakeup(wchan_t ident) |
261 | { | | 261 | { |
262 | sleepq_t *sq; | | 262 | sleepq_t *sq; |
263 | kmutex_t *mp; | | 263 | kmutex_t *mp; |
264 | | | 264 | |
265 | if (__predict_false(cold)) | | 265 | if (__predict_false(cold)) |
266 | return; | | 266 | return; |
267 | | | 267 | |
268 | sq = sleeptab_lookup(&sleeptab, ident, &mp); | | 268 | sq = sleeptab_lookup(&sleeptab, ident, &mp); |
269 | sleepq_wake(sq, ident, (u_int)-1, mp); | | 269 | sleepq_wake(sq, ident, (u_int)-1, mp); |
270 | } | | 270 | } |
271 | | | 271 | |
272 | /* | | 272 | /* |
273 | * General yield call. Puts the current LWP back on its run queue and | | 273 | * General yield call. Puts the current LWP back on its run queue and |
274 | * performs a context switch. | | 274 | * performs a context switch. |
275 | */ | | 275 | */ |
276 | void | | 276 | void |
277 | yield(void) | | 277 | yield(void) |
278 | { | | 278 | { |
279 | struct lwp *l = curlwp; | | 279 | struct lwp *l = curlwp; |
280 | | | 280 | |
281 | KERNEL_UNLOCK_ALL(l, &l->l_biglocks); | | 281 | KERNEL_UNLOCK_ALL(l, &l->l_biglocks); |
282 | lwp_lock(l); | | 282 | lwp_lock(l); |
283 | | | 283 | |
284 | KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_lwplock)); | | 284 | KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_lwplock)); |
285 | KASSERT(l->l_stat == LSONPROC); | | 285 | KASSERT(l->l_stat == LSONPROC); |
286 | | | 286 | |
287 | /* Voluntary - ditch kpriority boost. */ | | 287 | /* Voluntary - ditch kpriority boost. */ |
288 | l->l_kpriority = false; | | 288 | l->l_kpriority = false; |
289 | spc_lock(l->l_cpu); | | 289 | spc_lock(l->l_cpu); |
290 | mi_switch(l); | | 290 | mi_switch(l); |
291 | KERNEL_LOCK(l->l_biglocks, l); | | 291 | KERNEL_LOCK(l->l_biglocks, l); |
292 | } | | 292 | } |
293 | | | 293 | |
294 | /* | | 294 | /* |
295 | * General preemption call. Puts the current LWP back on its run queue | | 295 | * General preemption call. Puts the current LWP back on its run queue |
296 | * and performs an involuntary context switch. Different from yield() | | 296 | * and performs an involuntary context switch. Different from yield() |
297 | * in that: | | 297 | * in that: |
298 | * | | 298 | * |
299 | * - It's counted differently (involuntary vs. voluntary). | | 299 | * - It's counted differently (involuntary vs. voluntary). |
300 | * - Realtime threads go to the head of their runqueue vs. tail for yield(). | | 300 | * - Realtime threads go to the head of their runqueue vs. tail for yield(). |
301 | * - Priority boost is retained unless LWP has exceeded timeslice. | | 301 | * - Priority boost is retained unless LWP has exceeded timeslice. |
302 | */ | | 302 | */ |
303 | void | | 303 | void |
304 | preempt(void) | | 304 | preempt(void) |
305 | { | | 305 | { |
306 | struct lwp *l = curlwp; | | 306 | struct lwp *l = curlwp; |
307 | | | 307 | |
308 | KERNEL_UNLOCK_ALL(l, &l->l_biglocks); | | 308 | KERNEL_UNLOCK_ALL(l, &l->l_biglocks); |
309 | lwp_lock(l); | | 309 | lwp_lock(l); |
310 | | | 310 | |
311 | KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_lwplock)); | | 311 | KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_lwplock)); |
312 | KASSERT(l->l_stat == LSONPROC); | | 312 | KASSERT(l->l_stat == LSONPROC); |
313 | | | 313 | |
314 | spc_lock(l->l_cpu); | | 314 | spc_lock(l->l_cpu); |
315 | /* Involuntary - keep kpriority boost unless a CPU hog. */ | | 315 | /* Involuntary - keep kpriority boost unless a CPU hog. */ |
316 | if ((l->l_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) != 0) { | | 316 | if ((l->l_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) != 0) { |
317 | l->l_kpriority = false; | | 317 | l->l_kpriority = false; |
318 | } | | 318 | } |
319 | l->l_pflag |= LP_PREEMPTING; | | 319 | l->l_pflag |= LP_PREEMPTING; |
320 | mi_switch(l); | | 320 | mi_switch(l); |
321 | KERNEL_LOCK(l->l_biglocks, l); | | 321 | KERNEL_LOCK(l->l_biglocks, l); |
322 | } | | 322 | } |
323 | | | 323 | |
324 | /* | | 324 | /* |
325 | * Return true if the current LWP should yield the processor. Intended to | | 325 | * Return true if the current LWP should yield the processor. Intended to |
326 | * be used by long-running code in kernel. | | 326 | * be used by long-running code in kernel. |
327 | */ | | 327 | */ |
328 | inline bool | | 328 | inline bool |
329 | preempt_needed(void) | | 329 | preempt_needed(void) |
330 | { | | 330 | { |
331 | lwp_t *l = curlwp; | | 331 | lwp_t *l = curlwp; |
332 | int needed; | | 332 | int needed; |
333 | | | 333 | |
334 | KPREEMPT_DISABLE(l); | | 334 | KPREEMPT_DISABLE(l); |
335 | needed = l->l_cpu->ci_want_resched; | | 335 | needed = l->l_cpu->ci_want_resched; |
336 | KPREEMPT_ENABLE(l); | | 336 | KPREEMPT_ENABLE(l); |
337 | | | 337 | |
338 | return (needed != 0); | | 338 | return (needed != 0); |
339 | } | | 339 | } |
340 | | | 340 | |
341 | /* | | 341 | /* |
342 | * A breathing point for long running code in kernel. | | 342 | * A breathing point for long running code in kernel. |
343 | */ | | 343 | */ |
344 | void | | 344 | void |
345 | preempt_point(void) | | 345 | preempt_point(void) |
346 | { | | 346 | { |
347 | | | 347 | |
348 | if (__predict_false(preempt_needed())) { | | 348 | if (__predict_false(preempt_needed())) { |
349 | preempt(); | | 349 | preempt(); |
350 | } | | 350 | } |
351 | } | | 351 | } |
352 | | | 352 | |
353 | /* | | 353 | /* |
354 | * Handle a request made by another agent to preempt the current LWP | | 354 | * Handle a request made by another agent to preempt the current LWP |
355 | * in-kernel. Usually called when l_dopreempt may be non-zero. | | 355 | * in-kernel. Usually called when l_dopreempt may be non-zero. |
356 | * | | 356 | * |
357 | * Character addresses for lockstat only. | | 357 | * Character addresses for lockstat only. |
358 | */ | | 358 | */ |
359 | static char kpreempt_is_disabled; | | 359 | static char kpreempt_is_disabled; |
360 | static char kernel_lock_held; | | 360 | static char kernel_lock_held; |
361 | static char is_softint_lwp; | | 361 | static char is_softint_lwp; |
362 | static char spl_is_raised; | | 362 | static char spl_is_raised; |
363 | | | 363 | |
364 | bool | | 364 | bool |
365 | kpreempt(uintptr_t where) | | 365 | kpreempt(uintptr_t where) |
366 | { | | 366 | { |
367 | uintptr_t failed; | | 367 | uintptr_t failed; |
368 | lwp_t *l; | | 368 | lwp_t *l; |
369 | int s, dop, lsflag; | | 369 | int s, dop, lsflag; |
370 | | | 370 | |
371 | l = curlwp; | | 371 | l = curlwp; |
372 | failed = 0; | | 372 | failed = 0; |
373 | while ((dop = l->l_dopreempt) != 0) { | | 373 | while ((dop = l->l_dopreempt) != 0) { |
374 | if (l->l_stat != LSONPROC) { | | 374 | if (l->l_stat != LSONPROC) { |
375 | /* | | 375 | /* |
376 | * About to block (or die), let it happen. | | 376 | * About to block (or die), let it happen. |
377 | * Doesn't really count as "preemption has | | 377 | * Doesn't really count as "preemption has |
378 | * been blocked", since we're going to | | 378 | * been blocked", since we're going to |
379 | * context switch. | | 379 | * context switch. |
380 | */ | | 380 | */ |
381 | atomic_swap_uint(&l->l_dopreempt, 0); | | 381 | atomic_swap_uint(&l->l_dopreempt, 0); |
382 | return true; | | 382 | return true; |
383 | } | | 383 | } |
384 | KASSERT((l->l_flag & LW_IDLE) == 0); | | 384 | KASSERT((l->l_flag & LW_IDLE) == 0); |
385 | if (__predict_false(l->l_nopreempt != 0)) { | | 385 | if (__predict_false(l->l_nopreempt != 0)) { |
386 | /* LWP holds preemption disabled, explicitly. */ | | 386 | /* LWP holds preemption disabled, explicitly. */ |
387 | if ((dop & DOPREEMPT_COUNTED) == 0) { | | 387 | if ((dop & DOPREEMPT_COUNTED) == 0) { |
388 | kpreempt_ev_crit.ev_count++; | | 388 | kpreempt_ev_crit.ev_count++; |
389 | } | | 389 | } |
390 | failed = (uintptr_t)&kpreempt_is_disabled; | | 390 | failed = (uintptr_t)&kpreempt_is_disabled; |
391 | break; | | 391 | break; |
392 | } | | 392 | } |
393 | if (__predict_false((l->l_pflag & LP_INTR) != 0)) { | | 393 | if (__predict_false((l->l_pflag & LP_INTR) != 0)) { |
394 | /* Can't preempt soft interrupts yet. */ | | 394 | /* Can't preempt soft interrupts yet. */ |
395 | atomic_swap_uint(&l->l_dopreempt, 0); | | 395 | atomic_swap_uint(&l->l_dopreempt, 0); |
396 | failed = (uintptr_t)&is_softint_lwp; | | 396 | failed = (uintptr_t)&is_softint_lwp; |
397 | break; | | 397 | break; |
398 | } | | 398 | } |
399 | s = splsched(); | | 399 | s = splsched(); |
400 | if (__predict_false(l->l_blcnt != 0 || | | 400 | if (__predict_false(l->l_blcnt != 0 || |
401 | curcpu()->ci_biglock_wanted != NULL)) { | | 401 | curcpu()->ci_biglock_wanted != NULL)) { |
402 | /* Hold or want kernel_lock, code is not MT safe. */ | | 402 | /* Hold or want kernel_lock, code is not MT safe. */ |
403 | splx(s); | | 403 | splx(s); |
404 | if ((dop & DOPREEMPT_COUNTED) == 0) { | | 404 | if ((dop & DOPREEMPT_COUNTED) == 0) { |
405 | kpreempt_ev_klock.ev_count++; | | 405 | kpreempt_ev_klock.ev_count++; |
406 | } | | 406 | } |
407 | failed = (uintptr_t)&kernel_lock_held; | | 407 | failed = (uintptr_t)&kernel_lock_held; |
408 | break; | | 408 | break; |
409 | } | | 409 | } |
410 | if (__predict_false(!cpu_kpreempt_enter(where, s))) { | | 410 | if (__predict_false(!cpu_kpreempt_enter(where, s))) { |
411 | /* | | 411 | /* |
412 | * It may be that the IPL is too high. | | 412 | * It may be that the IPL is too high. |
413 | * kpreempt_enter() can schedule an | | 413 | * kpreempt_enter() can schedule an |
414 | * interrupt to retry later. | | 414 | * interrupt to retry later. |
415 | */ | | 415 | */ |
416 | splx(s); | | 416 | splx(s); |
417 | failed = (uintptr_t)&spl_is_raised; | | 417 | failed = (uintptr_t)&spl_is_raised; |
418 | break; | | 418 | break; |
419 | } | | 419 | } |
420 | /* Do it! */ | | 420 | /* Do it! */ |
421 | if (__predict_true((dop & DOPREEMPT_COUNTED) == 0)) { | | 421 | if (__predict_true((dop & DOPREEMPT_COUNTED) == 0)) { |
422 | kpreempt_ev_immed.ev_count++; | | 422 | kpreempt_ev_immed.ev_count++; |
423 | } | | 423 | } |
424 | lwp_lock(l); | | 424 | lwp_lock(l); |
425 | /* Involuntary - keep kpriority boost. */ | | 425 | /* Involuntary - keep kpriority boost. */ |
426 | l->l_pflag |= LP_PREEMPTING; | | 426 | l->l_pflag |= LP_PREEMPTING; |
427 | spc_lock(l->l_cpu); | | 427 | spc_lock(l->l_cpu); |
428 | mi_switch(l); | | 428 | mi_switch(l); |
429 | l->l_nopreempt++; | | 429 | l->l_nopreempt++; |
430 | splx(s); | | 430 | splx(s); |
431 | | | 431 | |
432 | /* Take care of any MD cleanup. */ | | 432 | /* Take care of any MD cleanup. */ |
433 | cpu_kpreempt_exit(where); | | 433 | cpu_kpreempt_exit(where); |
434 | l->l_nopreempt--; | | 434 | l->l_nopreempt--; |
435 | } | | 435 | } |
436 | | | 436 | |
437 | if (__predict_true(!failed)) { | | 437 | if (__predict_true(!failed)) { |
438 | return false; | | 438 | return false; |
439 | } | | 439 | } |
440 | | | 440 | |
441 | /* Record preemption failure for reporting via lockstat. */ | | 441 | /* Record preemption failure for reporting via lockstat. */ |
442 | atomic_or_uint(&l->l_dopreempt, DOPREEMPT_COUNTED); | | 442 | atomic_or_uint(&l->l_dopreempt, DOPREEMPT_COUNTED); |
443 | lsflag = 0; | | 443 | lsflag = 0; |
444 | LOCKSTAT_ENTER(lsflag); | | 444 | LOCKSTAT_ENTER(lsflag); |
445 | if (__predict_false(lsflag)) { | | 445 | if (__predict_false(lsflag)) { |
446 | if (where == 0) { | | 446 | if (where == 0) { |
447 | where = (uintptr_t)__builtin_return_address(0); | | 447 | where = (uintptr_t)__builtin_return_address(0); |
448 | } | | 448 | } |
449 | /* Preemption is on, might recurse, so make it atomic. */ | | 449 | /* Preemption is on, might recurse, so make it atomic. */ |
450 | if (atomic_cas_ptr_ni((void *)&l->l_pfailaddr, NULL, | | 450 | if (atomic_cas_ptr_ni((void *)&l->l_pfailaddr, NULL, |
451 | (void *)where) == NULL) { | | 451 | (void *)where) == NULL) { |
452 | LOCKSTAT_START_TIMER(lsflag, l->l_pfailtime); | | 452 | LOCKSTAT_START_TIMER(lsflag, l->l_pfailtime); |
453 | l->l_pfaillock = failed; | | 453 | l->l_pfaillock = failed; |
454 | } | | 454 | } |
455 | } | | 455 | } |
456 | LOCKSTAT_EXIT(lsflag); | | 456 | LOCKSTAT_EXIT(lsflag); |
457 | return true; | | 457 | return true; |
458 | } | | 458 | } |
459 | | | 459 | |
460 | /* | | 460 | /* |
461 | * Return true if preemption is explicitly disabled. | | 461 | * Return true if preemption is explicitly disabled. |
462 | */ | | 462 | */ |
463 | bool | | 463 | bool |
464 | kpreempt_disabled(void) | | 464 | kpreempt_disabled(void) |
465 | { | | 465 | { |
466 | const lwp_t *l = curlwp; | | 466 | const lwp_t *l = curlwp; |
467 | | | 467 | |
468 | return l->l_nopreempt != 0 || l->l_stat == LSZOMB || | | 468 | return l->l_nopreempt != 0 || l->l_stat == LSZOMB || |
469 | (l->l_flag & LW_IDLE) != 0 || (l->l_pflag & LP_INTR) != 0 || | | 469 | (l->l_flag & LW_IDLE) != 0 || (l->l_pflag & LP_INTR) != 0 || |
470 | cpu_kpreempt_disabled(); | | 470 | cpu_kpreempt_disabled(); |
471 | } | | 471 | } |
472 | | | 472 | |
473 | /* | | 473 | /* |
474 | * Disable kernel preemption. | | 474 | * Disable kernel preemption. |
475 | */ | | 475 | */ |
476 | void | | 476 | void |
477 | kpreempt_disable(void) | | 477 | kpreempt_disable(void) |
478 | { | | 478 | { |
479 | | | 479 | |
480 | KPREEMPT_DISABLE(curlwp); | | 480 | KPREEMPT_DISABLE(curlwp); |
481 | } | | 481 | } |
482 | | | 482 | |
483 | /* | | 483 | /* |
484 | * Reenable kernel preemption. | | 484 | * Reenable kernel preemption. |
485 | */ | | 485 | */ |
486 | void | | 486 | void |
487 | kpreempt_enable(void) | | 487 | kpreempt_enable(void) |
488 | { | | 488 | { |
489 | | | 489 | |
490 | KPREEMPT_ENABLE(curlwp); | | 490 | KPREEMPT_ENABLE(curlwp); |
491 | } | | 491 | } |
492 | | | 492 | |
493 | /* | | 493 | /* |
494 | * Compute the amount of time during which the current lwp was running. | | 494 | * Compute the amount of time during which the current lwp was running. |
495 | * | | 495 | * |
496 | * - update l_rtime unless it's an idle lwp. | | 496 | * - update l_rtime unless it's an idle lwp. |
497 | */ | | 497 | */ |
498 | | | 498 | |
499 | void | | 499 | void |
500 | updatertime(lwp_t *l, const struct bintime *now) | | 500 | updatertime(lwp_t *l, const struct bintime *now) |
501 | { | | 501 | { |
502 | | | 502 | |
503 | if (__predict_false(l->l_flag & LW_IDLE)) | | 503 | if (__predict_false(l->l_flag & LW_IDLE)) |
504 | return; | | 504 | return; |
505 | | | 505 | |
506 | /* rtime += now - stime */ | | 506 | /* rtime += now - stime */ |
507 | bintime_add(&l->l_rtime, now); | | 507 | bintime_add(&l->l_rtime, now); |
508 | bintime_sub(&l->l_rtime, &l->l_stime); | | 508 | bintime_sub(&l->l_rtime, &l->l_stime); |
509 | } | | 509 | } |
510 | | | 510 | |
511 | /* | | 511 | /* |
512 | * Select next LWP from the current CPU to run.. | | 512 | * Select next LWP from the current CPU to run.. |
513 | */ | | 513 | */ |
514 | static inline lwp_t * | | 514 | static inline lwp_t * |
515 | nextlwp(struct cpu_info *ci, struct schedstate_percpu *spc) | | 515 | nextlwp(struct cpu_info *ci, struct schedstate_percpu *spc) |
516 | { | | 516 | { |
517 | lwp_t *newl; | | 517 | lwp_t *newl; |
518 | | | 518 | |
519 | /* | | 519 | /* |
520 | * Let sched_nextlwp() select the LWP to run the CPU next. | | 520 | * Let sched_nextlwp() select the LWP to run the CPU next. |
521 | * If no LWP is runnable, select the idle LWP. | | 521 | * If no LWP is runnable, select the idle LWP. |
522 | * | | 522 | * |
523 | * On arrival here LWPs on a run queue are locked by spc_mutex which | | 523 | * On arrival here LWPs on a run queue are locked by spc_mutex which |
524 | * is currently held. Idle LWPs are always locked by spc_lwplock, | | 524 | * is currently held. Idle LWPs are always locked by spc_lwplock, |
525 | * which may or may not be held here. On exit from this code block, | | 525 | * which may or may not be held here. On exit from this code block, |
526 | * in all cases newl is locked by spc_lwplock. | | 526 | * in all cases newl is locked by spc_lwplock. |
527 | */ | | 527 | */ |
528 | newl = sched_nextlwp(); | | 528 | newl = sched_nextlwp(); |
529 | if (newl != NULL) { | | 529 | if (newl != NULL) { |
530 | sched_dequeue(newl); | | 530 | sched_dequeue(newl); |
531 | KASSERT(lwp_locked(newl, spc->spc_mutex)); | | 531 | KASSERT(lwp_locked(newl, spc->spc_mutex)); |
532 | KASSERT(newl->l_cpu == ci); | | 532 | KASSERT(newl->l_cpu == ci); |
533 | newl->l_stat = LSONPROC; | | 533 | newl->l_stat = LSONPROC; |
534 | newl->l_pflag |= LP_RUNNING; | | 534 | newl->l_pflag |= LP_RUNNING; |
535 | spc->spc_curpriority = lwp_eprio(newl); | | 535 | spc->spc_curpriority = lwp_eprio(newl); |
536 | spc->spc_flags &= ~(SPCF_SWITCHCLEAR | SPCF_IDLE); | | 536 | spc->spc_flags &= ~(SPCF_SWITCHCLEAR | SPCF_IDLE); |
537 | lwp_setlock(newl, spc->spc_lwplock); | | 537 | lwp_setlock(newl, spc->spc_lwplock); |
538 | } else { | | 538 | } else { |
539 | /* | | 539 | /* |
540 | * The idle LWP does not get set to LSONPROC, because | | 540 | * The idle LWP does not get set to LSONPROC, because |
541 | * otherwise it screws up the output from top(1) etc. | | 541 | * otherwise it screws up the output from top(1) etc. |
542 | */ | | 542 | */ |
543 | newl = ci->ci_data.cpu_idlelwp; | | 543 | newl = ci->ci_data.cpu_idlelwp; |
544 | newl->l_pflag |= LP_RUNNING; | | 544 | newl->l_pflag |= LP_RUNNING; |
545 | spc->spc_curpriority = PRI_IDLE; | | 545 | spc->spc_curpriority = PRI_IDLE; |
546 | spc->spc_flags = (spc->spc_flags & ~SPCF_SWITCHCLEAR) | | | 546 | spc->spc_flags = (spc->spc_flags & ~SPCF_SWITCHCLEAR) | |
547 | SPCF_IDLE; | | 547 | SPCF_IDLE; |
548 | } | | 548 | } |
549 | | | 549 | |
550 | /* | | 550 | /* |
551 | * Only clear want_resched if there are no pending (slow) software | | 551 | * Only clear want_resched if there are no pending (slow) software |
552 | * interrupts. We can do this without an atomic, because no new | | 552 | * interrupts. We can do this without an atomic, because no new |
553 | * LWPs can appear in the queue due to our hold on spc_mutex, and | | 553 | * LWPs can appear in the queue due to our hold on spc_mutex, and |
554 | * the update to ci_want_resched will become globally visible before | | 554 | * the update to ci_want_resched will become globally visible before |
555 | * the release of spc_mutex becomes globally visible. | | 555 | * the release of spc_mutex becomes globally visible. |
556 | */ | | 556 | */ |
557 | if (ci->ci_data.cpu_softints == 0) | | 557 | if (ci->ci_data.cpu_softints == 0) |
558 | ci->ci_want_resched = 0; | | 558 | ci->ci_want_resched = 0; |
559 | | | 559 | |
560 | return newl; | | 560 | return newl; |
561 | } | | 561 | } |
562 | | | 562 | |
563 | /* | | 563 | /* |
564 | * The machine independent parts of context switch. | | 564 | * The machine independent parts of context switch. |
565 | * | | 565 | * |
566 | * NOTE: l->l_cpu is not changed in this routine, because an LWP never | | 566 | * NOTE: l->l_cpu is not changed in this routine, because an LWP never |
567 | * changes its own l_cpu (that would screw up curcpu on many ports and could | | 567 | * changes its own l_cpu (that would screw up curcpu on many ports and could |
568 | * cause all kinds of other evil stuff). l_cpu is always changed by some | | 568 | * cause all kinds of other evil stuff). l_cpu is always changed by some |
569 | * other actor, when it's known the LWP is not running (the LP_RUNNING flag | | 569 | * other actor, when it's known the LWP is not running (the LP_RUNNING flag |
570 | * is checked under lock). | | 570 | * is checked under lock). |
571 | */ | | 571 | */ |
572 | void | | 572 | void |
573 | mi_switch(lwp_t *l) | | 573 | mi_switch(lwp_t *l) |
574 | { | | 574 | { |
575 | struct cpu_info *ci; | | 575 | struct cpu_info *ci; |
576 | struct schedstate_percpu *spc; | | 576 | struct schedstate_percpu *spc; |
577 | struct lwp *newl; | | 577 | struct lwp *newl; |
578 | kmutex_t *lock; | | 578 | kmutex_t *lock; |
579 | int oldspl; | | 579 | int oldspl; |
580 | struct bintime bt; | | 580 | struct bintime bt; |
581 | bool returning; | | 581 | bool returning; |
582 | | | 582 | |
583 | KASSERT(lwp_locked(l, NULL)); | | 583 | KASSERT(lwp_locked(l, NULL)); |
584 | KASSERT(kpreempt_disabled()); | | 584 | KASSERT(kpreempt_disabled()); |
585 | KASSERT(mutex_owned(curcpu()->ci_schedstate.spc_mutex)); | | 585 | KASSERT(mutex_owned(curcpu()->ci_schedstate.spc_mutex)); |
586 | KASSERTMSG(l->l_blcnt == 0, "kernel_lock leaked"); | | 586 | KASSERTMSG(l->l_blcnt == 0, "kernel_lock leaked"); |
587 | | | 587 | |
588 | kstack_check_magic(l); | | 588 | kstack_check_magic(l); |
589 | | | 589 | |
590 | binuptime(&bt); | | 590 | binuptime(&bt); |
591 | | | 591 | |
592 | KASSERTMSG(l == curlwp, "l %p curlwp %p", l, curlwp); | | 592 | KASSERTMSG(l == curlwp, "l %p curlwp %p", l, curlwp); |
593 | KASSERT((l->l_pflag & LP_RUNNING) != 0); | | 593 | KASSERT((l->l_pflag & LP_RUNNING) != 0); |
594 | KASSERT(l->l_cpu == curcpu() || l->l_stat == LSRUN); | | 594 | KASSERT(l->l_cpu == curcpu() || l->l_stat == LSRUN); |
595 | ci = curcpu(); | | 595 | ci = curcpu(); |
596 | spc = &ci->ci_schedstate; | | 596 | spc = &ci->ci_schedstate; |
597 | returning = false; | | 597 | returning = false; |
598 | newl = NULL; | | 598 | newl = NULL; |
599 | | | 599 | |
600 | /* | | 600 | /* |
601 | * If we have been asked to switch to a specific LWP, then there | | 601 | * If we have been asked to switch to a specific LWP, then there |
602 | * is no need to inspect the run queues. If a soft interrupt is | | 602 | * is no need to inspect the run queues. If a soft interrupt is |
603 | * blocking, then return to the interrupted thread without adjusting | | 603 | * blocking, then return to the interrupted thread without adjusting |
604 | * VM context or its start time: neither have been changed in order | | 604 | * VM context or its start time: neither have been changed in order |
605 | * to take the interrupt. | | 605 | * to take the interrupt. |
606 | */ | | 606 | */ |
607 | if (l->l_switchto != NULL) { | | 607 | if (l->l_switchto != NULL) { |
608 | if ((l->l_pflag & LP_INTR) != 0) { | | 608 | if ((l->l_pflag & LP_INTR) != 0) { |
609 | returning = true; | | 609 | returning = true; |
610 | softint_block(l); | | 610 | softint_block(l); |
611 | if ((l->l_pflag & LP_TIMEINTR) != 0) | | 611 | if ((l->l_pflag & LP_TIMEINTR) != 0) |
612 | updatertime(l, &bt); | | 612 | updatertime(l, &bt); |
613 | } | | 613 | } |
614 | newl = l->l_switchto; | | 614 | newl = l->l_switchto; |
615 | l->l_switchto = NULL; | | 615 | l->l_switchto = NULL; |
616 | } | | 616 | } |
617 | #ifndef __HAVE_FAST_SOFTINTS | | 617 | #ifndef __HAVE_FAST_SOFTINTS |
618 | else if (ci->ci_data.cpu_softints != 0) { | | 618 | else if (ci->ci_data.cpu_softints != 0) { |
619 | /* There are pending soft interrupts, so pick one. */ | | 619 | /* There are pending soft interrupts, so pick one. */ |
620 | newl = softint_picklwp(); | | 620 | newl = softint_picklwp(); |
621 | newl->l_stat = LSONPROC; | | 621 | newl->l_stat = LSONPROC; |
622 | newl->l_pflag |= LP_RUNNING; | | 622 | newl->l_pflag |= LP_RUNNING; |
623 | } | | 623 | } |
624 | #endif /* !__HAVE_FAST_SOFTINTS */ | | 624 | #endif /* !__HAVE_FAST_SOFTINTS */ |
625 | | | 625 | |
626 | /* | | 626 | /* |
627 | * If on the CPU and we have gotten this far, then we must yield. | | 627 | * If on the CPU and we have gotten this far, then we must yield. |
628 | */ | | 628 | */ |
629 | if (l->l_stat == LSONPROC && l != newl) { | | 629 | if (l->l_stat == LSONPROC && l != newl) { |
630 | KASSERT(lwp_locked(l, spc->spc_lwplock)); | | 630 | KASSERT(lwp_locked(l, spc->spc_lwplock)); |
631 | KASSERT((l->l_flag & LW_IDLE) == 0); | | 631 | KASSERT((l->l_flag & LW_IDLE) == 0); |
632 | l->l_stat = LSRUN; | | 632 | l->l_stat = LSRUN; |
633 | lwp_setlock(l, spc->spc_mutex); | | 633 | lwp_setlock(l, spc->spc_mutex); |
634 | sched_enqueue(l); | | 634 | sched_enqueue(l); |
635 | sched_preempted(l); | | 635 | sched_preempted(l); |
636 | | | 636 | |
637 | /* | | 637 | /* |
638 | * Handle migration. Note that "migrating LWP" may | | 638 | * Handle migration. Note that "migrating LWP" may |
639 | * be reset here, if interrupt/preemption happens | | 639 | * be reset here, if interrupt/preemption happens |
640 | * early in idle LWP. | | 640 | * early in idle LWP. |
641 | */ | | 641 | */ |
642 | if (l->l_target_cpu != NULL && (l->l_pflag & LP_BOUND) == 0) { | | 642 | if (l->l_target_cpu != NULL && (l->l_pflag & LP_BOUND) == 0) { |
643 | KASSERT((l->l_pflag & LP_INTR) == 0); | | 643 | KASSERT((l->l_pflag & LP_INTR) == 0); |
644 | spc->spc_migrating = l; | | 644 | spc->spc_migrating = l; |
645 | } | | 645 | } |
646 | } | | 646 | } |
647 | | | 647 | |
648 | /* Pick new LWP to run. */ | | 648 | /* Pick new LWP to run. */ |
649 | if (newl == NULL) { | | 649 | if (newl == NULL) { |
650 | newl = nextlwp(ci, spc); | | 650 | newl = nextlwp(ci, spc); |
651 | } | | 651 | } |
652 | | | 652 | |
653 | /* Items that must be updated with the CPU locked. */ | | 653 | /* Items that must be updated with the CPU locked. */ |
654 | if (!returning) { | | 654 | if (!returning) { |
655 | /* Count time spent in current system call */ | | 655 | /* Count time spent in current system call */ |
656 | SYSCALL_TIME_SLEEP(l); | | 656 | SYSCALL_TIME_SLEEP(l); |
657 | | | 657 | |
658 | updatertime(l, &bt); | | 658 | updatertime(l, &bt); |
659 | | | 659 | |
660 | /* Update the new LWP's start time. */ | | 660 | /* Update the new LWP's start time. */ |
661 | newl->l_stime = bt; | | 661 | newl->l_stime = bt; |
662 | | | 662 | |
663 | /* | | 663 | /* |
664 | * ci_curlwp changes when a fast soft interrupt occurs. | | 664 | * ci_curlwp changes when a fast soft interrupt occurs. |
665 | * We use ci_onproc to keep track of which kernel or | | 665 | * We use ci_onproc to keep track of which kernel or |
666 | * user thread is running 'underneath' the software | | 666 | * user thread is running 'underneath' the software |
667 | * interrupt. This is important for time accounting, | | 667 | * interrupt. This is important for time accounting, |
668 | * itimers and forcing user threads to preempt (aston). | | 668 | * itimers and forcing user threads to preempt (aston). |
669 | */ | | 669 | */ |
670 | ci->ci_onproc = newl; | | 670 | ci->ci_onproc = newl; |
671 | } | | 671 | } |
672 | | | 672 | |
673 | /* | | 673 | /* |
674 | * Preemption related tasks. Must be done holding spc_mutex. Clear | | 674 | * Preemption related tasks. Must be done holding spc_mutex. Clear |
675 | * l_dopreempt without an atomic - it's only ever set non-zero by | | 675 | * l_dopreempt without an atomic - it's only ever set non-zero by |
676 | * sched_resched_cpu() which also holds spc_mutex, and only ever | | 676 | * sched_resched_cpu() which also holds spc_mutex, and only ever |
677 | * cleared by the LWP itself (us) with atomics when not under lock. | | 677 | * cleared by the LWP itself (us) with atomics when not under lock. |
678 | */ | | 678 | */ |
679 | l->l_dopreempt = 0; | | 679 | l->l_dopreempt = 0; |
680 | if (__predict_false(l->l_pfailaddr != 0)) { | | 680 | if (__predict_false(l->l_pfailaddr != 0)) { |
681 | LOCKSTAT_FLAG(lsflag); | | 681 | LOCKSTAT_FLAG(lsflag); |
682 | LOCKSTAT_ENTER(lsflag); | | 682 | LOCKSTAT_ENTER(lsflag); |
683 | LOCKSTAT_STOP_TIMER(lsflag, l->l_pfailtime); | | 683 | LOCKSTAT_STOP_TIMER(lsflag, l->l_pfailtime); |
684 | LOCKSTAT_EVENT_RA(lsflag, l->l_pfaillock, LB_NOPREEMPT|LB_SPIN, | | 684 | LOCKSTAT_EVENT_RA(lsflag, l->l_pfaillock, LB_NOPREEMPT|LB_SPIN, |
685 | 1, l->l_pfailtime, l->l_pfailaddr); | | 685 | 1, l->l_pfailtime, l->l_pfailaddr); |
686 | LOCKSTAT_EXIT(lsflag); | | 686 | LOCKSTAT_EXIT(lsflag); |
687 | l->l_pfailtime = 0; | | 687 | l->l_pfailtime = 0; |
688 | l->l_pfaillock = 0; | | 688 | l->l_pfaillock = 0; |
689 | l->l_pfailaddr = 0; | | 689 | l->l_pfailaddr = 0; |
690 | } | | 690 | } |
691 | | | 691 | |
692 | if (l != newl) { | | 692 | if (l != newl) { |
693 | struct lwp *prevlwp; | | 693 | struct lwp *prevlwp; |
694 | | | 694 | |
695 | /* Release all locks, but leave the current LWP locked */ | | 695 | /* Release all locks, but leave the current LWP locked */ |
696 | if (l->l_mutex == spc->spc_mutex) { | | 696 | if (l->l_mutex == spc->spc_mutex) { |
697 | /* | | 697 | /* |
698 | * Drop spc_lwplock, if the current LWP has been moved | | 698 | * Drop spc_lwplock, if the current LWP has been moved |
699 | * to the run queue (it is now locked by spc_mutex). | | 699 | * to the run queue (it is now locked by spc_mutex). |
700 | */ | | 700 | */ |
701 | mutex_spin_exit(spc->spc_lwplock); | | 701 | mutex_spin_exit(spc->spc_lwplock); |
702 | } else { | | 702 | } else { |
703 | /* | | 703 | /* |
704 | * Otherwise, drop the spc_mutex, we are done with the | | 704 | * Otherwise, drop the spc_mutex, we are done with the |
705 | * run queues. | | 705 | * run queues. |
706 | */ | | 706 | */ |
707 | mutex_spin_exit(spc->spc_mutex); | | 707 | mutex_spin_exit(spc->spc_mutex); |
708 | } | | 708 | } |
709 | | | 709 | |
710 | /* We're down to only one lock, so do debug checks. */ | | 710 | /* We're down to only one lock, so do debug checks. */ |
711 | LOCKDEBUG_BARRIER(l->l_mutex, 1); | | 711 | LOCKDEBUG_BARRIER(l->l_mutex, 1); |
712 | | | 712 | |
713 | /* Count the context switch. */ | | 713 | /* Count the context switch. */ |
714 | CPU_COUNT(CPU_COUNT_NSWTCH, 1); | | 714 | CPU_COUNT(CPU_COUNT_NSWTCH, 1); |
715 | l->l_ncsw++; | | 715 | l->l_ncsw++; |
716 | if ((l->l_pflag & LP_PREEMPTING) != 0) { | | 716 | if ((l->l_pflag & LP_PREEMPTING) != 0) { |
717 | l->l_nivcsw++; | | 717 | l->l_nivcsw++; |
718 | l->l_pflag &= ~LP_PREEMPTING; | | 718 | l->l_pflag &= ~LP_PREEMPTING; |
719 | } | | 719 | } |
720 | | | 720 | |
721 | /* | | 721 | /* |
722 | * Increase the count of spin-mutexes before the release | | 722 | * Increase the count of spin-mutexes before the release |
723 | * of the last lock - we must remain at IPL_SCHED after | | 723 | * of the last lock - we must remain at IPL_SCHED after |
724 | * releasing the lock. | | 724 | * releasing the lock. |
725 | */ | | 725 | */ |
726 | KASSERTMSG(ci->ci_mtx_count == -1, | | 726 | KASSERTMSG(ci->ci_mtx_count == -1, |
727 | "%s: cpu%u: ci_mtx_count (%d) != -1 " | | 727 | "%s: cpu%u: ci_mtx_count (%d) != -1 " |
728 | "(block with spin-mutex held)", | | 728 | "(block with spin-mutex held)", |
729 | __func__, cpu_index(ci), ci->ci_mtx_count); | | 729 | __func__, cpu_index(ci), ci->ci_mtx_count); |
730 | oldspl = MUTEX_SPIN_OLDSPL(ci); | | 730 | oldspl = MUTEX_SPIN_OLDSPL(ci); |
731 | ci->ci_mtx_count = -2; | | 731 | ci->ci_mtx_count = -2; |
732 | | | 732 | |
733 | /* Update status for lwpctl, if present. */ | | 733 | /* Update status for lwpctl, if present. */ |
734 | if (l->l_lwpctl != NULL) { | | 734 | if (l->l_lwpctl != NULL) { |
735 | l->l_lwpctl->lc_curcpu = (l->l_stat == LSZOMB ? | | 735 | l->l_lwpctl->lc_curcpu = (l->l_stat == LSZOMB ? |
736 | LWPCTL_CPU_EXITED : LWPCTL_CPU_NONE); | | 736 | LWPCTL_CPU_EXITED : LWPCTL_CPU_NONE); |
737 | } | | 737 | } |
738 | | | 738 | |
739 | /* | | 739 | /* |
740 | * If curlwp is a soft interrupt LWP, there's nobody on the | | 740 | * If curlwp is a soft interrupt LWP, there's nobody on the |
741 | * other side to unlock - we're returning into an assembly | | 741 | * other side to unlock - we're returning into an assembly |
742 | * trampoline. Unlock now. This is safe because this is a | | 742 | * trampoline. Unlock now. This is safe because this is a |
743 | * kernel LWP and is bound to current CPU: the worst anyone | | 743 | * kernel LWP and is bound to current CPU: the worst anyone |
744 | * else will do to it, is to put it back onto this CPU's run | | 744 | * else will do to it, is to put it back onto this CPU's run |
745 | * queue (and the CPU is busy here right now!). | | 745 | * queue (and the CPU is busy here right now!). |
746 | */ | | 746 | */ |
747 | if (returning) { | | 747 | if (returning) { |
748 | /* Keep IPL_SCHED after this; MD code will fix up. */ | | 748 | /* Keep IPL_SCHED after this; MD code will fix up. */ |
749 | l->l_pflag &= ~LP_RUNNING; | | 749 | l->l_pflag &= ~LP_RUNNING; |
750 | lwp_unlock(l); | | 750 | lwp_unlock(l); |
751 | } else { | | 751 | } else { |
752 | /* A normal LWP: save old VM context. */ | | 752 | /* A normal LWP: save old VM context. */ |
753 | pmap_deactivate(l); | | 753 | pmap_deactivate(l); |
754 | } | | 754 | } |
755 | | | 755 | |
756 | /* | | 756 | /* |
757 | * If DTrace has set the active vtime enum to anything | | 757 | * If DTrace has set the active vtime enum to anything |
758 | * other than INACTIVE (0), then it should have set the | | 758 | * other than INACTIVE (0), then it should have set the |
759 | * function to call. | | 759 | * function to call. |
760 | */ | | 760 | */ |
761 | if (__predict_false(dtrace_vtime_active)) { | | 761 | if (__predict_false(dtrace_vtime_active)) { |
762 | (*dtrace_vtime_switch_func)(newl); | | 762 | (*dtrace_vtime_switch_func)(newl); |
763 | } | | 763 | } |
764 | | | 764 | |
765 | /* | | 765 | /* |
766 | * We must ensure not to come here from inside a read section. | | 766 | * We must ensure not to come here from inside a read section. |
767 | */ | | 767 | */ |
768 | KASSERT(pserialize_not_in_read_section()); | | 768 | KASSERT(pserialize_not_in_read_section()); |
769 | | | 769 | |
770 | /* Switch to the new LWP.. */ | | 770 | /* Switch to the new LWP.. */ |
771 | #ifdef MULTIPROCESSOR | | 771 | #ifdef MULTIPROCESSOR |
772 | KASSERT(curlwp == ci->ci_curlwp); | | 772 | KASSERT(curlwp == ci->ci_curlwp); |
773 | #endif | | 773 | #endif |
774 | KASSERTMSG(l == curlwp, "l %p curlwp %p", l, curlwp); | | 774 | KASSERTMSG(l == curlwp, "l %p curlwp %p", l, curlwp); |
775 | prevlwp = cpu_switchto(l, newl, returning); | | 775 | prevlwp = cpu_switchto(l, newl, returning); |
776 | ci = curcpu(); | | 776 | ci = curcpu(); |
777 | #ifdef MULTIPROCESSOR | | 777 | #ifdef MULTIPROCESSOR |
778 | KASSERT(curlwp == ci->ci_curlwp); | | 778 | KASSERT(curlwp == ci->ci_curlwp); |
779 | #endif | | 779 | #endif |
780 | KASSERTMSG(l == curlwp, "l %p curlwp %p prevlwp %p", | | 780 | KASSERTMSG(l == curlwp, "l %p curlwp %p prevlwp %p", |
781 | l, curlwp, prevlwp); | | 781 | l, curlwp, prevlwp); |
782 | KASSERT(prevlwp != NULL); | | 782 | KASSERT(prevlwp != NULL); |
783 | KASSERT(l->l_cpu == ci); | | 783 | KASSERT(l->l_cpu == ci); |
784 | KASSERT(ci->ci_mtx_count == -2); | | 784 | KASSERT(ci->ci_mtx_count == -2); |
785 | | | 785 | |
786 | /* | | 786 | /* |
787 | * Immediately mark the previous LWP as no longer running | | 787 | * Immediately mark the previous LWP as no longer running |
788 | * and unlock (to keep lock wait times short as possible). | | 788 | * and unlock (to keep lock wait times short as possible). |
789 | * We'll still be at IPL_SCHED afterwards. If a zombie, | | 789 | * We'll still be at IPL_SCHED afterwards. If a zombie, |
790 | * don't touch after clearing LP_RUNNING as it could be | | 790 | * don't touch after clearing LP_RUNNING as it could be |
791 | * reaped by another CPU. Issue a memory barrier to ensure | | 791 | * reaped by another CPU. Issue a memory barrier to ensure |
792 | * this. | | 792 | * this. |
793 | * | | 793 | * |
794 | * atomic_store_release matches atomic_load_acquire in | | 794 | * atomic_store_release matches atomic_load_acquire in |
795 | * lwp_free. | | 795 | * lwp_free. |
796 | */ | | 796 | */ |
797 | KASSERT((prevlwp->l_pflag & LP_RUNNING) != 0); | | 797 | KASSERT((prevlwp->l_pflag & LP_RUNNING) != 0); |
798 | lock = prevlwp->l_mutex; | | 798 | lock = prevlwp->l_mutex; |
799 | if (__predict_false(prevlwp->l_stat == LSZOMB)) { | | 799 | if (__predict_false(prevlwp->l_stat == LSZOMB)) { |
800 | atomic_store_release(&prevlwp->l_pflag, | | 800 | atomic_store_release(&prevlwp->l_pflag, |
801 | prevlwp->l_pflag & ~LP_RUNNING); | | 801 | prevlwp->l_pflag & ~LP_RUNNING); |
802 | } else { | | 802 | } else { |
803 | prevlwp->l_pflag &= ~LP_RUNNING; | | 803 | prevlwp->l_pflag &= ~LP_RUNNING; |
804 | } | | 804 | } |
805 | mutex_spin_exit(lock); | | 805 | mutex_spin_exit(lock); |
806 | | | 806 | |
807 | /* | | 807 | /* |
808 | * Switched away - we have new curlwp. | | 808 | * Switched away - we have new curlwp. |
809 | * Restore VM context and IPL. | | 809 | * Restore VM context and IPL. |
810 | */ | | 810 | */ |
811 | pmap_activate(l); | | 811 | pmap_activate(l); |
812 | pcu_switchpoint(l); | | 812 | pcu_switchpoint(l); |
813 | | | 813 | |
814 | /* Update status for lwpctl, if present. */ | | 814 | /* Update status for lwpctl, if present. */ |
815 | if (l->l_lwpctl != NULL) { | | 815 | if (l->l_lwpctl != NULL) { |
816 | l->l_lwpctl->lc_curcpu = (int)cpu_index(ci); | | 816 | l->l_lwpctl->lc_curcpu = (int)cpu_index(ci); |
817 | l->l_lwpctl->lc_pctr++; | | 817 | l->l_lwpctl->lc_pctr++; |
818 | } | | 818 | } |
819 | | | 819 | |
820 | /* | | 820 | /* |
821 | * Normalize the spin mutex count and restore the previous | | 821 | * Normalize the spin mutex count and restore the previous |
822 | * SPL. Note that, unless the caller disabled preemption, | | 822 | * SPL. Note that, unless the caller disabled preemption, |
823 | * we can be preempted at any time after this splx(). | | 823 | * we can be preempted at any time after this splx(). |
824 | */ | | 824 | */ |
825 | KASSERT(l->l_cpu == ci); | | 825 | KASSERT(l->l_cpu == ci); |
826 | KASSERT(ci->ci_mtx_count == -1); | | 826 | KASSERT(ci->ci_mtx_count == -1); |
827 | ci->ci_mtx_count = 0; | | 827 | ci->ci_mtx_count = 0; |
828 | splx(oldspl); | | 828 | splx(oldspl); |
829 | } else { | | 829 | } else { |
830 | /* Nothing to do - just unlock and return. */ | | 830 | /* Nothing to do - just unlock and return. */ |
831 | mutex_spin_exit(spc->spc_mutex); | | 831 | mutex_spin_exit(spc->spc_mutex); |
832 | l->l_pflag &= ~LP_PREEMPTING; | | 832 | l->l_pflag &= ~LP_PREEMPTING; |
833 | lwp_unlock(l); | | 833 | lwp_unlock(l); |
834 | } | | 834 | } |
835 | | | 835 | |
836 | KASSERT(l == curlwp); | | 836 | KASSERT(l == curlwp); |
837 | KASSERT(l->l_stat == LSONPROC || (l->l_flag & LW_IDLE) != 0); | | 837 | KASSERT(l->l_stat == LSONPROC || (l->l_flag & LW_IDLE) != 0); |
838 | | | 838 | |
839 | SYSCALL_TIME_WAKEUP(l); | | 839 | SYSCALL_TIME_WAKEUP(l); |
840 | LOCKDEBUG_BARRIER(NULL, 1); | | 840 | LOCKDEBUG_BARRIER(NULL, 1); |
841 | } | | 841 | } |
842 | | | 842 | |
843 | /* | | 843 | /* |
844 | * setrunnable: change LWP state to be runnable, placing it on the run queue. | | 844 | * setrunnable: change LWP state to be runnable, placing it on the run queue. |
845 | * | | 845 | * |
846 | * Call with the process and LWP locked. Will return with the LWP unlocked. | | 846 | * Call with the process and LWP locked. Will return with the LWP unlocked. |
847 | */ | | 847 | */ |
848 | void | | 848 | void |
849 | setrunnable(struct lwp *l) | | 849 | setrunnable(struct lwp *l) |
850 | { | | 850 | { |
851 | struct proc *p = l->l_proc; | | 851 | struct proc *p = l->l_proc; |
852 | struct cpu_info *ci; | | 852 | struct cpu_info *ci; |
853 | kmutex_t *oldlock; | | 853 | kmutex_t *oldlock; |
854 | | | 854 | |
855 | KASSERT((l->l_flag & LW_IDLE) == 0); | | 855 | KASSERT((l->l_flag & LW_IDLE) == 0); |
856 | KASSERT((l->l_flag & LW_DBGSUSPEND) == 0); | | 856 | KASSERT((l->l_flag & LW_DBGSUSPEND) == 0); |
857 | KASSERT(mutex_owned(p->p_lock)); | | 857 | KASSERT(mutex_owned(p->p_lock)); |
858 | KASSERT(lwp_locked(l, NULL)); | | 858 | KASSERT(lwp_locked(l, NULL)); |
859 | KASSERT(l->l_mutex != l->l_cpu->ci_schedstate.spc_mutex); | | 859 | KASSERT(l->l_mutex != l->l_cpu->ci_schedstate.spc_mutex); |
860 | | | 860 | |
861 | switch (l->l_stat) { | | 861 | switch (l->l_stat) { |
862 | case LSSTOP: | | 862 | case LSSTOP: |
863 | /* | | 863 | /* |
864 | * If we're being traced (possibly because someone attached us | | 864 | * If we're being traced (possibly because someone attached us |
865 | * while we were stopped), check for a signal from the debugger. | | 865 | * while we were stopped), check for a signal from the debugger. |
866 | */ | | 866 | */ |
867 | if ((p->p_slflag & PSL_TRACED) != 0 && p->p_xsig != 0) | | 867 | if ((p->p_slflag & PSL_TRACED) != 0 && p->p_xsig != 0) |
868 | signotify(l); | | 868 | signotify(l); |
869 | p->p_nrlwps++; | | 869 | p->p_nrlwps++; |
870 | break; | | 870 | break; |
871 | case LSSUSPENDED: | | 871 | case LSSUSPENDED: |
872 | KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_lwplock)); | | 872 | KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_lwplock)); |
873 | l->l_flag &= ~LW_WSUSPEND; | | 873 | l->l_flag &= ~LW_WSUSPEND; |
874 | p->p_nrlwps++; | | 874 | p->p_nrlwps++; |
875 | cv_broadcast(&p->p_lwpcv); | | 875 | cv_broadcast(&p->p_lwpcv); |
876 | break; | | 876 | break; |
877 | case LSSLEEP: | | 877 | case LSSLEEP: |
878 | KASSERT(l->l_wchan != NULL); | | 878 | KASSERT(l->l_wchan != NULL); |
879 | break; | | 879 | break; |
880 | case LSIDL: | | 880 | case LSIDL: |
881 | KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_lwplock)); | | 881 | KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_lwplock)); |
882 | break; | | 882 | break; |
883 | default: | | 883 | default: |
884 | panic("setrunnable: lwp %p state was %d", l, l->l_stat); | | 884 | panic("setrunnable: lwp %p state was %d", l, l->l_stat); |
885 | } | | 885 | } |
886 | | | 886 | |
887 | /* | | 887 | /* |
888 | * If the LWP was sleeping, start it again. | | 888 | * If the LWP was sleeping, start it again. |
889 | */ | | 889 | */ |
890 | if (l->l_wchan != NULL) { | | 890 | if (l->l_wchan != NULL) { |
891 | l->l_stat = LSSLEEP; | | 891 | l->l_stat = LSSLEEP; |
892 | /* lwp_unsleep() will release the lock. */ | | 892 | /* lwp_unsleep() will release the lock. */ |
893 | lwp_unsleep(l, true); | | 893 | lwp_unsleep(l, true); |
894 | return; | | 894 | return; |
895 | } | | 895 | } |
896 | | | 896 | |
897 | /* | | 897 | /* |
898 | * If the LWP is still on the CPU, mark it as LSONPROC. It may be | | 898 | * If the LWP is still on the CPU, mark it as LSONPROC. It may be |
899 | * about to call mi_switch(), in which case it will yield. | | 899 | * about to call mi_switch(), in which case it will yield. |
900 | */ | | 900 | */ |
901 | if ((l->l_pflag & LP_RUNNING) != 0) { | | 901 | if ((l->l_pflag & LP_RUNNING) != 0) { |
902 | l->l_stat = LSONPROC; | | 902 | l->l_stat = LSONPROC; |
903 | l->l_slptime = 0; | | 903 | l->l_slptime = 0; |
904 | lwp_unlock(l); | | 904 | lwp_unlock(l); |
905 | return; | | 905 | return; |
906 | } | | 906 | } |
907 | | | 907 | |
908 | /* | | 908 | /* |
909 | * Look for a CPU to run. | | 909 | * Look for a CPU to run. |
910 | * Set the LWP runnable. | | 910 | * Set the LWP runnable. |
911 | */ | | 911 | */ |
912 | ci = sched_takecpu(l); | | 912 | ci = sched_takecpu(l); |
913 | l->l_cpu = ci; | | 913 | l->l_cpu = ci; |
914 | spc_lock(ci); | | 914 | spc_lock(ci); |
915 | oldlock = lwp_setlock(l, l->l_cpu->ci_schedstate.spc_mutex); | | 915 | oldlock = lwp_setlock(l, l->l_cpu->ci_schedstate.spc_mutex); |
916 | sched_setrunnable(l); | | 916 | sched_setrunnable(l); |
917 | l->l_stat = LSRUN; | | 917 | l->l_stat = LSRUN; |
918 | l->l_slptime = 0; | | 918 | l->l_slptime = 0; |
919 | sched_enqueue(l); | | 919 | sched_enqueue(l); |
920 | sched_resched_lwp(l, true); | | 920 | sched_resched_lwp(l, true); |
921 | /* SPC & LWP now unlocked. */ | | 921 | /* SPC & LWP now unlocked. */ |
922 | mutex_spin_exit(oldlock); | | 922 | mutex_spin_exit(oldlock); |
923 | } | | 923 | } |
924 | | | 924 | |
925 | /* | | 925 | /* |
926 | * suspendsched: | | 926 | * suspendsched: |
927 | * | | 927 | * |
928 | * Convert all non-LW_SYSTEM LSSLEEP or LSRUN LWPs to LSSUSPENDED. | | 928 | * Convert all non-LW_SYSTEM LSSLEEP or LSRUN LWPs to LSSUSPENDED. |
929 | */ | | 929 | */ |
930 | void | | 930 | void |
931 | suspendsched(void) | | 931 | suspendsched(void) |
932 | { | | 932 | { |
933 | CPU_INFO_ITERATOR cii; | | 933 | CPU_INFO_ITERATOR cii; |
934 | struct cpu_info *ci; | | 934 | struct cpu_info *ci; |
935 | struct lwp *l; | | 935 | struct lwp *l; |
936 | struct proc *p; | | 936 | struct proc *p; |
937 | | | 937 | |
938 | /* | | 938 | /* |
939 | * We do this by process in order not to violate the locking rules. | | 939 | * We do this by process in order not to violate the locking rules. |
940 | */ | | 940 | */ |
941 | mutex_enter(&proc_lock); | | 941 | mutex_enter(&proc_lock); |
942 | PROCLIST_FOREACH(p, &allproc) { | | 942 | PROCLIST_FOREACH(p, &allproc) { |
943 | mutex_enter(p->p_lock); | | 943 | mutex_enter(p->p_lock); |
944 | if ((p->p_flag & PK_SYSTEM) != 0) { | | 944 | if ((p->p_flag & PK_SYSTEM) != 0) { |
945 | mutex_exit(p->p_lock); | | 945 | mutex_exit(p->p_lock); |
946 | continue; | | 946 | continue; |
947 | } | | 947 | } |
948 | | | 948 | |
949 | if (p->p_stat != SSTOP) { | | 949 | if (p->p_stat != SSTOP) { |
950 | if (p->p_stat != SZOMB && p->p_stat != SDEAD) { | | 950 | if (p->p_stat != SZOMB && p->p_stat != SDEAD) { |
951 | p->p_pptr->p_nstopchild++; | | 951 | p->p_pptr->p_nstopchild++; |
952 | p->p_waited = 0; | | 952 | p->p_waited = 0; |
953 | } | | 953 | } |
954 | p->p_stat = SSTOP; | | 954 | p->p_stat = SSTOP; |
955 | } | | 955 | } |
956 | | | 956 | |
957 | LIST_FOREACH(l, &p->p_lwps, l_sibling) { | | 957 | LIST_FOREACH(l, &p->p_lwps, l_sibling) { |
958 | if (l == curlwp) | | 958 | if (l == curlwp) |
959 | continue; | | 959 | continue; |
960 | | | 960 | |
961 | lwp_lock(l); | | 961 | lwp_lock(l); |
962 | | | 962 | |
963 | /* | | 963 | /* |
964 | * Set L_WREBOOT so that the LWP will suspend itself | | 964 | * Set L_WREBOOT so that the LWP will suspend itself |
965 | * when it tries to return to user mode. We want to | | 965 | * when it tries to return to user mode. We want to |
966 | * try and get to get as many LWPs as possible to | | 966 | * try and get to get as many LWPs as possible to |
967 | * the user / kernel boundary, so that they will | | 967 | * the user / kernel boundary, so that they will |
968 | * release any locks that they hold. | | 968 | * release any locks that they hold. |
969 | */ | | 969 | */ |
970 | l->l_flag |= (LW_WREBOOT | LW_WSUSPEND); | | 970 | l->l_flag |= (LW_WREBOOT | LW_WSUSPEND); |
971 | | | 971 | |
972 | if (l->l_stat == LSSLEEP && | | 972 | if (l->l_stat == LSSLEEP && |
973 | (l->l_flag & LW_SINTR) != 0) { | | 973 | (l->l_flag & LW_SINTR) != 0) { |
974 | /* setrunnable() will release the lock. */ | | 974 | /* setrunnable() will release the lock. */ |
975 | setrunnable(l); | | 975 | setrunnable(l); |
976 | continue; | | 976 | continue; |
977 | } | | 977 | } |
978 | | | 978 | |
979 | lwp_unlock(l); | | 979 | lwp_unlock(l); |
980 | } | | 980 | } |
981 | | | 981 | |
982 | mutex_exit(p->p_lock); | | 982 | mutex_exit(p->p_lock); |
983 | } | | 983 | } |
984 | mutex_exit(&proc_lock); | | 984 | mutex_exit(&proc_lock); |
985 | | | 985 | |
986 | /* | | 986 | /* |
987 | * Kick all CPUs to make them preempt any LWPs running in user mode. | | 987 | * Kick all CPUs to make them preempt any LWPs running in user mode. |
988 | * They'll trap into the kernel and suspend themselves in userret(). | | 988 | * They'll trap into the kernel and suspend themselves in userret(). |
989 | * | | 989 | * |
990 | * Unusually, we don't hold any other scheduler object locked, which | | 990 | * Unusually, we don't hold any other scheduler object locked, which |
991 | * would keep preemption off for sched_resched_cpu(), so disable it | | 991 | * would keep preemption off for sched_resched_cpu(), so disable it |
992 | * explicitly. | | 992 | * explicitly. |
993 | */ | | 993 | */ |
994 | kpreempt_disable(); | | 994 | kpreempt_disable(); |
995 | for (CPU_INFO_FOREACH(cii, ci)) { | | 995 | for (CPU_INFO_FOREACH(cii, ci)) { |
996 | spc_lock(ci); | | 996 | spc_lock(ci); |
997 | sched_resched_cpu(ci, PRI_KERNEL, true); | | 997 | sched_resched_cpu(ci, PRI_KERNEL, true); |
998 | /* spc now unlocked */ | | 998 | /* spc now unlocked */ |
999 | } | | 999 | } |
1000 | kpreempt_enable(); | | 1000 | kpreempt_enable(); |
1001 | } | | 1001 | } |
1002 | | | 1002 | |
1003 | /* | | 1003 | /* |
1004 | * sched_unsleep: | | 1004 | * sched_unsleep: |
1005 | * | | 1005 | * |
1006 | * The is called when the LWP has not been awoken normally but instead | | 1006 | * The is called when the LWP has not been awoken normally but instead |
1007 | * interrupted: for example, if the sleep timed out. Because of this, | | 1007 | * interrupted: for example, if the sleep timed out. Because of this, |
1008 | * it's not a valid action for running or idle LWPs. | | 1008 | * it's not a valid action for running or idle LWPs. |
1009 | */ | | 1009 | */ |
1010 | static void | | 1010 | static void |
1011 | sched_unsleep(struct lwp *l, bool cleanup) | | 1011 | sched_unsleep(struct lwp *l, bool cleanup) |
1012 | { | | 1012 | { |
1013 | | | 1013 | |
1014 | lwp_unlock(l); | | 1014 | lwp_unlock(l); |
1015 | panic("sched_unsleep"); | | 1015 | panic("sched_unsleep"); |
1016 | } | | 1016 | } |
1017 | | | 1017 | |
1018 | static void | | 1018 | static void |
1019 | sched_changepri(struct lwp *l, pri_t pri) | | 1019 | sched_changepri(struct lwp *l, pri_t pri) |
1020 | { | | 1020 | { |
1021 | struct schedstate_percpu *spc; | | 1021 | struct schedstate_percpu *spc; |
1022 | struct cpu_info *ci; | | 1022 | struct cpu_info *ci; |
1023 | | | 1023 | |
1024 | KASSERT(lwp_locked(l, NULL)); | | 1024 | KASSERT(lwp_locked(l, NULL)); |
1025 | | | 1025 | |
1026 | ci = l->l_cpu; | | 1026 | ci = l->l_cpu; |
1027 | spc = &ci->ci_schedstate; | | 1027 | spc = &ci->ci_schedstate; |
1028 | | | 1028 | |
1029 | if (l->l_stat == LSRUN) { | | 1029 | if (l->l_stat == LSRUN) { |
1030 | KASSERT(lwp_locked(l, spc->spc_mutex)); | | 1030 | KASSERT(lwp_locked(l, spc->spc_mutex)); |
1031 | sched_dequeue(l); | | 1031 | sched_dequeue(l); |
1032 | l->l_priority = pri; | | 1032 | l->l_priority = pri; |
1033 | sched_enqueue(l); | | 1033 | sched_enqueue(l); |
1034 | sched_resched_lwp(l, false); | | 1034 | sched_resched_lwp(l, false); |
1035 | } else if (l->l_stat == LSONPROC && l->l_class != SCHED_OTHER) { | | 1035 | } else if (l->l_stat == LSONPROC && l->l_class != SCHED_OTHER) { |
1036 | /* On priority drop, only evict realtime LWPs. */ | | 1036 | /* On priority drop, only evict realtime LWPs. */ |
1037 | KASSERT(lwp_locked(l, spc->spc_lwplock)); | | 1037 | KASSERT(lwp_locked(l, spc->spc_lwplock)); |
1038 | l->l_priority = pri; | | 1038 | l->l_priority = pri; |
1039 | spc_lock(ci); | | 1039 | spc_lock(ci); |
1040 | sched_resched_cpu(ci, spc->spc_maxpriority, true); | | 1040 | sched_resched_cpu(ci, spc->spc_maxpriority, true); |
1041 | /* spc now unlocked */ | | 1041 | /* spc now unlocked */ |
1042 | } else { | | 1042 | } else { |
1043 | l->l_priority = pri; | | 1043 | l->l_priority = pri; |
1044 | } | | 1044 | } |
1045 | } | | 1045 | } |
1046 | | | 1046 | |
1047 | static void | | 1047 | static void |
1048 | sched_lendpri(struct lwp *l, pri_t pri) | | 1048 | sched_lendpri(struct lwp *l, pri_t pri) |
1049 | { | | 1049 | { |
1050 | struct schedstate_percpu *spc; | | 1050 | struct schedstate_percpu *spc; |
1051 | struct cpu_info *ci; | | 1051 | struct cpu_info *ci; |
1052 | | | 1052 | |
1053 | KASSERT(lwp_locked(l, NULL)); | | 1053 | KASSERT(lwp_locked(l, NULL)); |
1054 | | | 1054 | |
1055 | ci = l->l_cpu; | | 1055 | ci = l->l_cpu; |
1056 | spc = &ci->ci_schedstate; | | 1056 | spc = &ci->ci_schedstate; |
1057 | | | 1057 | |
1058 | if (l->l_stat == LSRUN) { | | 1058 | if (l->l_stat == LSRUN) { |
1059 | KASSERT(lwp_locked(l, spc->spc_mutex)); | | 1059 | KASSERT(lwp_locked(l, spc->spc_mutex)); |
1060 | sched_dequeue(l); | | 1060 | sched_dequeue(l); |
1061 | l->l_inheritedprio = pri; | | 1061 | l->l_inheritedprio = pri; |
1062 | l->l_auxprio = MAX(l->l_inheritedprio, l->l_protectprio); | | 1062 | l->l_auxprio = MAX(l->l_inheritedprio, l->l_protectprio); |
1063 | sched_enqueue(l); | | 1063 | sched_enqueue(l); |
1064 | sched_resched_lwp(l, false); | | 1064 | sched_resched_lwp(l, false); |
1065 | } else if (l->l_stat == LSONPROC && l->l_class != SCHED_OTHER) { | | 1065 | } else if (l->l_stat == LSONPROC && l->l_class != SCHED_OTHER) { |
1066 | /* On priority drop, only evict realtime LWPs. */ | | 1066 | /* On priority drop, only evict realtime LWPs. */ |
1067 | KASSERT(lwp_locked(l, spc->spc_lwplock)); | | 1067 | KASSERT(lwp_locked(l, spc->spc_lwplock)); |
1068 | l->l_inheritedprio = pri; | | 1068 | l->l_inheritedprio = pri; |
1069 | l->l_auxprio = MAX(l->l_inheritedprio, l->l_protectprio); | | 1069 | l->l_auxprio = MAX(l->l_inheritedprio, l->l_protectprio); |
1070 | spc_lock(ci); | | 1070 | spc_lock(ci); |
1071 | sched_resched_cpu(ci, spc->spc_maxpriority, true); | | 1071 | sched_resched_cpu(ci, spc->spc_maxpriority, true); |
1072 | /* spc now unlocked */ | | 1072 | /* spc now unlocked */ |
1073 | } else { | | 1073 | } else { |
1074 | l->l_inheritedprio = pri; | | 1074 | l->l_inheritedprio = pri; |
1075 | l->l_auxprio = MAX(l->l_inheritedprio, l->l_protectprio); | | 1075 | l->l_auxprio = MAX(l->l_inheritedprio, l->l_protectprio); |
1076 | } | | 1076 | } |
1077 | } | | 1077 | } |
1078 | | | 1078 | |
1079 | struct lwp * | | 1079 | struct lwp * |
1080 | syncobj_noowner(wchan_t wchan) | | 1080 | syncobj_noowner(wchan_t wchan) |
1081 | { | | 1081 | { |
1082 | | | 1082 | |
1083 | return NULL; | | 1083 | return NULL; |
1084 | } | | 1084 | } |
1085 | | | 1085 | |
1086 | /* Decay 95% of proc::p_pctcpu in 60 seconds, ccpu = exp(-1/20) */ | | 1086 | /* Decay 95% of proc::p_pctcpu in 60 seconds, ccpu = exp(-1/20) */ |
1087 | const fixpt_t ccpu = 0.95122942450071400909 * FSCALE; | | 1087 | const fixpt_t ccpu = 0.95122942450071400909 * FSCALE; |
1088 | | | 1088 | |
1089 | /* | | 1089 | /* |
1090 | * Constants for averages over 1, 5 and 15 minutes when sampling at | | 1090 | * Constants for averages over 1, 5 and 15 minutes when sampling at |
1091 | * 5 second intervals. | | 1091 | * 5 second intervals. |
1092 | */ | | 1092 | */ |
1093 | static const fixpt_t cexp[ ] = { | | 1093 | static const fixpt_t cexp[ ] = { |
1094 | 0.9200444146293232 * FSCALE, /* exp(-1/12) */ | | 1094 | 0.9200444146293232 * FSCALE, /* exp(-1/12) */ |
1095 | 0.9834714538216174 * FSCALE, /* exp(-1/60) */ | | 1095 | 0.9834714538216174 * FSCALE, /* exp(-1/60) */ |
1096 | 0.9944598480048967 * FSCALE, /* exp(-1/180) */ | | 1096 | 0.9944598480048967 * FSCALE, /* exp(-1/180) */ |
1097 | }; | | 1097 | }; |
1098 | | | 1098 | |
1099 | /* | | 1099 | /* |
1100 | * sched_pstats: | | 1100 | * sched_pstats: |
1101 | * | | 1101 | * |
1102 | * => Update process statistics and check CPU resource allocation. | | 1102 | * => Update process statistics and check CPU resource allocation. |
1103 | * => Call scheduler-specific hook to eventually adjust LWP priorities. | | 1103 | * => Call scheduler-specific hook to eventually adjust LWP priorities. |
1104 | * => Compute load average of a quantity on 1, 5 and 15 minute intervals. | | 1104 | * => Compute load average of a quantity on 1, 5 and 15 minute intervals. |
1105 | */ | | 1105 | */ |
1106 | void | | 1106 | void |
1107 | sched_pstats(void) | | 1107 | sched_pstats(void) |
1108 | { | | 1108 | { |
1109 | struct loadavg *avg = &averunnable; | | 1109 | struct loadavg *avg = &averunnable; |
1110 | const int clkhz = (stathz != 0 ? stathz : hz); | | 1110 | const int clkhz = (stathz != 0 ? stathz : hz); |
1111 | static bool backwards = false; | | 1111 | static bool backwards = false; |
1112 | static u_int lavg_count = 0; | | 1112 | static u_int lavg_count = 0; |
1113 | struct proc *p; | | 1113 | struct proc *p; |
1114 | int nrun; | | 1114 | int nrun; |
1115 | | | 1115 | |
1116 | sched_pstats_ticks++; | | 1116 | sched_pstats_ticks++; |
1117 | if (++lavg_count >= 5) { | | 1117 | if (++lavg_count >= 5) { |
1118 | lavg_count = 0; | | 1118 | lavg_count = 0; |
1119 | nrun = 0; | | 1119 | nrun = 0; |
1120 | } | | 1120 | } |
1121 | mutex_enter(&proc_lock); | | 1121 | mutex_enter(&proc_lock); |
1122 | PROCLIST_FOREACH(p, &allproc) { | | 1122 | PROCLIST_FOREACH(p, &allproc) { |
1123 | struct lwp *l; | | 1123 | struct lwp *l; |
1124 | struct rlimit *rlim; | | 1124 | struct rlimit *rlim; |
1125 | time_t runtm; | | 1125 | time_t runtm; |
1126 | int sig; | | 1126 | int sig; |
1127 | | | 1127 | |
1128 | /* Increment sleep time (if sleeping), ignore overflow. */ | | 1128 | /* Increment sleep time (if sleeping), ignore overflow. */ |
1129 | mutex_enter(p->p_lock); | | 1129 | mutex_enter(p->p_lock); |
1130 | runtm = p->p_rtime.sec; | | 1130 | runtm = p->p_rtime.sec; |
1131 | LIST_FOREACH(l, &p->p_lwps, l_sibling) { | | 1131 | LIST_FOREACH(l, &p->p_lwps, l_sibling) { |
1132 | fixpt_t lpctcpu; | | 1132 | fixpt_t lpctcpu; |
1133 | u_int lcpticks; | | 1133 | u_int lcpticks; |
1134 | | | 1134 | |
1135 | if (__predict_false((l->l_flag & LW_IDLE) != 0)) | | 1135 | if (__predict_false((l->l_flag & LW_IDLE) != 0)) |
1136 | continue; | | 1136 | continue; |
1137 | lwp_lock(l); | | 1137 | lwp_lock(l); |
1138 | runtm += l->l_rtime.sec; | | 1138 | runtm += l->l_rtime.sec; |
1139 | l->l_swtime++; | | 1139 | l->l_swtime++; |
1140 | sched_lwp_stats(l); | | 1140 | sched_lwp_stats(l); |
1141 | | | 1141 | |
1142 | /* For load average calculation. */ | | 1142 | /* For load average calculation. */ |
1143 | if (__predict_false(lavg_count == 0) && | | 1143 | if (__predict_false(lavg_count == 0) && |
1144 | (l->l_flag & (LW_SINTR | LW_SYSTEM)) == 0) { | | 1144 | (l->l_flag & (LW_SINTR | LW_SYSTEM)) == 0) { |
1145 | switch (l->l_stat) { | | 1145 | switch (l->l_stat) { |
1146 | case LSSLEEP: | | 1146 | case LSSLEEP: |
1147 | if (l->l_slptime > 1) { | | 1147 | if (l->l_slptime > 1) { |
1148 | break; | | 1148 | break; |
1149 | } | | 1149 | } |
1150 | /* FALLTHROUGH */ | | 1150 | /* FALLTHROUGH */ |
1151 | case LSRUN: | | 1151 | case LSRUN: |
1152 | case LSONPROC: | | 1152 | case LSONPROC: |
1153 | case LSIDL: | | 1153 | case LSIDL: |
1154 | nrun++; | | 1154 | nrun++; |
1155 | } | | 1155 | } |
1156 | } | | 1156 | } |
1157 | lwp_unlock(l); | | 1157 | lwp_unlock(l); |
1158 | | | 1158 | |
1159 | l->l_pctcpu = (l->l_pctcpu * ccpu) >> FSHIFT; | | 1159 | l->l_pctcpu = (l->l_pctcpu * ccpu) >> FSHIFT; |
1160 | if (l->l_slptime != 0) | | 1160 | if (l->l_slptime != 0) |
1161 | continue; | | 1161 | continue; |
1162 | | | 1162 | |
1163 | lpctcpu = l->l_pctcpu; | | 1163 | lpctcpu = l->l_pctcpu; |
1164 | lcpticks = atomic_swap_uint(&l->l_cpticks, 0); | | 1164 | lcpticks = atomic_swap_uint(&l->l_cpticks, 0); |
1165 | lpctcpu += ((FSCALE - ccpu) * | | 1165 | lpctcpu += ((FSCALE - ccpu) * |
1166 | (lcpticks * FSCALE / clkhz)) >> FSHIFT; | | 1166 | (lcpticks * FSCALE / clkhz)) >> FSHIFT; |
1167 | l->l_pctcpu = lpctcpu; | | 1167 | l->l_pctcpu = lpctcpu; |
1168 | } | | 1168 | } |
1169 | /* Calculating p_pctcpu only for ps(1) */ | | 1169 | /* Calculating p_pctcpu only for ps(1) */ |
1170 | p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; | | 1170 | p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; |
1171 | | | 1171 | |
1172 | if (__predict_false(runtm < 0)) { | | 1172 | if (__predict_false(runtm < 0)) { |
1173 | if (!backwards) { | | 1173 | if (!backwards) { |
1174 | backwards = true; | | 1174 | backwards = true; |
1175 | printf("WARNING: negative runtime; " | | 1175 | printf("WARNING: negative runtime; " |
1176 | "monotonic clock has gone backwards\n"); | | 1176 | "monotonic clock has gone backwards\n"); |
1177 | } | | 1177 | } |
1178 | mutex_exit(p->p_lock); | | 1178 | mutex_exit(p->p_lock); |
1179 | continue; | | 1179 | continue; |
1180 | } | | 1180 | } |
1181 | | | 1181 | |
1182 | /* | | 1182 | /* |
1183 | * Check if the process exceeds its CPU resource allocation. | | 1183 | * Check if the process exceeds its CPU resource allocation. |
1184 | * If over the hard limit, kill it with SIGKILL. | | 1184 | * If over the hard limit, kill it with SIGKILL. |
1185 | * If over the soft limit, send SIGXCPU and raise | | 1185 | * If over the soft limit, send SIGXCPU and raise |
1186 | * the soft limit a little. | | 1186 | * the soft limit a little. |
1187 | */ | | 1187 | */ |
1188 | rlim = &p->p_rlimit[RLIMIT_CPU]; | | 1188 | rlim = &p->p_rlimit[RLIMIT_CPU]; |
1189 | sig = 0; | | 1189 | sig = 0; |
1190 | if (__predict_false(runtm >= rlim->rlim_cur)) { | | 1190 | if (__predict_false(runtm >= rlim->rlim_cur)) { |
1191 | if (runtm >= rlim->rlim_max) { | | 1191 | if (runtm >= rlim->rlim_max) { |
1192 | sig = SIGKILL; | | 1192 | sig = SIGKILL; |
1193 | log(LOG_NOTICE, | | 1193 | log(LOG_NOTICE, |
1194 | "pid %d, command %s, is killed: %s\n", | | 1194 | "pid %d, command %s, is killed: %s\n", |
1195 | p->p_pid, p->p_comm, "exceeded RLIMIT_CPU"); | | 1195 | p->p_pid, p->p_comm, "exceeded RLIMIT_CPU"); |
1196 | uprintf("pid %d, command %s, is killed: %s\n", | | 1196 | uprintf("pid %d, command %s, is killed: %s\n", |
1197 | p->p_pid, p->p_comm, "exceeded RLIMIT_CPU"); | | 1197 | p->p_pid, p->p_comm, "exceeded RLIMIT_CPU"); |
1198 | } else { | | 1198 | } else { |
1199 | sig = SIGXCPU; | | 1199 | sig = SIGXCPU; |
1200 | if (rlim->rlim_cur < rlim->rlim_max) | | 1200 | if (rlim->rlim_cur < rlim->rlim_max) |
1201 | rlim->rlim_cur += 5; | | 1201 | rlim->rlim_cur += 5; |
1202 | } | | 1202 | } |
1203 | } | | 1203 | } |
1204 | mutex_exit(p->p_lock); | | 1204 | mutex_exit(p->p_lock); |
1205 | if (__predict_false(sig)) { | | 1205 | if (__predict_false(sig)) { |
1206 | KASSERT((p->p_flag & PK_SYSTEM) == 0); | | 1206 | KASSERT((p->p_flag & PK_SYSTEM) == 0); |
1207 | psignal(p, sig); | | 1207 | psignal(p, sig); |
1208 | } | | 1208 | } |
1209 | } | | 1209 | } |
1210 | | | 1210 | |
1211 | /* Load average calculation. */ | | 1211 | /* Load average calculation. */ |
1212 | if (__predict_false(lavg_count == 0)) { | | 1212 | if (__predict_false(lavg_count == 0)) { |
1213 | int i; | | 1213 | int i; |
1214 | CTASSERT(__arraycount(cexp) == __arraycount(avg->ldavg)); | | 1214 | CTASSERT(__arraycount(cexp) == __arraycount(avg->ldavg)); |
1215 | for (i = 0; i < __arraycount(cexp); i++) { | | 1215 | for (i = 0; i < __arraycount(cexp); i++) { |
1216 | avg->ldavg[i] = (cexp[i] * avg->ldavg[i] + | | 1216 | avg->ldavg[i] = (cexp[i] * avg->ldavg[i] + |
1217 | nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT; | | 1217 | nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT; |
1218 | } | | 1218 | } |
1219 | } | | 1219 | } |
1220 | | | 1220 | |
1221 | /* Lightning bolt. */ | | 1221 | /* Lightning bolt. */ |
1222 | cv_broadcast(&lbolt); | | 1222 | cv_broadcast(&lbolt); |
1223 | | | 1223 | |
1224 | mutex_exit(&proc_lock); | | 1224 | mutex_exit(&proc_lock); |
1225 | } | | 1225 | } |