These don't need uvm/uvm_extern.h.diff -r1.127 -r1.128 src/sys/kern/kern_clock.c
(uebayasi)
--- src/sys/kern/kern_clock.c 2010/12/20 00:25:46 1.127
+++ src/sys/kern/kern_clock.c 2011/07/27 14:35:33 1.128
@@ -1,438 +1,436 @@ | @@ -1,438 +1,436 @@ | |||
1 | /* $NetBSD: kern_clock.c,v 1.127 2010/12/20 00:25:46 matt Exp $ */ | 1 | /* $NetBSD: kern_clock.c,v 1.128 2011/07/27 14:35:33 uebayasi Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 2000, 2004, 2006, 2007, 2008 The NetBSD Foundation, Inc. | 4 | * Copyright (c) 2000, 2004, 2006, 2007, 2008 The NetBSD Foundation, Inc. | |
5 | * All rights reserved. | 5 | * All rights reserved. | |
6 | * | 6 | * | |
7 | * This code is derived from software contributed to The NetBSD Foundation | 7 | * This code is derived from software contributed to The NetBSD Foundation | |
8 | * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, | 8 | * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, | |
9 | * NASA Ames Research Center. | 9 | * NASA Ames Research Center. | |
10 | * This code is derived from software contributed to The NetBSD Foundation | 10 | * This code is derived from software contributed to The NetBSD Foundation | |
11 | * by Charles M. Hannum. | 11 | * by Charles M. Hannum. | |
12 | * | 12 | * | |
13 | * Redistribution and use in source and binary forms, with or without | 13 | * Redistribution and use in source and binary forms, with or without | |
14 | * modification, are permitted provided that the following conditions | 14 | * modification, are permitted provided that the following conditions | |
15 | * are met: | 15 | * are met: | |
16 | * 1. Redistributions of source code must retain the above copyright | 16 | * 1. Redistributions of source code must retain the above copyright | |
17 | * notice, this list of conditions and the following disclaimer. | 17 | * notice, this list of conditions and the following disclaimer. | |
18 | * 2. Redistributions in binary form must reproduce the above copyright | 18 | * 2. Redistributions in binary form must reproduce the above copyright | |
19 | * notice, this list of conditions and the following disclaimer in the | 19 | * notice, this list of conditions and the following disclaimer in the | |
20 | * documentation and/or other materials provided with the distribution. | 20 | * documentation and/or other materials provided with the distribution. | |
21 | * | 21 | * | |
22 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | 22 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |
23 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | 23 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
24 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 24 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
25 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | 25 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |
26 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 26 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
27 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 27 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
28 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 28 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
29 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 29 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
30 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 30 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
31 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 31 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
32 | * POSSIBILITY OF SUCH DAMAGE. | 32 | * POSSIBILITY OF SUCH DAMAGE. | |
33 | */ | 33 | */ | |
34 | 34 | |||
35 | /*- | 35 | /*- | |
36 | * Copyright (c) 1982, 1986, 1991, 1993 | 36 | * Copyright (c) 1982, 1986, 1991, 1993 | |
37 | * The Regents of the University of California. All rights reserved. | 37 | * The Regents of the University of California. All rights reserved. | |
38 | * (c) UNIX System Laboratories, Inc. | 38 | * (c) UNIX System Laboratories, Inc. | |
39 | * All or some portions of this file are derived from material licensed | 39 | * All or some portions of this file are derived from material licensed | |
40 | * to the University of California by American Telephone and Telegraph | 40 | * to the University of California by American Telephone and Telegraph | |
41 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | 41 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | |
42 | * the permission of UNIX System Laboratories, Inc. | 42 | * the permission of UNIX System Laboratories, Inc. | |
43 | * | 43 | * | |
44 | * Redistribution and use in source and binary forms, with or without | 44 | * Redistribution and use in source and binary forms, with or without | |
45 | * modification, are permitted provided that the following conditions | 45 | * modification, are permitted provided that the following conditions | |
46 | * are met: | 46 | * are met: | |
47 | * 1. Redistributions of source code must retain the above copyright | 47 | * 1. Redistributions of source code must retain the above copyright | |
48 | * notice, this list of conditions and the following disclaimer. | 48 | * notice, this list of conditions and the following disclaimer. | |
49 | * 2. Redistributions in binary form must reproduce the above copyright | 49 | * 2. Redistributions in binary form must reproduce the above copyright | |
50 | * notice, this list of conditions and the following disclaimer in the | 50 | * notice, this list of conditions and the following disclaimer in the | |
51 | * documentation and/or other materials provided with the distribution. | 51 | * documentation and/or other materials provided with the distribution. | |
52 | * 3. Neither the name of the University nor the names of its contributors | 52 | * 3. Neither the name of the University nor the names of its contributors | |
53 | * may be used to endorse or promote products derived from this software | 53 | * may be used to endorse or promote products derived from this software | |
54 | * without specific prior written permission. | 54 | * without specific prior written permission. | |
55 | * | 55 | * | |
56 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | 56 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
57 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 57 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
58 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 58 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
59 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | 59 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
60 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 60 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
61 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 61 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
62 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 62 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
63 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 63 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
64 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 64 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
65 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 65 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
66 | * SUCH DAMAGE. | 66 | * SUCH DAMAGE. | |
67 | * | 67 | * | |
68 | * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 | 68 | * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 | |
69 | */ | 69 | */ | |
70 | 70 | |||
71 | #include <sys/cdefs.h> | 71 | #include <sys/cdefs.h> | |
72 | __KERNEL_RCSID(0, "$NetBSD: kern_clock.c,v 1.127 2010/12/20 00:25:46 matt Exp $"); | 72 | __KERNEL_RCSID(0, "$NetBSD: kern_clock.c,v 1.128 2011/07/27 14:35:33 uebayasi Exp $"); | |
73 | 73 | |||
74 | #include "opt_ntp.h" | 74 | #include "opt_ntp.h" | |
75 | #include "opt_perfctrs.h" | 75 | #include "opt_perfctrs.h" | |
76 | 76 | |||
77 | #include <sys/param.h> | 77 | #include <sys/param.h> | |
78 | #include <sys/systm.h> | 78 | #include <sys/systm.h> | |
79 | #include <sys/callout.h> | 79 | #include <sys/callout.h> | |
80 | #include <sys/kernel.h> | 80 | #include <sys/kernel.h> | |
81 | #include <sys/proc.h> | 81 | #include <sys/proc.h> | |
82 | #include <sys/resourcevar.h> | 82 | #include <sys/resourcevar.h> | |
83 | #include <sys/signalvar.h> | 83 | #include <sys/signalvar.h> | |
84 | #include <sys/sysctl.h> | 84 | #include <sys/sysctl.h> | |
85 | #include <sys/timex.h> | 85 | #include <sys/timex.h> | |
86 | #include <sys/sched.h> | 86 | #include <sys/sched.h> | |
87 | #include <sys/time.h> | 87 | #include <sys/time.h> | |
88 | #include <sys/timetc.h> | 88 | #include <sys/timetc.h> | |
89 | #include <sys/cpu.h> | 89 | #include <sys/cpu.h> | |
90 | #include <sys/atomic.h> | 90 | #include <sys/atomic.h> | |
91 | 91 | |||
92 | #include <uvm/uvm_extern.h> | |||
93 | ||||
94 | #ifdef GPROF | 92 | #ifdef GPROF | |
95 | #include <sys/gmon.h> | 93 | #include <sys/gmon.h> | |
96 | #endif | 94 | #endif | |
97 | 95 | |||
98 | /* | 96 | /* | |
99 | * Clock handling routines. | 97 | * Clock handling routines. | |
100 | * | 98 | * | |
101 | * This code is written to operate with two timers that run independently of | 99 | * This code is written to operate with two timers that run independently of | |
102 | * each other. The main clock, running hz times per second, is used to keep | 100 | * each other. The main clock, running hz times per second, is used to keep | |
103 | * track of real time. The second timer handles kernel and user profiling, | 101 | * track of real time. The second timer handles kernel and user profiling, | |
104 | * and does resource use estimation. If the second timer is programmable, | 102 | * and does resource use estimation. If the second timer is programmable, | |
105 | * it is randomized to avoid aliasing between the two clocks. For example, | 103 | * it is randomized to avoid aliasing between the two clocks. For example, | |
106 | * the randomization prevents an adversary from always giving up the CPU | 104 | * the randomization prevents an adversary from always giving up the CPU | |
107 | * just before its quantum expires. Otherwise, it would never accumulate | 105 | * just before its quantum expires. Otherwise, it would never accumulate | |
108 | * CPU ticks. The mean frequency of the second timer is stathz. | 106 | * CPU ticks. The mean frequency of the second timer is stathz. | |
109 | * | 107 | * | |
110 | * If no second timer exists, stathz will be zero; in this case we drive | 108 | * If no second timer exists, stathz will be zero; in this case we drive | |
111 | * profiling and statistics off the main clock. This WILL NOT be accurate; | 109 | * profiling and statistics off the main clock. This WILL NOT be accurate; | |
112 | * do not do it unless absolutely necessary. | 110 | * do not do it unless absolutely necessary. | |
113 | * | 111 | * | |
114 | * The statistics clock may (or may not) be run at a higher rate while | 112 | * The statistics clock may (or may not) be run at a higher rate while | |
115 | * profiling. This profile clock runs at profhz. We require that profhz | 113 | * profiling. This profile clock runs at profhz. We require that profhz | |
116 | * be an integral multiple of stathz. | 114 | * be an integral multiple of stathz. | |
117 | * | 115 | * | |
118 | * If the statistics clock is running fast, it must be divided by the ratio | 116 | * If the statistics clock is running fast, it must be divided by the ratio | |
119 | * profhz/stathz for statistics. (For profiling, every tick counts.) | 117 | * profhz/stathz for statistics. (For profiling, every tick counts.) | |
120 | */ | 118 | */ | |
121 | 119 | |||
122 | int stathz; | 120 | int stathz; | |
123 | int profhz; | 121 | int profhz; | |
124 | int profsrc; | 122 | int profsrc; | |
125 | int schedhz; | 123 | int schedhz; | |
126 | int profprocs; | 124 | int profprocs; | |
127 | int hardclock_ticks; | 125 | int hardclock_ticks; | |
128 | static int hardscheddiv; /* hard => sched divider (used if schedhz == 0) */ | 126 | static int hardscheddiv; /* hard => sched divider (used if schedhz == 0) */ | |
129 | static int psdiv; /* prof => stat divider */ | 127 | static int psdiv; /* prof => stat divider */ | |
130 | int psratio; /* ratio: prof / stat */ | 128 | int psratio; /* ratio: prof / stat */ | |
131 | 129 | |||
132 | static u_int get_intr_timecount(struct timecounter *); | 130 | static u_int get_intr_timecount(struct timecounter *); | |
133 | 131 | |||
134 | static struct timecounter intr_timecounter = { | 132 | static struct timecounter intr_timecounter = { | |
135 | get_intr_timecount, /* get_timecount */ | 133 | get_intr_timecount, /* get_timecount */ | |
136 | 0, /* no poll_pps */ | 134 | 0, /* no poll_pps */ | |
137 | ~0u, /* counter_mask */ | 135 | ~0u, /* counter_mask */ | |
138 | 0, /* frequency */ | 136 | 0, /* frequency */ | |
139 | "clockinterrupt", /* name */ | 137 | "clockinterrupt", /* name */ | |
140 | 0, /* quality - minimum implementation level for a clock */ | 138 | 0, /* quality - minimum implementation level for a clock */ | |
141 | NULL, /* prev */ | 139 | NULL, /* prev */ | |
142 | NULL, /* next */ | 140 | NULL, /* next */ | |
143 | }; | 141 | }; | |
144 | 142 | |||
145 | static u_int | 143 | static u_int | |
146 | get_intr_timecount(struct timecounter *tc) | 144 | get_intr_timecount(struct timecounter *tc) | |
147 | { | 145 | { | |
148 | 146 | |||
149 | return (u_int)hardclock_ticks; | 147 | return (u_int)hardclock_ticks; | |
150 | } | 148 | } | |
151 | 149 | |||
152 | /* | 150 | /* | |
153 | * Initialize clock frequencies and start both clocks running. | 151 | * Initialize clock frequencies and start both clocks running. | |
154 | */ | 152 | */ | |
155 | void | 153 | void | |
156 | initclocks(void) | 154 | initclocks(void) | |
157 | { | 155 | { | |
158 | int i; | 156 | int i; | |
159 | 157 | |||
160 | /* | 158 | /* | |
161 | * Set divisors to 1 (normal case) and let the machine-specific | 159 | * Set divisors to 1 (normal case) and let the machine-specific | |
162 | * code do its bit. | 160 | * code do its bit. | |
163 | */ | 161 | */ | |
164 | psdiv = 1; | 162 | psdiv = 1; | |
165 | /* | 163 | /* | |
166 | * provide minimum default time counter | 164 | * provide minimum default time counter | |
167 | * will only run at interrupt resolution | 165 | * will only run at interrupt resolution | |
168 | */ | 166 | */ | |
169 | intr_timecounter.tc_frequency = hz; | 167 | intr_timecounter.tc_frequency = hz; | |
170 | tc_init(&intr_timecounter); | 168 | tc_init(&intr_timecounter); | |
171 | cpu_initclocks(); | 169 | cpu_initclocks(); | |
172 | 170 | |||
173 | /* | 171 | /* | |
174 | * Compute profhz and stathz, fix profhz if needed. | 172 | * Compute profhz and stathz, fix profhz if needed. | |
175 | */ | 173 | */ | |
176 | i = stathz ? stathz : hz; | 174 | i = stathz ? stathz : hz; | |
177 | if (profhz == 0) | 175 | if (profhz == 0) | |
178 | profhz = i; | 176 | profhz = i; | |
179 | psratio = profhz / i; | 177 | psratio = profhz / i; | |
180 | if (schedhz == 0) { | 178 | if (schedhz == 0) { | |
181 | /* 16Hz is best */ | 179 | /* 16Hz is best */ | |
182 | hardscheddiv = hz / 16; | 180 | hardscheddiv = hz / 16; | |
183 | if (hardscheddiv <= 0) | 181 | if (hardscheddiv <= 0) | |
184 | panic("hardscheddiv"); | 182 | panic("hardscheddiv"); | |
185 | } | 183 | } | |
186 | 184 | |||
187 | } | 185 | } | |
188 | 186 | |||
189 | /* | 187 | /* | |
190 | * The real-time timer, interrupting hz times per second. | 188 | * The real-time timer, interrupting hz times per second. | |
191 | */ | 189 | */ | |
192 | void | 190 | void | |
193 | hardclock(struct clockframe *frame) | 191 | hardclock(struct clockframe *frame) | |
194 | { | 192 | { | |
195 | struct lwp *l; | 193 | struct lwp *l; | |
196 | struct cpu_info *ci; | 194 | struct cpu_info *ci; | |
197 | 195 | |||
198 | ci = curcpu(); | 196 | ci = curcpu(); | |
199 | l = ci->ci_data.cpu_onproc; | 197 | l = ci->ci_data.cpu_onproc; | |
200 | 198 | |||
201 | timer_tick(l, CLKF_USERMODE(frame)); | 199 | timer_tick(l, CLKF_USERMODE(frame)); | |
202 | 200 | |||
203 | /* | 201 | /* | |
204 | * If no separate statistics clock is available, run it from here. | 202 | * If no separate statistics clock is available, run it from here. | |
205 | */ | 203 | */ | |
206 | if (stathz == 0) | 204 | if (stathz == 0) | |
207 | statclock(frame); | 205 | statclock(frame); | |
208 | /* | 206 | /* | |
209 | * If no separate schedclock is provided, call it here | 207 | * If no separate schedclock is provided, call it here | |
210 | * at about 16 Hz. | 208 | * at about 16 Hz. | |
211 | */ | 209 | */ | |
212 | if (schedhz == 0) { | 210 | if (schedhz == 0) { | |
213 | if ((int)(--ci->ci_schedstate.spc_schedticks) <= 0) { | 211 | if ((int)(--ci->ci_schedstate.spc_schedticks) <= 0) { | |
214 | schedclock(l); | 212 | schedclock(l); | |
215 | ci->ci_schedstate.spc_schedticks = hardscheddiv; | 213 | ci->ci_schedstate.spc_schedticks = hardscheddiv; | |
216 | } | 214 | } | |
217 | } | 215 | } | |
218 | if ((--ci->ci_schedstate.spc_ticks) <= 0) | 216 | if ((--ci->ci_schedstate.spc_ticks) <= 0) | |
219 | sched_tick(ci); | 217 | sched_tick(ci); | |
220 | 218 | |||
221 | if (CPU_IS_PRIMARY(ci)) { | 219 | if (CPU_IS_PRIMARY(ci)) { | |
222 | hardclock_ticks++; | 220 | hardclock_ticks++; | |
223 | tc_ticktock(); | 221 | tc_ticktock(); | |
224 | } | 222 | } | |
225 | 223 | |||
226 | /* | 224 | /* | |
227 | * Update real-time timeout queue. | 225 | * Update real-time timeout queue. | |
228 | */ | 226 | */ | |
229 | callout_hardclock(); | 227 | callout_hardclock(); | |
230 | } | 228 | } | |
231 | 229 | |||
232 | /* | 230 | /* | |
233 | * Start profiling on a process. | 231 | * Start profiling on a process. | |
234 | * | 232 | * | |
235 | * Kernel profiling passes proc0 which never exits and hence | 233 | * Kernel profiling passes proc0 which never exits and hence | |
236 | * keeps the profile clock running constantly. | 234 | * keeps the profile clock running constantly. | |
237 | */ | 235 | */ | |
238 | void | 236 | void | |
239 | startprofclock(struct proc *p) | 237 | startprofclock(struct proc *p) | |
240 | { | 238 | { | |
241 | 239 | |||
242 | KASSERT(mutex_owned(&p->p_stmutex)); | 240 | KASSERT(mutex_owned(&p->p_stmutex)); | |
243 | 241 | |||
244 | if ((p->p_stflag & PST_PROFIL) == 0) { | 242 | if ((p->p_stflag & PST_PROFIL) == 0) { | |
245 | p->p_stflag |= PST_PROFIL; | 243 | p->p_stflag |= PST_PROFIL; | |
246 | /* | 244 | /* | |
247 | * This is only necessary if using the clock as the | 245 | * This is only necessary if using the clock as the | |
248 | * profiling source. | 246 | * profiling source. | |
249 | */ | 247 | */ | |
250 | if (++profprocs == 1 && stathz != 0) | 248 | if (++profprocs == 1 && stathz != 0) | |
251 | psdiv = psratio; | 249 | psdiv = psratio; | |
252 | } | 250 | } | |
253 | } | 251 | } | |
254 | 252 | |||
255 | /* | 253 | /* | |
256 | * Stop profiling on a process. | 254 | * Stop profiling on a process. | |
257 | */ | 255 | */ | |
258 | void | 256 | void | |
259 | stopprofclock(struct proc *p) | 257 | stopprofclock(struct proc *p) | |
260 | { | 258 | { | |
261 | 259 | |||
262 | KASSERT(mutex_owned(&p->p_stmutex)); | 260 | KASSERT(mutex_owned(&p->p_stmutex)); | |
263 | 261 | |||
264 | if (p->p_stflag & PST_PROFIL) { | 262 | if (p->p_stflag & PST_PROFIL) { | |
265 | p->p_stflag &= ~PST_PROFIL; | 263 | p->p_stflag &= ~PST_PROFIL; | |
266 | /* | 264 | /* | |
267 | * This is only necessary if using the clock as the | 265 | * This is only necessary if using the clock as the | |
268 | * profiling source. | 266 | * profiling source. | |
269 | */ | 267 | */ | |
270 | if (--profprocs == 0 && stathz != 0) | 268 | if (--profprocs == 0 && stathz != 0) | |
271 | psdiv = 1; | 269 | psdiv = 1; | |
272 | } | 270 | } | |
273 | } | 271 | } | |
274 | 272 | |||
275 | #if defined(PERFCTRS) | 273 | #if defined(PERFCTRS) | |
276 | /* | 274 | /* | |
277 | * Independent profiling "tick" in case we're using a separate | 275 | * Independent profiling "tick" in case we're using a separate | |
278 | * clock or profiling event source. Currently, that's just | 276 | * clock or profiling event source. Currently, that's just | |
279 | * performance counters--hence the wrapper. | 277 | * performance counters--hence the wrapper. | |
280 | */ | 278 | */ | |
281 | void | 279 | void | |
282 | proftick(struct clockframe *frame) | 280 | proftick(struct clockframe *frame) | |
283 | { | 281 | { | |
284 | #ifdef GPROF | 282 | #ifdef GPROF | |
285 | struct gmonparam *g; | 283 | struct gmonparam *g; | |
286 | intptr_t i; | 284 | intptr_t i; | |
287 | #endif | 285 | #endif | |
288 | struct lwp *l; | 286 | struct lwp *l; | |
289 | struct proc *p; | 287 | struct proc *p; | |
290 | 288 | |||
291 | l = curcpu()->ci_data.cpu_onproc; | 289 | l = curcpu()->ci_data.cpu_onproc; | |
292 | p = (l ? l->l_proc : NULL); | 290 | p = (l ? l->l_proc : NULL); | |
293 | if (CLKF_USERMODE(frame)) { | 291 | if (CLKF_USERMODE(frame)) { | |
294 | mutex_spin_enter(&p->p_stmutex); | 292 | mutex_spin_enter(&p->p_stmutex); | |
295 | if (p->p_stflag & PST_PROFIL) | 293 | if (p->p_stflag & PST_PROFIL) | |
296 | addupc_intr(l, CLKF_PC(frame)); | 294 | addupc_intr(l, CLKF_PC(frame)); | |
297 | mutex_spin_exit(&p->p_stmutex); | 295 | mutex_spin_exit(&p->p_stmutex); | |
298 | } else { | 296 | } else { | |
299 | #ifdef GPROF | 297 | #ifdef GPROF | |
300 | g = &_gmonparam; | 298 | g = &_gmonparam; | |
301 | if (g->state == GMON_PROF_ON) { | 299 | if (g->state == GMON_PROF_ON) { | |
302 | i = CLKF_PC(frame) - g->lowpc; | 300 | i = CLKF_PC(frame) - g->lowpc; | |
303 | if (i < g->textsize) { | 301 | if (i < g->textsize) { | |
304 | i /= HISTFRACTION * sizeof(*g->kcount); | 302 | i /= HISTFRACTION * sizeof(*g->kcount); | |
305 | g->kcount[i]++; | 303 | g->kcount[i]++; | |
306 | } | 304 | } | |
307 | } | 305 | } | |
308 | #endif | 306 | #endif | |
309 | #ifdef LWP_PC | 307 | #ifdef LWP_PC | |
310 | if (p != NULL && (p->p_stflag & PST_PROFIL) != 0) | 308 | if (p != NULL && (p->p_stflag & PST_PROFIL) != 0) | |
311 | addupc_intr(l, LWP_PC(l)); | 309 | addupc_intr(l, LWP_PC(l)); | |
312 | #endif | 310 | #endif | |
313 | } | 311 | } | |
314 | } | 312 | } | |
315 | #endif | 313 | #endif | |
316 | 314 | |||
317 | void | 315 | void | |
318 | schedclock(struct lwp *l) | 316 | schedclock(struct lwp *l) | |
319 | { | 317 | { | |
320 | if ((l->l_flag & LW_IDLE) != 0) | 318 | if ((l->l_flag & LW_IDLE) != 0) | |
321 | return; | 319 | return; | |
322 | 320 | |||
323 | sched_schedclock(l); | 321 | sched_schedclock(l); | |
324 | } | 322 | } | |
325 | 323 | |||
326 | /* | 324 | /* | |
327 | * Statistics clock. Grab profile sample, and if divider reaches 0, | 325 | * Statistics clock. Grab profile sample, and if divider reaches 0, | |
328 | * do process and kernel statistics. | 326 | * do process and kernel statistics. | |
329 | */ | 327 | */ | |
330 | void | 328 | void | |
331 | statclock(struct clockframe *frame) | 329 | statclock(struct clockframe *frame) | |
332 | { | 330 | { | |
333 | #ifdef GPROF | 331 | #ifdef GPROF | |
334 | struct gmonparam *g; | 332 | struct gmonparam *g; | |
335 | intptr_t i; | 333 | intptr_t i; | |
336 | #endif | 334 | #endif | |
337 | struct cpu_info *ci = curcpu(); | 335 | struct cpu_info *ci = curcpu(); | |
338 | struct schedstate_percpu *spc = &ci->ci_schedstate; | 336 | struct schedstate_percpu *spc = &ci->ci_schedstate; | |
339 | struct proc *p; | 337 | struct proc *p; | |
340 | struct lwp *l; | 338 | struct lwp *l; | |
341 | 339 | |||
342 | /* | 340 | /* | |
343 | * Notice changes in divisor frequency, and adjust clock | 341 | * Notice changes in divisor frequency, and adjust clock | |
344 | * frequency accordingly. | 342 | * frequency accordingly. | |
345 | */ | 343 | */ | |
346 | if (spc->spc_psdiv != psdiv) { | 344 | if (spc->spc_psdiv != psdiv) { | |
347 | spc->spc_psdiv = psdiv; | 345 | spc->spc_psdiv = psdiv; | |
348 | spc->spc_pscnt = psdiv; | 346 | spc->spc_pscnt = psdiv; | |
349 | if (psdiv == 1) { | 347 | if (psdiv == 1) { | |
350 | setstatclockrate(stathz); | 348 | setstatclockrate(stathz); | |
351 | } else { | 349 | } else { | |
352 | setstatclockrate(profhz); | 350 | setstatclockrate(profhz); | |
353 | } | 351 | } | |
354 | } | 352 | } | |
355 | l = ci->ci_data.cpu_onproc; | 353 | l = ci->ci_data.cpu_onproc; | |
356 | if ((l->l_flag & LW_IDLE) != 0) { | 354 | if ((l->l_flag & LW_IDLE) != 0) { | |
357 | /* | 355 | /* | |
358 | * don't account idle lwps as swapper. | 356 | * don't account idle lwps as swapper. | |
359 | */ | 357 | */ | |
360 | p = NULL; | 358 | p = NULL; | |
361 | } else { | 359 | } else { | |
362 | p = l->l_proc; | 360 | p = l->l_proc; | |
363 | mutex_spin_enter(&p->p_stmutex); | 361 | mutex_spin_enter(&p->p_stmutex); | |
364 | } | 362 | } | |
365 | 363 | |||
366 | if (CLKF_USERMODE(frame)) { | 364 | if (CLKF_USERMODE(frame)) { | |
367 | if ((p->p_stflag & PST_PROFIL) && profsrc == PROFSRC_CLOCK) | 365 | if ((p->p_stflag & PST_PROFIL) && profsrc == PROFSRC_CLOCK) | |
368 | addupc_intr(l, CLKF_PC(frame)); | 366 | addupc_intr(l, CLKF_PC(frame)); | |
369 | if (--spc->spc_pscnt > 0) { | 367 | if (--spc->spc_pscnt > 0) { | |
370 | mutex_spin_exit(&p->p_stmutex); | 368 | mutex_spin_exit(&p->p_stmutex); | |
371 | return; | 369 | return; | |
372 | } | 370 | } | |
373 | 371 | |||
374 | /* | 372 | /* | |
375 | * Came from user mode; CPU was in user state. | 373 | * Came from user mode; CPU was in user state. | |
376 | * If this process is being profiled record the tick. | 374 | * If this process is being profiled record the tick. | |
377 | */ | 375 | */ | |
378 | p->p_uticks++; | 376 | p->p_uticks++; | |
379 | if (p->p_nice > NZERO) | 377 | if (p->p_nice > NZERO) | |
380 | spc->spc_cp_time[CP_NICE]++; | 378 | spc->spc_cp_time[CP_NICE]++; | |
381 | else | 379 | else | |
382 | spc->spc_cp_time[CP_USER]++; | 380 | spc->spc_cp_time[CP_USER]++; | |
383 | } else { | 381 | } else { | |
384 | #ifdef GPROF | 382 | #ifdef GPROF | |
385 | /* | 383 | /* | |
386 | * Kernel statistics are just like addupc_intr, only easier. | 384 | * Kernel statistics are just like addupc_intr, only easier. | |
387 | */ | 385 | */ | |
388 | g = &_gmonparam; | 386 | g = &_gmonparam; | |
389 | if (profsrc == PROFSRC_CLOCK && g->state == GMON_PROF_ON) { | 387 | if (profsrc == PROFSRC_CLOCK && g->state == GMON_PROF_ON) { | |
390 | i = CLKF_PC(frame) - g->lowpc; | 388 | i = CLKF_PC(frame) - g->lowpc; | |
391 | if (i < g->textsize) { | 389 | if (i < g->textsize) { | |
392 | i /= HISTFRACTION * sizeof(*g->kcount); | 390 | i /= HISTFRACTION * sizeof(*g->kcount); | |
393 | g->kcount[i]++; | 391 | g->kcount[i]++; | |
394 | } | 392 | } | |
395 | } | 393 | } | |
396 | #endif | 394 | #endif | |
397 | #ifdef LWP_PC | 395 | #ifdef LWP_PC | |
398 | if (p != NULL && profsrc == PROFSRC_CLOCK && | 396 | if (p != NULL && profsrc == PROFSRC_CLOCK && | |
399 | (p->p_stflag & PST_PROFIL)) { | 397 | (p->p_stflag & PST_PROFIL)) { | |
400 | addupc_intr(l, LWP_PC(l)); | 398 | addupc_intr(l, LWP_PC(l)); | |
401 | } | 399 | } | |
402 | #endif | 400 | #endif | |
403 | if (--spc->spc_pscnt > 0) { | 401 | if (--spc->spc_pscnt > 0) { | |
404 | if (p != NULL) | 402 | if (p != NULL) | |
405 | mutex_spin_exit(&p->p_stmutex); | 403 | mutex_spin_exit(&p->p_stmutex); | |
406 | return; | 404 | return; | |
407 | } | 405 | } | |
408 | /* | 406 | /* | |
409 | * Came from kernel mode, so we were: | 407 | * Came from kernel mode, so we were: | |
410 | * - handling an interrupt, | 408 | * - handling an interrupt, | |
411 | * - doing syscall or trap work on behalf of the current | 409 | * - doing syscall or trap work on behalf of the current | |
412 | * user process, or | 410 | * user process, or | |
413 | * - spinning in the idle loop. | 411 | * - spinning in the idle loop. | |
414 | * Whichever it is, charge the time as appropriate. | 412 | * Whichever it is, charge the time as appropriate. | |
415 | * Note that we charge interrupts to the current process, | 413 | * Note that we charge interrupts to the current process, | |
416 | * regardless of whether they are ``for'' that process, | 414 | * regardless of whether they are ``for'' that process, | |
417 | * so that we know how much of its real time was spent | 415 | * so that we know how much of its real time was spent | |
418 | * in ``non-process'' (i.e., interrupt) work. | 416 | * in ``non-process'' (i.e., interrupt) work. | |
419 | */ | 417 | */ | |
420 | if (CLKF_INTR(frame) || (curlwp->l_pflag & LP_INTR) != 0) { | 418 | if (CLKF_INTR(frame) || (curlwp->l_pflag & LP_INTR) != 0) { | |
421 | if (p != NULL) { | 419 | if (p != NULL) { | |
422 | p->p_iticks++; | 420 | p->p_iticks++; | |
423 | } | 421 | } | |
424 | spc->spc_cp_time[CP_INTR]++; | 422 | spc->spc_cp_time[CP_INTR]++; | |
425 | } else if (p != NULL) { | 423 | } else if (p != NULL) { | |
426 | p->p_sticks++; | 424 | p->p_sticks++; | |
427 | spc->spc_cp_time[CP_SYS]++; | 425 | spc->spc_cp_time[CP_SYS]++; | |
428 | } else { | 426 | } else { | |
429 | spc->spc_cp_time[CP_IDLE]++; | 427 | spc->spc_cp_time[CP_IDLE]++; | |
430 | } | 428 | } | |
431 | } | 429 | } | |
432 | spc->spc_pscnt = psdiv; | 430 | spc->spc_pscnt = psdiv; | |
433 | 431 | |||
434 | if (p != NULL) { | 432 | if (p != NULL) { | |
435 | atomic_inc_uint(&l->l_cpticks); | 433 | atomic_inc_uint(&l->l_cpticks); | |
436 | mutex_spin_exit(&p->p_stmutex); | 434 | mutex_spin_exit(&p->p_stmutex); | |
437 | } | 435 | } | |
438 | } | 436 | } |
--- src/sys/kern/kern_condvar.c 2011/04/14 20:19:35 1.29
+++ src/sys/kern/kern_condvar.c 2011/07/27 14:35:33 1.30
@@ -1,391 +1,389 @@ | @@ -1,391 +1,389 @@ | |||
1 | /* $NetBSD: kern_condvar.c,v 1.29 2011/04/14 20:19:35 jym Exp $ */ | 1 | /* $NetBSD: kern_condvar.c,v 1.30 2011/07/27 14:35:33 uebayasi Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 2006, 2007, 2008 The NetBSD Foundation, Inc. | 4 | * Copyright (c) 2006, 2007, 2008 The NetBSD Foundation, Inc. | |
5 | * All rights reserved. | 5 | * All rights reserved. | |
6 | * | 6 | * | |
7 | * This code is derived from software contributed to The NetBSD Foundation | 7 | * This code is derived from software contributed to The NetBSD Foundation | |
8 | * by Andrew Doran. | 8 | * by Andrew Doran. | |
9 | * | 9 | * | |
10 | * Redistribution and use in source and binary forms, with or without | 10 | * Redistribution and use in source and binary forms, with or without | |
11 | * modification, are permitted provided that the following conditions | 11 | * modification, are permitted provided that the following conditions | |
12 | * are met: | 12 | * are met: | |
13 | * 1. Redistributions of source code must retain the above copyright | 13 | * 1. Redistributions of source code must retain the above copyright | |
14 | * notice, this list of conditions and the following disclaimer. | 14 | * notice, this list of conditions and the following disclaimer. | |
15 | * 2. Redistributions in binary form must reproduce the above copyright | 15 | * 2. Redistributions in binary form must reproduce the above copyright | |
16 | * notice, this list of conditions and the following disclaimer in the | 16 | * notice, this list of conditions and the following disclaimer in the | |
17 | * documentation and/or other materials provided with the distribution. | 17 | * documentation and/or other materials provided with the distribution. | |
18 | * | 18 | * | |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
29 | * POSSIBILITY OF SUCH DAMAGE. | 29 | * POSSIBILITY OF SUCH DAMAGE. | |
30 | */ | 30 | */ | |
31 | 31 | |||
32 | /* | 32 | /* | |
33 | * Kernel condition variable implementation. | 33 | * Kernel condition variable implementation. | |
34 | */ | 34 | */ | |
35 | 35 | |||
36 | #include <sys/cdefs.h> | 36 | #include <sys/cdefs.h> | |
37 | __KERNEL_RCSID(0, "$NetBSD: kern_condvar.c,v 1.29 2011/04/14 20:19:35 jym Exp $"); | 37 | __KERNEL_RCSID(0, "$NetBSD: kern_condvar.c,v 1.30 2011/07/27 14:35:33 uebayasi Exp $"); | |
38 | 38 | |||
39 | #include <sys/param.h> | 39 | #include <sys/param.h> | |
40 | #include <sys/proc.h> | 40 | #include <sys/proc.h> | |
41 | #include <sys/sched.h> | 41 | #include <sys/sched.h> | |
42 | #include <sys/systm.h> | 42 | #include <sys/systm.h> | |
43 | #include <sys/condvar.h> | 43 | #include <sys/condvar.h> | |
44 | #include <sys/sleepq.h> | 44 | #include <sys/sleepq.h> | |
45 | #include <sys/lockdebug.h> | 45 | #include <sys/lockdebug.h> | |
46 | #include <sys/cpu.h> | 46 | #include <sys/cpu.h> | |
47 | 47 | |||
48 | #include <uvm/uvm_extern.h> | |||
49 | ||||
50 | /* | 48 | /* | |
51 | * Accessors for the private contents of the kcondvar_t data type. | 49 | * Accessors for the private contents of the kcondvar_t data type. | |
52 | * | 50 | * | |
53 | * cv_opaque[0] sleepq... | 51 | * cv_opaque[0] sleepq... | |
54 | * cv_opaque[1] ...pointers | 52 | * cv_opaque[1] ...pointers | |
55 | * cv_opaque[2] description for ps(1) | 53 | * cv_opaque[2] description for ps(1) | |
56 | * | 54 | * | |
57 | * cv_opaque[0..1] is protected by the interlock passed to cv_wait() (enqueue | 55 | * cv_opaque[0..1] is protected by the interlock passed to cv_wait() (enqueue | |
58 | * only), and the sleep queue lock acquired with sleeptab_lookup() (enqueue | 56 | * only), and the sleep queue lock acquired with sleeptab_lookup() (enqueue | |
59 | * and dequeue). | 57 | * and dequeue). | |
60 | * | 58 | * | |
61 | * cv_opaque[2] (the wmesg) is static and does not change throughout the life | 59 | * cv_opaque[2] (the wmesg) is static and does not change throughout the life | |
62 | * of the CV. | 60 | * of the CV. | |
63 | */ | 61 | */ | |
64 | #define CV_SLEEPQ(cv) ((sleepq_t *)(cv)->cv_opaque) | 62 | #define CV_SLEEPQ(cv) ((sleepq_t *)(cv)->cv_opaque) | |
65 | #define CV_WMESG(cv) ((const char *)(cv)->cv_opaque[2]) | 63 | #define CV_WMESG(cv) ((const char *)(cv)->cv_opaque[2]) | |
66 | #define CV_SET_WMESG(cv, v) (cv)->cv_opaque[2] = __UNCONST(v) | 64 | #define CV_SET_WMESG(cv, v) (cv)->cv_opaque[2] = __UNCONST(v) | |
67 | 65 | |||
68 | #define CV_DEBUG_P(cv) (CV_WMESG(cv) != nodebug) | 66 | #define CV_DEBUG_P(cv) (CV_WMESG(cv) != nodebug) | |
69 | #define CV_RA ((uintptr_t)__builtin_return_address(0)) | 67 | #define CV_RA ((uintptr_t)__builtin_return_address(0)) | |
70 | 68 | |||
71 | static void cv_unsleep(lwp_t *, bool); | 69 | static void cv_unsleep(lwp_t *, bool); | |
72 | static void cv_wakeup_one(kcondvar_t *); | 70 | static void cv_wakeup_one(kcondvar_t *); | |
73 | static void cv_wakeup_all(kcondvar_t *); | 71 | static void cv_wakeup_all(kcondvar_t *); | |
74 | 72 | |||
75 | static syncobj_t cv_syncobj = { | 73 | static syncobj_t cv_syncobj = { | |
76 | SOBJ_SLEEPQ_SORTED, | 74 | SOBJ_SLEEPQ_SORTED, | |
77 | cv_unsleep, | 75 | cv_unsleep, | |
78 | sleepq_changepri, | 76 | sleepq_changepri, | |
79 | sleepq_lendpri, | 77 | sleepq_lendpri, | |
80 | syncobj_noowner, | 78 | syncobj_noowner, | |
81 | }; | 79 | }; | |
82 | 80 | |||
83 | lockops_t cv_lockops = { | 81 | lockops_t cv_lockops = { | |
84 | "Condition variable", | 82 | "Condition variable", | |
85 | LOCKOPS_CV, | 83 | LOCKOPS_CV, | |
86 | NULL | 84 | NULL | |
87 | }; | 85 | }; | |
88 | 86 | |||
89 | static const char deadcv[] = "deadcv"; | 87 | static const char deadcv[] = "deadcv"; | |
90 | static const char nodebug[] = "nodebug"; | 88 | static const char nodebug[] = "nodebug"; | |
91 | 89 | |||
92 | /* | 90 | /* | |
93 | * cv_init: | 91 | * cv_init: | |
94 | * | 92 | * | |
95 | * Initialize a condition variable for use. | 93 | * Initialize a condition variable for use. | |
96 | */ | 94 | */ | |
97 | void | 95 | void | |
98 | cv_init(kcondvar_t *cv, const char *wmesg) | 96 | cv_init(kcondvar_t *cv, const char *wmesg) | |
99 | { | 97 | { | |
100 | #ifdef LOCKDEBUG | 98 | #ifdef LOCKDEBUG | |
101 | bool dodebug; | 99 | bool dodebug; | |
102 | 100 | |||
103 | dodebug = LOCKDEBUG_ALLOC(cv, &cv_lockops, | 101 | dodebug = LOCKDEBUG_ALLOC(cv, &cv_lockops, | |
104 | (uintptr_t)__builtin_return_address(0)); | 102 | (uintptr_t)__builtin_return_address(0)); | |
105 | if (!dodebug) { | 103 | if (!dodebug) { | |
106 | /* XXX This will break vfs_lockf. */ | 104 | /* XXX This will break vfs_lockf. */ | |
107 | wmesg = nodebug; | 105 | wmesg = nodebug; | |
108 | } | 106 | } | |
109 | #endif | 107 | #endif | |
110 | KASSERT(wmesg != NULL); | 108 | KASSERT(wmesg != NULL); | |
111 | CV_SET_WMESG(cv, wmesg); | 109 | CV_SET_WMESG(cv, wmesg); | |
112 | sleepq_init(CV_SLEEPQ(cv)); | 110 | sleepq_init(CV_SLEEPQ(cv)); | |
113 | } | 111 | } | |
114 | 112 | |||
115 | /* | 113 | /* | |
116 | * cv_destroy: | 114 | * cv_destroy: | |
117 | * | 115 | * | |
118 | * Tear down a condition variable. | 116 | * Tear down a condition variable. | |
119 | */ | 117 | */ | |
120 | void | 118 | void | |
121 | cv_destroy(kcondvar_t *cv) | 119 | cv_destroy(kcondvar_t *cv) | |
122 | { | 120 | { | |
123 | 121 | |||
124 | LOCKDEBUG_FREE(CV_DEBUG_P(cv), cv); | 122 | LOCKDEBUG_FREE(CV_DEBUG_P(cv), cv); | |
125 | #ifdef DIAGNOSTIC | 123 | #ifdef DIAGNOSTIC | |
126 | KASSERT(cv_is_valid(cv)); | 124 | KASSERT(cv_is_valid(cv)); | |
127 | CV_SET_WMESG(cv, deadcv); | 125 | CV_SET_WMESG(cv, deadcv); | |
128 | #endif | 126 | #endif | |
129 | } | 127 | } | |
130 | 128 | |||
131 | /* | 129 | /* | |
132 | * cv_enter: | 130 | * cv_enter: | |
133 | * | 131 | * | |
134 | * Look up and lock the sleep queue corresponding to the given | 132 | * Look up and lock the sleep queue corresponding to the given | |
135 | * condition variable, and increment the number of waiters. | 133 | * condition variable, and increment the number of waiters. | |
136 | */ | 134 | */ | |
137 | static inline void | 135 | static inline void | |
138 | cv_enter(kcondvar_t *cv, kmutex_t *mtx, lwp_t *l) | 136 | cv_enter(kcondvar_t *cv, kmutex_t *mtx, lwp_t *l) | |
139 | { | 137 | { | |
140 | sleepq_t *sq; | 138 | sleepq_t *sq; | |
141 | kmutex_t *mp; | 139 | kmutex_t *mp; | |
142 | 140 | |||
143 | KASSERT(cv_is_valid(cv)); | 141 | KASSERT(cv_is_valid(cv)); | |
144 | KASSERT(!cpu_intr_p()); | 142 | KASSERT(!cpu_intr_p()); | |
145 | KASSERT((l->l_pflag & LP_INTR) == 0 || panicstr != NULL); | 143 | KASSERT((l->l_pflag & LP_INTR) == 0 || panicstr != NULL); | |
146 | 144 | |||
147 | LOCKDEBUG_LOCKED(CV_DEBUG_P(cv), cv, mtx, CV_RA, 0); | 145 | LOCKDEBUG_LOCKED(CV_DEBUG_P(cv), cv, mtx, CV_RA, 0); | |
148 | 146 | |||
149 | l->l_kpriority = true; | 147 | l->l_kpriority = true; | |
150 | mp = sleepq_hashlock(cv); | 148 | mp = sleepq_hashlock(cv); | |
151 | sq = CV_SLEEPQ(cv); | 149 | sq = CV_SLEEPQ(cv); | |
152 | sleepq_enter(sq, l, mp); | 150 | sleepq_enter(sq, l, mp); | |
153 | sleepq_enqueue(sq, cv, CV_WMESG(cv), &cv_syncobj); | 151 | sleepq_enqueue(sq, cv, CV_WMESG(cv), &cv_syncobj); | |
154 | mutex_exit(mtx); | 152 | mutex_exit(mtx); | |
155 | KASSERT(cv_has_waiters(cv)); | 153 | KASSERT(cv_has_waiters(cv)); | |
156 | } | 154 | } | |
157 | 155 | |||
158 | /* | 156 | /* | |
159 | * cv_exit: | 157 | * cv_exit: | |
160 | * | 158 | * | |
161 | * After resuming execution, check to see if we have been restarted | 159 | * After resuming execution, check to see if we have been restarted | |
162 | * as a result of cv_signal(). If we have, but cannot take the | 160 | * as a result of cv_signal(). If we have, but cannot take the | |
163 | * wakeup (because of eg a pending Unix signal or timeout) then try | 161 | * wakeup (because of eg a pending Unix signal or timeout) then try | |
164 | * to ensure that another LWP sees it. This is necessary because | 162 | * to ensure that another LWP sees it. This is necessary because | |
165 | * there may be multiple waiters, and at least one should take the | 163 | * there may be multiple waiters, and at least one should take the | |
166 | * wakeup if possible. | 164 | * wakeup if possible. | |
167 | */ | 165 | */ | |
168 | static inline int | 166 | static inline int | |
169 | cv_exit(kcondvar_t *cv, kmutex_t *mtx, lwp_t *l, const int error) | 167 | cv_exit(kcondvar_t *cv, kmutex_t *mtx, lwp_t *l, const int error) | |
170 | { | 168 | { | |
171 | 169 | |||
172 | mutex_enter(mtx); | 170 | mutex_enter(mtx); | |
173 | if (__predict_false(error != 0)) | 171 | if (__predict_false(error != 0)) | |
174 | cv_signal(cv); | 172 | cv_signal(cv); | |
175 | 173 | |||
176 | LOCKDEBUG_UNLOCKED(CV_DEBUG_P(cv), cv, CV_RA, 0); | 174 | LOCKDEBUG_UNLOCKED(CV_DEBUG_P(cv), cv, CV_RA, 0); | |
177 | KASSERT(cv_is_valid(cv)); | 175 | KASSERT(cv_is_valid(cv)); | |
178 | 176 | |||
179 | return error; | 177 | return error; | |
180 | } | 178 | } | |
181 | 179 | |||
182 | /* | 180 | /* | |
183 | * cv_unsleep: | 181 | * cv_unsleep: | |
184 | * | 182 | * | |
185 | * Remove an LWP from the condition variable and sleep queue. This | 183 | * Remove an LWP from the condition variable and sleep queue. This | |
186 | * is called when the LWP has not been awoken normally but instead | 184 | * is called when the LWP has not been awoken normally but instead | |
187 | * interrupted: for example, when a signal is received. Must be | 185 | * interrupted: for example, when a signal is received. Must be | |
188 | * called with the LWP locked, and must return it unlocked. | 186 | * called with the LWP locked, and must return it unlocked. | |
189 | */ | 187 | */ | |
190 | static void | 188 | static void | |
191 | cv_unsleep(lwp_t *l, bool cleanup) | 189 | cv_unsleep(lwp_t *l, bool cleanup) | |
192 | { | 190 | { | |
193 | kcondvar_t *cv; | 191 | kcondvar_t *cv; | |
194 | 192 | |||
195 | cv = (kcondvar_t *)(uintptr_t)l->l_wchan; | 193 | cv = (kcondvar_t *)(uintptr_t)l->l_wchan; | |
196 | 194 | |||
197 | KASSERT(l->l_wchan == (wchan_t)cv); | 195 | KASSERT(l->l_wchan == (wchan_t)cv); | |
198 | KASSERT(l->l_sleepq == CV_SLEEPQ(cv)); | 196 | KASSERT(l->l_sleepq == CV_SLEEPQ(cv)); | |
199 | KASSERT(cv_is_valid(cv)); | 197 | KASSERT(cv_is_valid(cv)); | |
200 | KASSERT(cv_has_waiters(cv)); | 198 | KASSERT(cv_has_waiters(cv)); | |
201 | 199 | |||
202 | sleepq_unsleep(l, cleanup); | 200 | sleepq_unsleep(l, cleanup); | |
203 | } | 201 | } | |
204 | 202 | |||
205 | /* | 203 | /* | |
206 | * cv_wait: | 204 | * cv_wait: | |
207 | * | 205 | * | |
208 | * Wait non-interruptably on a condition variable until awoken. | 206 | * Wait non-interruptably on a condition variable until awoken. | |
209 | */ | 207 | */ | |
210 | void | 208 | void | |
211 | cv_wait(kcondvar_t *cv, kmutex_t *mtx) | 209 | cv_wait(kcondvar_t *cv, kmutex_t *mtx) | |
212 | { | 210 | { | |
213 | lwp_t *l = curlwp; | 211 | lwp_t *l = curlwp; | |
214 | 212 | |||
215 | KASSERT(mutex_owned(mtx)); | 213 | KASSERT(mutex_owned(mtx)); | |
216 | 214 | |||
217 | cv_enter(cv, mtx, l); | 215 | cv_enter(cv, mtx, l); | |
218 | (void)sleepq_block(0, false); | 216 | (void)sleepq_block(0, false); | |
219 | (void)cv_exit(cv, mtx, l, 0); | 217 | (void)cv_exit(cv, mtx, l, 0); | |
220 | } | 218 | } | |
221 | 219 | |||
222 | /* | 220 | /* | |
223 | * cv_wait_sig: | 221 | * cv_wait_sig: | |
224 | * | 222 | * | |
225 | * Wait on a condition variable until a awoken or a signal is received. | 223 | * Wait on a condition variable until a awoken or a signal is received. | |
226 | * Will also return early if the process is exiting. Returns zero if | 224 | * Will also return early if the process is exiting. Returns zero if | |
227 | * awoken normally, ERESTART if a signal was received and the system | 225 | * awoken normally, ERESTART if a signal was received and the system | |
228 | * call is restartable, or EINTR otherwise. | 226 | * call is restartable, or EINTR otherwise. | |
229 | */ | 227 | */ | |
230 | int | 228 | int | |
231 | cv_wait_sig(kcondvar_t *cv, kmutex_t *mtx) | 229 | cv_wait_sig(kcondvar_t *cv, kmutex_t *mtx) | |
232 | { | 230 | { | |
233 | lwp_t *l = curlwp; | 231 | lwp_t *l = curlwp; | |
234 | int error; | 232 | int error; | |
235 | 233 | |||
236 | KASSERT(mutex_owned(mtx)); | 234 | KASSERT(mutex_owned(mtx)); | |
237 | 235 | |||
238 | cv_enter(cv, mtx, l); | 236 | cv_enter(cv, mtx, l); | |
239 | error = sleepq_block(0, true); | 237 | error = sleepq_block(0, true); | |
240 | return cv_exit(cv, mtx, l, error); | 238 | return cv_exit(cv, mtx, l, error); | |
241 | } | 239 | } | |
242 | 240 | |||
243 | /* | 241 | /* | |
244 | * cv_timedwait: | 242 | * cv_timedwait: | |
245 | * | 243 | * | |
246 | * Wait on a condition variable until awoken or the specified timeout | 244 | * Wait on a condition variable until awoken or the specified timeout | |
247 | * expires. Returns zero if awoken normally or EWOULDBLOCK if the | 245 | * expires. Returns zero if awoken normally or EWOULDBLOCK if the | |
248 | * timeout expired. | 246 | * timeout expired. | |
249 | */ | 247 | */ | |
250 | int | 248 | int | |
251 | cv_timedwait(kcondvar_t *cv, kmutex_t *mtx, int timo) | 249 | cv_timedwait(kcondvar_t *cv, kmutex_t *mtx, int timo) | |
252 | { | 250 | { | |
253 | lwp_t *l = curlwp; | 251 | lwp_t *l = curlwp; | |
254 | int error; | 252 | int error; | |
255 | 253 | |||
256 | KASSERT(mutex_owned(mtx)); | 254 | KASSERT(mutex_owned(mtx)); | |
257 | 255 | |||
258 | cv_enter(cv, mtx, l); | 256 | cv_enter(cv, mtx, l); | |
259 | error = sleepq_block(timo, false); | 257 | error = sleepq_block(timo, false); | |
260 | return cv_exit(cv, mtx, l, error); | 258 | return cv_exit(cv, mtx, l, error); | |
261 | } | 259 | } | |
262 | 260 | |||
263 | /* | 261 | /* | |
264 | * cv_timedwait_sig: | 262 | * cv_timedwait_sig: | |
265 | * | 263 | * | |
266 | * Wait on a condition variable until a timeout expires, awoken or a | 264 | * Wait on a condition variable until a timeout expires, awoken or a | |
267 | * signal is received. Will also return early if the process is | 265 | * signal is received. Will also return early if the process is | |
268 | * exiting. Returns zero if awoken normally, EWOULDBLOCK if the | 266 | * exiting. Returns zero if awoken normally, EWOULDBLOCK if the | |
269 | * timeout expires, ERESTART if a signal was received and the system | 267 | * timeout expires, ERESTART if a signal was received and the system | |
270 | * call is restartable, or EINTR otherwise. | 268 | * call is restartable, or EINTR otherwise. | |
271 | */ | 269 | */ | |
272 | int | 270 | int | |
273 | cv_timedwait_sig(kcondvar_t *cv, kmutex_t *mtx, int timo) | 271 | cv_timedwait_sig(kcondvar_t *cv, kmutex_t *mtx, int timo) | |
274 | { | 272 | { | |
275 | lwp_t *l = curlwp; | 273 | lwp_t *l = curlwp; | |
276 | int error; | 274 | int error; | |
277 | 275 | |||
278 | KASSERT(mutex_owned(mtx)); | 276 | KASSERT(mutex_owned(mtx)); | |
279 | 277 | |||
280 | cv_enter(cv, mtx, l); | 278 | cv_enter(cv, mtx, l); | |
281 | error = sleepq_block(timo, true); | 279 | error = sleepq_block(timo, true); | |
282 | return cv_exit(cv, mtx, l, error); | 280 | return cv_exit(cv, mtx, l, error); | |
283 | } | 281 | } | |
284 | 282 | |||
285 | /* | 283 | /* | |
286 | * cv_signal: | 284 | * cv_signal: | |
287 | * | 285 | * | |
288 | * Wake the highest priority LWP waiting on a condition variable. | 286 | * Wake the highest priority LWP waiting on a condition variable. | |
289 | * Must be called with the interlocking mutex held. | 287 | * Must be called with the interlocking mutex held. | |
290 | */ | 288 | */ | |
291 | void | 289 | void | |
292 | cv_signal(kcondvar_t *cv) | 290 | cv_signal(kcondvar_t *cv) | |
293 | { | 291 | { | |
294 | 292 | |||
295 | /* LOCKDEBUG_WAKEUP(CV_DEBUG_P(cv), cv, CV_RA); */ | 293 | /* LOCKDEBUG_WAKEUP(CV_DEBUG_P(cv), cv, CV_RA); */ | |
296 | KASSERT(cv_is_valid(cv)); | 294 | KASSERT(cv_is_valid(cv)); | |
297 | 295 | |||
298 | if (__predict_false(!TAILQ_EMPTY(CV_SLEEPQ(cv)))) | 296 | if (__predict_false(!TAILQ_EMPTY(CV_SLEEPQ(cv)))) | |
299 | cv_wakeup_one(cv); | 297 | cv_wakeup_one(cv); | |
300 | } | 298 | } | |
301 | 299 | |||
302 | static void __noinline | 300 | static void __noinline | |
303 | cv_wakeup_one(kcondvar_t *cv) | 301 | cv_wakeup_one(kcondvar_t *cv) | |
304 | { | 302 | { | |
305 | sleepq_t *sq; | 303 | sleepq_t *sq; | |
306 | kmutex_t *mp; | 304 | kmutex_t *mp; | |
307 | lwp_t *l; | 305 | lwp_t *l; | |
308 | 306 | |||
309 | KASSERT(cv_is_valid(cv)); | 307 | KASSERT(cv_is_valid(cv)); | |
310 | 308 | |||
311 | mp = sleepq_hashlock(cv); | 309 | mp = sleepq_hashlock(cv); | |
312 | sq = CV_SLEEPQ(cv); | 310 | sq = CV_SLEEPQ(cv); | |
313 | l = TAILQ_FIRST(sq); | 311 | l = TAILQ_FIRST(sq); | |
314 | if (l == NULL) { | 312 | if (l == NULL) { | |
315 | mutex_spin_exit(mp); | 313 | mutex_spin_exit(mp); | |
316 | return; | 314 | return; | |
317 | } | 315 | } | |
318 | KASSERT(l->l_sleepq == sq); | 316 | KASSERT(l->l_sleepq == sq); | |
319 | KASSERT(l->l_mutex == mp); | 317 | KASSERT(l->l_mutex == mp); | |
320 | KASSERT(l->l_wchan == cv); | 318 | KASSERT(l->l_wchan == cv); | |
321 | sleepq_remove(sq, l); | 319 | sleepq_remove(sq, l); | |
322 | mutex_spin_exit(mp); | 320 | mutex_spin_exit(mp); | |
323 | 321 | |||
324 | KASSERT(cv_is_valid(cv)); | 322 | KASSERT(cv_is_valid(cv)); | |
325 | } | 323 | } | |
326 | 324 | |||
327 | /* | 325 | /* | |
328 | * cv_broadcast: | 326 | * cv_broadcast: | |
329 | * | 327 | * | |
330 | * Wake all LWPs waiting on a condition variable. Must be called | 328 | * Wake all LWPs waiting on a condition variable. Must be called | |
331 | * with the interlocking mutex held. | 329 | * with the interlocking mutex held. | |
332 | */ | 330 | */ | |
333 | void | 331 | void | |
334 | cv_broadcast(kcondvar_t *cv) | 332 | cv_broadcast(kcondvar_t *cv) | |
335 | { | 333 | { | |
336 | 334 | |||
337 | /* LOCKDEBUG_WAKEUP(CV_DEBUG_P(cv), cv, CV_RA); */ | 335 | /* LOCKDEBUG_WAKEUP(CV_DEBUG_P(cv), cv, CV_RA); */ | |
338 | KASSERT(cv_is_valid(cv)); | 336 | KASSERT(cv_is_valid(cv)); | |
339 | 337 | |||
340 | if (__predict_false(!TAILQ_EMPTY(CV_SLEEPQ(cv)))) | 338 | if (__predict_false(!TAILQ_EMPTY(CV_SLEEPQ(cv)))) | |
341 | cv_wakeup_all(cv); | 339 | cv_wakeup_all(cv); | |
342 | } | 340 | } | |
343 | 341 | |||
344 | static void __noinline | 342 | static void __noinline | |
345 | cv_wakeup_all(kcondvar_t *cv) | 343 | cv_wakeup_all(kcondvar_t *cv) | |
346 | { | 344 | { | |
347 | sleepq_t *sq; | 345 | sleepq_t *sq; | |
348 | kmutex_t *mp; | 346 | kmutex_t *mp; | |
349 | lwp_t *l, *next; | 347 | lwp_t *l, *next; | |
350 | 348 | |||
351 | KASSERT(cv_is_valid(cv)); | 349 | KASSERT(cv_is_valid(cv)); | |
352 | 350 | |||
353 | mp = sleepq_hashlock(cv); | 351 | mp = sleepq_hashlock(cv); | |
354 | sq = CV_SLEEPQ(cv); | 352 | sq = CV_SLEEPQ(cv); | |
355 | for (l = TAILQ_FIRST(sq); l != NULL; l = next) { | 353 | for (l = TAILQ_FIRST(sq); l != NULL; l = next) { | |
356 | KASSERT(l->l_sleepq == sq); | 354 | KASSERT(l->l_sleepq == sq); | |
357 | KASSERT(l->l_mutex == mp); | 355 | KASSERT(l->l_mutex == mp); | |
358 | KASSERT(l->l_wchan == cv); | 356 | KASSERT(l->l_wchan == cv); | |
359 | next = TAILQ_NEXT(l, l_sleepchain); | 357 | next = TAILQ_NEXT(l, l_sleepchain); | |
360 | sleepq_remove(sq, l); | 358 | sleepq_remove(sq, l); | |
361 | } | 359 | } | |
362 | mutex_spin_exit(mp); | 360 | mutex_spin_exit(mp); | |
363 | 361 | |||
364 | KASSERT(cv_is_valid(cv)); | 362 | KASSERT(cv_is_valid(cv)); | |
365 | } | 363 | } | |
366 | 364 | |||
367 | /* | 365 | /* | |
368 | * cv_has_waiters: | 366 | * cv_has_waiters: | |
369 | * | 367 | * | |
370 | * For diagnostic assertions: return non-zero if a condition | 368 | * For diagnostic assertions: return non-zero if a condition | |
371 | * variable has waiters. | 369 | * variable has waiters. | |
372 | */ | 370 | */ | |
373 | bool | 371 | bool | |
374 | cv_has_waiters(kcondvar_t *cv) | 372 | cv_has_waiters(kcondvar_t *cv) | |
375 | { | 373 | { | |
376 | 374 | |||
377 | return !TAILQ_EMPTY(CV_SLEEPQ(cv)); | 375 | return !TAILQ_EMPTY(CV_SLEEPQ(cv)); | |
378 | } | 376 | } | |
379 | 377 | |||
380 | /* | 378 | /* | |
381 | * cv_is_valid: | 379 | * cv_is_valid: | |
382 | * | 380 | * | |
383 | * For diagnostic assertions: return non-zero if a condition | 381 | * For diagnostic assertions: return non-zero if a condition | |
384 | * variable appears to be valid. No locks need be held. | 382 | * variable appears to be valid. No locks need be held. | |
385 | */ | 383 | */ | |
386 | bool | 384 | bool | |
387 | cv_is_valid(kcondvar_t *cv) | 385 | cv_is_valid(kcondvar_t *cv) | |
388 | { | 386 | { | |
389 | 387 | |||
390 | return CV_WMESG(cv) != deadcv && CV_WMESG(cv) != NULL; | 388 | return CV_WMESG(cv) != deadcv && CV_WMESG(cv) != NULL; | |
391 | } | 389 | } |
--- src/sys/kern/kern_turnstile.c 2011/05/13 22:19:41 1.29
+++ src/sys/kern/kern_turnstile.c 2011/07/27 14:35:34 1.30
@@ -1,515 +1,513 @@ | @@ -1,515 +1,513 @@ | |||
1 | /* $NetBSD: kern_turnstile.c,v 1.29 2011/05/13 22:19:41 rmind Exp $ */ | 1 | /* $NetBSD: kern_turnstile.c,v 1.30 2011/07/27 14:35:34 uebayasi Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 2002, 2006, 2007, 2009 The NetBSD Foundation, Inc. | 4 | * Copyright (c) 2002, 2006, 2007, 2009 The NetBSD Foundation, Inc. | |
5 | * All rights reserved. | 5 | * All rights reserved. | |
6 | * | 6 | * | |
7 | * This code is derived from software contributed to The NetBSD Foundation | 7 | * This code is derived from software contributed to The NetBSD Foundation | |
8 | * by Jason R. Thorpe and Andrew Doran. | 8 | * by Jason R. Thorpe and Andrew Doran. | |
9 | * | 9 | * | |
10 | * Redistribution and use in source and binary forms, with or without | 10 | * Redistribution and use in source and binary forms, with or without | |
11 | * modification, are permitted provided that the following conditions | 11 | * modification, are permitted provided that the following conditions | |
12 | * are met: | 12 | * are met: | |
13 | * 1. Redistributions of source code must retain the above copyright | 13 | * 1. Redistributions of source code must retain the above copyright | |
14 | * notice, this list of conditions and the following disclaimer. | 14 | * notice, this list of conditions and the following disclaimer. | |
15 | * 2. Redistributions in binary form must reproduce the above copyright | 15 | * 2. Redistributions in binary form must reproduce the above copyright | |
16 | * notice, this list of conditions and the following disclaimer in the | 16 | * notice, this list of conditions and the following disclaimer in the | |
17 | * documentation and/or other materials provided with the distribution. | 17 | * documentation and/or other materials provided with the distribution. | |
18 | * | 18 | * | |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
29 | * POSSIBILITY OF SUCH DAMAGE. | 29 | * POSSIBILITY OF SUCH DAMAGE. | |
30 | */ | 30 | */ | |
31 | 31 | |||
32 | /* | 32 | /* | |
33 | * Turnstiles are described in detail in: | 33 | * Turnstiles are described in detail in: | |
34 | * | 34 | * | |
35 | * Solaris Internals: Core Kernel Architecture, Jim Mauro and | 35 | * Solaris Internals: Core Kernel Architecture, Jim Mauro and | |
36 | * Richard McDougall. | 36 | * Richard McDougall. | |
37 | * | 37 | * | |
38 | * Turnstiles are kept in a hash table. There are likely to be many more | 38 | * Turnstiles are kept in a hash table. There are likely to be many more | |
39 | * synchronisation objects than there are threads. Since a thread can block | 39 | * synchronisation objects than there are threads. Since a thread can block | |
40 | * on only one lock at a time, we only need one turnstile per thread, and | 40 | * on only one lock at a time, we only need one turnstile per thread, and | |
41 | * so they are allocated at thread creation time. | 41 | * so they are allocated at thread creation time. | |
42 | * | 42 | * | |
43 | * When a thread decides it needs to block on a lock, it looks up the | 43 | * When a thread decides it needs to block on a lock, it looks up the | |
44 | * active turnstile for that lock. If no active turnstile exists, then | 44 | * active turnstile for that lock. If no active turnstile exists, then | |
45 | * the process lends its turnstile to the lock. If there is already an | 45 | * the process lends its turnstile to the lock. If there is already an | |
46 | * active turnstile for the lock, the thread places its turnstile on a | 46 | * active turnstile for the lock, the thread places its turnstile on a | |
47 | * list of free turnstiles, and references the active one instead. | 47 | * list of free turnstiles, and references the active one instead. | |
48 | * | 48 | * | |
49 | * The act of looking up the turnstile acquires an interlock on the sleep | 49 | * The act of looking up the turnstile acquires an interlock on the sleep | |
50 | * queue. If a thread decides it doesn't need to block after all, then this | 50 | * queue. If a thread decides it doesn't need to block after all, then this | |
51 | * interlock must be released by explicitly aborting the turnstile | 51 | * interlock must be released by explicitly aborting the turnstile | |
52 | * operation. | 52 | * operation. | |
53 | * | 53 | * | |
54 | * When a thread is awakened, it needs to get its turnstile back. If there | 54 | * When a thread is awakened, it needs to get its turnstile back. If there | |
55 | * are still other threads waiting in the active turnstile, the thread | 55 | * are still other threads waiting in the active turnstile, the thread | |
56 | * grabs a free turnstile off the free list. Otherwise, it can take back | 56 | * grabs a free turnstile off the free list. Otherwise, it can take back | |
57 | * the active turnstile from the lock (thus deactivating the turnstile). | 57 | * the active turnstile from the lock (thus deactivating the turnstile). | |
58 | * | 58 | * | |
59 | * Turnstiles are the place to do priority inheritence. | 59 | * Turnstiles are the place to do priority inheritence. | |
60 | */ | 60 | */ | |
61 | 61 | |||
62 | #include <sys/cdefs.h> | 62 | #include <sys/cdefs.h> | |
63 | __KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.29 2011/05/13 22:19:41 rmind Exp $"); | 63 | __KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.30 2011/07/27 14:35:34 uebayasi Exp $"); | |
64 | 64 | |||
65 | #include <sys/param.h> | 65 | #include <sys/param.h> | |
66 | #include <sys/lockdebug.h> | 66 | #include <sys/lockdebug.h> | |
67 | #include <sys/pool.h> | 67 | #include <sys/pool.h> | |
68 | #include <sys/proc.h> | 68 | #include <sys/proc.h> | |
69 | #include <sys/sleepq.h> | 69 | #include <sys/sleepq.h> | |
70 | #include <sys/systm.h> | 70 | #include <sys/systm.h> | |
71 | 71 | |||
72 | #include <uvm/uvm_extern.h> | |||
73 | ||||
74 | #define TS_HASH_SIZE 64 | 72 | #define TS_HASH_SIZE 64 | |
75 | #define TS_HASH_MASK (TS_HASH_SIZE - 1) | 73 | #define TS_HASH_MASK (TS_HASH_SIZE - 1) | |
76 | #define TS_HASH(obj) (((uintptr_t)(obj) >> 3) & TS_HASH_MASK) | 74 | #define TS_HASH(obj) (((uintptr_t)(obj) >> 3) & TS_HASH_MASK) | |
77 | 75 | |||
78 | static tschain_t turnstile_tab[TS_HASH_SIZE] __cacheline_aligned; | 76 | static tschain_t turnstile_tab[TS_HASH_SIZE] __cacheline_aligned; | |
79 | pool_cache_t turnstile_cache __read_mostly; | 77 | pool_cache_t turnstile_cache __read_mostly; | |
80 | 78 | |||
81 | static int turnstile_ctor(void *, void *, int); | 79 | static int turnstile_ctor(void *, void *, int); | |
82 | 80 | |||
83 | extern turnstile_t turnstile0; | 81 | extern turnstile_t turnstile0; | |
84 | 82 | |||
85 | /* | 83 | /* | |
86 | * turnstile_init: | 84 | * turnstile_init: | |
87 | * | 85 | * | |
88 | * Initialize the turnstile mechanism. | 86 | * Initialize the turnstile mechanism. | |
89 | */ | 87 | */ | |
90 | void | 88 | void | |
91 | turnstile_init(void) | 89 | turnstile_init(void) | |
92 | { | 90 | { | |
93 | tschain_t *tc; | 91 | tschain_t *tc; | |
94 | int i; | 92 | int i; | |
95 | 93 | |||
96 | for (i = 0; i < TS_HASH_SIZE; i++) { | 94 | for (i = 0; i < TS_HASH_SIZE; i++) { | |
97 | tc = &turnstile_tab[i]; | 95 | tc = &turnstile_tab[i]; | |
98 | LIST_INIT(&tc->tc_chain); | 96 | LIST_INIT(&tc->tc_chain); | |
99 | tc->tc_mutex = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); | 97 | tc->tc_mutex = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); | |
100 | } | 98 | } | |
101 | 99 | |||
102 | turnstile_cache = pool_cache_init(sizeof(turnstile_t), 0, 0, 0, | 100 | turnstile_cache = pool_cache_init(sizeof(turnstile_t), 0, 0, 0, | |
103 | "tstilepl", NULL, IPL_NONE, turnstile_ctor, NULL, NULL); | 101 | "tstilepl", NULL, IPL_NONE, turnstile_ctor, NULL, NULL); | |
104 | KASSERT(turnstile_cache != NULL); | 102 | KASSERT(turnstile_cache != NULL); | |
105 | 103 | |||
106 | (void)turnstile_ctor(NULL, &turnstile0, 0); | 104 | (void)turnstile_ctor(NULL, &turnstile0, 0); | |
107 | } | 105 | } | |
108 | 106 | |||
109 | /* | 107 | /* | |
110 | * turnstile_ctor: | 108 | * turnstile_ctor: | |
111 | * | 109 | * | |
112 | * Constructor for turnstiles. | 110 | * Constructor for turnstiles. | |
113 | */ | 111 | */ | |
114 | static int | 112 | static int | |
115 | turnstile_ctor(void *arg, void *obj, int flags) | 113 | turnstile_ctor(void *arg, void *obj, int flags) | |
116 | { | 114 | { | |
117 | turnstile_t *ts = obj; | 115 | turnstile_t *ts = obj; | |
118 | 116 | |||
119 | memset(ts, 0, sizeof(*ts)); | 117 | memset(ts, 0, sizeof(*ts)); | |
120 | sleepq_init(&ts->ts_sleepq[TS_READER_Q]); | 118 | sleepq_init(&ts->ts_sleepq[TS_READER_Q]); | |
121 | sleepq_init(&ts->ts_sleepq[TS_WRITER_Q]); | 119 | sleepq_init(&ts->ts_sleepq[TS_WRITER_Q]); | |
122 | return (0); | 120 | return (0); | |
123 | } | 121 | } | |
124 | 122 | |||
125 | /* | 123 | /* | |
126 | * turnstile_remove: | 124 | * turnstile_remove: | |
127 | * | 125 | * | |
128 | * Remove an LWP from a turnstile sleep queue and wake it. | 126 | * Remove an LWP from a turnstile sleep queue and wake it. | |
129 | */ | 127 | */ | |
130 | static inline void | 128 | static inline void | |
131 | turnstile_remove(turnstile_t *ts, lwp_t *l, int q) | 129 | turnstile_remove(turnstile_t *ts, lwp_t *l, int q) | |
132 | { | 130 | { | |
133 | turnstile_t *nts; | 131 | turnstile_t *nts; | |
134 | 132 | |||
135 | KASSERT(l->l_ts == ts); | 133 | KASSERT(l->l_ts == ts); | |
136 | 134 | |||
137 | /* | 135 | /* | |
138 | * This process is no longer using the active turnstile. | 136 | * This process is no longer using the active turnstile. | |
139 | * Find an inactive one on the free list to give to it. | 137 | * Find an inactive one on the free list to give to it. | |
140 | */ | 138 | */ | |
141 | if ((nts = ts->ts_free) != NULL) { | 139 | if ((nts = ts->ts_free) != NULL) { | |
142 | KASSERT(TS_ALL_WAITERS(ts) > 1); | 140 | KASSERT(TS_ALL_WAITERS(ts) > 1); | |
143 | l->l_ts = nts; | 141 | l->l_ts = nts; | |
144 | ts->ts_free = nts->ts_free; | 142 | ts->ts_free = nts->ts_free; | |
145 | nts->ts_free = NULL; | 143 | nts->ts_free = NULL; | |
146 | } else { | 144 | } else { | |
147 | /* | 145 | /* | |
148 | * If the free list is empty, this is the last | 146 | * If the free list is empty, this is the last | |
149 | * waiter. | 147 | * waiter. | |
150 | */ | 148 | */ | |
151 | KASSERT(TS_ALL_WAITERS(ts) == 1); | 149 | KASSERT(TS_ALL_WAITERS(ts) == 1); | |
152 | LIST_REMOVE(ts, ts_chain); | 150 | LIST_REMOVE(ts, ts_chain); | |
153 | } | 151 | } | |
154 | 152 | |||
155 | ts->ts_waiters[q]--; | 153 | ts->ts_waiters[q]--; | |
156 | sleepq_remove(&ts->ts_sleepq[q], l); | 154 | sleepq_remove(&ts->ts_sleepq[q], l); | |
157 | } | 155 | } | |
158 | 156 | |||
159 | /* | 157 | /* | |
160 | * turnstile_lookup: | 158 | * turnstile_lookup: | |
161 | * | 159 | * | |
162 | * Look up the turnstile for the specified lock. This acquires and | 160 | * Look up the turnstile for the specified lock. This acquires and | |
163 | * holds the turnstile chain lock (sleep queue interlock). | 161 | * holds the turnstile chain lock (sleep queue interlock). | |
164 | */ | 162 | */ | |
165 | turnstile_t * | 163 | turnstile_t * | |
166 | turnstile_lookup(wchan_t obj) | 164 | turnstile_lookup(wchan_t obj) | |
167 | { | 165 | { | |
168 | turnstile_t *ts; | 166 | turnstile_t *ts; | |
169 | tschain_t *tc; | 167 | tschain_t *tc; | |
170 | 168 | |||
171 | tc = &turnstile_tab[TS_HASH(obj)]; | 169 | tc = &turnstile_tab[TS_HASH(obj)]; | |
172 | mutex_spin_enter(tc->tc_mutex); | 170 | mutex_spin_enter(tc->tc_mutex); | |
173 | 171 | |||
174 | LIST_FOREACH(ts, &tc->tc_chain, ts_chain) | 172 | LIST_FOREACH(ts, &tc->tc_chain, ts_chain) | |
175 | if (ts->ts_obj == obj) | 173 | if (ts->ts_obj == obj) | |
176 | return (ts); | 174 | return (ts); | |
177 | 175 | |||
178 | /* | 176 | /* | |
179 | * No turnstile yet for this lock. No problem, turnstile_block() | 177 | * No turnstile yet for this lock. No problem, turnstile_block() | |
180 | * handles this by fetching the turnstile from the blocking thread. | 178 | * handles this by fetching the turnstile from the blocking thread. | |
181 | */ | 179 | */ | |
182 | return (NULL); | 180 | return (NULL); | |
183 | } | 181 | } | |
184 | 182 | |||
185 | /* | 183 | /* | |
186 | * turnstile_exit: | 184 | * turnstile_exit: | |
187 | * | 185 | * | |
188 | * Abort a turnstile operation. | 186 | * Abort a turnstile operation. | |
189 | */ | 187 | */ | |
190 | void | 188 | void | |
191 | turnstile_exit(wchan_t obj) | 189 | turnstile_exit(wchan_t obj) | |
192 | { | 190 | { | |
193 | tschain_t *tc; | 191 | tschain_t *tc; | |
194 | 192 | |||
195 | tc = &turnstile_tab[TS_HASH(obj)]; | 193 | tc = &turnstile_tab[TS_HASH(obj)]; | |
196 | mutex_spin_exit(tc->tc_mutex); | 194 | mutex_spin_exit(tc->tc_mutex); | |
197 | } | 195 | } | |
198 | 196 | |||
199 | /* | 197 | /* | |
200 | * turnstile_block: | 198 | * turnstile_block: | |
201 | * | 199 | * | |
202 | * Enter an object into the turnstile chain and prepare the current | 200 | * Enter an object into the turnstile chain and prepare the current | |
203 | * LWP for sleep. | 201 | * LWP for sleep. | |
204 | */ | 202 | */ | |
205 | void | 203 | void | |
206 | turnstile_block(turnstile_t *ts, int q, wchan_t obj, syncobj_t *sobj) | 204 | turnstile_block(turnstile_t *ts, int q, wchan_t obj, syncobj_t *sobj) | |
207 | { | 205 | { | |
208 | lwp_t *l; | 206 | lwp_t *l; | |
209 | lwp_t *cur; /* cached curlwp */ | 207 | lwp_t *cur; /* cached curlwp */ | |
210 | lwp_t *owner; | 208 | lwp_t *owner; | |
211 | turnstile_t *ots; | 209 | turnstile_t *ots; | |
212 | tschain_t *tc; | 210 | tschain_t *tc; | |
213 | sleepq_t *sq; | 211 | sleepq_t *sq; | |
214 | pri_t prio, obase; | 212 | pri_t prio, obase; | |
215 | 213 | |||
216 | tc = &turnstile_tab[TS_HASH(obj)]; | 214 | tc = &turnstile_tab[TS_HASH(obj)]; | |
217 | l = cur = curlwp; | 215 | l = cur = curlwp; | |
218 | 216 | |||
219 | KASSERT(q == TS_READER_Q || q == TS_WRITER_Q); | 217 | KASSERT(q == TS_READER_Q || q == TS_WRITER_Q); | |
220 | KASSERT(mutex_owned(tc->tc_mutex)); | 218 | KASSERT(mutex_owned(tc->tc_mutex)); | |
221 | KASSERT(l != NULL && l->l_ts != NULL); | 219 | KASSERT(l != NULL && l->l_ts != NULL); | |
222 | 220 | |||
223 | if (ts == NULL) { | 221 | if (ts == NULL) { | |
224 | /* | 222 | /* | |
225 | * We are the first thread to wait for this object; | 223 | * We are the first thread to wait for this object; | |
226 | * lend our turnstile to it. | 224 | * lend our turnstile to it. | |
227 | */ | 225 | */ | |
228 | ts = l->l_ts; | 226 | ts = l->l_ts; | |
229 | KASSERT(TS_ALL_WAITERS(ts) == 0); | 227 | KASSERT(TS_ALL_WAITERS(ts) == 0); | |
230 | KASSERT(TAILQ_EMPTY(&ts->ts_sleepq[TS_READER_Q]) && | 228 | KASSERT(TAILQ_EMPTY(&ts->ts_sleepq[TS_READER_Q]) && | |
231 | TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q])); | 229 | TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q])); | |
232 | ts->ts_obj = obj; | 230 | ts->ts_obj = obj; | |
233 | ts->ts_inheritor = NULL; | 231 | ts->ts_inheritor = NULL; | |
234 | LIST_INSERT_HEAD(&tc->tc_chain, ts, ts_chain); | 232 | LIST_INSERT_HEAD(&tc->tc_chain, ts, ts_chain); | |
235 | } else { | 233 | } else { | |
236 | /* | 234 | /* | |
237 | * Object already has a turnstile. Put our turnstile | 235 | * Object already has a turnstile. Put our turnstile | |
238 | * onto the free list, and reference the existing | 236 | * onto the free list, and reference the existing | |
239 | * turnstile instead. | 237 | * turnstile instead. | |
240 | */ | 238 | */ | |
241 | ots = l->l_ts; | 239 | ots = l->l_ts; | |
242 | KASSERT(ots->ts_free == NULL); | 240 | KASSERT(ots->ts_free == NULL); | |
243 | ots->ts_free = ts->ts_free; | 241 | ots->ts_free = ts->ts_free; | |
244 | ts->ts_free = ots; | 242 | ts->ts_free = ots; | |
245 | l->l_ts = ts; | 243 | l->l_ts = ts; | |
246 | 244 | |||
247 | KASSERT(ts->ts_obj == obj); | 245 | KASSERT(ts->ts_obj == obj); | |
248 | KASSERT(TS_ALL_WAITERS(ts) != 0); | 246 | KASSERT(TS_ALL_WAITERS(ts) != 0); | |
249 | KASSERT(!TAILQ_EMPTY(&ts->ts_sleepq[TS_READER_Q]) || | 247 | KASSERT(!TAILQ_EMPTY(&ts->ts_sleepq[TS_READER_Q]) || | |
250 | !TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q])); | 248 | !TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q])); | |
251 | } | 249 | } | |
252 | 250 | |||
253 | sq = &ts->ts_sleepq[q]; | 251 | sq = &ts->ts_sleepq[q]; | |
254 | ts->ts_waiters[q]++; | 252 | ts->ts_waiters[q]++; | |
255 | sleepq_enter(sq, l, tc->tc_mutex); | 253 | sleepq_enter(sq, l, tc->tc_mutex); | |
256 | LOCKDEBUG_BARRIER(tc->tc_mutex, 1); | 254 | LOCKDEBUG_BARRIER(tc->tc_mutex, 1); | |
257 | l->l_kpriority = true; | 255 | l->l_kpriority = true; | |
258 | obase = l->l_kpribase; | 256 | obase = l->l_kpribase; | |
259 | if (obase < PRI_KTHREAD) | 257 | if (obase < PRI_KTHREAD) | |
260 | l->l_kpribase = PRI_KTHREAD; | 258 | l->l_kpribase = PRI_KTHREAD; | |
261 | sleepq_enqueue(sq, obj, "tstile", sobj); | 259 | sleepq_enqueue(sq, obj, "tstile", sobj); | |
262 | 260 | |||
263 | /* | 261 | /* | |
264 | * Disable preemption across this entire block, as we may drop | 262 | * Disable preemption across this entire block, as we may drop | |
265 | * scheduler locks (allowing preemption), and would prefer not | 263 | * scheduler locks (allowing preemption), and would prefer not | |
266 | * to be interrupted while in a state of flux. | 264 | * to be interrupted while in a state of flux. | |
267 | */ | 265 | */ | |
268 | KPREEMPT_DISABLE(l); | 266 | KPREEMPT_DISABLE(l); | |
269 | 267 | |||
270 | /* | 268 | /* | |
271 | * Lend our priority to lwps on the blocking chain. | 269 | * Lend our priority to lwps on the blocking chain. | |
272 | * | 270 | * | |
273 | * NOTE: if you get a panic in this code block, it is likely that | 271 | * NOTE: if you get a panic in this code block, it is likely that | |
274 | * a lock has been destroyed or corrupted while still in use. Try | 272 | * a lock has been destroyed or corrupted while still in use. Try | |
275 | * compiling a kernel with LOCKDEBUG to pinpoint the problem. | 273 | * compiling a kernel with LOCKDEBUG to pinpoint the problem. | |
276 | */ | 274 | */ | |
277 | prio = lwp_eprio(l); | 275 | prio = lwp_eprio(l); | |
278 | KASSERT(cur == l); | 276 | KASSERT(cur == l); | |
279 | KASSERT(tc->tc_mutex == cur->l_mutex); | 277 | KASSERT(tc->tc_mutex == cur->l_mutex); | |
280 | for (;;) { | 278 | for (;;) { | |
281 | bool dolock; | 279 | bool dolock; | |
282 | 280 | |||
283 | if (l->l_wchan == NULL) | 281 | if (l->l_wchan == NULL) | |
284 | break; | 282 | break; | |
285 | 283 | |||
286 | owner = (*l->l_syncobj->sobj_owner)(l->l_wchan); | 284 | owner = (*l->l_syncobj->sobj_owner)(l->l_wchan); | |
287 | if (owner == NULL) | 285 | if (owner == NULL) | |
288 | break; | 286 | break; | |
289 | 287 | |||
290 | /* The owner may have changed as we have dropped the tc lock */ | 288 | /* The owner may have changed as we have dropped the tc lock */ | |
291 | if (cur == owner) { | 289 | if (cur == owner) { | |
292 | /* | 290 | /* | |
293 | * we own the lock: stop here, sleepq_block() | 291 | * we own the lock: stop here, sleepq_block() | |
294 | * should wake up immediatly | 292 | * should wake up immediatly | |
295 | */ | 293 | */ | |
296 | break; | 294 | break; | |
297 | } | 295 | } | |
298 | if (l->l_mutex != owner->l_mutex) | 296 | if (l->l_mutex != owner->l_mutex) | |
299 | dolock = true; | 297 | dolock = true; | |
300 | else | 298 | else | |
301 | dolock = false; | 299 | dolock = false; | |
302 | if (l == owner || (dolock && !lwp_trylock(owner))) { | 300 | if (l == owner || (dolock && !lwp_trylock(owner))) { | |
303 | /* | 301 | /* | |
304 | * restart from curlwp. | 302 | * restart from curlwp. | |
305 | * Note that there may be a livelock here: | 303 | * Note that there may be a livelock here: | |
306 | * the owner may try grabing cur's lock (which is | 304 | * the owner may try grabing cur's lock (which is | |
307 | * the tc lock) while we're trying to grab | 305 | * the tc lock) while we're trying to grab | |
308 | * the owner's lock. | 306 | * the owner's lock. | |
309 | */ | 307 | */ | |
310 | lwp_unlock(l); | 308 | lwp_unlock(l); | |
311 | l = cur; | 309 | l = cur; | |
312 | lwp_lock(l); | 310 | lwp_lock(l); | |
313 | prio = lwp_eprio(l); | 311 | prio = lwp_eprio(l); | |
314 | continue; | 312 | continue; | |
315 | } | 313 | } | |
316 | if (prio <= lwp_eprio(owner)) { | 314 | if (prio <= lwp_eprio(owner)) { | |
317 | if (dolock) | 315 | if (dolock) | |
318 | lwp_unlock(owner); | 316 | lwp_unlock(owner); | |
319 | break; | 317 | break; | |
320 | } | 318 | } | |
321 | ts = l->l_ts; | 319 | ts = l->l_ts; | |
322 | KASSERT(ts->ts_inheritor == owner || ts->ts_inheritor == NULL); | 320 | KASSERT(ts->ts_inheritor == owner || ts->ts_inheritor == NULL); | |
323 | if (ts->ts_inheritor == NULL) { | 321 | if (ts->ts_inheritor == NULL) { | |
324 | ts->ts_inheritor = owner; | 322 | ts->ts_inheritor = owner; | |
325 | ts->ts_eprio = prio; | 323 | ts->ts_eprio = prio; | |
326 | SLIST_INSERT_HEAD(&owner->l_pi_lenders, ts, ts_pichain); | 324 | SLIST_INSERT_HEAD(&owner->l_pi_lenders, ts, ts_pichain); | |
327 | lwp_lendpri(owner, prio); | 325 | lwp_lendpri(owner, prio); | |
328 | } else if (prio > ts->ts_eprio) { | 326 | } else if (prio > ts->ts_eprio) { | |
329 | ts->ts_eprio = prio; | 327 | ts->ts_eprio = prio; | |
330 | lwp_lendpri(owner, prio); | 328 | lwp_lendpri(owner, prio); | |
331 | } | 329 | } | |
332 | if (dolock) | 330 | if (dolock) | |
333 | lwp_unlock(l); | 331 | lwp_unlock(l); | |
334 | l = owner; | 332 | l = owner; | |
335 | } | 333 | } | |
336 | LOCKDEBUG_BARRIER(l->l_mutex, 1); | 334 | LOCKDEBUG_BARRIER(l->l_mutex, 1); | |
337 | if (cur->l_mutex != l->l_mutex) { | 335 | if (cur->l_mutex != l->l_mutex) { | |
338 | lwp_unlock(l); | 336 | lwp_unlock(l); | |
339 | lwp_lock(cur); | 337 | lwp_lock(cur); | |
340 | } | 338 | } | |
341 | LOCKDEBUG_BARRIER(cur->l_mutex, 1); | 339 | LOCKDEBUG_BARRIER(cur->l_mutex, 1); | |
342 | 340 | |||
343 | sleepq_block(0, false); | 341 | sleepq_block(0, false); | |
344 | cur->l_kpribase = obase; | 342 | cur->l_kpribase = obase; | |
345 | KPREEMPT_ENABLE(cur); | 343 | KPREEMPT_ENABLE(cur); | |
346 | } | 344 | } | |
347 | 345 | |||
348 | /* | 346 | /* | |
349 | * turnstile_wakeup: | 347 | * turnstile_wakeup: | |
350 | * | 348 | * | |
351 | * Wake up the specified number of threads that are blocked | 349 | * Wake up the specified number of threads that are blocked | |
352 | * in a turnstile. | 350 | * in a turnstile. | |
353 | */ | 351 | */ | |
354 | void | 352 | void | |
355 | turnstile_wakeup(turnstile_t *ts, int q, int count, lwp_t *nl) | 353 | turnstile_wakeup(turnstile_t *ts, int q, int count, lwp_t *nl) | |
356 | { | 354 | { | |
357 | sleepq_t *sq; | 355 | sleepq_t *sq; | |
358 | tschain_t *tc; | 356 | tschain_t *tc; | |
359 | lwp_t *l; | 357 | lwp_t *l; | |
360 | 358 | |||
361 | tc = &turnstile_tab[TS_HASH(ts->ts_obj)]; | 359 | tc = &turnstile_tab[TS_HASH(ts->ts_obj)]; | |
362 | sq = &ts->ts_sleepq[q]; | 360 | sq = &ts->ts_sleepq[q]; | |
363 | 361 | |||
364 | KASSERT(q == TS_READER_Q || q == TS_WRITER_Q); | 362 | KASSERT(q == TS_READER_Q || q == TS_WRITER_Q); | |
365 | KASSERT(count > 0 && count <= TS_WAITERS(ts, q)); | 363 | KASSERT(count > 0 && count <= TS_WAITERS(ts, q)); | |
366 | KASSERT(mutex_owned(tc->tc_mutex)); | 364 | KASSERT(mutex_owned(tc->tc_mutex)); | |
367 | KASSERT(ts->ts_inheritor == curlwp || ts->ts_inheritor == NULL); | 365 | KASSERT(ts->ts_inheritor == curlwp || ts->ts_inheritor == NULL); | |
368 | 366 | |||
369 | /* | 367 | /* | |
370 | * restore inherited priority if necessary. | 368 | * restore inherited priority if necessary. | |
371 | */ | 369 | */ | |
372 | 370 | |||
373 | if (ts->ts_inheritor != NULL) { | 371 | if (ts->ts_inheritor != NULL) { | |
374 | turnstile_t *iter; | 372 | turnstile_t *iter; | |
375 | turnstile_t *next; | 373 | turnstile_t *next; | |
376 | turnstile_t *prev = NULL; | 374 | turnstile_t *prev = NULL; | |
377 | pri_t prio; | 375 | pri_t prio; | |
378 | bool dolock; | 376 | bool dolock; | |
379 | 377 | |||
380 | ts->ts_inheritor = NULL; | 378 | ts->ts_inheritor = NULL; | |
381 | l = curlwp; | 379 | l = curlwp; | |
382 | 380 | |||
383 | dolock = l->l_mutex == l->l_cpu->ci_schedstate.spc_lwplock; | 381 | dolock = l->l_mutex == l->l_cpu->ci_schedstate.spc_lwplock; | |
384 | if (dolock) { | 382 | if (dolock) { | |
385 | lwp_lock(l); | 383 | lwp_lock(l); | |
386 | } | 384 | } | |
387 | 385 | |||
388 | /* | 386 | /* | |
389 | * the following loop does two things. | 387 | * the following loop does two things. | |
390 | * | 388 | * | |
391 | * - remove ts from the list. | 389 | * - remove ts from the list. | |
392 | * | 390 | * | |
393 | * - from the rest of the list, find the highest priority. | 391 | * - from the rest of the list, find the highest priority. | |
394 | */ | 392 | */ | |
395 | 393 | |||
396 | prio = -1; | 394 | prio = -1; | |
397 | KASSERT(!SLIST_EMPTY(&l->l_pi_lenders)); | 395 | KASSERT(!SLIST_EMPTY(&l->l_pi_lenders)); | |
398 | for (iter = SLIST_FIRST(&l->l_pi_lenders); | 396 | for (iter = SLIST_FIRST(&l->l_pi_lenders); | |
399 | iter != NULL; iter = next) { | 397 | iter != NULL; iter = next) { | |
400 | KASSERT(lwp_eprio(l) >= ts->ts_eprio); | 398 | KASSERT(lwp_eprio(l) >= ts->ts_eprio); | |
401 | next = SLIST_NEXT(iter, ts_pichain); | 399 | next = SLIST_NEXT(iter, ts_pichain); | |
402 | if (iter == ts) { | 400 | if (iter == ts) { | |
403 | if (prev == NULL) { | 401 | if (prev == NULL) { | |
404 | SLIST_REMOVE_HEAD(&l->l_pi_lenders, | 402 | SLIST_REMOVE_HEAD(&l->l_pi_lenders, | |
405 | ts_pichain); | 403 | ts_pichain); | |
406 | } else { | 404 | } else { | |
407 | SLIST_REMOVE_AFTER(prev, ts_pichain); | 405 | SLIST_REMOVE_AFTER(prev, ts_pichain); | |
408 | } | 406 | } | |
409 | } else if (prio < iter->ts_eprio) { | 407 | } else if (prio < iter->ts_eprio) { | |
410 | prio = iter->ts_eprio; | 408 | prio = iter->ts_eprio; | |
411 | } | 409 | } | |
412 | prev = iter; | 410 | prev = iter; | |
413 | } | 411 | } | |
414 | 412 | |||
415 | lwp_lendpri(l, prio); | 413 | lwp_lendpri(l, prio); | |
416 | 414 | |||
417 | if (dolock) { | 415 | if (dolock) { | |
418 | lwp_unlock(l); | 416 | lwp_unlock(l); | |
419 | } | 417 | } | |
420 | } | 418 | } | |
421 | 419 | |||
422 | if (nl != NULL) { | 420 | if (nl != NULL) { | |
423 | #if defined(DEBUG) || defined(LOCKDEBUG) | 421 | #if defined(DEBUG) || defined(LOCKDEBUG) | |
424 | TAILQ_FOREACH(l, sq, l_sleepchain) { | 422 | TAILQ_FOREACH(l, sq, l_sleepchain) { | |
425 | if (l == nl) | 423 | if (l == nl) | |
426 | break; | 424 | break; | |
427 | } | 425 | } | |
428 | if (l == NULL) | 426 | if (l == NULL) | |
429 | panic("turnstile_wakeup: nl not on sleepq"); | 427 | panic("turnstile_wakeup: nl not on sleepq"); | |
430 | #endif | 428 | #endif | |
431 | turnstile_remove(ts, nl, q); | 429 | turnstile_remove(ts, nl, q); | |
432 | } else { | 430 | } else { | |
433 | while (count-- > 0) { | 431 | while (count-- > 0) { | |
434 | l = TAILQ_FIRST(sq); | 432 | l = TAILQ_FIRST(sq); | |
435 | KASSERT(l != NULL); | 433 | KASSERT(l != NULL); | |
436 | turnstile_remove(ts, l, q); | 434 | turnstile_remove(ts, l, q); | |
437 | } | 435 | } | |
438 | } | 436 | } | |
439 | mutex_spin_exit(tc->tc_mutex); | 437 | mutex_spin_exit(tc->tc_mutex); | |
440 | } | 438 | } | |
441 | 439 | |||
442 | /* | 440 | /* | |
443 | * turnstile_unsleep: | 441 | * turnstile_unsleep: | |
444 | * | 442 | * | |
445 | * Remove an LWP from the turnstile. This is called when the LWP has | 443 | * Remove an LWP from the turnstile. This is called when the LWP has | |
446 | * not been awoken normally but instead interrupted: for example, if it | 444 | * not been awoken normally but instead interrupted: for example, if it | |
447 | * has received a signal. It's not a valid action for turnstiles, | 445 | * has received a signal. It's not a valid action for turnstiles, | |
448 | * since LWPs blocking on a turnstile are not interruptable. | 446 | * since LWPs blocking on a turnstile are not interruptable. | |
449 | */ | 447 | */ | |
450 | void | 448 | void | |
451 | turnstile_unsleep(lwp_t *l, bool cleanup) | 449 | turnstile_unsleep(lwp_t *l, bool cleanup) | |
452 | { | 450 | { | |
453 | 451 | |||
454 | lwp_unlock(l); | 452 | lwp_unlock(l); | |
455 | panic("turnstile_unsleep"); | 453 | panic("turnstile_unsleep"); | |
456 | } | 454 | } | |
457 | 455 | |||
458 | /* | 456 | /* | |
459 | * turnstile_changepri: | 457 | * turnstile_changepri: | |
460 | * | 458 | * | |
461 | * Adjust the priority of an LWP residing on a turnstile. | 459 | * Adjust the priority of an LWP residing on a turnstile. | |
462 | */ | 460 | */ | |
463 | void | 461 | void | |
464 | turnstile_changepri(lwp_t *l, pri_t pri) | 462 | turnstile_changepri(lwp_t *l, pri_t pri) | |
465 | { | 463 | { | |
466 | 464 | |||
467 | /* XXX priority inheritance */ | 465 | /* XXX priority inheritance */ | |
468 | sleepq_changepri(l, pri); | 466 | sleepq_changepri(l, pri); | |
469 | } | 467 | } | |
470 | 468 | |||
471 | #if defined(LOCKDEBUG) | 469 | #if defined(LOCKDEBUG) | |
472 | /* | 470 | /* | |
473 | * turnstile_print: | 471 | * turnstile_print: | |
474 | * | 472 | * | |
475 | * Given the address of a lock object, print the contents of a | 473 | * Given the address of a lock object, print the contents of a | |
476 | * turnstile. | 474 | * turnstile. | |
477 | */ | 475 | */ | |
478 | void | 476 | void | |
479 | turnstile_print(volatile void *obj, void (*pr)(const char *, ...)) | 477 | turnstile_print(volatile void *obj, void (*pr)(const char *, ...)) | |
480 | { | 478 | { | |
481 | turnstile_t *ts; | 479 | turnstile_t *ts; | |
482 | tschain_t *tc; | 480 | tschain_t *tc; | |
483 | sleepq_t *rsq, *wsq; | 481 | sleepq_t *rsq, *wsq; | |
484 | lwp_t *l; | 482 | lwp_t *l; | |
485 | 483 | |||
486 | tc = &turnstile_tab[TS_HASH(obj)]; | 484 | tc = &turnstile_tab[TS_HASH(obj)]; | |
487 | 485 | |||
488 | LIST_FOREACH(ts, &tc->tc_chain, ts_chain) | 486 | LIST_FOREACH(ts, &tc->tc_chain, ts_chain) | |
489 | if (ts->ts_obj == obj) | 487 | if (ts->ts_obj == obj) | |
490 | break; | 488 | break; | |
491 | 489 | |||
492 | (*pr)("Turnstile chain at %p.\n", tc); | 490 | (*pr)("Turnstile chain at %p.\n", tc); | |
493 | if (ts == NULL) { | 491 | if (ts == NULL) { | |
494 | (*pr)("=> No active turnstile for this lock.\n"); | 492 | (*pr)("=> No active turnstile for this lock.\n"); | |
495 | return; | 493 | return; | |
496 | } | 494 | } | |
497 | 495 | |||
498 | rsq = &ts->ts_sleepq[TS_READER_Q]; | 496 | rsq = &ts->ts_sleepq[TS_READER_Q]; | |
499 | wsq = &ts->ts_sleepq[TS_WRITER_Q]; | 497 | wsq = &ts->ts_sleepq[TS_WRITER_Q]; | |
500 | 498 | |||
501 | (*pr)("=> Turnstile at %p (wrq=%p, rdq=%p).\n", ts, rsq, wsq); | 499 | (*pr)("=> Turnstile at %p (wrq=%p, rdq=%p).\n", ts, rsq, wsq); | |
502 | 500 | |||
503 | (*pr)("=> %d waiting readers:", TS_WAITERS(ts, TS_READER_Q)); | 501 | (*pr)("=> %d waiting readers:", TS_WAITERS(ts, TS_READER_Q)); | |
504 | TAILQ_FOREACH(l, rsq, l_sleepchain) { | 502 | TAILQ_FOREACH(l, rsq, l_sleepchain) { | |
505 | (*pr)(" %p", l); | 503 | (*pr)(" %p", l); | |
506 | } | 504 | } | |
507 | (*pr)("\n"); | 505 | (*pr)("\n"); | |
508 | 506 | |||
509 | (*pr)("=> %d waiting writers:", TS_WAITERS(ts, TS_WRITER_Q)); | 507 | (*pr)("=> %d waiting writers:", TS_WAITERS(ts, TS_WRITER_Q)); | |
510 | TAILQ_FOREACH(l, wsq, l_sleepchain) { | 508 | TAILQ_FOREACH(l, wsq, l_sleepchain) { | |
511 | (*pr)(" %p", l); | 509 | (*pr)(" %p", l); | |
512 | } | 510 | } | |
513 | (*pr)("\n"); | 511 | (*pr)("\n"); | |
514 | } | 512 | } | |
515 | #endif /* LOCKDEBUG */ | 513 | #endif /* LOCKDEBUG */ |
--- src/sys/kern/kern_ksyms.c 2011/04/24 18:46:22 1.63
+++ src/sys/kern/kern_ksyms.c 2011/07/27 14:35:34 1.64
@@ -1,1094 +1,1092 @@ | @@ -1,1094 +1,1092 @@ | |||
1 | /* $NetBSD: kern_ksyms.c,v 1.63 2011/04/24 18:46:22 rmind Exp $ */ | 1 | /* $NetBSD: kern_ksyms.c,v 1.64 2011/07/27 14:35:34 uebayasi Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 2008 The NetBSD Foundation, Inc. | 4 | * Copyright (c) 2008 The NetBSD Foundation, Inc. | |
5 | * All rights reserved. | 5 | * All rights reserved. | |
6 | * | 6 | * | |
7 | * This code is derived from software developed for The NetBSD Foundation | 7 | * This code is derived from software developed for The NetBSD Foundation | |
8 | * by Andrew Doran. | 8 | * by Andrew Doran. | |
9 | * | 9 | * | |
10 | * Redistribution and use in source and binary forms, with or without | 10 | * Redistribution and use in source and binary forms, with or without | |
11 | * modification, are permitted provided that the following conditions | 11 | * modification, are permitted provided that the following conditions | |
12 | * are met: | 12 | * are met: | |
13 | * 1. Redistributions of source code must retain the above copyright | 13 | * 1. Redistributions of source code must retain the above copyright | |
14 | * notice, this list of conditions and the following disclaimer. | 14 | * notice, this list of conditions and the following disclaimer. | |
15 | * 2. Redistributions in binary form must reproduce the above copyright | 15 | * 2. Redistributions in binary form must reproduce the above copyright | |
16 | * notice, this list of conditions and the following disclaimer in the | 16 | * notice, this list of conditions and the following disclaimer in the | |
17 | * documentation and/or other materials provided with the distribution. | 17 | * documentation and/or other materials provided with the distribution. | |
18 | * | 18 | * | |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
29 | * POSSIBILITY OF SUCH DAMAGE. | 29 | * POSSIBILITY OF SUCH DAMAGE. | |
30 | */ | 30 | */ | |
31 | 31 | |||
32 | /* | 32 | /* | |
33 | * Copyright (c) 2001, 2003 Anders Magnusson (ragge@ludd.luth.se). | 33 | * Copyright (c) 2001, 2003 Anders Magnusson (ragge@ludd.luth.se). | |
34 | * All rights reserved. | 34 | * All rights reserved. | |
35 | * | 35 | * | |
36 | * Redistribution and use in source and binary forms, with or without | 36 | * Redistribution and use in source and binary forms, with or without | |
37 | * modification, are permitted provided that the following conditions | 37 | * modification, are permitted provided that the following conditions | |
38 | * are met: | 38 | * are met: | |
39 | * 1. Redistributions of source code must retain the above copyright | 39 | * 1. Redistributions of source code must retain the above copyright | |
40 | * notice, this list of conditions and the following disclaimer. | 40 | * notice, this list of conditions and the following disclaimer. | |
41 | * 2. Redistributions in binary form must reproduce the above copyright | 41 | * 2. Redistributions in binary form must reproduce the above copyright | |
42 | * notice, this list of conditions and the following disclaimer in the | 42 | * notice, this list of conditions and the following disclaimer in the | |
43 | * documentation and/or other materials provided with the distribution. | 43 | * documentation and/or other materials provided with the distribution. | |
44 | * 3. The name of the author may not be used to endorse or promote products | 44 | * 3. The name of the author may not be used to endorse or promote products | |
45 | * derived from this software without specific prior written permission | 45 | * derived from this software without specific prior written permission | |
46 | * | 46 | * | |
47 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | 47 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | |
48 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | 48 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |
49 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | 49 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | |
50 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | 50 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | |
51 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | 51 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
52 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 52 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
53 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 53 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
54 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 54 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
55 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | 55 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | |
56 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 56 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
57 | */ | 57 | */ | |
58 | 58 | |||
59 | /* | 59 | /* | |
60 | * Code to deal with in-kernel symbol table management + /dev/ksyms. | 60 | * Code to deal with in-kernel symbol table management + /dev/ksyms. | |
61 | * | 61 | * | |
62 | * For each loaded module the symbol table info is kept track of by a | 62 | * For each loaded module the symbol table info is kept track of by a | |
63 | * struct, placed in a circular list. The first entry is the kernel | 63 | * struct, placed in a circular list. The first entry is the kernel | |
64 | * symbol table. | 64 | * symbol table. | |
65 | */ | 65 | */ | |
66 | 66 | |||
67 | /* | 67 | /* | |
68 | * TODO: | 68 | * TODO: | |
69 | * | 69 | * | |
70 | * Add support for mmap, poll. | 70 | * Add support for mmap, poll. | |
71 | */ | 71 | */ | |
72 | 72 | |||
73 | #include <sys/cdefs.h> | 73 | #include <sys/cdefs.h> | |
74 | __KERNEL_RCSID(0, "$NetBSD: kern_ksyms.c,v 1.63 2011/04/24 18:46:22 rmind Exp $"); | 74 | __KERNEL_RCSID(0, "$NetBSD: kern_ksyms.c,v 1.64 2011/07/27 14:35:34 uebayasi Exp $"); | |
75 | 75 | |||
76 | #if defined(_KERNEL) && defined(_KERNEL_OPT) | 76 | #if defined(_KERNEL) && defined(_KERNEL_OPT) | |
77 | #include "opt_ddb.h" | 77 | #include "opt_ddb.h" | |
78 | #include "opt_ddbparam.h" /* for SYMTAB_SPACE */ | 78 | #include "opt_ddbparam.h" /* for SYMTAB_SPACE */ | |
79 | #include "opt_dtrace.h" | 79 | #include "opt_dtrace.h" | |
80 | #endif | 80 | #endif | |
81 | 81 | |||
82 | #define _KSYMS_PRIVATE | 82 | #define _KSYMS_PRIVATE | |
83 | 83 | |||
84 | #include <sys/param.h> | 84 | #include <sys/param.h> | |
85 | #include <sys/queue.h> | 85 | #include <sys/queue.h> | |
86 | #include <sys/exec.h> | 86 | #include <sys/exec.h> | |
87 | #include <sys/systm.h> | 87 | #include <sys/systm.h> | |
88 | #include <sys/conf.h> | 88 | #include <sys/conf.h> | |
89 | #include <sys/kmem.h> | 89 | #include <sys/kmem.h> | |
90 | #include <sys/proc.h> | 90 | #include <sys/proc.h> | |
91 | #include <sys/atomic.h> | 91 | #include <sys/atomic.h> | |
92 | #include <sys/ksyms.h> | 92 | #include <sys/ksyms.h> | |
93 | 93 | |||
94 | #include <uvm/uvm_extern.h> | |||
95 | ||||
96 | #ifdef DDB | 94 | #ifdef DDB | |
97 | #include <ddb/db_output.h> | 95 | #include <ddb/db_output.h> | |
98 | #endif | 96 | #endif | |
99 | 97 | |||
100 | #include "ksyms.h" | 98 | #include "ksyms.h" | |
101 | 99 | |||
102 | #define KSYMS_MAX_ID 65536 | 100 | #define KSYMS_MAX_ID 65536 | |
103 | #ifdef KDTRACE_HOOKS | 101 | #ifdef KDTRACE_HOOKS | |
104 | static uint32_t ksyms_nmap[KSYMS_MAX_ID]; /* sorted symbol table map */ | 102 | static uint32_t ksyms_nmap[KSYMS_MAX_ID]; /* sorted symbol table map */ | |
105 | #else | 103 | #else | |
106 | static uint32_t *ksyms_nmap = NULL; | 104 | static uint32_t *ksyms_nmap = NULL; | |
107 | #endif | 105 | #endif | |
108 | 106 | |||
109 | static int ksyms_maxlen; | 107 | static int ksyms_maxlen; | |
110 | static bool ksyms_isopen; | 108 | static bool ksyms_isopen; | |
111 | static bool ksyms_initted; | 109 | static bool ksyms_initted; | |
112 | static struct ksyms_hdr ksyms_hdr; | 110 | static struct ksyms_hdr ksyms_hdr; | |
113 | static kmutex_t ksyms_lock; | 111 | static kmutex_t ksyms_lock; | |
114 | 112 | |||
115 | void ksymsattach(int); | 113 | void ksymsattach(int); | |
116 | static void ksyms_hdr_init(void *); | 114 | static void ksyms_hdr_init(void *); | |
117 | static void ksyms_sizes_calc(void); | 115 | static void ksyms_sizes_calc(void); | |
118 | 116 | |||
119 | #ifdef KSYMS_DEBUG | 117 | #ifdef KSYMS_DEBUG | |
120 | #define FOLLOW_CALLS 1 | 118 | #define FOLLOW_CALLS 1 | |
121 | #define FOLLOW_MORE_CALLS 2 | 119 | #define FOLLOW_MORE_CALLS 2 | |
122 | #define FOLLOW_DEVKSYMS 4 | 120 | #define FOLLOW_DEVKSYMS 4 | |
123 | static int ksyms_debug; | 121 | static int ksyms_debug; | |
124 | #endif | 122 | #endif | |
125 | 123 | |||
126 | #ifdef SYMTAB_SPACE | 124 | #ifdef SYMTAB_SPACE | |
127 | #define SYMTAB_FILLER "|This is the symbol table!" | 125 | #define SYMTAB_FILLER "|This is the symbol table!" | |
128 | 126 | |||
129 | char db_symtab[SYMTAB_SPACE] = SYMTAB_FILLER; | 127 | char db_symtab[SYMTAB_SPACE] = SYMTAB_FILLER; | |
130 | int db_symtabsize = SYMTAB_SPACE; | 128 | int db_symtabsize = SYMTAB_SPACE; | |
131 | #endif | 129 | #endif | |
132 | 130 | |||
133 | int ksyms_symsz; | 131 | int ksyms_symsz; | |
134 | int ksyms_strsz; | 132 | int ksyms_strsz; | |
135 | int ksyms_ctfsz; | 133 | int ksyms_ctfsz; | |
136 | TAILQ_HEAD(, ksyms_symtab) ksyms_symtabs = | 134 | TAILQ_HEAD(, ksyms_symtab) ksyms_symtabs = | |
137 | TAILQ_HEAD_INITIALIZER(ksyms_symtabs); | 135 | TAILQ_HEAD_INITIALIZER(ksyms_symtabs); | |
138 | static struct ksyms_symtab kernel_symtab; | 136 | static struct ksyms_symtab kernel_symtab; | |
139 | 137 | |||
140 | static int | 138 | static int | |
141 | ksyms_verify(void *symstart, void *strstart) | 139 | ksyms_verify(void *symstart, void *strstart) | |
142 | { | 140 | { | |
143 | #if defined(DIAGNOSTIC) || defined(DEBUG) | 141 | #if defined(DIAGNOSTIC) || defined(DEBUG) | |
144 | if (symstart == NULL) | 142 | if (symstart == NULL) | |
145 | printf("ksyms: Symbol table not found\n"); | 143 | printf("ksyms: Symbol table not found\n"); | |
146 | if (strstart == NULL) | 144 | if (strstart == NULL) | |
147 | printf("ksyms: String table not found\n"); | 145 | printf("ksyms: String table not found\n"); | |
148 | if (symstart == NULL || strstart == NULL) | 146 | if (symstart == NULL || strstart == NULL) | |
149 | printf("ksyms: Perhaps the kernel is stripped?\n"); | 147 | printf("ksyms: Perhaps the kernel is stripped?\n"); | |
150 | #endif | 148 | #endif | |
151 | if (symstart == NULL || strstart == NULL) | 149 | if (symstart == NULL || strstart == NULL) | |
152 | return 0; | 150 | return 0; | |
153 | return 1; | 151 | return 1; | |
154 | } | 152 | } | |
155 | 153 | |||
156 | /* | 154 | /* | |
157 | * Finds a certain symbol name in a certain symbol table. | 155 | * Finds a certain symbol name in a certain symbol table. | |
158 | */ | 156 | */ | |
159 | static Elf_Sym * | 157 | static Elf_Sym * | |
160 | findsym(const char *name, struct ksyms_symtab *table, int type) | 158 | findsym(const char *name, struct ksyms_symtab *table, int type) | |
161 | { | 159 | { | |
162 | Elf_Sym *sym, *maxsym; | 160 | Elf_Sym *sym, *maxsym; | |
163 | int low, mid, high, nglob; | 161 | int low, mid, high, nglob; | |
164 | char *str, *cmp; | 162 | char *str, *cmp; | |
165 | 163 | |||
166 | sym = table->sd_symstart; | 164 | sym = table->sd_symstart; | |
167 | str = table->sd_strstart - table->sd_usroffset; | 165 | str = table->sd_strstart - table->sd_usroffset; | |
168 | nglob = table->sd_nglob; | 166 | nglob = table->sd_nglob; | |
169 | low = 0; | 167 | low = 0; | |
170 | high = nglob; | 168 | high = nglob; | |
171 | 169 | |||
172 | /* | 170 | /* | |
173 | * Start with a binary search of all global symbols in this table. | 171 | * Start with a binary search of all global symbols in this table. | |
174 | * Global symbols must have unique names. | 172 | * Global symbols must have unique names. | |
175 | */ | 173 | */ | |
176 | while (low < high) { | 174 | while (low < high) { | |
177 | mid = (low + high) >> 1; | 175 | mid = (low + high) >> 1; | |
178 | cmp = sym[mid].st_name + str; | 176 | cmp = sym[mid].st_name + str; | |
179 | if (cmp[0] < name[0] || strcmp(cmp, name) < 0) { | 177 | if (cmp[0] < name[0] || strcmp(cmp, name) < 0) { | |
180 | low = mid + 1; | 178 | low = mid + 1; | |
181 | } else { | 179 | } else { | |
182 | high = mid; | 180 | high = mid; | |
183 | } | 181 | } | |
184 | } | 182 | } | |
185 | KASSERT(low == high); | 183 | KASSERT(low == high); | |
186 | if (__predict_true(low < nglob && | 184 | if (__predict_true(low < nglob && | |
187 | strcmp(sym[low].st_name + str, name) == 0)) { | 185 | strcmp(sym[low].st_name + str, name) == 0)) { | |
188 | KASSERT(ELF_ST_BIND(sym[low].st_info) == STB_GLOBAL); | 186 | KASSERT(ELF_ST_BIND(sym[low].st_info) == STB_GLOBAL); | |
189 | return &sym[low]; | 187 | return &sym[low]; | |
190 | } | 188 | } | |
191 | 189 | |||
192 | /* | 190 | /* | |
193 | * Perform a linear search of local symbols (rare). Many local | 191 | * Perform a linear search of local symbols (rare). Many local | |
194 | * symbols with the same name can exist so are not included in | 192 | * symbols with the same name can exist so are not included in | |
195 | * the binary search. | 193 | * the binary search. | |
196 | */ | 194 | */ | |
197 | if (type != KSYMS_EXTERN) { | 195 | if (type != KSYMS_EXTERN) { | |
198 | maxsym = sym + table->sd_symsize / sizeof(Elf_Sym); | 196 | maxsym = sym + table->sd_symsize / sizeof(Elf_Sym); | |
199 | for (sym += nglob; sym < maxsym; sym++) { | 197 | for (sym += nglob; sym < maxsym; sym++) { | |
200 | if (strcmp(name, sym->st_name + str) == 0) { | 198 | if (strcmp(name, sym->st_name + str) == 0) { | |
201 | return sym; | 199 | return sym; | |
202 | } | 200 | } | |
203 | } | 201 | } | |
204 | } | 202 | } | |
205 | return NULL; | 203 | return NULL; | |
206 | } | 204 | } | |
207 | 205 | |||
208 | /* | 206 | /* | |
209 | * The "attach" is in reality done in ksyms_init(). | 207 | * The "attach" is in reality done in ksyms_init(). | |
210 | */ | 208 | */ | |
211 | void | 209 | void | |
212 | ksymsattach(int arg) | 210 | ksymsattach(int arg) | |
213 | { | 211 | { | |
214 | 212 | |||
215 | } | 213 | } | |
216 | 214 | |||
217 | void | 215 | void | |
218 | ksyms_init(void) | 216 | ksyms_init(void) | |
219 | { | 217 | { | |
220 | 218 | |||
221 | #ifdef SYMTAB_SPACE | 219 | #ifdef SYMTAB_SPACE | |
222 | if (!ksyms_initted && | 220 | if (!ksyms_initted && | |
223 | strncmp(db_symtab, SYMTAB_FILLER, sizeof(SYMTAB_FILLER))) { | 221 | strncmp(db_symtab, SYMTAB_FILLER, sizeof(SYMTAB_FILLER))) { | |
224 | ksyms_addsyms_elf(db_symtabsize, db_symtab, | 222 | ksyms_addsyms_elf(db_symtabsize, db_symtab, | |
225 | db_symtab + db_symtabsize); | 223 | db_symtab + db_symtabsize); | |
226 | } | 224 | } | |
227 | #endif | 225 | #endif | |
228 | 226 | |||
229 | mutex_init(&ksyms_lock, MUTEX_DEFAULT, IPL_NONE); | 227 | mutex_init(&ksyms_lock, MUTEX_DEFAULT, IPL_NONE); | |
230 | } | 228 | } | |
231 | 229 | |||
232 | /* | 230 | /* | |
233 | * Add a symbol table. | 231 | * Add a symbol table. | |
234 | * This is intended for use when the symbol table and its corresponding | 232 | * This is intended for use when the symbol table and its corresponding | |
235 | * string table are easily available. If they are embedded in an ELF | 233 | * string table are easily available. If they are embedded in an ELF | |
236 | * image, use addsymtab_elf() instead. | 234 | * image, use addsymtab_elf() instead. | |
237 | * | 235 | * | |
238 | * name - Symbol's table name. | 236 | * name - Symbol's table name. | |
239 | * symstart, symsize - Address and size of the symbol table. | 237 | * symstart, symsize - Address and size of the symbol table. | |
240 | * strstart, strsize - Address and size of the string table. | 238 | * strstart, strsize - Address and size of the string table. | |
241 | * tab - Symbol table to be updated with this information. | 239 | * tab - Symbol table to be updated with this information. | |
242 | * newstart - Address to which the symbol table has to be copied during | 240 | * newstart - Address to which the symbol table has to be copied during | |
243 | * shrinking. If NULL, it is not moved. | 241 | * shrinking. If NULL, it is not moved. | |
244 | */ | 242 | */ | |
245 | static const char *addsymtab_strstart; | 243 | static const char *addsymtab_strstart; | |
246 | 244 | |||
247 | static int | 245 | static int | |
248 | addsymtab_compar(const void *a, const void *b) | 246 | addsymtab_compar(const void *a, const void *b) | |
249 | { | 247 | { | |
250 | const Elf_Sym *sa, *sb; | 248 | const Elf_Sym *sa, *sb; | |
251 | 249 | |||
252 | sa = a; | 250 | sa = a; | |
253 | sb = b; | 251 | sb = b; | |
254 | 252 | |||
255 | /* | 253 | /* | |
256 | * Split the symbol table into two, with globals at the start | 254 | * Split the symbol table into two, with globals at the start | |
257 | * and locals at the end. | 255 | * and locals at the end. | |
258 | */ | 256 | */ | |
259 | if (ELF_ST_BIND(sa->st_info) != ELF_ST_BIND(sb->st_info)) { | 257 | if (ELF_ST_BIND(sa->st_info) != ELF_ST_BIND(sb->st_info)) { | |
260 | if (ELF_ST_BIND(sa->st_info) == STB_GLOBAL) { | 258 | if (ELF_ST_BIND(sa->st_info) == STB_GLOBAL) { | |
261 | return -1; | 259 | return -1; | |
262 | } | 260 | } | |
263 | if (ELF_ST_BIND(sb->st_info) == STB_GLOBAL) { | 261 | if (ELF_ST_BIND(sb->st_info) == STB_GLOBAL) { | |
264 | return 1; | 262 | return 1; | |
265 | } | 263 | } | |
266 | } | 264 | } | |
267 | 265 | |||
268 | /* Within each band, sort by name. */ | 266 | /* Within each band, sort by name. */ | |
269 | return strcmp(sa->st_name + addsymtab_strstart, | 267 | return strcmp(sa->st_name + addsymtab_strstart, | |
270 | sb->st_name + addsymtab_strstart); | 268 | sb->st_name + addsymtab_strstart); | |
271 | } | 269 | } | |
272 | 270 | |||
273 | static void | 271 | static void | |
274 | addsymtab(const char *name, void *symstart, size_t symsize, | 272 | addsymtab(const char *name, void *symstart, size_t symsize, | |
275 | void *strstart, size_t strsize, struct ksyms_symtab *tab, | 273 | void *strstart, size_t strsize, struct ksyms_symtab *tab, | |
276 | void *newstart, void *ctfstart, size_t ctfsize, uint32_t *nmap) | 274 | void *newstart, void *ctfstart, size_t ctfsize, uint32_t *nmap) | |
277 | { | 275 | { | |
278 | Elf_Sym *sym, *nsym, ts; | 276 | Elf_Sym *sym, *nsym, ts; | |
279 | int i, j, n, nglob; | 277 | int i, j, n, nglob; | |
280 | char *str; | 278 | char *str; | |
281 | int nsyms = symsize / sizeof(Elf_Sym); | 279 | int nsyms = symsize / sizeof(Elf_Sym); | |
282 | 280 | |||
283 | /* Sanity check for pre-allocated map table used during startup. */ | 281 | /* Sanity check for pre-allocated map table used during startup. */ | |
284 | if ((nmap == ksyms_nmap) && (nsyms >= KSYMS_MAX_ID)) { | 282 | if ((nmap == ksyms_nmap) && (nsyms >= KSYMS_MAX_ID)) { | |
285 | printf("kern_ksyms: ERROR %d > %d, increase KSYMS_MAX_ID\n", | 283 | printf("kern_ksyms: ERROR %d > %d, increase KSYMS_MAX_ID\n", | |
286 | nsyms, KSYMS_MAX_ID); | 284 | nsyms, KSYMS_MAX_ID); | |
287 | 285 | |||
288 | /* truncate for now */ | 286 | /* truncate for now */ | |
289 | nsyms = KSYMS_MAX_ID - 1; | 287 | nsyms = KSYMS_MAX_ID - 1; | |
290 | } | 288 | } | |
291 | 289 | |||
292 | tab->sd_symstart = symstart; | 290 | tab->sd_symstart = symstart; | |
293 | tab->sd_symsize = symsize; | 291 | tab->sd_symsize = symsize; | |
294 | tab->sd_strstart = strstart; | 292 | tab->sd_strstart = strstart; | |
295 | tab->sd_strsize = strsize; | 293 | tab->sd_strsize = strsize; | |
296 | tab->sd_name = name; | 294 | tab->sd_name = name; | |
297 | tab->sd_minsym = UINTPTR_MAX; | 295 | tab->sd_minsym = UINTPTR_MAX; | |
298 | tab->sd_maxsym = 0; | 296 | tab->sd_maxsym = 0; | |
299 | tab->sd_usroffset = 0; | 297 | tab->sd_usroffset = 0; | |
300 | tab->sd_gone = false; | 298 | tab->sd_gone = false; | |
301 | #ifdef KDTRACE_HOOKS | 299 | #ifdef KDTRACE_HOOKS | |
302 | tab->sd_ctfstart = ctfstart; | 300 | tab->sd_ctfstart = ctfstart; | |
303 | tab->sd_ctfsize = ctfsize; | 301 | tab->sd_ctfsize = ctfsize; | |
304 | tab->sd_nmap = nmap; | 302 | tab->sd_nmap = nmap; | |
305 | tab->sd_nmapsize = nsyms; | 303 | tab->sd_nmapsize = nsyms; | |
306 | #endif | 304 | #endif | |
307 | #ifdef KSYMS_DEBUG | 305 | #ifdef KSYMS_DEBUG | |
308 | printf("newstart %p sym %p ksyms_symsz %zu str %p strsz %zu send %p\n", | 306 | printf("newstart %p sym %p ksyms_symsz %zu str %p strsz %zu send %p\n", | |
309 | newstart, symstart, symsize, strstart, strsize, | 307 | newstart, symstart, symsize, strstart, strsize, | |
310 | tab->sd_strstart + tab->sd_strsize); | 308 | tab->sd_strstart + tab->sd_strsize); | |
311 | #endif | 309 | #endif | |
312 | 310 | |||
313 | if (nmap) { | 311 | if (nmap) { | |
314 | memset(nmap, 0, nsyms * sizeof(uint32_t)); | 312 | memset(nmap, 0, nsyms * sizeof(uint32_t)); | |
315 | } | 313 | } | |
316 | 314 | |||
317 | /* Pack symbol table by removing all file name references. */ | 315 | /* Pack symbol table by removing all file name references. */ | |
318 | sym = tab->sd_symstart; | 316 | sym = tab->sd_symstart; | |
319 | nsym = (Elf_Sym *)newstart; | 317 | nsym = (Elf_Sym *)newstart; | |
320 | str = tab->sd_strstart; | 318 | str = tab->sd_strstart; | |
321 | nglob = 0; | 319 | nglob = 0; | |
322 | for (i = n = 0; i < nsyms; i++) { | 320 | for (i = n = 0; i < nsyms; i++) { | |
323 | 321 | |||
324 | /* This breaks CTF mapping, so don't do it when | 322 | /* This breaks CTF mapping, so don't do it when | |
325 | * DTrace is enabled | 323 | * DTrace is enabled | |
326 | */ | 324 | */ | |
327 | #ifndef KDTRACE_HOOKS | 325 | #ifndef KDTRACE_HOOKS | |
328 | /* | 326 | /* | |
329 | * Remove useless symbols. | 327 | * Remove useless symbols. | |
330 | * Should actually remove all typeless symbols. | 328 | * Should actually remove all typeless symbols. | |
331 | */ | 329 | */ | |
332 | if (sym[i].st_name == 0) | 330 | if (sym[i].st_name == 0) | |
333 | continue; /* Skip nameless entries */ | 331 | continue; /* Skip nameless entries */ | |
334 | if (sym[i].st_shndx == SHN_UNDEF) | 332 | if (sym[i].st_shndx == SHN_UNDEF) | |
335 | continue; /* Skip external references */ | 333 | continue; /* Skip external references */ | |
336 | if (ELF_ST_TYPE(sym[i].st_info) == STT_FILE) | 334 | if (ELF_ST_TYPE(sym[i].st_info) == STT_FILE) | |
337 | continue; /* Skip filenames */ | 335 | continue; /* Skip filenames */ | |
338 | if (ELF_ST_TYPE(sym[i].st_info) == STT_NOTYPE && | 336 | if (ELF_ST_TYPE(sym[i].st_info) == STT_NOTYPE && | |
339 | sym[i].st_value == 0 && | 337 | sym[i].st_value == 0 && | |
340 | strcmp(str + sym[i].st_name, "*ABS*") == 0) | 338 | strcmp(str + sym[i].st_name, "*ABS*") == 0) | |
341 | continue; /* XXX */ | 339 | continue; /* XXX */ | |
342 | if (ELF_ST_TYPE(sym[i].st_info) == STT_NOTYPE && | 340 | if (ELF_ST_TYPE(sym[i].st_info) == STT_NOTYPE && | |
343 | strcmp(str + sym[i].st_name, "gcc2_compiled.") == 0) | 341 | strcmp(str + sym[i].st_name, "gcc2_compiled.") == 0) | |
344 | continue; /* XXX */ | 342 | continue; /* XXX */ | |
345 | #endif | 343 | #endif | |
346 | 344 | |||
347 | /* Save symbol. Set it as an absolute offset */ | 345 | /* Save symbol. Set it as an absolute offset */ | |
348 | nsym[n] = sym[i]; | 346 | nsym[n] = sym[i]; | |
349 | 347 | |||
350 | #ifdef KDTRACE_HOOKS | 348 | #ifdef KDTRACE_HOOKS | |
351 | if (nmap != NULL) { | 349 | if (nmap != NULL) { | |
352 | /* | 350 | /* | |
353 | * Save the size, replace it with the symbol id so | 351 | * Save the size, replace it with the symbol id so | |
354 | * the mapping can be done after the cleanup and sort. | 352 | * the mapping can be done after the cleanup and sort. | |
355 | */ | 353 | */ | |
356 | nmap[i] = nsym[n].st_size; | 354 | nmap[i] = nsym[n].st_size; | |
357 | nsym[n].st_size = i + 1; /* zero is reserved */ | 355 | nsym[n].st_size = i + 1; /* zero is reserved */ | |
358 | } | 356 | } | |
359 | #endif | 357 | #endif | |
360 | 358 | |||
361 | nsym[n].st_shndx = SHBSS; | 359 | nsym[n].st_shndx = SHBSS; | |
362 | j = strlen(nsym[n].st_name + str) + 1; | 360 | j = strlen(nsym[n].st_name + str) + 1; | |
363 | if (j > ksyms_maxlen) | 361 | if (j > ksyms_maxlen) | |
364 | ksyms_maxlen = j; | 362 | ksyms_maxlen = j; | |
365 | nglob += (ELF_ST_BIND(nsym[n].st_info) == STB_GLOBAL); | 363 | nglob += (ELF_ST_BIND(nsym[n].st_info) == STB_GLOBAL); | |
366 | 364 | |||
367 | /* Compute min and max symbols. */ | 365 | /* Compute min and max symbols. */ | |
368 | if (strcmp(str + sym[i].st_name, "*ABS*") != 0 | 366 | if (strcmp(str + sym[i].st_name, "*ABS*") != 0 | |
369 | && ELF_ST_TYPE(nsym[n].st_info) != STT_NOTYPE) { | 367 | && ELF_ST_TYPE(nsym[n].st_info) != STT_NOTYPE) { | |
370 | if (nsym[n].st_value < tab->sd_minsym) { | 368 | if (nsym[n].st_value < tab->sd_minsym) { | |
371 | tab->sd_minsym = nsym[n].st_value; | 369 | tab->sd_minsym = nsym[n].st_value; | |
372 | } | 370 | } | |
373 | if (nsym[n].st_value > tab->sd_maxsym) { | 371 | if (nsym[n].st_value > tab->sd_maxsym) { | |
374 | tab->sd_maxsym = nsym[n].st_value; | 372 | tab->sd_maxsym = nsym[n].st_value; | |
375 | } | 373 | } | |
376 | } | 374 | } | |
377 | n++; | 375 | n++; | |
378 | } | 376 | } | |
379 | 377 | |||
380 | /* Fill the rest of the record, and sort the symbols. */ | 378 | /* Fill the rest of the record, and sort the symbols. */ | |
381 | tab->sd_symstart = nsym; | 379 | tab->sd_symstart = nsym; | |
382 | tab->sd_symsize = n * sizeof(Elf_Sym); | 380 | tab->sd_symsize = n * sizeof(Elf_Sym); | |
383 | tab->sd_nglob = nglob; | 381 | tab->sd_nglob = nglob; | |
384 | addsymtab_strstart = str; | 382 | addsymtab_strstart = str; | |
385 | if (kheapsort(nsym, n, sizeof(Elf_Sym), addsymtab_compar, &ts) != 0) | 383 | if (kheapsort(nsym, n, sizeof(Elf_Sym), addsymtab_compar, &ts) != 0) | |
386 | panic("addsymtab"); | 384 | panic("addsymtab"); | |
387 | 385 | |||
388 | #ifdef KDTRACE_HOOKS | 386 | #ifdef KDTRACE_HOOKS | |
389 | /* | 387 | /* | |
390 | * Build the mapping from original symbol id to new symbol table. | 388 | * Build the mapping from original symbol id to new symbol table. | |
391 | * Deleted symbols will have a zero map, indices will be one based | 389 | * Deleted symbols will have a zero map, indices will be one based | |
392 | * instead of zero based. | 390 | * instead of zero based. | |
393 | * Resulting map is sd_nmap[original_index] = new_index + 1 | 391 | * Resulting map is sd_nmap[original_index] = new_index + 1 | |
394 | */ | 392 | */ | |
395 | if (nmap != NULL) { | 393 | if (nmap != NULL) { | |
396 | int new; | 394 | int new; | |
397 | for (new = 0; new < n; new++) { | 395 | for (new = 0; new < n; new++) { | |
398 | uint32_t orig = nsym[new].st_size - 1; | 396 | uint32_t orig = nsym[new].st_size - 1; | |
399 | uint32_t size = nmap[orig]; | 397 | uint32_t size = nmap[orig]; | |
400 | 398 | |||
401 | nmap[orig] = new + 1; | 399 | nmap[orig] = new + 1; | |
402 | 400 | |||
403 | /* restore the size */ | 401 | /* restore the size */ | |
404 | nsym[new].st_size = size; | 402 | nsym[new].st_size = size; | |
405 | } | 403 | } | |
406 | } | 404 | } | |
407 | #endif | 405 | #endif | |
408 | 406 | |||
409 | /* ksymsread() is unlocked, so membar. */ | 407 | /* ksymsread() is unlocked, so membar. */ | |
410 | membar_producer(); | 408 | membar_producer(); | |
411 | TAILQ_INSERT_TAIL(&ksyms_symtabs, tab, sd_queue); | 409 | TAILQ_INSERT_TAIL(&ksyms_symtabs, tab, sd_queue); | |
412 | ksyms_sizes_calc(); | 410 | ksyms_sizes_calc(); | |
413 | ksyms_initted = true; | 411 | ksyms_initted = true; | |
414 | } | 412 | } | |
415 | 413 | |||
416 | /* | 414 | /* | |
417 | * Setup the kernel symbol table stuff. | 415 | * Setup the kernel symbol table stuff. | |
418 | */ | 416 | */ | |
419 | void | 417 | void | |
420 | ksyms_addsyms_elf(int symsize, void *start, void *end) | 418 | ksyms_addsyms_elf(int symsize, void *start, void *end) | |
421 | { | 419 | { | |
422 | int i, j; | 420 | int i, j; | |
423 | Elf_Shdr *shdr; | 421 | Elf_Shdr *shdr; | |
424 | char *symstart = NULL, *strstart = NULL; | 422 | char *symstart = NULL, *strstart = NULL; | |
425 | size_t strsize = 0; | 423 | size_t strsize = 0; | |
426 | Elf_Ehdr *ehdr; | 424 | Elf_Ehdr *ehdr; | |
427 | char *ctfstart = NULL; | 425 | char *ctfstart = NULL; | |
428 | size_t ctfsize = 0; | 426 | size_t ctfsize = 0; | |
429 | 427 | |||
430 | if (symsize <= 0) { | 428 | if (symsize <= 0) { | |
431 | printf("[ Kernel symbol table missing! ]\n"); | 429 | printf("[ Kernel symbol table missing! ]\n"); | |
432 | return; | 430 | return; | |
433 | } | 431 | } | |
434 | 432 | |||
435 | /* Sanity check */ | 433 | /* Sanity check */ | |
436 | if (ALIGNED_POINTER(start, long) == 0) { | 434 | if (ALIGNED_POINTER(start, long) == 0) { | |
437 | printf("[ Kernel symbol table has bad start address %p ]\n", | 435 | printf("[ Kernel symbol table has bad start address %p ]\n", | |
438 | start); | 436 | start); | |
439 | return; | 437 | return; | |
440 | } | 438 | } | |
441 | 439 | |||
442 | ehdr = (Elf_Ehdr *)start; | 440 | ehdr = (Elf_Ehdr *)start; | |
443 | 441 | |||
444 | /* check if this is a valid ELF header */ | 442 | /* check if this is a valid ELF header */ | |
445 | /* No reason to verify arch type, the kernel is actually running! */ | 443 | /* No reason to verify arch type, the kernel is actually running! */ | |
446 | if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) || | 444 | if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) || | |
447 | ehdr->e_ident[EI_CLASS] != ELFCLASS || | 445 | ehdr->e_ident[EI_CLASS] != ELFCLASS || | |
448 | ehdr->e_version > 1) { | 446 | ehdr->e_version > 1) { | |
449 | printf("[ Kernel symbol table invalid! ]\n"); | 447 | printf("[ Kernel symbol table invalid! ]\n"); | |
450 | return; /* nothing to do */ | 448 | return; /* nothing to do */ | |
451 | } | 449 | } | |
452 | 450 | |||
453 | /* Loaded header will be scratched in addsymtab */ | 451 | /* Loaded header will be scratched in addsymtab */ | |
454 | ksyms_hdr_init(start); | 452 | ksyms_hdr_init(start); | |
455 | 453 | |||
456 | /* Find the symbol table and the corresponding string table. */ | 454 | /* Find the symbol table and the corresponding string table. */ | |
457 | shdr = (Elf_Shdr *)((uint8_t *)start + ehdr->e_shoff); | 455 | shdr = (Elf_Shdr *)((uint8_t *)start + ehdr->e_shoff); | |
458 | for (i = 1; i < ehdr->e_shnum; i++) { | 456 | for (i = 1; i < ehdr->e_shnum; i++) { | |
459 | if (shdr[i].sh_type != SHT_SYMTAB) | 457 | if (shdr[i].sh_type != SHT_SYMTAB) | |
460 | continue; | 458 | continue; | |
461 | if (shdr[i].sh_offset == 0) | 459 | if (shdr[i].sh_offset == 0) | |
462 | continue; | 460 | continue; | |
463 | symstart = (uint8_t *)start + shdr[i].sh_offset; | 461 | symstart = (uint8_t *)start + shdr[i].sh_offset; | |
464 | symsize = shdr[i].sh_size; | 462 | symsize = shdr[i].sh_size; | |
465 | j = shdr[i].sh_link; | 463 | j = shdr[i].sh_link; | |
466 | if (shdr[j].sh_offset == 0) | 464 | if (shdr[j].sh_offset == 0) | |
467 | continue; /* Can this happen? */ | 465 | continue; /* Can this happen? */ | |
468 | strstart = (uint8_t *)start + shdr[j].sh_offset; | 466 | strstart = (uint8_t *)start + shdr[j].sh_offset; | |
469 | strsize = shdr[j].sh_size; | 467 | strsize = shdr[j].sh_size; | |
470 | break; | 468 | break; | |
471 | } | 469 | } | |
472 | 470 | |||
473 | #ifdef KDTRACE_HOOKS | 471 | #ifdef KDTRACE_HOOKS | |
474 | /* Find the CTF section */ | 472 | /* Find the CTF section */ | |
475 | shdr = (Elf_Shdr *)((uint8_t *)start + ehdr->e_shoff); | 473 | shdr = (Elf_Shdr *)((uint8_t *)start + ehdr->e_shoff); | |
476 | if (ehdr->e_shstrndx != 0) { | 474 | if (ehdr->e_shstrndx != 0) { | |
477 | char *shstr = (uint8_t *)start + | 475 | char *shstr = (uint8_t *)start + | |
478 | shdr[ehdr->e_shstrndx].sh_offset; | 476 | shdr[ehdr->e_shstrndx].sh_offset; | |
479 | for (i = 1; i < ehdr->e_shnum; i++) { | 477 | for (i = 1; i < ehdr->e_shnum; i++) { | |
480 | #ifdef DEBUG | 478 | #ifdef DEBUG | |
481 | printf("ksyms: checking %s\n", &shstr[shdr[i].sh_name]); | 479 | printf("ksyms: checking %s\n", &shstr[shdr[i].sh_name]); | |
482 | #endif | 480 | #endif | |
483 | if (shdr[i].sh_type != SHT_PROGBITS) | 481 | if (shdr[i].sh_type != SHT_PROGBITS) | |
484 | continue; | 482 | continue; | |
485 | if (strncmp(".SUNW_ctf", &shstr[shdr[i].sh_name], 10) | 483 | if (strncmp(".SUNW_ctf", &shstr[shdr[i].sh_name], 10) | |
486 | != 0) | 484 | != 0) | |
487 | continue; | 485 | continue; | |
488 | ctfstart = (uint8_t *)start + shdr[i].sh_offset; | 486 | ctfstart = (uint8_t *)start + shdr[i].sh_offset; | |
489 | ctfsize = shdr[i].sh_size; | 487 | ctfsize = shdr[i].sh_size; | |
490 | ksyms_ctfsz = ctfsize; | 488 | ksyms_ctfsz = ctfsize; | |
491 | #ifdef DEBUG | 489 | #ifdef DEBUG | |
492 | aprint_normal("Found CTF at %p, size 0x%zx\n", | 490 | aprint_normal("Found CTF at %p, size 0x%zx\n", | |
493 | ctfstart, ctfsize); | 491 | ctfstart, ctfsize); | |
494 | #endif | 492 | #endif | |
495 | break; | 493 | break; | |
496 | } | 494 | } | |
497 | #ifdef DEBUG | 495 | #ifdef DEBUG | |
498 | } else { | 496 | } else { | |
499 | printf("ksyms: e_shstrndx == 0\n"); | 497 | printf("ksyms: e_shstrndx == 0\n"); | |
500 | #endif | 498 | #endif | |
501 | } | 499 | } | |
502 | #endif | 500 | #endif | |
503 | 501 | |||
504 | if (!ksyms_verify(symstart, strstart)) | 502 | if (!ksyms_verify(symstart, strstart)) | |
505 | return; | 503 | return; | |
506 | 504 | |||
507 | addsymtab("netbsd", symstart, symsize, strstart, strsize, | 505 | addsymtab("netbsd", symstart, symsize, strstart, strsize, | |
508 | &kernel_symtab, start, ctfstart, ctfsize, ksyms_nmap); | 506 | &kernel_symtab, start, ctfstart, ctfsize, ksyms_nmap); | |
509 | 507 | |||
510 | #ifdef DEBUG | 508 | #ifdef DEBUG | |
511 | aprint_normal("Loaded initial symtab at %p, strtab at %p, # entries %ld\n", | 509 | aprint_normal("Loaded initial symtab at %p, strtab at %p, # entries %ld\n", | |
512 | kernel_symtab.sd_symstart, kernel_symtab.sd_strstart, | 510 | kernel_symtab.sd_symstart, kernel_symtab.sd_strstart, | |
513 | (long)kernel_symtab.sd_symsize/sizeof(Elf_Sym)); | 511 | (long)kernel_symtab.sd_symsize/sizeof(Elf_Sym)); | |
514 | #endif | 512 | #endif | |
515 | } | 513 | } | |
516 | 514 | |||
517 | /* | 515 | /* | |
518 | * Setup the kernel symbol table stuff. | 516 | * Setup the kernel symbol table stuff. | |
519 | * Use this when the address of the symbol and string tables are known; | 517 | * Use this when the address of the symbol and string tables are known; | |
520 | * otherwise use ksyms_init with an ELF image. | 518 | * otherwise use ksyms_init with an ELF image. | |
521 | * We need to pass a minimal ELF header which will later be completed by | 519 | * We need to pass a minimal ELF header which will later be completed by | |
522 | * ksyms_hdr_init and handed off to userland through /dev/ksyms. We use | 520 | * ksyms_hdr_init and handed off to userland through /dev/ksyms. We use | |
523 | * a void *rather than a pointer to avoid exposing the Elf_Ehdr type. | 521 | * a void *rather than a pointer to avoid exposing the Elf_Ehdr type. | |
524 | */ | 522 | */ | |
525 | void | 523 | void | |
526 | ksyms_addsyms_explicit(void *ehdr, void *symstart, size_t symsize, | 524 | ksyms_addsyms_explicit(void *ehdr, void *symstart, size_t symsize, | |
527 | void *strstart, size_t strsize) | 525 | void *strstart, size_t strsize) | |
528 | { | 526 | { | |
529 | 527 | |||
530 | if (!ksyms_verify(symstart, strstart)) | 528 | if (!ksyms_verify(symstart, strstart)) | |
531 | return; | 529 | return; | |
532 | 530 | |||
533 | ksyms_hdr_init(ehdr); | 531 | ksyms_hdr_init(ehdr); | |
534 | addsymtab("netbsd", symstart, symsize, strstart, strsize, | 532 | addsymtab("netbsd", symstart, symsize, strstart, strsize, | |
535 | &kernel_symtab, symstart, NULL, 0, ksyms_nmap); | 533 | &kernel_symtab, symstart, NULL, 0, ksyms_nmap); | |
536 | } | 534 | } | |
537 | 535 | |||
538 | /* | 536 | /* | |
539 | * Get the value associated with a symbol. | 537 | * Get the value associated with a symbol. | |
540 | * "mod" is the module name, or null if any module. | 538 | * "mod" is the module name, or null if any module. | |
541 | * "sym" is the symbol name. | 539 | * "sym" is the symbol name. | |
542 | * "val" is a pointer to the corresponding value, if call succeeded. | 540 | * "val" is a pointer to the corresponding value, if call succeeded. | |
543 | * Returns 0 if success or ENOENT if no such entry. | 541 | * Returns 0 if success or ENOENT if no such entry. | |
544 | * | 542 | * | |
545 | * Call with ksyms_lock, unless known that the symbol table can't change. | 543 | * Call with ksyms_lock, unless known that the symbol table can't change. | |
546 | */ | 544 | */ | |
547 | int | 545 | int | |
548 | ksyms_getval_unlocked(const char *mod, const char *sym, unsigned long *val, | 546 | ksyms_getval_unlocked(const char *mod, const char *sym, unsigned long *val, | |
549 | int type) | 547 | int type) | |
550 | { | 548 | { | |
551 | struct ksyms_symtab *st; | 549 | struct ksyms_symtab *st; | |
552 | Elf_Sym *es; | 550 | Elf_Sym *es; | |
553 | 551 | |||
554 | #ifdef KSYMS_DEBUG | 552 | #ifdef KSYMS_DEBUG | |
555 | if (ksyms_debug & FOLLOW_CALLS) | 553 | if (ksyms_debug & FOLLOW_CALLS) | |
556 | printf("ksyms_getval_unlocked: mod %s sym %s valp %p\n", | 554 | printf("ksyms_getval_unlocked: mod %s sym %s valp %p\n", | |
557 | mod, sym, val); | 555 | mod, sym, val); | |
558 | #endif | 556 | #endif | |
559 | 557 | |||
560 | TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { | 558 | TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { | |
561 | if (__predict_false(st->sd_gone)) | 559 | if (__predict_false(st->sd_gone)) | |
562 | continue; | 560 | continue; | |
563 | if (mod != NULL && strcmp(st->sd_name, mod)) | 561 | if (mod != NULL && strcmp(st->sd_name, mod)) | |
564 | continue; | 562 | continue; | |
565 | if ((es = findsym(sym, st, type)) != NULL) { | 563 | if ((es = findsym(sym, st, type)) != NULL) { | |
566 | *val = es->st_value; | 564 | *val = es->st_value; | |
567 | return 0; | 565 | return 0; | |
568 | } | 566 | } | |
569 | } | 567 | } | |
570 | return ENOENT; | 568 | return ENOENT; | |
571 | } | 569 | } | |
572 | 570 | |||
573 | int | 571 | int | |
574 | ksyms_getval(const char *mod, const char *sym, unsigned long *val, int type) | 572 | ksyms_getval(const char *mod, const char *sym, unsigned long *val, int type) | |
575 | { | 573 | { | |
576 | int rc; | 574 | int rc; | |
577 | 575 | |||
578 | if (!ksyms_initted) | 576 | if (!ksyms_initted) | |
579 | return ENOENT; | 577 | return ENOENT; | |
580 | 578 | |||
581 | mutex_enter(&ksyms_lock); | 579 | mutex_enter(&ksyms_lock); | |
582 | rc = ksyms_getval_unlocked(mod, sym, val, type); | 580 | rc = ksyms_getval_unlocked(mod, sym, val, type); | |
583 | mutex_exit(&ksyms_lock); | 581 | mutex_exit(&ksyms_lock); | |
584 | return rc; | 582 | return rc; | |
585 | } | 583 | } | |
586 | 584 | |||
587 | struct ksyms_symtab * | 585 | struct ksyms_symtab * | |
588 | ksyms_get_mod(const char *mod) | 586 | ksyms_get_mod(const char *mod) | |
589 | { | 587 | { | |
590 | struct ksyms_symtab *st; | 588 | struct ksyms_symtab *st; | |
591 | 589 | |||
592 | mutex_enter(&ksyms_lock); | 590 | mutex_enter(&ksyms_lock); | |
593 | TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { | 591 | TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { | |
594 | if (__predict_false(st->sd_gone)) | 592 | if (__predict_false(st->sd_gone)) | |
595 | continue; | 593 | continue; | |
596 | if (mod != NULL && strcmp(st->sd_name, mod)) | 594 | if (mod != NULL && strcmp(st->sd_name, mod)) | |
597 | continue; | 595 | continue; | |
598 | break; | 596 | break; | |
599 | } | 597 | } | |
600 | mutex_exit(&ksyms_lock); | 598 | mutex_exit(&ksyms_lock); | |
601 | 599 | |||
602 | return st; | 600 | return st; | |
603 | } | 601 | } | |
604 | 602 | |||
605 | 603 | |||
606 | /* | 604 | /* | |
607 | * ksyms_mod_foreach() | 605 | * ksyms_mod_foreach() | |
608 | * | 606 | * | |
609 | * Iterate over the symbol table of the specified module, calling the callback | 607 | * Iterate over the symbol table of the specified module, calling the callback | |
610 | * handler for each symbol. Stop iterating if the handler return is non-zero. | 608 | * handler for each symbol. Stop iterating if the handler return is non-zero. | |
611 | * | 609 | * | |
612 | */ | 610 | */ | |
613 | 611 | |||
614 | int | 612 | int | |
615 | ksyms_mod_foreach(const char *mod, ksyms_callback_t callback, void *opaque) | 613 | ksyms_mod_foreach(const char *mod, ksyms_callback_t callback, void *opaque) | |
616 | { | 614 | { | |
617 | struct ksyms_symtab *st; | 615 | struct ksyms_symtab *st; | |
618 | Elf_Sym *sym, *maxsym; | 616 | Elf_Sym *sym, *maxsym; | |
619 | char *str; | 617 | char *str; | |
620 | int symindx; | 618 | int symindx; | |
621 | 619 | |||
622 | if (!ksyms_initted) | 620 | if (!ksyms_initted) | |
623 | return ENOENT; | 621 | return ENOENT; | |
624 | 622 | |||
625 | mutex_enter(&ksyms_lock); | 623 | mutex_enter(&ksyms_lock); | |
626 | 624 | |||
627 | /* find the module */ | 625 | /* find the module */ | |
628 | TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { | 626 | TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { | |
629 | if (__predict_false(st->sd_gone)) | 627 | if (__predict_false(st->sd_gone)) | |
630 | continue; | 628 | continue; | |
631 | if (mod != NULL && strcmp(st->sd_name, mod)) | 629 | if (mod != NULL && strcmp(st->sd_name, mod)) | |
632 | continue; | 630 | continue; | |
633 | 631 | |||
634 | sym = st->sd_symstart; | 632 | sym = st->sd_symstart; | |
635 | str = st->sd_strstart - st->sd_usroffset; | 633 | str = st->sd_strstart - st->sd_usroffset; | |
636 | 634 | |||
637 | /* now iterate through the symbols */ | 635 | /* now iterate through the symbols */ | |
638 | maxsym = sym + st->sd_symsize / sizeof(Elf_Sym); | 636 | maxsym = sym + st->sd_symsize / sizeof(Elf_Sym); | |
639 | for (symindx = 0; sym < maxsym; sym++, symindx++) { | 637 | for (symindx = 0; sym < maxsym; sym++, symindx++) { | |
640 | if (callback(str + sym->st_name, symindx, | 638 | if (callback(str + sym->st_name, symindx, | |
641 | (void *)sym->st_value, | 639 | (void *)sym->st_value, | |
642 | sym->st_size, | 640 | sym->st_size, | |
643 | sym->st_info, | 641 | sym->st_info, | |
644 | opaque) != 0) { | 642 | opaque) != 0) { | |
645 | break; | 643 | break; | |
646 | } | 644 | } | |
647 | } | 645 | } | |
648 | } | 646 | } | |
649 | mutex_exit(&ksyms_lock); | 647 | mutex_exit(&ksyms_lock); | |
650 | 648 | |||
651 | return 0; | 649 | return 0; | |
652 | } | 650 | } | |
653 | 651 | |||
654 | /* | 652 | /* | |
655 | * Get "mod" and "symbol" associated with an address. | 653 | * Get "mod" and "symbol" associated with an address. | |
656 | * Returns 0 if success or ENOENT if no such entry. | 654 | * Returns 0 if success or ENOENT if no such entry. | |
657 | * | 655 | * | |
658 | * Call with ksyms_lock, unless known that the symbol table can't change. | 656 | * Call with ksyms_lock, unless known that the symbol table can't change. | |
659 | */ | 657 | */ | |
660 | int | 658 | int | |
661 | ksyms_getname(const char **mod, const char **sym, vaddr_t v, int f) | 659 | ksyms_getname(const char **mod, const char **sym, vaddr_t v, int f) | |
662 | { | 660 | { | |
663 | struct ksyms_symtab *st; | 661 | struct ksyms_symtab *st; | |
664 | Elf_Sym *les, *es = NULL; | 662 | Elf_Sym *les, *es = NULL; | |
665 | vaddr_t laddr = 0; | 663 | vaddr_t laddr = 0; | |
666 | const char *lmod = NULL; | 664 | const char *lmod = NULL; | |
667 | char *stable = NULL; | 665 | char *stable = NULL; | |
668 | int type, i, sz; | 666 | int type, i, sz; | |
669 | 667 | |||
670 | if (!ksyms_initted) | 668 | if (!ksyms_initted) | |
671 | return ENOENT; | 669 | return ENOENT; | |
672 | 670 | |||
673 | TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { | 671 | TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { | |
674 | if (st->sd_gone) | 672 | if (st->sd_gone) | |
675 | continue; | 673 | continue; | |
676 | if (v < st->sd_minsym || v > st->sd_maxsym) | 674 | if (v < st->sd_minsym || v > st->sd_maxsym) | |
677 | continue; | 675 | continue; | |
678 | sz = st->sd_symsize/sizeof(Elf_Sym); | 676 | sz = st->sd_symsize/sizeof(Elf_Sym); | |
679 | for (i = 0; i < sz; i++) { | 677 | for (i = 0; i < sz; i++) { | |
680 | les = st->sd_symstart + i; | 678 | les = st->sd_symstart + i; | |
681 | type = ELF_ST_TYPE(les->st_info); | 679 | type = ELF_ST_TYPE(les->st_info); | |
682 | 680 | |||
683 | if ((f & KSYMS_PROC) && (type != STT_FUNC)) | 681 | if ((f & KSYMS_PROC) && (type != STT_FUNC)) | |
684 | continue; | 682 | continue; | |
685 | 683 | |||
686 | if (type == STT_NOTYPE) | 684 | if (type == STT_NOTYPE) | |
687 | continue; | 685 | continue; | |
688 | 686 | |||
689 | if (((f & KSYMS_ANY) == 0) && | 687 | if (((f & KSYMS_ANY) == 0) && | |
690 | (type != STT_FUNC) && (type != STT_OBJECT)) | 688 | (type != STT_FUNC) && (type != STT_OBJECT)) | |
691 | continue; | 689 | continue; | |
692 | 690 | |||
693 | if ((les->st_value <= v) && (les->st_value > laddr)) { | 691 | if ((les->st_value <= v) && (les->st_value > laddr)) { | |
694 | laddr = les->st_value; | 692 | laddr = les->st_value; | |
695 | es = les; | 693 | es = les; | |
696 | lmod = st->sd_name; | 694 | lmod = st->sd_name; | |
697 | stable = st->sd_strstart - st->sd_usroffset; | 695 | stable = st->sd_strstart - st->sd_usroffset; | |
698 | } | 696 | } | |
699 | } | 697 | } | |
700 | } | 698 | } | |
701 | if (es == NULL) | 699 | if (es == NULL) | |
702 | return ENOENT; | 700 | return ENOENT; | |
703 | if ((f & KSYMS_EXACT) && (v != es->st_value)) | 701 | if ((f & KSYMS_EXACT) && (v != es->st_value)) | |
704 | return ENOENT; | 702 | return ENOENT; | |
705 | if (mod) | 703 | if (mod) | |
706 | *mod = lmod; | 704 | *mod = lmod; | |
707 | if (sym) | 705 | if (sym) | |
708 | *sym = stable + es->st_name; | 706 | *sym = stable + es->st_name; | |
709 | return 0; | 707 | return 0; | |
710 | } | 708 | } | |
711 | 709 | |||
712 | /* | 710 | /* | |
713 | * Add a symbol table from a loadable module. | 711 | * Add a symbol table from a loadable module. | |
714 | */ | 712 | */ | |
715 | void | 713 | void | |
716 | ksyms_modload(const char *name, void *symstart, vsize_t symsize, | 714 | ksyms_modload(const char *name, void *symstart, vsize_t symsize, | |
717 | char *strstart, vsize_t strsize) | 715 | char *strstart, vsize_t strsize) | |
718 | { | 716 | { | |
719 | struct ksyms_symtab *st; | 717 | struct ksyms_symtab *st; | |
720 | 718 | |||
721 | st = kmem_zalloc(sizeof(*st), KM_SLEEP); | 719 | st = kmem_zalloc(sizeof(*st), KM_SLEEP); | |
722 | mutex_enter(&ksyms_lock); | 720 | mutex_enter(&ksyms_lock); | |
723 | addsymtab(name, symstart, symsize, strstart, strsize, st, symstart, | 721 | addsymtab(name, symstart, symsize, strstart, strsize, st, symstart, | |
724 | NULL, 0, NULL); | 722 | NULL, 0, NULL); | |
725 | mutex_exit(&ksyms_lock); | 723 | mutex_exit(&ksyms_lock); | |
726 | } | 724 | } | |
727 | 725 | |||
728 | /* | 726 | /* | |
729 | * Remove a symbol table from a loadable module. | 727 | * Remove a symbol table from a loadable module. | |
730 | */ | 728 | */ | |
731 | void | 729 | void | |
732 | ksyms_modunload(const char *name) | 730 | ksyms_modunload(const char *name) | |
733 | { | 731 | { | |
734 | struct ksyms_symtab *st; | 732 | struct ksyms_symtab *st; | |
735 | 733 | |||
736 | mutex_enter(&ksyms_lock); | 734 | mutex_enter(&ksyms_lock); | |
737 | TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { | 735 | TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { | |
738 | if (st->sd_gone) | 736 | if (st->sd_gone) | |
739 | continue; | 737 | continue; | |
740 | if (strcmp(name, st->sd_name) != 0) | 738 | if (strcmp(name, st->sd_name) != 0) | |
741 | continue; | 739 | continue; | |
742 | st->sd_gone = true; | 740 | st->sd_gone = true; | |
743 | if (!ksyms_isopen) { | 741 | if (!ksyms_isopen) { | |
744 | TAILQ_REMOVE(&ksyms_symtabs, st, sd_queue); | 742 | TAILQ_REMOVE(&ksyms_symtabs, st, sd_queue); | |
745 | ksyms_sizes_calc(); | 743 | ksyms_sizes_calc(); | |
746 | kmem_free(st, sizeof(*st)); | 744 | kmem_free(st, sizeof(*st)); | |
747 | } | 745 | } | |
748 | break; | 746 | break; | |
749 | } | 747 | } | |
750 | mutex_exit(&ksyms_lock); | 748 | mutex_exit(&ksyms_lock); | |
751 | KASSERT(st != NULL); | 749 | KASSERT(st != NULL); | |
752 | } | 750 | } | |
753 | 751 | |||
754 | #ifdef DDB | 752 | #ifdef DDB | |
755 | /* | 753 | /* | |
756 | * Keep sifting stuff here, to avoid export of ksyms internals. | 754 | * Keep sifting stuff here, to avoid export of ksyms internals. | |
757 | * | 755 | * | |
758 | * Systems is expected to be quiescent, so no locking done. | 756 | * Systems is expected to be quiescent, so no locking done. | |
759 | */ | 757 | */ | |
760 | int | 758 | int | |
761 | ksyms_sift(char *mod, char *sym, int mode) | 759 | ksyms_sift(char *mod, char *sym, int mode) | |
762 | { | 760 | { | |
763 | struct ksyms_symtab *st; | 761 | struct ksyms_symtab *st; | |
764 | char *sb; | 762 | char *sb; | |
765 | int i, sz; | 763 | int i, sz; | |
766 | 764 | |||
767 | if (!ksyms_initted) | 765 | if (!ksyms_initted) | |
768 | return ENOENT; | 766 | return ENOENT; | |
769 | 767 | |||
770 | TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { | 768 | TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { | |
771 | if (st->sd_gone) | 769 | if (st->sd_gone) | |
772 | continue; | 770 | continue; | |
773 | if (mod && strcmp(mod, st->sd_name)) | 771 | if (mod && strcmp(mod, st->sd_name)) | |
774 | continue; | 772 | continue; | |
775 | sb = st->sd_strstart - st->sd_usroffset; | 773 | sb = st->sd_strstart - st->sd_usroffset; | |
776 | 774 | |||
777 | sz = st->sd_symsize/sizeof(Elf_Sym); | 775 | sz = st->sd_symsize/sizeof(Elf_Sym); | |
778 | for (i = 0; i < sz; i++) { | 776 | for (i = 0; i < sz; i++) { | |
779 | Elf_Sym *les = st->sd_symstart + i; | 777 | Elf_Sym *les = st->sd_symstart + i; | |
780 | char c; | 778 | char c; | |
781 | 779 | |||
782 | if (strstr(sb + les->st_name, sym) == NULL) | 780 | if (strstr(sb + les->st_name, sym) == NULL) | |
783 | continue; | 781 | continue; | |
784 | 782 | |||
785 | if (mode == 'F') { | 783 | if (mode == 'F') { | |
786 | switch (ELF_ST_TYPE(les->st_info)) { | 784 | switch (ELF_ST_TYPE(les->st_info)) { | |
787 | case STT_OBJECT: | 785 | case STT_OBJECT: | |
788 | c = '+'; | 786 | c = '+'; | |
789 | break; | 787 | break; | |
790 | case STT_FUNC: | 788 | case STT_FUNC: | |
791 | c = '*'; | 789 | c = '*'; | |
792 | break; | 790 | break; | |
793 | case STT_SECTION: | 791 | case STT_SECTION: | |
794 | c = '&'; | 792 | c = '&'; | |
795 | break; | 793 | break; | |
796 | case STT_FILE: | 794 | case STT_FILE: | |
797 | c = '/'; | 795 | c = '/'; | |
798 | break; | 796 | break; | |
799 | default: | 797 | default: | |
800 | c = ' '; | 798 | c = ' '; | |
801 | break; | 799 | break; | |
802 | } | 800 | } | |
803 | db_printf("%s%c ", sb + les->st_name, c); | 801 | db_printf("%s%c ", sb + les->st_name, c); | |
804 | } else | 802 | } else | |
805 | db_printf("%s ", sb + les->st_name); | 803 | db_printf("%s ", sb + les->st_name); | |
806 | } | 804 | } | |
807 | } | 805 | } | |
808 | return ENOENT; | 806 | return ENOENT; | |
809 | } | 807 | } | |
810 | #endif /* DDB */ | 808 | #endif /* DDB */ | |
811 | 809 | |||
812 | /* | 810 | /* | |
813 | * In case we exposing the symbol table to the userland using the pseudo- | 811 | * In case we exposing the symbol table to the userland using the pseudo- | |
814 | * device /dev/ksyms, it is easier to provide all the tables as one. | 812 | * device /dev/ksyms, it is easier to provide all the tables as one. | |
815 | * However, it means we have to change all the st_name fields for the | 813 | * However, it means we have to change all the st_name fields for the | |
816 | * symbols so they match the ELF image that the userland will read | 814 | * symbols so they match the ELF image that the userland will read | |
817 | * through the device. | 815 | * through the device. | |
818 | * | 816 | * | |
819 | * The actual (correct) value of st_name is preserved through a global | 817 | * The actual (correct) value of st_name is preserved through a global | |
820 | * offset stored in the symbol table structure. | 818 | * offset stored in the symbol table structure. | |
821 | * | 819 | * | |
822 | * Call with ksyms_lock held. | 820 | * Call with ksyms_lock held. | |
823 | */ | 821 | */ | |
824 | static void | 822 | static void | |
825 | ksyms_sizes_calc(void) | 823 | ksyms_sizes_calc(void) | |
826 | { | 824 | { | |
827 | struct ksyms_symtab *st; | 825 | struct ksyms_symtab *st; | |
828 | int i, delta; | 826 | int i, delta; | |
829 | 827 | |||
830 | ksyms_symsz = ksyms_strsz = 0; | 828 | ksyms_symsz = ksyms_strsz = 0; | |
831 | TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { | 829 | TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { | |
832 | delta = ksyms_strsz - st->sd_usroffset; | 830 | delta = ksyms_strsz - st->sd_usroffset; | |
833 | if (delta != 0) { | 831 | if (delta != 0) { | |
834 | for (i = 0; i < st->sd_symsize/sizeof(Elf_Sym); i++) | 832 | for (i = 0; i < st->sd_symsize/sizeof(Elf_Sym); i++) | |
835 | st->sd_symstart[i].st_name += delta; | 833 | st->sd_symstart[i].st_name += delta; | |
836 | st->sd_usroffset = ksyms_strsz; | 834 | st->sd_usroffset = ksyms_strsz; | |
837 | } | 835 | } | |
838 | ksyms_symsz += st->sd_symsize; | 836 | ksyms_symsz += st->sd_symsize; | |
839 | ksyms_strsz += st->sd_strsize; | 837 | ksyms_strsz += st->sd_strsize; | |
840 | } | 838 | } | |
841 | } | 839 | } | |
842 | 840 | |||
843 | static void | 841 | static void | |
844 | ksyms_hdr_init(void *hdraddr) | 842 | ksyms_hdr_init(void *hdraddr) | |
845 | { | 843 | { | |
846 | 844 | |||
847 | /* Copy the loaded elf exec header */ | 845 | /* Copy the loaded elf exec header */ | |
848 | memcpy(&ksyms_hdr.kh_ehdr, hdraddr, sizeof(Elf_Ehdr)); | 846 | memcpy(&ksyms_hdr.kh_ehdr, hdraddr, sizeof(Elf_Ehdr)); | |
849 | 847 | |||
850 | /* Set correct program/section header sizes, offsets and numbers */ | 848 | /* Set correct program/section header sizes, offsets and numbers */ | |
851 | ksyms_hdr.kh_ehdr.e_phoff = offsetof(struct ksyms_hdr, kh_phdr[0]); | 849 | ksyms_hdr.kh_ehdr.e_phoff = offsetof(struct ksyms_hdr, kh_phdr[0]); | |
852 | ksyms_hdr.kh_ehdr.e_phentsize = sizeof(Elf_Phdr); | 850 | ksyms_hdr.kh_ehdr.e_phentsize = sizeof(Elf_Phdr); | |
853 | ksyms_hdr.kh_ehdr.e_phnum = NPRGHDR; | 851 | ksyms_hdr.kh_ehdr.e_phnum = NPRGHDR; | |
854 | ksyms_hdr.kh_ehdr.e_shoff = offsetof(struct ksyms_hdr, kh_shdr[0]); | 852 | ksyms_hdr.kh_ehdr.e_shoff = offsetof(struct ksyms_hdr, kh_shdr[0]); | |
855 | ksyms_hdr.kh_ehdr.e_shentsize = sizeof(Elf_Shdr); | 853 | ksyms_hdr.kh_ehdr.e_shentsize = sizeof(Elf_Shdr); | |
856 | ksyms_hdr.kh_ehdr.e_shnum = NSECHDR; | 854 | ksyms_hdr.kh_ehdr.e_shnum = NSECHDR; | |
857 | ksyms_hdr.kh_ehdr.e_shstrndx = SHSTRTAB; | 855 | ksyms_hdr.kh_ehdr.e_shstrndx = SHSTRTAB; | |
858 | 856 | |||
859 | /* Text/data - fake */ | 857 | /* Text/data - fake */ | |
860 | ksyms_hdr.kh_phdr[0].p_type = PT_LOAD; | 858 | ksyms_hdr.kh_phdr[0].p_type = PT_LOAD; | |
861 | ksyms_hdr.kh_phdr[0].p_memsz = (unsigned long)-1L; | 859 | ksyms_hdr.kh_phdr[0].p_memsz = (unsigned long)-1L; | |
862 | ksyms_hdr.kh_phdr[0].p_flags = PF_R | PF_X | PF_W; | 860 | ksyms_hdr.kh_phdr[0].p_flags = PF_R | PF_X | PF_W; | |
863 | 861 | |||
864 | /* First section is null */ | 862 | /* First section is null */ | |
865 | 863 | |||
866 | /* Second section header; ".symtab" */ | 864 | /* Second section header; ".symtab" */ | |
867 | ksyms_hdr.kh_shdr[SYMTAB].sh_name = 1; /* Section 3 offset */ | 865 | ksyms_hdr.kh_shdr[SYMTAB].sh_name = 1; /* Section 3 offset */ | |
868 | ksyms_hdr.kh_shdr[SYMTAB].sh_type = SHT_SYMTAB; | 866 | ksyms_hdr.kh_shdr[SYMTAB].sh_type = SHT_SYMTAB; | |
869 | ksyms_hdr.kh_shdr[SYMTAB].sh_offset = sizeof(struct ksyms_hdr); | 867 | ksyms_hdr.kh_shdr[SYMTAB].sh_offset = sizeof(struct ksyms_hdr); | |
870 | /* ksyms_hdr.kh_shdr[SYMTAB].sh_size = filled in at open */ | 868 | /* ksyms_hdr.kh_shdr[SYMTAB].sh_size = filled in at open */ | |
871 | ksyms_hdr.kh_shdr[SYMTAB].sh_link = 2; /* Corresponding strtab */ | 869 | ksyms_hdr.kh_shdr[SYMTAB].sh_link = 2; /* Corresponding strtab */ | |
872 | ksyms_hdr.kh_shdr[SYMTAB].sh_addralign = sizeof(long); | 870 | ksyms_hdr.kh_shdr[SYMTAB].sh_addralign = sizeof(long); | |
873 | ksyms_hdr.kh_shdr[SYMTAB].sh_entsize = sizeof(Elf_Sym); | 871 | ksyms_hdr.kh_shdr[SYMTAB].sh_entsize = sizeof(Elf_Sym); | |
874 | 872 | |||
875 | /* Third section header; ".strtab" */ | 873 | /* Third section header; ".strtab" */ | |
876 | ksyms_hdr.kh_shdr[STRTAB].sh_name = 9; /* Section 3 offset */ | 874 | ksyms_hdr.kh_shdr[STRTAB].sh_name = 9; /* Section 3 offset */ | |
877 | ksyms_hdr.kh_shdr[STRTAB].sh_type = SHT_STRTAB; | 875 | ksyms_hdr.kh_shdr[STRTAB].sh_type = SHT_STRTAB; | |
878 | /* ksyms_hdr.kh_shdr[STRTAB].sh_offset = filled in at open */ | 876 | /* ksyms_hdr.kh_shdr[STRTAB].sh_offset = filled in at open */ | |
879 | /* ksyms_hdr.kh_shdr[STRTAB].sh_size = filled in at open */ | 877 | /* ksyms_hdr.kh_shdr[STRTAB].sh_size = filled in at open */ | |
880 | ksyms_hdr.kh_shdr[STRTAB].sh_addralign = sizeof(char); | 878 | ksyms_hdr.kh_shdr[STRTAB].sh_addralign = sizeof(char); | |
881 | 879 | |||
882 | /* Fourth section, ".shstrtab" */ | 880 | /* Fourth section, ".shstrtab" */ | |
883 | ksyms_hdr.kh_shdr[SHSTRTAB].sh_name = 17; /* This section name offset */ | 881 | ksyms_hdr.kh_shdr[SHSTRTAB].sh_name = 17; /* This section name offset */ | |
884 | ksyms_hdr.kh_shdr[SHSTRTAB].sh_type = SHT_STRTAB; | 882 | ksyms_hdr.kh_shdr[SHSTRTAB].sh_type = SHT_STRTAB; | |
885 | ksyms_hdr.kh_shdr[SHSTRTAB].sh_offset = | 883 | ksyms_hdr.kh_shdr[SHSTRTAB].sh_offset = | |
886 | offsetof(struct ksyms_hdr, kh_strtab); | 884 | offsetof(struct ksyms_hdr, kh_strtab); | |
887 | ksyms_hdr.kh_shdr[SHSTRTAB].sh_size = SHSTRSIZ; | 885 | ksyms_hdr.kh_shdr[SHSTRTAB].sh_size = SHSTRSIZ; | |
888 | ksyms_hdr.kh_shdr[SHSTRTAB].sh_addralign = sizeof(char); | 886 | ksyms_hdr.kh_shdr[SHSTRTAB].sh_addralign = sizeof(char); | |
889 | 887 | |||
890 | /* Fifth section, ".bss". All symbols reside here. */ | 888 | /* Fifth section, ".bss". All symbols reside here. */ | |
891 | ksyms_hdr.kh_shdr[SHBSS].sh_name = 27; /* This section name offset */ | 889 | ksyms_hdr.kh_shdr[SHBSS].sh_name = 27; /* This section name offset */ | |
892 | ksyms_hdr.kh_shdr[SHBSS].sh_type = SHT_NOBITS; | 890 | ksyms_hdr.kh_shdr[SHBSS].sh_type = SHT_NOBITS; | |
893 | ksyms_hdr.kh_shdr[SHBSS].sh_offset = 0; | 891 | ksyms_hdr.kh_shdr[SHBSS].sh_offset = 0; | |
894 | ksyms_hdr.kh_shdr[SHBSS].sh_size = (unsigned long)-1L; | 892 | ksyms_hdr.kh_shdr[SHBSS].sh_size = (unsigned long)-1L; | |
895 | ksyms_hdr.kh_shdr[SHBSS].sh_addralign = PAGE_SIZE; | 893 | ksyms_hdr.kh_shdr[SHBSS].sh_addralign = PAGE_SIZE; | |
896 | ksyms_hdr.kh_shdr[SHBSS].sh_flags = SHF_ALLOC | SHF_EXECINSTR; | 894 | ksyms_hdr.kh_shdr[SHBSS].sh_flags = SHF_ALLOC | SHF_EXECINSTR; | |
897 | 895 | |||
898 | #ifdef KDTRACE_HOOKS | 896 | #ifdef KDTRACE_HOOKS | |
899 | /* Sixth section header; ".SUNW_ctf" */ | 897 | /* Sixth section header; ".SUNW_ctf" */ | |
900 | ksyms_hdr.kh_shdr[SHCTF].sh_name = 32; /* Section 6 offset */ | 898 | ksyms_hdr.kh_shdr[SHCTF].sh_name = 32; /* Section 6 offset */ | |
901 | ksyms_hdr.kh_shdr[SHCTF].sh_type = SHT_PROGBITS; | 899 | ksyms_hdr.kh_shdr[SHCTF].sh_type = SHT_PROGBITS; | |
902 | /* ksyms_hdr.kh_shdr[SHCTF].sh_offset = filled in at open */ | 900 | /* ksyms_hdr.kh_shdr[SHCTF].sh_offset = filled in at open */ | |
903 | /* ksyms_hdr.kh_shdr[SHCTF].sh_size = filled in at open */ | 901 | /* ksyms_hdr.kh_shdr[SHCTF].sh_size = filled in at open */ | |
904 | ksyms_hdr.kh_shdr[SHCTF].sh_link = SYMTAB; /* Corresponding symtab */ | 902 | ksyms_hdr.kh_shdr[SHCTF].sh_link = SYMTAB; /* Corresponding symtab */ | |
905 | ksyms_hdr.kh_shdr[SHCTF].sh_addralign = sizeof(char); | 903 | ksyms_hdr.kh_shdr[SHCTF].sh_addralign = sizeof(char); | |
906 | #endif | 904 | #endif | |
907 | 905 | |||
908 | /* Set section names */ | 906 | /* Set section names */ | |
909 | strlcpy(&ksyms_hdr.kh_strtab[1], ".symtab", | 907 | strlcpy(&ksyms_hdr.kh_strtab[1], ".symtab", | |
910 | sizeof(ksyms_hdr.kh_strtab) - 1); | 908 | sizeof(ksyms_hdr.kh_strtab) - 1); | |
911 | strlcpy(&ksyms_hdr.kh_strtab[9], ".strtab", | 909 | strlcpy(&ksyms_hdr.kh_strtab[9], ".strtab", | |
912 | sizeof(ksyms_hdr.kh_strtab) - 9); | 910 | sizeof(ksyms_hdr.kh_strtab) - 9); | |
913 | strlcpy(&ksyms_hdr.kh_strtab[17], ".shstrtab", | 911 | strlcpy(&ksyms_hdr.kh_strtab[17], ".shstrtab", | |
914 | sizeof(ksyms_hdr.kh_strtab) - 17); | 912 | sizeof(ksyms_hdr.kh_strtab) - 17); | |
915 | strlcpy(&ksyms_hdr.kh_strtab[27], ".bss", | 913 | strlcpy(&ksyms_hdr.kh_strtab[27], ".bss", | |
916 | sizeof(ksyms_hdr.kh_strtab) - 27); | 914 | sizeof(ksyms_hdr.kh_strtab) - 27); | |
917 | #ifdef KDTRACE_HOOKS | 915 | #ifdef KDTRACE_HOOKS | |
918 | strlcpy(&ksyms_hdr.kh_strtab[32], ".SUNW_ctf", | 916 | strlcpy(&ksyms_hdr.kh_strtab[32], ".SUNW_ctf", | |
919 | sizeof(ksyms_hdr.kh_strtab) - 32); | 917 | sizeof(ksyms_hdr.kh_strtab) - 32); | |
920 | #endif | 918 | #endif | |
921 | } | 919 | } | |
922 | 920 | |||
923 | static int | 921 | static int | |
924 | ksymsopen(dev_t dev, int oflags, int devtype, struct lwp *l) | 922 | ksymsopen(dev_t dev, int oflags, int devtype, struct lwp *l) | |
925 | { | 923 | { | |
926 | 924 | |||
927 | if (minor(dev) != 0 || !ksyms_initted) | 925 | if (minor(dev) != 0 || !ksyms_initted) | |
928 | return ENXIO; | 926 | return ENXIO; | |
929 | 927 | |||
930 | /* | 928 | /* | |
931 | * Create a "snapshot" of the kernel symbol table. Setting | 929 | * Create a "snapshot" of the kernel symbol table. Setting | |
932 | * ksyms_isopen will prevent symbol tables from being freed. | 930 | * ksyms_isopen will prevent symbol tables from being freed. | |
933 | */ | 931 | */ | |
934 | mutex_enter(&ksyms_lock); | 932 | mutex_enter(&ksyms_lock); | |
935 | ksyms_hdr.kh_shdr[SYMTAB].sh_size = ksyms_symsz; | 933 | ksyms_hdr.kh_shdr[SYMTAB].sh_size = ksyms_symsz; | |
936 | ksyms_hdr.kh_shdr[SYMTAB].sh_info = ksyms_symsz / sizeof(Elf_Sym); | 934 | ksyms_hdr.kh_shdr[SYMTAB].sh_info = ksyms_symsz / sizeof(Elf_Sym); | |
937 | ksyms_hdr.kh_shdr[STRTAB].sh_offset = ksyms_symsz + | 935 | ksyms_hdr.kh_shdr[STRTAB].sh_offset = ksyms_symsz + | |
938 | ksyms_hdr.kh_shdr[SYMTAB].sh_offset; | 936 | ksyms_hdr.kh_shdr[SYMTAB].sh_offset; | |
939 | ksyms_hdr.kh_shdr[STRTAB].sh_size = ksyms_strsz; | 937 | ksyms_hdr.kh_shdr[STRTAB].sh_size = ksyms_strsz; | |
940 | #ifdef KDTRACE_HOOKS | 938 | #ifdef KDTRACE_HOOKS | |
941 | ksyms_hdr.kh_shdr[SHCTF].sh_offset = ksyms_strsz + | 939 | ksyms_hdr.kh_shdr[SHCTF].sh_offset = ksyms_strsz + | |
942 | ksyms_hdr.kh_shdr[STRTAB].sh_offset; | 940 | ksyms_hdr.kh_shdr[STRTAB].sh_offset; | |
943 | ksyms_hdr.kh_shdr[SHCTF].sh_size = ksyms_ctfsz; | 941 | ksyms_hdr.kh_shdr[SHCTF].sh_size = ksyms_ctfsz; | |
944 | #endif | 942 | #endif | |
945 | ksyms_isopen = true; | 943 | ksyms_isopen = true; | |
946 | mutex_exit(&ksyms_lock); | 944 | mutex_exit(&ksyms_lock); | |
947 | 945 | |||
948 | return 0; | 946 | return 0; | |
949 | } | 947 | } | |
950 | 948 | |||
951 | static int | 949 | static int | |
952 | ksymsclose(dev_t dev, int oflags, int devtype, struct lwp *l) | 950 | ksymsclose(dev_t dev, int oflags, int devtype, struct lwp *l) | |
953 | { | 951 | { | |
954 | struct ksyms_symtab *st, *next; | 952 | struct ksyms_symtab *st, *next; | |
955 | bool resize; | 953 | bool resize; | |
956 | 954 | |||
957 | /* Discard refernces to symbol tables. */ | 955 | /* Discard refernces to symbol tables. */ | |
958 | mutex_enter(&ksyms_lock); | 956 | mutex_enter(&ksyms_lock); | |
959 | ksyms_isopen = false; | 957 | ksyms_isopen = false; | |
960 | resize = false; | 958 | resize = false; | |
961 | for (st = TAILQ_FIRST(&ksyms_symtabs); st != NULL; st = next) { | 959 | for (st = TAILQ_FIRST(&ksyms_symtabs); st != NULL; st = next) { | |
962 | next = TAILQ_NEXT(st, sd_queue); | 960 | next = TAILQ_NEXT(st, sd_queue); | |
963 | if (st->sd_gone) { | 961 | if (st->sd_gone) { | |
964 | TAILQ_REMOVE(&ksyms_symtabs, st, sd_queue); | 962 | TAILQ_REMOVE(&ksyms_symtabs, st, sd_queue); | |
965 | kmem_free(st, sizeof(*st)); | 963 | kmem_free(st, sizeof(*st)); | |
966 | resize = true; | 964 | resize = true; | |
967 | } | 965 | } | |
968 | } | 966 | } | |
969 | if (resize) | 967 | if (resize) | |
970 | ksyms_sizes_calc(); | 968 | ksyms_sizes_calc(); | |
971 | mutex_exit(&ksyms_lock); | 969 | mutex_exit(&ksyms_lock); | |
972 | 970 | |||
973 | return 0; | 971 | return 0; | |
974 | } | 972 | } | |
975 | 973 | |||
976 | static int | 974 | static int | |
977 | ksymsread(dev_t dev, struct uio *uio, int ioflag) | 975 | ksymsread(dev_t dev, struct uio *uio, int ioflag) | |
978 | { | 976 | { | |
979 | struct ksyms_symtab *st; | 977 | struct ksyms_symtab *st; | |
980 | size_t filepos, inpos, off; | 978 | size_t filepos, inpos, off; | |
981 | int error; | 979 | int error; | |
982 | #ifdef KDTRACE_HOOKS | 980 | #ifdef KDTRACE_HOOKS | |
983 | struct ksyms_symtab *cst; | 981 | struct ksyms_symtab *cst; | |
984 | #endif | 982 | #endif | |
985 | 983 | |||
986 | /* | 984 | /* | |
987 | * First: Copy out the ELF header. XXX Lose if ksymsopen() | 985 | * First: Copy out the ELF header. XXX Lose if ksymsopen() | |
988 | * occurs during read of the header. | 986 | * occurs during read of the header. | |
989 | */ | 987 | */ | |
990 | off = uio->uio_offset; | 988 | off = uio->uio_offset; | |
991 | if (off < sizeof(struct ksyms_hdr)) { | 989 | if (off < sizeof(struct ksyms_hdr)) { | |
992 | error = uiomove((char *)&ksyms_hdr + off, | 990 | error = uiomove((char *)&ksyms_hdr + off, | |
993 | sizeof(struct ksyms_hdr) - off, uio); | 991 | sizeof(struct ksyms_hdr) - off, uio); | |
994 | if (error != 0) | 992 | if (error != 0) | |
995 | return error; | 993 | return error; | |
996 | } | 994 | } | |
997 | 995 | |||
998 | /* | 996 | /* | |
999 | * Copy out the symbol table. | 997 | * Copy out the symbol table. | |
1000 | */ | 998 | */ | |
1001 | filepos = sizeof(struct ksyms_hdr); | 999 | filepos = sizeof(struct ksyms_hdr); | |
1002 | TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { | 1000 | TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { | |
1003 | if (uio->uio_resid == 0) | 1001 | if (uio->uio_resid == 0) | |
1004 | return 0; | 1002 | return 0; | |
1005 | if (uio->uio_offset <= st->sd_symsize + filepos) { | 1003 | if (uio->uio_offset <= st->sd_symsize + filepos) { | |
1006 | inpos = uio->uio_offset - filepos; | 1004 | inpos = uio->uio_offset - filepos; | |
1007 | error = uiomove((char *)st->sd_symstart + inpos, | 1005 | error = uiomove((char *)st->sd_symstart + inpos, | |
1008 | st->sd_symsize - inpos, uio); | 1006 | st->sd_symsize - inpos, uio); | |
1009 | if (error != 0) | 1007 | if (error != 0) | |
1010 | return error; | 1008 | return error; | |
1011 | } | 1009 | } | |
1012 | filepos += st->sd_symsize; | 1010 | filepos += st->sd_symsize; | |
1013 | } | 1011 | } | |
1014 | 1012 | |||
1015 | /* | 1013 | /* | |
1016 | * Copy out the string table | 1014 | * Copy out the string table | |
1017 | */ | 1015 | */ | |
1018 | KASSERT(filepos == sizeof(struct ksyms_hdr) + | 1016 | KASSERT(filepos == sizeof(struct ksyms_hdr) + | |
1019 | ksyms_hdr.kh_shdr[SYMTAB].sh_size); | 1017 | ksyms_hdr.kh_shdr[SYMTAB].sh_size); | |
1020 | TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { | 1018 | TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { | |
1021 | if (uio->uio_resid == 0) | 1019 | if (uio->uio_resid == 0) | |
1022 | return 0; | 1020 | return 0; | |
1023 | if (uio->uio_offset <= st->sd_strsize + filepos) { | 1021 | if (uio->uio_offset <= st->sd_strsize + filepos) { | |
1024 | inpos = uio->uio_offset - filepos; | 1022 | inpos = uio->uio_offset - filepos; | |
1025 | error = uiomove((char *)st->sd_strstart + inpos, | 1023 | error = uiomove((char *)st->sd_strstart + inpos, | |
1026 | st->sd_strsize - inpos, uio); | 1024 | st->sd_strsize - inpos, uio); | |
1027 | if (error != 0) | 1025 | if (error != 0) | |
1028 | return error; | 1026 | return error; | |
1029 | } | 1027 | } | |
1030 | filepos += st->sd_strsize; | 1028 | filepos += st->sd_strsize; | |
1031 | } | 1029 | } | |
1032 | 1030 | |||
1033 | #ifdef KDTRACE_HOOKS | 1031 | #ifdef KDTRACE_HOOKS | |
1034 | /* | 1032 | /* | |
1035 | * Copy out the CTF table. | 1033 | * Copy out the CTF table. | |
1036 | */ | 1034 | */ | |
1037 | cst = TAILQ_FIRST(&ksyms_symtabs); | 1035 | cst = TAILQ_FIRST(&ksyms_symtabs); | |
1038 | if (cst->sd_ctfstart != NULL) { | 1036 | if (cst->sd_ctfstart != NULL) { | |
1039 | if (uio->uio_resid == 0) | 1037 | if (uio->uio_resid == 0) | |
1040 | return 0; | 1038 | return 0; | |
1041 | if (uio->uio_offset <= cst->sd_ctfsize + filepos) { | 1039 | if (uio->uio_offset <= cst->sd_ctfsize + filepos) { | |
1042 | inpos = uio->uio_offset - filepos; | 1040 | inpos = uio->uio_offset - filepos; | |
1043 | error = uiomove((char *)cst->sd_ctfstart + inpos, | 1041 | error = uiomove((char *)cst->sd_ctfstart + inpos, | |
1044 | cst->sd_ctfsize - inpos, uio); | 1042 | cst->sd_ctfsize - inpos, uio); | |
1045 | if (error != 0) | 1043 | if (error != 0) | |
1046 | return error; | 1044 | return error; | |
1047 | } | 1045 | } | |
1048 | filepos += cst->sd_ctfsize; | 1046 | filepos += cst->sd_ctfsize; | |
1049 | } | 1047 | } | |
1050 | #endif | 1048 | #endif | |
1051 | 1049 | |||
1052 | return 0; | 1050 | return 0; | |
1053 | } | 1051 | } | |
1054 | 1052 | |||
1055 | static int | 1053 | static int | |
1056 | ksymswrite(dev_t dev, struct uio *uio, int ioflag) | 1054 | ksymswrite(dev_t dev, struct uio *uio, int ioflag) | |
1057 | { | 1055 | { | |
1058 | 1056 | |||
1059 | return EROFS; | 1057 | return EROFS; | |
1060 | } | 1058 | } | |
1061 | 1059 | |||
1062 | static int | 1060 | static int | |
1063 | ksymsioctl(dev_t dev, u_long cmd, void *data, int fflag, struct lwp *l) | 1061 | ksymsioctl(dev_t dev, u_long cmd, void *data, int fflag, struct lwp *l) | |
1064 | { | 1062 | { | |
1065 | struct ksyms_gsymbol *kg = (struct ksyms_gsymbol *)data; | 1063 | struct ksyms_gsymbol *kg = (struct ksyms_gsymbol *)data; | |
1066 | struct ksyms_symtab *st; | 1064 | struct ksyms_symtab *st; | |
1067 | Elf_Sym *sym = NULL, copy; | 1065 | Elf_Sym *sym = NULL, copy; | |
1068 | unsigned long val; | 1066 | unsigned long val; | |
1069 | int error = 0; | 1067 | int error = 0; | |
1070 | char *str = NULL; | 1068 | char *str = NULL; | |
1071 | int len; | 1069 | int len; | |
1072 | 1070 | |||
1073 | /* Read ksyms_maxlen only once while not holding the lock. */ | 1071 | /* Read ksyms_maxlen only once while not holding the lock. */ | |
1074 | len = ksyms_maxlen; | 1072 | len = ksyms_maxlen; | |
1075 | 1073 | |||
1076 | if (cmd == KIOCGVALUE || cmd == KIOCGSYMBOL) { | 1074 | if (cmd == KIOCGVALUE || cmd == KIOCGSYMBOL) { | |
1077 | str = kmem_alloc(len, KM_SLEEP); | 1075 | str = kmem_alloc(len, KM_SLEEP); | |
1078 | if ((error = copyinstr(kg->kg_name, str, len, NULL)) != 0) { | 1076 | if ((error = copyinstr(kg->kg_name, str, len, NULL)) != 0) { | |
1079 | kmem_free(str, len); | 1077 | kmem_free(str, len); | |
1080 | return error; | 1078 | return error; | |
1081 | } | 1079 | } | |
1082 | } | 1080 | } | |
1083 | 1081 | |||
1084 | switch (cmd) { | 1082 | switch (cmd) { | |
1085 | case KIOCGVALUE: | 1083 | case KIOCGVALUE: | |
1086 | /* | 1084 | /* | |
1087 | * Use the in-kernel symbol lookup code for fast | 1085 | * Use the in-kernel symbol lookup code for fast | |
1088 | * retreival of a value. | 1086 | * retreival of a value. | |
1089 | */ | 1087 | */ | |
1090 | error = ksyms_getval(NULL, str, &val, KSYMS_EXTERN); | 1088 | error = ksyms_getval(NULL, str, &val, KSYMS_EXTERN); | |
1091 | if (error == 0) | 1089 | if (error == 0) | |
1092 | error = copyout(&val, kg->kg_value, sizeof(long)); | 1090 | error = copyout(&val, kg->kg_value, sizeof(long)); | |
1093 | kmem_free(str, len); | 1091 | kmem_free(str, len); | |
1094 | break; | 1092 | break; |
--- src/sys/kern/kern_sleepq.c 2011/07/26 13:04:51 1.40
+++ src/sys/kern/kern_sleepq.c 2011/07/27 14:35:34 1.41
@@ -1,491 +1,489 @@ | @@ -1,491 +1,489 @@ | |||
1 | /* $NetBSD: kern_sleepq.c,v 1.40 2011/07/26 13:04:51 yamt Exp $ */ | 1 | /* $NetBSD: kern_sleepq.c,v 1.41 2011/07/27 14:35:34 uebayasi Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc. | 4 | * Copyright (c) 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc. | |
5 | * All rights reserved. | 5 | * All rights reserved. | |
6 | * | 6 | * | |
7 | * This code is derived from software contributed to The NetBSD Foundation | 7 | * This code is derived from software contributed to The NetBSD Foundation | |
8 | * by Andrew Doran. | 8 | * by Andrew Doran. | |
9 | * | 9 | * | |
10 | * Redistribution and use in source and binary forms, with or without | 10 | * Redistribution and use in source and binary forms, with or without | |
11 | * modification, are permitted provided that the following conditions | 11 | * modification, are permitted provided that the following conditions | |
12 | * are met: | 12 | * are met: | |
13 | * 1. Redistributions of source code must retain the above copyright | 13 | * 1. Redistributions of source code must retain the above copyright | |
14 | * notice, this list of conditions and the following disclaimer. | 14 | * notice, this list of conditions and the following disclaimer. | |
15 | * 2. Redistributions in binary form must reproduce the above copyright | 15 | * 2. Redistributions in binary form must reproduce the above copyright | |
16 | * notice, this list of conditions and the following disclaimer in the | 16 | * notice, this list of conditions and the following disclaimer in the | |
17 | * documentation and/or other materials provided with the distribution. | 17 | * documentation and/or other materials provided with the distribution. | |
18 | * | 18 | * | |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
29 | * POSSIBILITY OF SUCH DAMAGE. | 29 | * POSSIBILITY OF SUCH DAMAGE. | |
30 | */ | 30 | */ | |
31 | 31 | |||
32 | /* | 32 | /* | |
33 | * Sleep queue implementation, used by turnstiles and general sleep/wakeup | 33 | * Sleep queue implementation, used by turnstiles and general sleep/wakeup | |
34 | * interfaces. | 34 | * interfaces. | |
35 | */ | 35 | */ | |
36 | 36 | |||
37 | #include <sys/cdefs.h> | 37 | #include <sys/cdefs.h> | |
38 | __KERNEL_RCSID(0, "$NetBSD: kern_sleepq.c,v 1.40 2011/07/26 13:04:51 yamt Exp $"); | 38 | __KERNEL_RCSID(0, "$NetBSD: kern_sleepq.c,v 1.41 2011/07/27 14:35:34 uebayasi Exp $"); | |
39 | 39 | |||
40 | #include <sys/param.h> | 40 | #include <sys/param.h> | |
41 | #include <sys/kernel.h> | 41 | #include <sys/kernel.h> | |
42 | #include <sys/cpu.h> | 42 | #include <sys/cpu.h> | |
43 | #include <sys/pool.h> | 43 | #include <sys/pool.h> | |
44 | #include <sys/proc.h> | 44 | #include <sys/proc.h> | |
45 | #include <sys/resourcevar.h> | 45 | #include <sys/resourcevar.h> | |
46 | #include <sys/sa.h> | 46 | #include <sys/sa.h> | |
47 | #include <sys/savar.h> | 47 | #include <sys/savar.h> | |
48 | #include <sys/sched.h> | 48 | #include <sys/sched.h> | |
49 | #include <sys/systm.h> | 49 | #include <sys/systm.h> | |
50 | #include <sys/sleepq.h> | 50 | #include <sys/sleepq.h> | |
51 | #include <sys/ktrace.h> | 51 | #include <sys/ktrace.h> | |
52 | 52 | |||
53 | #include <uvm/uvm_extern.h> | |||
54 | ||||
55 | #include "opt_sa.h" | 53 | #include "opt_sa.h" | |
56 | 54 | |||
57 | static int sleepq_sigtoerror(lwp_t *, int); | 55 | static int sleepq_sigtoerror(lwp_t *, int); | |
58 | 56 | |||
59 | /* General purpose sleep table, used by ltsleep() and condition variables. */ | 57 | /* General purpose sleep table, used by ltsleep() and condition variables. */ | |
60 | sleeptab_t sleeptab __cacheline_aligned; | 58 | sleeptab_t sleeptab __cacheline_aligned; | |
61 | 59 | |||
62 | /* | 60 | /* | |
63 | * sleeptab_init: | 61 | * sleeptab_init: | |
64 | * | 62 | * | |
65 | * Initialize a sleep table. | 63 | * Initialize a sleep table. | |
66 | */ | 64 | */ | |
67 | void | 65 | void | |
68 | sleeptab_init(sleeptab_t *st) | 66 | sleeptab_init(sleeptab_t *st) | |
69 | { | 67 | { | |
70 | sleepq_t *sq; | 68 | sleepq_t *sq; | |
71 | int i; | 69 | int i; | |
72 | 70 | |||
73 | for (i = 0; i < SLEEPTAB_HASH_SIZE; i++) { | 71 | for (i = 0; i < SLEEPTAB_HASH_SIZE; i++) { | |
74 | sq = &st->st_queues[i].st_queue; | 72 | sq = &st->st_queues[i].st_queue; | |
75 | st->st_queues[i].st_mutex = | 73 | st->st_queues[i].st_mutex = | |
76 | mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); | 74 | mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); | |
77 | sleepq_init(sq); | 75 | sleepq_init(sq); | |
78 | } | 76 | } | |
79 | } | 77 | } | |
80 | 78 | |||
81 | /* | 79 | /* | |
82 | * sleepq_init: | 80 | * sleepq_init: | |
83 | * | 81 | * | |
84 | * Prepare a sleep queue for use. | 82 | * Prepare a sleep queue for use. | |
85 | */ | 83 | */ | |
86 | void | 84 | void | |
87 | sleepq_init(sleepq_t *sq) | 85 | sleepq_init(sleepq_t *sq) | |
88 | { | 86 | { | |
89 | 87 | |||
90 | TAILQ_INIT(sq); | 88 | TAILQ_INIT(sq); | |
91 | } | 89 | } | |
92 | 90 | |||
93 | /* | 91 | /* | |
94 | * sleepq_remove: | 92 | * sleepq_remove: | |
95 | * | 93 | * | |
96 | * Remove an LWP from a sleep queue and wake it up. | 94 | * Remove an LWP from a sleep queue and wake it up. | |
97 | */ | 95 | */ | |
98 | void | 96 | void | |
99 | sleepq_remove(sleepq_t *sq, lwp_t *l) | 97 | sleepq_remove(sleepq_t *sq, lwp_t *l) | |
100 | { | 98 | { | |
101 | struct schedstate_percpu *spc; | 99 | struct schedstate_percpu *spc; | |
102 | struct cpu_info *ci; | 100 | struct cpu_info *ci; | |
103 | 101 | |||
104 | KASSERT(lwp_locked(l, NULL)); | 102 | KASSERT(lwp_locked(l, NULL)); | |
105 | 103 | |||
106 | TAILQ_REMOVE(sq, l, l_sleepchain); | 104 | TAILQ_REMOVE(sq, l, l_sleepchain); | |
107 | l->l_syncobj = &sched_syncobj; | 105 | l->l_syncobj = &sched_syncobj; | |
108 | l->l_wchan = NULL; | 106 | l->l_wchan = NULL; | |
109 | l->l_sleepq = NULL; | 107 | l->l_sleepq = NULL; | |
110 | l->l_flag &= ~LW_SINTR; | 108 | l->l_flag &= ~LW_SINTR; | |
111 | 109 | |||
112 | ci = l->l_cpu; | 110 | ci = l->l_cpu; | |
113 | spc = &ci->ci_schedstate; | 111 | spc = &ci->ci_schedstate; | |
114 | 112 | |||
115 | /* | 113 | /* | |
116 | * If not sleeping, the LWP must have been suspended. Let whoever | 114 | * If not sleeping, the LWP must have been suspended. Let whoever | |
117 | * holds it stopped set it running again. | 115 | * holds it stopped set it running again. | |
118 | */ | 116 | */ | |
119 | if (l->l_stat != LSSLEEP) { | 117 | if (l->l_stat != LSSLEEP) { | |
120 | KASSERT(l->l_stat == LSSTOP || l->l_stat == LSSUSPENDED); | 118 | KASSERT(l->l_stat == LSSTOP || l->l_stat == LSSUSPENDED); | |
121 | lwp_setlock(l, spc->spc_lwplock); | 119 | lwp_setlock(l, spc->spc_lwplock); | |
122 | return; | 120 | return; | |
123 | } | 121 | } | |
124 | 122 | |||
125 | /* | 123 | /* | |
126 | * If the LWP is still on the CPU, mark it as LSONPROC. It may be | 124 | * If the LWP is still on the CPU, mark it as LSONPROC. It may be | |
127 | * about to call mi_switch(), in which case it will yield. | 125 | * about to call mi_switch(), in which case it will yield. | |
128 | */ | 126 | */ | |
129 | if ((l->l_pflag & LP_RUNNING) != 0) { | 127 | if ((l->l_pflag & LP_RUNNING) != 0) { | |
130 | l->l_stat = LSONPROC; | 128 | l->l_stat = LSONPROC; | |
131 | l->l_slptime = 0; | 129 | l->l_slptime = 0; | |
132 | lwp_setlock(l, spc->spc_lwplock); | 130 | lwp_setlock(l, spc->spc_lwplock); | |
133 | return; | 131 | return; | |
134 | } | 132 | } | |
135 | 133 | |||
136 | /* Update sleep time delta, call the wake-up handler of scheduler */ | 134 | /* Update sleep time delta, call the wake-up handler of scheduler */ | |
137 | l->l_slpticksum += (hardclock_ticks - l->l_slpticks); | 135 | l->l_slpticksum += (hardclock_ticks - l->l_slpticks); | |
138 | sched_wakeup(l); | 136 | sched_wakeup(l); | |
139 | 137 | |||
140 | /* Look for a CPU to wake up */ | 138 | /* Look for a CPU to wake up */ | |
141 | l->l_cpu = sched_takecpu(l); | 139 | l->l_cpu = sched_takecpu(l); | |
142 | ci = l->l_cpu; | 140 | ci = l->l_cpu; | |
143 | spc = &ci->ci_schedstate; | 141 | spc = &ci->ci_schedstate; | |
144 | 142 | |||
145 | /* | 143 | /* | |
146 | * Set it running. | 144 | * Set it running. | |
147 | */ | 145 | */ | |
148 | spc_lock(ci); | 146 | spc_lock(ci); | |
149 | lwp_setlock(l, spc->spc_mutex); | 147 | lwp_setlock(l, spc->spc_mutex); | |
150 | #ifdef KERN_SA | 148 | #ifdef KERN_SA | |
151 | if (l->l_proc->p_sa != NULL) | 149 | if (l->l_proc->p_sa != NULL) | |
152 | sa_awaken(l); | 150 | sa_awaken(l); | |
153 | #endif /* KERN_SA */ | 151 | #endif /* KERN_SA */ | |
154 | sched_setrunnable(l); | 152 | sched_setrunnable(l); | |
155 | l->l_stat = LSRUN; | 153 | l->l_stat = LSRUN; | |
156 | l->l_slptime = 0; | 154 | l->l_slptime = 0; | |
157 | sched_enqueue(l, false); | 155 | sched_enqueue(l, false); | |
158 | spc_unlock(ci); | 156 | spc_unlock(ci); | |
159 | } | 157 | } | |
160 | 158 | |||
161 | /* | 159 | /* | |
162 | * sleepq_insert: | 160 | * sleepq_insert: | |
163 | * | 161 | * | |
164 | * Insert an LWP into the sleep queue, optionally sorting by priority. | 162 | * Insert an LWP into the sleep queue, optionally sorting by priority. | |
165 | */ | 163 | */ | |
166 | void | 164 | void | |
167 | sleepq_insert(sleepq_t *sq, lwp_t *l, syncobj_t *sobj) | 165 | sleepq_insert(sleepq_t *sq, lwp_t *l, syncobj_t *sobj) | |
168 | { | 166 | { | |
169 | 167 | |||
170 | if ((sobj->sobj_flag & SOBJ_SLEEPQ_SORTED) != 0) { | 168 | if ((sobj->sobj_flag & SOBJ_SLEEPQ_SORTED) != 0) { | |
171 | lwp_t *l2; | 169 | lwp_t *l2; | |
172 | const int pri = lwp_eprio(l); | 170 | const int pri = lwp_eprio(l); | |
173 | 171 | |||
174 | TAILQ_FOREACH(l2, sq, l_sleepchain) { | 172 | TAILQ_FOREACH(l2, sq, l_sleepchain) { | |
175 | if (lwp_eprio(l2) < pri) { | 173 | if (lwp_eprio(l2) < pri) { | |
176 | TAILQ_INSERT_BEFORE(l2, l, l_sleepchain); | 174 | TAILQ_INSERT_BEFORE(l2, l, l_sleepchain); | |
177 | return; | 175 | return; | |
178 | } | 176 | } | |
179 | } | 177 | } | |
180 | } | 178 | } | |
181 | 179 | |||
182 | if ((sobj->sobj_flag & SOBJ_SLEEPQ_LIFO) != 0) | 180 | if ((sobj->sobj_flag & SOBJ_SLEEPQ_LIFO) != 0) | |
183 | TAILQ_INSERT_HEAD(sq, l, l_sleepchain); | 181 | TAILQ_INSERT_HEAD(sq, l, l_sleepchain); | |
184 | else | 182 | else | |
185 | TAILQ_INSERT_TAIL(sq, l, l_sleepchain); | 183 | TAILQ_INSERT_TAIL(sq, l, l_sleepchain); | |
186 | } | 184 | } | |
187 | 185 | |||
188 | /* | 186 | /* | |
189 | * sleepq_enqueue: | 187 | * sleepq_enqueue: | |
190 | * | 188 | * | |
191 | * Enter an LWP into the sleep queue and prepare for sleep. The sleep | 189 | * Enter an LWP into the sleep queue and prepare for sleep. The sleep | |
192 | * queue must already be locked, and any interlock (such as the kernel | 190 | * queue must already be locked, and any interlock (such as the kernel | |
193 | * lock) must have be released (see sleeptab_lookup(), sleepq_enter()). | 191 | * lock) must have be released (see sleeptab_lookup(), sleepq_enter()). | |
194 | */ | 192 | */ | |
195 | void | 193 | void | |
196 | sleepq_enqueue(sleepq_t *sq, wchan_t wchan, const char *wmesg, syncobj_t *sobj) | 194 | sleepq_enqueue(sleepq_t *sq, wchan_t wchan, const char *wmesg, syncobj_t *sobj) | |
197 | { | 195 | { | |
198 | lwp_t *l = curlwp; | 196 | lwp_t *l = curlwp; | |
199 | 197 | |||
200 | KASSERT(lwp_locked(l, NULL)); | 198 | KASSERT(lwp_locked(l, NULL)); | |
201 | KASSERT(l->l_stat == LSONPROC); | 199 | KASSERT(l->l_stat == LSONPROC); | |
202 | KASSERT(l->l_wchan == NULL && l->l_sleepq == NULL); | 200 | KASSERT(l->l_wchan == NULL && l->l_sleepq == NULL); | |
203 | 201 | |||
204 | l->l_syncobj = sobj; | 202 | l->l_syncobj = sobj; | |
205 | l->l_wchan = wchan; | 203 | l->l_wchan = wchan; | |
206 | l->l_sleepq = sq; | 204 | l->l_sleepq = sq; | |
207 | l->l_wmesg = wmesg; | 205 | l->l_wmesg = wmesg; | |
208 | l->l_slptime = 0; | 206 | l->l_slptime = 0; | |
209 | l->l_stat = LSSLEEP; | 207 | l->l_stat = LSSLEEP; | |
210 | l->l_sleeperr = 0; | 208 | l->l_sleeperr = 0; | |
211 | 209 | |||
212 | sleepq_insert(sq, l, sobj); | 210 | sleepq_insert(sq, l, sobj); | |
213 | 211 | |||
214 | /* Save the time when thread has slept */ | 212 | /* Save the time when thread has slept */ | |
215 | l->l_slpticks = hardclock_ticks; | 213 | l->l_slpticks = hardclock_ticks; | |
216 | sched_slept(l); | 214 | sched_slept(l); | |
217 | } | 215 | } | |
218 | 216 | |||
219 | /* | 217 | /* | |
220 | * sleepq_block: | 218 | * sleepq_block: | |
221 | * | 219 | * | |
222 | * After any intermediate step such as releasing an interlock, switch. | 220 | * After any intermediate step such as releasing an interlock, switch. | |
223 | * sleepq_block() may return early under exceptional conditions, for | 221 | * sleepq_block() may return early under exceptional conditions, for | |
224 | * example if the LWP's containing process is exiting. | 222 | * example if the LWP's containing process is exiting. | |
225 | */ | 223 | */ | |
226 | int | 224 | int | |
227 | sleepq_block(int timo, bool catch) | 225 | sleepq_block(int timo, bool catch) | |
228 | { | 226 | { | |
229 | int error = 0, sig; | 227 | int error = 0, sig; | |
230 | struct proc *p; | 228 | struct proc *p; | |
231 | lwp_t *l = curlwp; | 229 | lwp_t *l = curlwp; | |
232 | bool early = false; | 230 | bool early = false; | |
233 | int biglocks = l->l_biglocks; | 231 | int biglocks = l->l_biglocks; | |
234 | 232 | |||
235 | ktrcsw(1, 0); | 233 | ktrcsw(1, 0); | |
236 | 234 | |||
237 | /* | 235 | /* | |
238 | * If sleeping interruptably, check for pending signals, exits or | 236 | * If sleeping interruptably, check for pending signals, exits or | |
239 | * core dump events. | 237 | * core dump events. | |
240 | */ | 238 | */ | |
241 | if (catch) { | 239 | if (catch) { | |
242 | l->l_flag |= LW_SINTR; | 240 | l->l_flag |= LW_SINTR; | |
243 | if ((l->l_flag & (LW_CANCELLED|LW_WEXIT|LW_WCORE)) != 0) { | 241 | if ((l->l_flag & (LW_CANCELLED|LW_WEXIT|LW_WCORE)) != 0) { | |
244 | l->l_flag &= ~LW_CANCELLED; | 242 | l->l_flag &= ~LW_CANCELLED; | |
245 | error = EINTR; | 243 | error = EINTR; | |
246 | early = true; | 244 | early = true; | |
247 | } else if ((l->l_flag & LW_PENDSIG) != 0 && sigispending(l, 0)) | 245 | } else if ((l->l_flag & LW_PENDSIG) != 0 && sigispending(l, 0)) | |
248 | early = true; | 246 | early = true; | |
249 | } | 247 | } | |
250 | 248 | |||
251 | if (early) { | 249 | if (early) { | |
252 | /* lwp_unsleep() will release the lock */ | 250 | /* lwp_unsleep() will release the lock */ | |
253 | lwp_unsleep(l, true); | 251 | lwp_unsleep(l, true); | |
254 | } else { | 252 | } else { | |
255 | if (timo) | 253 | if (timo) | |
256 | callout_schedule(&l->l_timeout_ch, timo); | 254 | callout_schedule(&l->l_timeout_ch, timo); | |
257 | 255 | |||
258 | #ifdef KERN_SA | 256 | #ifdef KERN_SA | |
259 | if (((l->l_flag & LW_SA) != 0) && (~l->l_pflag & LP_SA_NOBLOCK)) | 257 | if (((l->l_flag & LW_SA) != 0) && (~l->l_pflag & LP_SA_NOBLOCK)) | |
260 | sa_switch(l); | 258 | sa_switch(l); | |
261 | else | 259 | else | |
262 | #endif | 260 | #endif | |
263 | mi_switch(l); | 261 | mi_switch(l); | |
264 | 262 | |||
265 | /* The LWP and sleep queue are now unlocked. */ | 263 | /* The LWP and sleep queue are now unlocked. */ | |
266 | if (timo) { | 264 | if (timo) { | |
267 | /* | 265 | /* | |
268 | * Even if the callout appears to have fired, we need to | 266 | * Even if the callout appears to have fired, we need to | |
269 | * stop it in order to synchronise with other CPUs. | 267 | * stop it in order to synchronise with other CPUs. | |
270 | */ | 268 | */ | |
271 | if (callout_halt(&l->l_timeout_ch, NULL)) | 269 | if (callout_halt(&l->l_timeout_ch, NULL)) | |
272 | error = EWOULDBLOCK; | 270 | error = EWOULDBLOCK; | |
273 | } | 271 | } | |
274 | } | 272 | } | |
275 | 273 | |||
276 | if (catch && error == 0) { | 274 | if (catch && error == 0) { | |
277 | p = l->l_proc; | 275 | p = l->l_proc; | |
278 | if ((l->l_flag & (LW_CANCELLED | LW_WEXIT | LW_WCORE)) != 0) | 276 | if ((l->l_flag & (LW_CANCELLED | LW_WEXIT | LW_WCORE)) != 0) | |
279 | error = EINTR; | 277 | error = EINTR; | |
280 | else if ((l->l_flag & LW_PENDSIG) != 0) { | 278 | else if ((l->l_flag & LW_PENDSIG) != 0) { | |
281 | /* | 279 | /* | |
282 | * Acquiring p_lock may cause us to recurse | 280 | * Acquiring p_lock may cause us to recurse | |
283 | * through the sleep path and back into this | 281 | * through the sleep path and back into this | |
284 | * routine, but is safe because LWPs sleeping | 282 | * routine, but is safe because LWPs sleeping | |
285 | * on locks are non-interruptable. We will | 283 | * on locks are non-interruptable. We will | |
286 | * not recurse again. | 284 | * not recurse again. | |
287 | */ | 285 | */ | |
288 | mutex_enter(p->p_lock); | 286 | mutex_enter(p->p_lock); | |
289 | if ((sig = issignal(l)) != 0) | 287 | if ((sig = issignal(l)) != 0) | |
290 | error = sleepq_sigtoerror(l, sig); | 288 | error = sleepq_sigtoerror(l, sig); | |
291 | mutex_exit(p->p_lock); | 289 | mutex_exit(p->p_lock); | |
292 | } | 290 | } | |
293 | } | 291 | } | |
294 | 292 | |||
295 | ktrcsw(0, 0); | 293 | ktrcsw(0, 0); | |
296 | if (__predict_false(biglocks != 0)) { | 294 | if (__predict_false(biglocks != 0)) { | |
297 | KERNEL_LOCK(biglocks, NULL); | 295 | KERNEL_LOCK(biglocks, NULL); | |
298 | } | 296 | } | |
299 | return error; | 297 | return error; | |
300 | } | 298 | } | |
301 | 299 | |||
302 | /* | 300 | /* | |
303 | * sleepq_wake: | 301 | * sleepq_wake: | |
304 | * | 302 | * | |
305 | * Wake zero or more LWPs blocked on a single wait channel. | 303 | * Wake zero or more LWPs blocked on a single wait channel. | |
306 | */ | 304 | */ | |
307 | lwp_t * | 305 | lwp_t * | |
308 | sleepq_wake(sleepq_t *sq, wchan_t wchan, u_int expected, kmutex_t *mp) | 306 | sleepq_wake(sleepq_t *sq, wchan_t wchan, u_int expected, kmutex_t *mp) | |
309 | { | 307 | { | |
310 | lwp_t *l, *next; | 308 | lwp_t *l, *next; | |
311 | 309 | |||
312 | KASSERT(mutex_owned(mp)); | 310 | KASSERT(mutex_owned(mp)); | |
313 | 311 | |||
314 | for (l = TAILQ_FIRST(sq); l != NULL; l = next) { | 312 | for (l = TAILQ_FIRST(sq); l != NULL; l = next) { | |
315 | KASSERT(l->l_sleepq == sq); | 313 | KASSERT(l->l_sleepq == sq); | |
316 | KASSERT(l->l_mutex == mp); | 314 | KASSERT(l->l_mutex == mp); | |
317 | next = TAILQ_NEXT(l, l_sleepchain); | 315 | next = TAILQ_NEXT(l, l_sleepchain); | |
318 | if (l->l_wchan != wchan) | 316 | if (l->l_wchan != wchan) | |
319 | continue; | 317 | continue; | |
320 | sleepq_remove(sq, l); | 318 | sleepq_remove(sq, l); | |
321 | if (--expected == 0) | 319 | if (--expected == 0) | |
322 | break; | 320 | break; | |
323 | } | 321 | } | |
324 | 322 | |||
325 | mutex_spin_exit(mp); | 323 | mutex_spin_exit(mp); | |
326 | return l; | 324 | return l; | |
327 | } | 325 | } | |
328 | 326 | |||
329 | /* | 327 | /* | |
330 | * sleepq_unsleep: | 328 | * sleepq_unsleep: | |
331 | * | 329 | * | |
332 | * Remove an LWP from its sleep queue and set it runnable again. | 330 | * Remove an LWP from its sleep queue and set it runnable again. | |
333 | * sleepq_unsleep() is called with the LWP's mutex held, and will | 331 | * sleepq_unsleep() is called with the LWP's mutex held, and will | |
334 | * always release it. | 332 | * always release it. | |
335 | */ | 333 | */ | |
336 | void | 334 | void | |
337 | sleepq_unsleep(lwp_t *l, bool cleanup) | 335 | sleepq_unsleep(lwp_t *l, bool cleanup) | |
338 | { | 336 | { | |
339 | sleepq_t *sq = l->l_sleepq; | 337 | sleepq_t *sq = l->l_sleepq; | |
340 | kmutex_t *mp = l->l_mutex; | 338 | kmutex_t *mp = l->l_mutex; | |
341 | 339 | |||
342 | KASSERT(lwp_locked(l, mp)); | 340 | KASSERT(lwp_locked(l, mp)); | |
343 | KASSERT(l->l_wchan != NULL); | 341 | KASSERT(l->l_wchan != NULL); | |
344 | 342 | |||
345 | sleepq_remove(sq, l); | 343 | sleepq_remove(sq, l); | |
346 | if (cleanup) { | 344 | if (cleanup) { | |
347 | mutex_spin_exit(mp); | 345 | mutex_spin_exit(mp); | |
348 | } | 346 | } | |
349 | } | 347 | } | |
350 | 348 | |||
351 | /* | 349 | /* | |
352 | * sleepq_timeout: | 350 | * sleepq_timeout: | |
353 | * | 351 | * | |
354 | * Entered via the callout(9) subsystem to time out an LWP that is on a | 352 | * Entered via the callout(9) subsystem to time out an LWP that is on a | |
355 | * sleep queue. | 353 | * sleep queue. | |
356 | */ | 354 | */ | |
357 | void | 355 | void | |
358 | sleepq_timeout(void *arg) | 356 | sleepq_timeout(void *arg) | |
359 | { | 357 | { | |
360 | lwp_t *l = arg; | 358 | lwp_t *l = arg; | |
361 | 359 | |||
362 | /* | 360 | /* | |
363 | * Lock the LWP. Assuming it's still on the sleep queue, its | 361 | * Lock the LWP. Assuming it's still on the sleep queue, its | |
364 | * current mutex will also be the sleep queue mutex. | 362 | * current mutex will also be the sleep queue mutex. | |
365 | */ | 363 | */ | |
366 | lwp_lock(l); | 364 | lwp_lock(l); | |
367 | 365 | |||
368 | if (l->l_wchan == NULL) { | 366 | if (l->l_wchan == NULL) { | |
369 | /* Somebody beat us to it. */ | 367 | /* Somebody beat us to it. */ | |
370 | lwp_unlock(l); | 368 | lwp_unlock(l); | |
371 | return; | 369 | return; | |
372 | } | 370 | } | |
373 | 371 | |||
374 | lwp_unsleep(l, true); | 372 | lwp_unsleep(l, true); | |
375 | } | 373 | } | |
376 | 374 | |||
377 | /* | 375 | /* | |
378 | * sleepq_sigtoerror: | 376 | * sleepq_sigtoerror: | |
379 | * | 377 | * | |
380 | * Given a signal number, interpret and return an error code. | 378 | * Given a signal number, interpret and return an error code. | |
381 | */ | 379 | */ | |
382 | static int | 380 | static int | |
383 | sleepq_sigtoerror(lwp_t *l, int sig) | 381 | sleepq_sigtoerror(lwp_t *l, int sig) | |
384 | { | 382 | { | |
385 | struct proc *p = l->l_proc; | 383 | struct proc *p = l->l_proc; | |
386 | int error; | 384 | int error; | |
387 | 385 | |||
388 | KASSERT(mutex_owned(p->p_lock)); | 386 | KASSERT(mutex_owned(p->p_lock)); | |
389 | 387 | |||
390 | /* | 388 | /* | |
391 | * If this sleep was canceled, don't let the syscall restart. | 389 | * If this sleep was canceled, don't let the syscall restart. | |
392 | */ | 390 | */ | |
393 | if ((SIGACTION(p, sig).sa_flags & SA_RESTART) == 0) | 391 | if ((SIGACTION(p, sig).sa_flags & SA_RESTART) == 0) | |
394 | error = EINTR; | 392 | error = EINTR; | |
395 | else | 393 | else | |
396 | error = ERESTART; | 394 | error = ERESTART; | |
397 | 395 | |||
398 | return error; | 396 | return error; | |
399 | } | 397 | } | |
400 | 398 | |||
401 | /* | 399 | /* | |
402 | * sleepq_abort: | 400 | * sleepq_abort: | |
403 | * | 401 | * | |
404 | * After a panic or during autoconfiguration, lower the interrupt | 402 | * After a panic or during autoconfiguration, lower the interrupt | |
405 | * priority level to give pending interrupts a chance to run, and | 403 | * priority level to give pending interrupts a chance to run, and | |
406 | * then return. Called if sleepq_dontsleep() returns non-zero, and | 404 | * then return. Called if sleepq_dontsleep() returns non-zero, and | |
407 | * always returns zero. | 405 | * always returns zero. | |
408 | */ | 406 | */ | |
409 | int | 407 | int | |
410 | sleepq_abort(kmutex_t *mtx, int unlock) | 408 | sleepq_abort(kmutex_t *mtx, int unlock) | |
411 | { | 409 | { | |
412 | extern int safepri; | 410 | extern int safepri; | |
413 | int s; | 411 | int s; | |
414 | 412 | |||
415 | s = splhigh(); | 413 | s = splhigh(); | |
416 | splx(safepri); | 414 | splx(safepri); | |
417 | splx(s); | 415 | splx(s); | |
418 | if (mtx != NULL && unlock != 0) | 416 | if (mtx != NULL && unlock != 0) | |
419 | mutex_exit(mtx); | 417 | mutex_exit(mtx); | |
420 | 418 | |||
421 | return 0; | 419 | return 0; | |
422 | } | 420 | } | |
423 | 421 | |||
424 | /* | 422 | /* | |
425 | * sleepq_changepri: | 423 | * sleepq_changepri: | |
426 | * | 424 | * | |
427 | * Adjust the priority of an LWP residing on a sleepq. This method | 425 | * Adjust the priority of an LWP residing on a sleepq. This method | |
428 | * will only alter the user priority; the effective priority is | 426 | * will only alter the user priority; the effective priority is | |
429 | * assumed to have been fixed at the time of insertion into the queue. | 427 | * assumed to have been fixed at the time of insertion into the queue. | |
430 | */ | 428 | */ | |
431 | void | 429 | void | |
432 | sleepq_changepri(lwp_t *l, pri_t pri) | 430 | sleepq_changepri(lwp_t *l, pri_t pri) | |
433 | { | 431 | { | |
434 | sleepq_t *sq = l->l_sleepq; | 432 | sleepq_t *sq = l->l_sleepq; | |
435 | pri_t opri; | 433 | pri_t opri; | |
436 | 434 | |||
437 | KASSERT(lwp_locked(l, NULL)); | 435 | KASSERT(lwp_locked(l, NULL)); | |
438 | 436 | |||
439 | opri = lwp_eprio(l); | 437 | opri = lwp_eprio(l); | |
440 | l->l_priority = pri; | 438 | l->l_priority = pri; | |
441 | 439 | |||
442 | if (lwp_eprio(l) == opri) { | 440 | if (lwp_eprio(l) == opri) { | |
443 | return; | 441 | return; | |
444 | } | 442 | } | |
445 | if ((l->l_syncobj->sobj_flag & SOBJ_SLEEPQ_SORTED) == 0) { | 443 | if ((l->l_syncobj->sobj_flag & SOBJ_SLEEPQ_SORTED) == 0) { | |
446 | return; | 444 | return; | |
447 | } | 445 | } | |
448 | 446 | |||
449 | /* | 447 | /* | |
450 | * Don't let the sleep queue become empty, even briefly. | 448 | * Don't let the sleep queue become empty, even briefly. | |
451 | * cv_signal() and cv_broadcast() inspect it without the | 449 | * cv_signal() and cv_broadcast() inspect it without the | |
452 | * sleep queue lock held and need to see a non-empty queue | 450 | * sleep queue lock held and need to see a non-empty queue | |
453 | * head if there are waiters. | 451 | * head if there are waiters. | |
454 | */ | 452 | */ | |
455 | if (TAILQ_FIRST(sq) == l && TAILQ_NEXT(l, l_sleepchain) == NULL) { | 453 | if (TAILQ_FIRST(sq) == l && TAILQ_NEXT(l, l_sleepchain) == NULL) { | |
456 | return; | 454 | return; | |
457 | } | 455 | } | |
458 | TAILQ_REMOVE(sq, l, l_sleepchain); | 456 | TAILQ_REMOVE(sq, l, l_sleepchain); | |
459 | sleepq_insert(sq, l, l->l_syncobj); | 457 | sleepq_insert(sq, l, l->l_syncobj); | |
460 | } | 458 | } | |
461 | 459 | |||
462 | void | 460 | void | |
463 | sleepq_lendpri(lwp_t *l, pri_t pri) | 461 | sleepq_lendpri(lwp_t *l, pri_t pri) | |
464 | { | 462 | { | |
465 | sleepq_t *sq = l->l_sleepq; | 463 | sleepq_t *sq = l->l_sleepq; | |
466 | pri_t opri; | 464 | pri_t opri; | |
467 | 465 | |||
468 | KASSERT(lwp_locked(l, NULL)); | 466 | KASSERT(lwp_locked(l, NULL)); | |
469 | 467 | |||
470 | opri = lwp_eprio(l); | 468 | opri = lwp_eprio(l); | |
471 | l->l_inheritedprio = pri; | 469 | l->l_inheritedprio = pri; | |
472 | 470 | |||
473 | if (lwp_eprio(l) == opri) { | 471 | if (lwp_eprio(l) == opri) { | |
474 | return; | 472 | return; | |
475 | } | 473 | } | |
476 | if ((l->l_syncobj->sobj_flag & SOBJ_SLEEPQ_SORTED) == 0) { | 474 | if ((l->l_syncobj->sobj_flag & SOBJ_SLEEPQ_SORTED) == 0) { | |
477 | return; | 475 | return; | |
478 | } | 476 | } | |
479 | 477 | |||
480 | /* | 478 | /* | |
481 | * Don't let the sleep queue become empty, even briefly. | 479 | * Don't let the sleep queue become empty, even briefly. | |
482 | * cv_signal() and cv_broadcast() inspect it without the | 480 | * cv_signal() and cv_broadcast() inspect it without the | |
483 | * sleep queue lock held and need to see a non-empty queue | 481 | * sleep queue lock held and need to see a non-empty queue | |
484 | * head if there are waiters. | 482 | * head if there are waiters. | |
485 | */ | 483 | */ | |
486 | if (TAILQ_FIRST(sq) == l && TAILQ_NEXT(l, l_sleepchain) == NULL) { | 484 | if (TAILQ_FIRST(sq) == l && TAILQ_NEXT(l, l_sleepchain) == NULL) { | |
487 | return; | 485 | return; | |
488 | } | 486 | } | |
489 | TAILQ_REMOVE(sq, l, l_sleepchain); | 487 | TAILQ_REMOVE(sq, l, l_sleepchain); | |
490 | sleepq_insert(sq, l, l->l_syncobj); | 488 | sleepq_insert(sq, l, l->l_syncobj); | |
491 | } | 489 | } |
--- src/sys/kern/kern_subr.c 2010/11/11 11:07:07 1.208
+++ src/sys/kern/kern_subr.c 2011/07/27 14:35:34 1.209
@@ -1,726 +1,724 @@ | @@ -1,726 +1,724 @@ | |||
1 | /* $NetBSD: kern_subr.c,v 1.208 2010/11/11 11:07:07 hannken Exp $ */ | 1 | /* $NetBSD: kern_subr.c,v 1.209 2011/07/27 14:35:34 uebayasi Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 1997, 1998, 1999, 2002, 2007, 2008 The NetBSD Foundation, Inc. | 4 | * Copyright (c) 1997, 1998, 1999, 2002, 2007, 2008 The NetBSD Foundation, Inc. | |
5 | * All rights reserved. | 5 | * All rights reserved. | |
6 | * | 6 | * | |
7 | * This code is derived from software contributed to The NetBSD Foundation | 7 | * This code is derived from software contributed to The NetBSD Foundation | |
8 | * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, | 8 | * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, | |
9 | * NASA Ames Research Center, and by Luke Mewburn. | 9 | * NASA Ames Research Center, and by Luke Mewburn. | |
10 | * | 10 | * | |
11 | * Redistribution and use in source and binary forms, with or without | 11 | * Redistribution and use in source and binary forms, with or without | |
12 | * modification, are permitted provided that the following conditions | 12 | * modification, are permitted provided that the following conditions | |
13 | * are met: | 13 | * are met: | |
14 | * 1. Redistributions of source code must retain the above copyright | 14 | * 1. Redistributions of source code must retain the above copyright | |
15 | * notice, this list of conditions and the following disclaimer. | 15 | * notice, this list of conditions and the following disclaimer. | |
16 | * 2. Redistributions in binary form must reproduce the above copyright | 16 | * 2. Redistributions in binary form must reproduce the above copyright | |
17 | * notice, this list of conditions and the following disclaimer in the | 17 | * notice, this list of conditions and the following disclaimer in the | |
18 | * documentation and/or other materials provided with the distribution. | 18 | * documentation and/or other materials provided with the distribution. | |
19 | * | 19 | * | |
20 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | 20 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |
21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
22 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 22 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
23 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | 23 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |
24 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 24 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
25 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 25 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
26 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 26 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
27 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 27 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
29 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 29 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
30 | * POSSIBILITY OF SUCH DAMAGE. | 30 | * POSSIBILITY OF SUCH DAMAGE. | |
31 | */ | 31 | */ | |
32 | 32 | |||
33 | /* | 33 | /* | |
34 | * Copyright (c) 1982, 1986, 1991, 1993 | 34 | * Copyright (c) 1982, 1986, 1991, 1993 | |
35 | * The Regents of the University of California. All rights reserved. | 35 | * The Regents of the University of California. All rights reserved. | |
36 | * (c) UNIX System Laboratories, Inc. | 36 | * (c) UNIX System Laboratories, Inc. | |
37 | * All or some portions of this file are derived from material licensed | 37 | * All or some portions of this file are derived from material licensed | |
38 | * to the University of California by American Telephone and Telegraph | 38 | * to the University of California by American Telephone and Telegraph | |
39 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | 39 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | |
40 | * the permission of UNIX System Laboratories, Inc. | 40 | * the permission of UNIX System Laboratories, Inc. | |
41 | * | 41 | * | |
42 | * Copyright (c) 1992, 1993 | 42 | * Copyright (c) 1992, 1993 | |
43 | * The Regents of the University of California. All rights reserved. | 43 | * The Regents of the University of California. All rights reserved. | |
44 | * | 44 | * | |
45 | * This software was developed by the Computer Systems Engineering group | 45 | * This software was developed by the Computer Systems Engineering group | |
46 | * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and | 46 | * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and | |
47 | * contributed to Berkeley. | 47 | * contributed to Berkeley. | |
48 | * | 48 | * | |
49 | * All advertising materials mentioning features or use of this software | 49 | * All advertising materials mentioning features or use of this software | |
50 | * must display the following acknowledgement: | 50 | * must display the following acknowledgement: | |
51 | * This product includes software developed by the University of | 51 | * This product includes software developed by the University of | |
52 | * California, Lawrence Berkeley Laboratory. | 52 | * California, Lawrence Berkeley Laboratory. | |
53 | * | 53 | * | |
54 | * Redistribution and use in source and binary forms, with or without | 54 | * Redistribution and use in source and binary forms, with or without | |
55 | * modification, are permitted provided that the following conditions | 55 | * modification, are permitted provided that the following conditions | |
56 | * are met: | 56 | * are met: | |
57 | * 1. Redistributions of source code must retain the above copyright | 57 | * 1. Redistributions of source code must retain the above copyright | |
58 | * notice, this list of conditions and the following disclaimer. | 58 | * notice, this list of conditions and the following disclaimer. | |
59 | * 2. Redistributions in binary form must reproduce the above copyright | 59 | * 2. Redistributions in binary form must reproduce the above copyright | |
60 | * notice, this list of conditions and the following disclaimer in the | 60 | * notice, this list of conditions and the following disclaimer in the | |
61 | * documentation and/or other materials provided with the distribution. | 61 | * documentation and/or other materials provided with the distribution. | |
62 | * 3. Neither the name of the University nor the names of its contributors | 62 | * 3. Neither the name of the University nor the names of its contributors | |
63 | * may be used to endorse or promote products derived from this software | 63 | * may be used to endorse or promote products derived from this software | |
64 | * without specific prior written permission. | 64 | * without specific prior written permission. | |
65 | * | 65 | * | |
66 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | 66 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
67 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 67 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
68 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 68 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
69 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | 69 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
70 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 70 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
71 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 71 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
72 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 72 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
73 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 73 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
74 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 74 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
75 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 75 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
76 | * SUCH DAMAGE. | 76 | * SUCH DAMAGE. | |
77 | * | 77 | * | |
78 | * @(#)kern_subr.c 8.4 (Berkeley) 2/14/95 | 78 | * @(#)kern_subr.c 8.4 (Berkeley) 2/14/95 | |
79 | */ | 79 | */ | |
80 | 80 | |||
81 | #include <sys/cdefs.h> | 81 | #include <sys/cdefs.h> | |
82 | __KERNEL_RCSID(0, "$NetBSD: kern_subr.c,v 1.208 2010/11/11 11:07:07 hannken Exp $"); | 82 | __KERNEL_RCSID(0, "$NetBSD: kern_subr.c,v 1.209 2011/07/27 14:35:34 uebayasi Exp $"); | |
83 | 83 | |||
84 | #include "opt_ddb.h" | 84 | #include "opt_ddb.h" | |
85 | #include "opt_md.h" | 85 | #include "opt_md.h" | |
86 | #include "opt_syscall_debug.h" | 86 | #include "opt_syscall_debug.h" | |
87 | #include "opt_ktrace.h" | 87 | #include "opt_ktrace.h" | |
88 | #include "opt_ptrace.h" | 88 | #include "opt_ptrace.h" | |
89 | #include "opt_tftproot.h" | 89 | #include "opt_tftproot.h" | |
90 | 90 | |||
91 | #include <sys/param.h> | 91 | #include <sys/param.h> | |
92 | #include <sys/systm.h> | 92 | #include <sys/systm.h> | |
93 | #include <sys/proc.h> | 93 | #include <sys/proc.h> | |
94 | #include <sys/mount.h> | 94 | #include <sys/mount.h> | |
95 | #include <sys/device.h> | 95 | #include <sys/device.h> | |
96 | #include <sys/reboot.h> | 96 | #include <sys/reboot.h> | |
97 | #include <sys/conf.h> | 97 | #include <sys/conf.h> | |
98 | #include <sys/disk.h> | 98 | #include <sys/disk.h> | |
99 | #include <sys/disklabel.h> | 99 | #include <sys/disklabel.h> | |
100 | #include <sys/queue.h> | 100 | #include <sys/queue.h> | |
101 | #include <sys/ktrace.h> | 101 | #include <sys/ktrace.h> | |
102 | #include <sys/ptrace.h> | 102 | #include <sys/ptrace.h> | |
103 | #include <sys/fcntl.h> | 103 | #include <sys/fcntl.h> | |
104 | #include <sys/kauth.h> | 104 | #include <sys/kauth.h> | |
105 | #include <sys/stat.h> | 105 | #include <sys/stat.h> | |
106 | #include <sys/vnode.h> | 106 | #include <sys/vnode.h> | |
107 | #include <sys/module.h> | 107 | #include <sys/module.h> | |
108 | 108 | |||
109 | #include <uvm/uvm_extern.h> | |||
110 | ||||
111 | #include <dev/cons.h> | 109 | #include <dev/cons.h> | |
112 | 110 | |||
113 | #include <net/if.h> | 111 | #include <net/if.h> | |
114 | 112 | |||
115 | /* XXX these should eventually move to subr_autoconf.c */ | 113 | /* XXX these should eventually move to subr_autoconf.c */ | |
116 | static device_t finddevice(const char *); | 114 | static device_t finddevice(const char *); | |
117 | static device_t getdisk(char *, int, int, dev_t *, int); | 115 | static device_t getdisk(char *, int, int, dev_t *, int); | |
118 | static device_t parsedisk(char *, int, int, dev_t *); | 116 | static device_t parsedisk(char *, int, int, dev_t *); | |
119 | static const char *getwedgename(const char *, int); | 117 | static const char *getwedgename(const char *, int); | |
120 | 118 | |||
121 | #ifdef TFTPROOT | 119 | #ifdef TFTPROOT | |
122 | int tftproot_dhcpboot(device_t); | 120 | int tftproot_dhcpboot(device_t); | |
123 | #endif | 121 | #endif | |
124 | 122 | |||
125 | dev_t dumpcdev; /* for savecore */ | 123 | dev_t dumpcdev; /* for savecore */ | |
126 | 124 | |||
127 | static int | 125 | static int | |
128 | isswap(device_t dv) | 126 | isswap(device_t dv) | |
129 | { | 127 | { | |
130 | struct dkwedge_info wi; | 128 | struct dkwedge_info wi; | |
131 | struct vnode *vn; | 129 | struct vnode *vn; | |
132 | int error; | 130 | int error; | |
133 | 131 | |||
134 | if (device_class(dv) != DV_DISK || !device_is_a(dv, "dk")) | 132 | if (device_class(dv) != DV_DISK || !device_is_a(dv, "dk")) | |
135 | return 0; | 133 | return 0; | |
136 | 134 | |||
137 | if ((vn = opendisk(dv)) == NULL) | 135 | if ((vn = opendisk(dv)) == NULL) | |
138 | return 0; | 136 | return 0; | |
139 | 137 | |||
140 | error = VOP_IOCTL(vn, DIOCGWEDGEINFO, &wi, FREAD, NOCRED); | 138 | error = VOP_IOCTL(vn, DIOCGWEDGEINFO, &wi, FREAD, NOCRED); | |
141 | VOP_CLOSE(vn, FREAD, NOCRED); | 139 | VOP_CLOSE(vn, FREAD, NOCRED); | |
142 | vput(vn); | 140 | vput(vn); | |
143 | if (error) { | 141 | if (error) { | |
144 | #ifdef DEBUG_WEDGE | 142 | #ifdef DEBUG_WEDGE | |
145 | printf("%s: Get wedge info returned %d\n", device_xname(dv), error); | 143 | printf("%s: Get wedge info returned %d\n", device_xname(dv), error); | |
146 | #endif | 144 | #endif | |
147 | return 0; | 145 | return 0; | |
148 | } | 146 | } | |
149 | return strcmp(wi.dkw_ptype, DKW_PTYPE_SWAP) == 0; | 147 | return strcmp(wi.dkw_ptype, DKW_PTYPE_SWAP) == 0; | |
150 | } | 148 | } | |
151 | 149 | |||
152 | /* | 150 | /* | |
153 | * Determine the root device and, if instructed to, the root file system. | 151 | * Determine the root device and, if instructed to, the root file system. | |
154 | */ | 152 | */ | |
155 | 153 | |||
156 | #ifdef MEMORY_DISK_IS_ROOT | 154 | #ifdef MEMORY_DISK_IS_ROOT | |
157 | int md_is_root = 1; | 155 | int md_is_root = 1; | |
158 | #else | 156 | #else | |
159 | int md_is_root = 0; | 157 | int md_is_root = 0; | |
160 | #endif | 158 | #endif | |
161 | 159 | |||
162 | /* | 160 | /* | |
163 | * The device and wedge that we booted from. If booted_wedge is NULL, | 161 | * The device and wedge that we booted from. If booted_wedge is NULL, | |
164 | * the we might consult booted_partition. | 162 | * the we might consult booted_partition. | |
165 | */ | 163 | */ | |
166 | device_t booted_device; | 164 | device_t booted_device; | |
167 | device_t booted_wedge; | 165 | device_t booted_wedge; | |
168 | int booted_partition; | 166 | int booted_partition; | |
169 | 167 | |||
170 | /* | 168 | /* | |
171 | * Use partition letters if it's a disk class but not a wedge. | 169 | * Use partition letters if it's a disk class but not a wedge. | |
172 | * XXX Check for wedge is kinda gross. | 170 | * XXX Check for wedge is kinda gross. | |
173 | */ | 171 | */ | |
174 | #define DEV_USES_PARTITIONS(dv) \ | 172 | #define DEV_USES_PARTITIONS(dv) \ | |
175 | (device_class((dv)) == DV_DISK && \ | 173 | (device_class((dv)) == DV_DISK && \ | |
176 | !device_is_a((dv), "dk")) | 174 | !device_is_a((dv), "dk")) | |
177 | 175 | |||
178 | void | 176 | void | |
179 | setroot(device_t bootdv, int bootpartition) | 177 | setroot(device_t bootdv, int bootpartition) | |
180 | { | 178 | { | |
181 | device_t dv; | 179 | device_t dv; | |
182 | deviter_t di; | 180 | deviter_t di; | |
183 | int len, majdev; | 181 | int len, majdev; | |
184 | dev_t nrootdev; | 182 | dev_t nrootdev; | |
185 | dev_t ndumpdev = NODEV; | 183 | dev_t ndumpdev = NODEV; | |
186 | char buf[128]; | 184 | char buf[128]; | |
187 | const char *rootdevname; | 185 | const char *rootdevname; | |
188 | const char *dumpdevname; | 186 | const char *dumpdevname; | |
189 | device_t rootdv = NULL; /* XXX gcc -Wuninitialized */ | 187 | device_t rootdv = NULL; /* XXX gcc -Wuninitialized */ | |
190 | device_t dumpdv = NULL; | 188 | device_t dumpdv = NULL; | |
191 | struct ifnet *ifp; | 189 | struct ifnet *ifp; | |
192 | const char *deffsname; | 190 | const char *deffsname; | |
193 | struct vfsops *vops; | 191 | struct vfsops *vops; | |
194 | 192 | |||
195 | #ifdef TFTPROOT | 193 | #ifdef TFTPROOT | |
196 | if (tftproot_dhcpboot(bootdv) != 0) | 194 | if (tftproot_dhcpboot(bootdv) != 0) | |
197 | boothowto |= RB_ASKNAME; | 195 | boothowto |= RB_ASKNAME; | |
198 | #endif | 196 | #endif | |
199 | 197 | |||
200 | /* | 198 | /* | |
201 | * For root on md0 we have to force the attachment of md0. | 199 | * For root on md0 we have to force the attachment of md0. | |
202 | */ | 200 | */ | |
203 | if (md_is_root) { | 201 | if (md_is_root) { | |
204 | int md_major; | 202 | int md_major; | |
205 | dev_t md_dev; | 203 | dev_t md_dev; | |
206 | 204 | |||
207 | bootdv = NULL; | 205 | bootdv = NULL; | |
208 | md_major = devsw_name2blk("md", NULL, 0); | 206 | md_major = devsw_name2blk("md", NULL, 0); | |
209 | if (md_major >= 0) { | 207 | if (md_major >= 0) { | |
210 | md_dev = MAKEDISKDEV(md_major, 0, RAW_PART); | 208 | md_dev = MAKEDISKDEV(md_major, 0, RAW_PART); | |
211 | if (bdev_open(md_dev, FREAD, S_IFBLK, curlwp) == 0) | 209 | if (bdev_open(md_dev, FREAD, S_IFBLK, curlwp) == 0) | |
212 | bootdv = device_find_by_xname("md0"); | 210 | bootdv = device_find_by_xname("md0"); | |
213 | } | 211 | } | |
214 | if (bootdv == NULL) | 212 | if (bootdv == NULL) | |
215 | panic("Cannot open \"md0\" (root)"); | 213 | panic("Cannot open \"md0\" (root)"); | |
216 | } | 214 | } | |
217 | 215 | |||
218 | /* | 216 | /* | |
219 | * If NFS is specified as the file system, and we found | 217 | * If NFS is specified as the file system, and we found | |
220 | * a DV_DISK boot device (or no boot device at all), then | 218 | * a DV_DISK boot device (or no boot device at all), then | |
221 | * find a reasonable network interface for "rootspec". | 219 | * find a reasonable network interface for "rootspec". | |
222 | */ | 220 | */ | |
223 | vops = vfs_getopsbyname(MOUNT_NFS); | 221 | vops = vfs_getopsbyname(MOUNT_NFS); | |
224 | if (vops != NULL && strcmp(rootfstype, MOUNT_NFS) == 0 && | 222 | if (vops != NULL && strcmp(rootfstype, MOUNT_NFS) == 0 && | |
225 | rootspec == NULL && | 223 | rootspec == NULL && | |
226 | (bootdv == NULL || device_class(bootdv) != DV_IFNET)) { | 224 | (bootdv == NULL || device_class(bootdv) != DV_IFNET)) { | |
227 | IFNET_FOREACH(ifp) { | 225 | IFNET_FOREACH(ifp) { | |
228 | if ((ifp->if_flags & | 226 | if ((ifp->if_flags & | |
229 | (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0) | 227 | (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0) | |
230 | break; | 228 | break; | |
231 | } | 229 | } | |
232 | if (ifp == NULL) { | 230 | if (ifp == NULL) { | |
233 | /* | 231 | /* | |
234 | * Can't find a suitable interface; ask the | 232 | * Can't find a suitable interface; ask the | |
235 | * user. | 233 | * user. | |
236 | */ | 234 | */ | |
237 | boothowto |= RB_ASKNAME; | 235 | boothowto |= RB_ASKNAME; | |
238 | } else { | 236 | } else { | |
239 | /* | 237 | /* | |
240 | * Have a suitable interface; behave as if | 238 | * Have a suitable interface; behave as if | |
241 | * the user specified this interface. | 239 | * the user specified this interface. | |
242 | */ | 240 | */ | |
243 | rootspec = (const char *)ifp->if_xname; | 241 | rootspec = (const char *)ifp->if_xname; | |
244 | } | 242 | } | |
245 | } | 243 | } | |
246 | if (vops != NULL) | 244 | if (vops != NULL) | |
247 | vfs_delref(vops); | 245 | vfs_delref(vops); | |
248 | 246 | |||
249 | /* | 247 | /* | |
250 | * If wildcarded root and we the boot device wasn't determined, | 248 | * If wildcarded root and we the boot device wasn't determined, | |
251 | * ask the user. | 249 | * ask the user. | |
252 | */ | 250 | */ | |
253 | if (rootspec == NULL && bootdv == NULL) | 251 | if (rootspec == NULL && bootdv == NULL) | |
254 | boothowto |= RB_ASKNAME; | 252 | boothowto |= RB_ASKNAME; | |
255 | 253 | |||
256 | top: | 254 | top: | |
257 | if (boothowto & RB_ASKNAME) { | 255 | if (boothowto & RB_ASKNAME) { | |
258 | device_t defdumpdv; | 256 | device_t defdumpdv; | |
259 | 257 | |||
260 | for (;;) { | 258 | for (;;) { | |
261 | printf("root device"); | 259 | printf("root device"); | |
262 | if (bootdv != NULL) { | 260 | if (bootdv != NULL) { | |
263 | printf(" (default %s", device_xname(bootdv)); | 261 | printf(" (default %s", device_xname(bootdv)); | |
264 | if (DEV_USES_PARTITIONS(bootdv)) | 262 | if (DEV_USES_PARTITIONS(bootdv)) | |
265 | printf("%c", bootpartition + 'a'); | 263 | printf("%c", bootpartition + 'a'); | |
266 | printf(")"); | 264 | printf(")"); | |
267 | } | 265 | } | |
268 | printf(": "); | 266 | printf(": "); | |
269 | len = cngetsn(buf, sizeof(buf)); | 267 | len = cngetsn(buf, sizeof(buf)); | |
270 | if (len == 0 && bootdv != NULL) { | 268 | if (len == 0 && bootdv != NULL) { | |
271 | strlcpy(buf, device_xname(bootdv), sizeof(buf)); | 269 | strlcpy(buf, device_xname(bootdv), sizeof(buf)); | |
272 | len = strlen(buf); | 270 | len = strlen(buf); | |
273 | } | 271 | } | |
274 | if (len > 0 && buf[len - 1] == '*') { | 272 | if (len > 0 && buf[len - 1] == '*') { | |
275 | buf[--len] = '\0'; | 273 | buf[--len] = '\0'; | |
276 | dv = getdisk(buf, len, 1, &nrootdev, 0); | 274 | dv = getdisk(buf, len, 1, &nrootdev, 0); | |
277 | if (dv != NULL) { | 275 | if (dv != NULL) { | |
278 | rootdv = dv; | 276 | rootdv = dv; | |
279 | break; | 277 | break; | |
280 | } | 278 | } | |
281 | } | 279 | } | |
282 | dv = getdisk(buf, len, bootpartition, &nrootdev, 0); | 280 | dv = getdisk(buf, len, bootpartition, &nrootdev, 0); | |
283 | if (dv != NULL) { | 281 | if (dv != NULL) { | |
284 | rootdv = dv; | 282 | rootdv = dv; | |
285 | break; | 283 | break; | |
286 | } | 284 | } | |
287 | } | 285 | } | |
288 | 286 | |||
289 | /* | 287 | /* | |
290 | * Set up the default dump device. If root is on | 288 | * Set up the default dump device. If root is on | |
291 | * a network device, there is no default dump | 289 | * a network device, there is no default dump | |
292 | * device, since we don't support dumps to the | 290 | * device, since we don't support dumps to the | |
293 | * network. | 291 | * network. | |
294 | */ | 292 | */ | |
295 | if (DEV_USES_PARTITIONS(rootdv) == 0) | 293 | if (DEV_USES_PARTITIONS(rootdv) == 0) | |
296 | defdumpdv = NULL; | 294 | defdumpdv = NULL; | |
297 | else | 295 | else | |
298 | defdumpdv = rootdv; | 296 | defdumpdv = rootdv; | |
299 | 297 | |||
300 | for (;;) { | 298 | for (;;) { | |
301 | printf("dump device"); | 299 | printf("dump device"); | |
302 | if (defdumpdv != NULL) { | 300 | if (defdumpdv != NULL) { | |
303 | /* | 301 | /* | |
304 | * Note, we know it's a disk if we get here. | 302 | * Note, we know it's a disk if we get here. | |
305 | */ | 303 | */ | |
306 | printf(" (default %sb)", device_xname(defdumpdv)); | 304 | printf(" (default %sb)", device_xname(defdumpdv)); | |
307 | } | 305 | } | |
308 | printf(": "); | 306 | printf(": "); | |
309 | len = cngetsn(buf, sizeof(buf)); | 307 | len = cngetsn(buf, sizeof(buf)); | |
310 | if (len == 0) { | 308 | if (len == 0) { | |
311 | if (defdumpdv != NULL) { | 309 | if (defdumpdv != NULL) { | |
312 | ndumpdev = MAKEDISKDEV(major(nrootdev), | 310 | ndumpdev = MAKEDISKDEV(major(nrootdev), | |
313 | DISKUNIT(nrootdev), 1); | 311 | DISKUNIT(nrootdev), 1); | |
314 | } | 312 | } | |
315 | dumpdv = defdumpdv; | 313 | dumpdv = defdumpdv; | |
316 | break; | 314 | break; | |
317 | } | 315 | } | |
318 | if (len == 4 && strcmp(buf, "none") == 0) { | 316 | if (len == 4 && strcmp(buf, "none") == 0) { | |
319 | dumpdv = NULL; | 317 | dumpdv = NULL; | |
320 | break; | 318 | break; | |
321 | } | 319 | } | |
322 | dv = getdisk(buf, len, 1, &ndumpdev, 1); | 320 | dv = getdisk(buf, len, 1, &ndumpdev, 1); | |
323 | if (dv != NULL) { | 321 | if (dv != NULL) { | |
324 | dumpdv = dv; | 322 | dumpdv = dv; | |
325 | break; | 323 | break; | |
326 | } | 324 | } | |
327 | } | 325 | } | |
328 | 326 | |||
329 | rootdev = nrootdev; | 327 | rootdev = nrootdev; | |
330 | dumpdev = ndumpdev; | 328 | dumpdev = ndumpdev; | |
331 | 329 | |||
332 | for (vops = LIST_FIRST(&vfs_list); vops != NULL; | 330 | for (vops = LIST_FIRST(&vfs_list); vops != NULL; | |
333 | vops = LIST_NEXT(vops, vfs_list)) { | 331 | vops = LIST_NEXT(vops, vfs_list)) { | |
334 | if (vops->vfs_mountroot != NULL && | 332 | if (vops->vfs_mountroot != NULL && | |
335 | strcmp(rootfstype, vops->vfs_name) == 0) | 333 | strcmp(rootfstype, vops->vfs_name) == 0) | |
336 | break; | 334 | break; | |
337 | } | 335 | } | |
338 | 336 | |||
339 | if (vops == NULL) { | 337 | if (vops == NULL) { | |
340 | deffsname = "generic"; | 338 | deffsname = "generic"; | |
341 | } else | 339 | } else | |
342 | deffsname = vops->vfs_name; | 340 | deffsname = vops->vfs_name; | |
343 | 341 | |||
344 | for (;;) { | 342 | for (;;) { | |
345 | printf("file system (default %s): ", deffsname); | 343 | printf("file system (default %s): ", deffsname); | |
346 | len = cngetsn(buf, sizeof(buf)); | 344 | len = cngetsn(buf, sizeof(buf)); | |
347 | if (len == 0) { | 345 | if (len == 0) { | |
348 | if (strcmp(deffsname, "generic") == 0) | 346 | if (strcmp(deffsname, "generic") == 0) | |
349 | rootfstype = ROOT_FSTYPE_ANY; | 347 | rootfstype = ROOT_FSTYPE_ANY; | |
350 | break; | 348 | break; | |
351 | } | 349 | } | |
352 | if (len == 4 && strcmp(buf, "halt") == 0) | 350 | if (len == 4 && strcmp(buf, "halt") == 0) | |
353 | cpu_reboot(RB_HALT, NULL); | 351 | cpu_reboot(RB_HALT, NULL); | |
354 | else if (len == 6 && strcmp(buf, "reboot") == 0) | 352 | else if (len == 6 && strcmp(buf, "reboot") == 0) | |
355 | cpu_reboot(0, NULL); | 353 | cpu_reboot(0, NULL); | |
356 | #if defined(DDB) | 354 | #if defined(DDB) | |
357 | else if (len == 3 && strcmp(buf, "ddb") == 0) { | 355 | else if (len == 3 && strcmp(buf, "ddb") == 0) { | |
358 | console_debugger(); | 356 | console_debugger(); | |
359 | } | 357 | } | |
360 | #endif | 358 | #endif | |
361 | else if (len == 7 && strcmp(buf, "generic") == 0) { | 359 | else if (len == 7 && strcmp(buf, "generic") == 0) { | |
362 | rootfstype = ROOT_FSTYPE_ANY; | 360 | rootfstype = ROOT_FSTYPE_ANY; | |
363 | break; | 361 | break; | |
364 | } | 362 | } | |
365 | vops = vfs_getopsbyname(buf); | 363 | vops = vfs_getopsbyname(buf); | |
366 | if (vops == NULL || vops->vfs_mountroot == NULL) { | 364 | if (vops == NULL || vops->vfs_mountroot == NULL) { | |
367 | printf("use one of: generic"); | 365 | printf("use one of: generic"); | |
368 | for (vops = LIST_FIRST(&vfs_list); | 366 | for (vops = LIST_FIRST(&vfs_list); | |
369 | vops != NULL; | 367 | vops != NULL; | |
370 | vops = LIST_NEXT(vops, vfs_list)) { | 368 | vops = LIST_NEXT(vops, vfs_list)) { | |
371 | if (vops->vfs_mountroot != NULL) | 369 | if (vops->vfs_mountroot != NULL) | |
372 | printf(" %s", vops->vfs_name); | 370 | printf(" %s", vops->vfs_name); | |
373 | } | 371 | } | |
374 | if (vops != NULL) | 372 | if (vops != NULL) | |
375 | vfs_delref(vops); | 373 | vfs_delref(vops); | |
376 | #if defined(DDB) | 374 | #if defined(DDB) | |
377 | printf(" ddb"); | 375 | printf(" ddb"); | |
378 | #endif | 376 | #endif | |
379 | printf(" halt reboot\n"); | 377 | printf(" halt reboot\n"); | |
380 | } else { | 378 | } else { | |
381 | /* | 379 | /* | |
382 | * XXX If *vops gets freed between here and | 380 | * XXX If *vops gets freed between here and | |
383 | * the call to mountroot(), rootfstype will | 381 | * the call to mountroot(), rootfstype will | |
384 | * point to something unexpected. But in | 382 | * point to something unexpected. But in | |
385 | * this case the system will fail anyway. | 383 | * this case the system will fail anyway. | |
386 | */ | 384 | */ | |
387 | rootfstype = vops->vfs_name; | 385 | rootfstype = vops->vfs_name; | |
388 | vfs_delref(vops); | 386 | vfs_delref(vops); | |
389 | break; | 387 | break; | |
390 | } | 388 | } | |
391 | } | 389 | } | |
392 | 390 | |||
393 | } else if (rootspec == NULL) { | 391 | } else if (rootspec == NULL) { | |
394 | /* | 392 | /* | |
395 | * Wildcarded root; use the boot device. | 393 | * Wildcarded root; use the boot device. | |
396 | */ | 394 | */ | |
397 | rootdv = bootdv; | 395 | rootdv = bootdv; | |
398 | 396 | |||
399 | if (bootdv) | 397 | if (bootdv) | |
400 | majdev = devsw_name2blk(device_xname(bootdv), NULL, 0); | 398 | majdev = devsw_name2blk(device_xname(bootdv), NULL, 0); | |
401 | else | 399 | else | |
402 | majdev = -1; | 400 | majdev = -1; | |
403 | if (majdev >= 0) { | 401 | if (majdev >= 0) { | |
404 | /* | 402 | /* | |
405 | * Root is on a disk. `bootpartition' is root, | 403 | * Root is on a disk. `bootpartition' is root, | |
406 | * unless the device does not use partitions. | 404 | * unless the device does not use partitions. | |
407 | */ | 405 | */ | |
408 | if (DEV_USES_PARTITIONS(bootdv)) | 406 | if (DEV_USES_PARTITIONS(bootdv)) | |
409 | rootdev = MAKEDISKDEV(majdev, | 407 | rootdev = MAKEDISKDEV(majdev, | |
410 | device_unit(bootdv), | 408 | device_unit(bootdv), | |
411 | bootpartition); | 409 | bootpartition); | |
412 | else | 410 | else | |
413 | rootdev = makedev(majdev, device_unit(bootdv)); | 411 | rootdev = makedev(majdev, device_unit(bootdv)); | |
414 | } | 412 | } | |
415 | } else { | 413 | } else { | |
416 | 414 | |||
417 | /* | 415 | /* | |
418 | * `root on <dev> ...' | 416 | * `root on <dev> ...' | |
419 | */ | 417 | */ | |
420 | 418 | |||
421 | /* | 419 | /* | |
422 | * If it's a network interface, we can bail out | 420 | * If it's a network interface, we can bail out | |
423 | * early. | 421 | * early. | |
424 | */ | 422 | */ | |
425 | dv = finddevice(rootspec); | 423 | dv = finddevice(rootspec); | |
426 | if (dv != NULL && device_class(dv) == DV_IFNET) { | 424 | if (dv != NULL && device_class(dv) == DV_IFNET) { | |
427 | rootdv = dv; | 425 | rootdv = dv; | |
428 | goto haveroot; | 426 | goto haveroot; | |
429 | } | 427 | } | |
430 | 428 | |||
431 | if (rootdev == NODEV && | 429 | if (rootdev == NODEV && | |
432 | device_class(dv) == DV_DISK && device_is_a(dv, "dk") && | 430 | device_class(dv) == DV_DISK && device_is_a(dv, "dk") && | |
433 | (majdev = devsw_name2blk(device_xname(dv), NULL, 0)) >= 0) | 431 | (majdev = devsw_name2blk(device_xname(dv), NULL, 0)) >= 0) | |
434 | rootdev = makedev(majdev, device_unit(dv)); | 432 | rootdev = makedev(majdev, device_unit(dv)); | |
435 | 433 | |||
436 | rootdevname = devsw_blk2name(major(rootdev)); | 434 | rootdevname = devsw_blk2name(major(rootdev)); | |
437 | if (rootdevname == NULL) { | 435 | if (rootdevname == NULL) { | |
438 | printf("unknown device major 0x%llx\n", | 436 | printf("unknown device major 0x%llx\n", | |
439 | (unsigned long long)rootdev); | 437 | (unsigned long long)rootdev); | |
440 | boothowto |= RB_ASKNAME; | 438 | boothowto |= RB_ASKNAME; | |
441 | goto top; | 439 | goto top; | |
442 | } | 440 | } | |
443 | memset(buf, 0, sizeof(buf)); | 441 | memset(buf, 0, sizeof(buf)); | |
444 | snprintf(buf, sizeof(buf), "%s%llu", rootdevname, | 442 | snprintf(buf, sizeof(buf), "%s%llu", rootdevname, | |
445 | (unsigned long long)DISKUNIT(rootdev)); | 443 | (unsigned long long)DISKUNIT(rootdev)); | |
446 | 444 | |||
447 | rootdv = finddevice(buf); | 445 | rootdv = finddevice(buf); | |
448 | if (rootdv == NULL) { | 446 | if (rootdv == NULL) { | |
449 | printf("device %s (0x%llx) not configured\n", | 447 | printf("device %s (0x%llx) not configured\n", | |
450 | buf, (unsigned long long)rootdev); | 448 | buf, (unsigned long long)rootdev); | |
451 | boothowto |= RB_ASKNAME; | 449 | boothowto |= RB_ASKNAME; | |
452 | goto top; | 450 | goto top; | |
453 | } | 451 | } | |
454 | } | 452 | } | |
455 | 453 | |||
456 | haveroot: | 454 | haveroot: | |
457 | 455 | |||
458 | root_device = rootdv; | 456 | root_device = rootdv; | |
459 | 457 | |||
460 | switch (device_class(rootdv)) { | 458 | switch (device_class(rootdv)) { | |
461 | case DV_IFNET: | 459 | case DV_IFNET: | |
462 | case DV_DISK: | 460 | case DV_DISK: | |
463 | aprint_normal("root on %s", device_xname(rootdv)); | 461 | aprint_normal("root on %s", device_xname(rootdv)); | |
464 | if (DEV_USES_PARTITIONS(rootdv)) | 462 | if (DEV_USES_PARTITIONS(rootdv)) | |
465 | aprint_normal("%c", (int)DISKPART(rootdev) + 'a'); | 463 | aprint_normal("%c", (int)DISKPART(rootdev) + 'a'); | |
466 | break; | 464 | break; | |
467 | 465 | |||
468 | default: | 466 | default: | |
469 | printf("can't determine root device\n"); | 467 | printf("can't determine root device\n"); | |
470 | boothowto |= RB_ASKNAME; | 468 | boothowto |= RB_ASKNAME; | |
471 | goto top; | 469 | goto top; | |
472 | } | 470 | } | |
473 | 471 | |||
474 | /* | 472 | /* | |
475 | * Now configure the dump device. | 473 | * Now configure the dump device. | |
476 | * | 474 | * | |
477 | * If we haven't figured out the dump device, do so, with | 475 | * If we haven't figured out the dump device, do so, with | |
478 | * the following rules: | 476 | * the following rules: | |
479 | * | 477 | * | |
480 | * (a) We already know dumpdv in the RB_ASKNAME case. | 478 | * (a) We already know dumpdv in the RB_ASKNAME case. | |
481 | * | 479 | * | |
482 | * (b) If dumpspec is set, try to use it. If the device | 480 | * (b) If dumpspec is set, try to use it. If the device | |
483 | * is not available, punt. | 481 | * is not available, punt. | |
484 | * | 482 | * | |
485 | * (c) If dumpspec is not set, the dump device is | 483 | * (c) If dumpspec is not set, the dump device is | |
486 | * wildcarded or unspecified. If the root device | 484 | * wildcarded or unspecified. If the root device | |
487 | * is DV_IFNET, punt. Otherwise, use partition b | 485 | * is DV_IFNET, punt. Otherwise, use partition b | |
488 | * of the root device. | 486 | * of the root device. | |
489 | */ | 487 | */ | |
490 | 488 | |||
491 | if (boothowto & RB_ASKNAME) { /* (a) */ | 489 | if (boothowto & RB_ASKNAME) { /* (a) */ | |
492 | if (dumpdv == NULL) | 490 | if (dumpdv == NULL) | |
493 | goto nodumpdev; | 491 | goto nodumpdev; | |
494 | } else if (dumpspec != NULL) { /* (b) */ | 492 | } else if (dumpspec != NULL) { /* (b) */ | |
495 | if (strcmp(dumpspec, "none") == 0 || dumpdev == NODEV) { | 493 | if (strcmp(dumpspec, "none") == 0 || dumpdev == NODEV) { | |
496 | /* | 494 | /* | |
497 | * Operator doesn't want a dump device. | 495 | * Operator doesn't want a dump device. | |
498 | * Or looks like they tried to pick a network | 496 | * Or looks like they tried to pick a network | |
499 | * device. Oops. | 497 | * device. Oops. | |
500 | */ | 498 | */ | |
501 | goto nodumpdev; | 499 | goto nodumpdev; | |
502 | } | 500 | } | |
503 | 501 | |||
504 | dumpdevname = devsw_blk2name(major(dumpdev)); | 502 | dumpdevname = devsw_blk2name(major(dumpdev)); | |
505 | if (dumpdevname == NULL) | 503 | if (dumpdevname == NULL) | |
506 | goto nodumpdev; | 504 | goto nodumpdev; | |
507 | memset(buf, 0, sizeof(buf)); | 505 | memset(buf, 0, sizeof(buf)); | |
508 | snprintf(buf, sizeof(buf), "%s%llu", dumpdevname, | 506 | snprintf(buf, sizeof(buf), "%s%llu", dumpdevname, | |
509 | (unsigned long long)DISKUNIT(dumpdev)); | 507 | (unsigned long long)DISKUNIT(dumpdev)); | |
510 | 508 | |||
511 | dumpdv = finddevice(buf); | 509 | dumpdv = finddevice(buf); | |
512 | if (dumpdv == NULL) { | 510 | if (dumpdv == NULL) { | |
513 | /* | 511 | /* | |
514 | * Device not configured. | 512 | * Device not configured. | |
515 | */ | 513 | */ | |
516 | goto nodumpdev; | 514 | goto nodumpdev; | |
517 | } | 515 | } | |
518 | } else { /* (c) */ | 516 | } else { /* (c) */ | |
519 | if (DEV_USES_PARTITIONS(rootdv) == 0) { | 517 | if (DEV_USES_PARTITIONS(rootdv) == 0) { | |
520 | for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); | 518 | for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); | |
521 | dv != NULL; | 519 | dv != NULL; | |
522 | dv = deviter_next(&di)) | 520 | dv = deviter_next(&di)) | |
523 | if (isswap(dv)) | 521 | if (isswap(dv)) | |
524 | break; | 522 | break; | |
525 | deviter_release(&di); | 523 | deviter_release(&di); | |
526 | if (dv == NULL) | 524 | if (dv == NULL) | |
527 | goto nodumpdev; | 525 | goto nodumpdev; | |
528 | 526 | |||
529 | majdev = devsw_name2blk(device_xname(dv), NULL, 0); | 527 | majdev = devsw_name2blk(device_xname(dv), NULL, 0); | |
530 | if (majdev < 0) | 528 | if (majdev < 0) | |
531 | goto nodumpdev; | 529 | goto nodumpdev; | |
532 | dumpdv = dv; | 530 | dumpdv = dv; | |
533 | dumpdev = makedev(majdev, device_unit(dumpdv)); | 531 | dumpdev = makedev(majdev, device_unit(dumpdv)); | |
534 | } else { | 532 | } else { | |
535 | dumpdv = rootdv; | 533 | dumpdv = rootdv; | |
536 | dumpdev = MAKEDISKDEV(major(rootdev), | 534 | dumpdev = MAKEDISKDEV(major(rootdev), | |
537 | device_unit(dumpdv), 1); | 535 | device_unit(dumpdv), 1); | |
538 | } | 536 | } | |
539 | } | 537 | } | |
540 | 538 | |||
541 | dumpcdev = devsw_blk2chr(dumpdev); | 539 | dumpcdev = devsw_blk2chr(dumpdev); | |
542 | aprint_normal(" dumps on %s", device_xname(dumpdv)); | 540 | aprint_normal(" dumps on %s", device_xname(dumpdv)); | |
543 | if (DEV_USES_PARTITIONS(dumpdv)) | 541 | if (DEV_USES_PARTITIONS(dumpdv)) | |
544 | aprint_normal("%c", (int)DISKPART(dumpdev) + 'a'); | 542 | aprint_normal("%c", (int)DISKPART(dumpdev) + 'a'); | |
545 | aprint_normal("\n"); | 543 | aprint_normal("\n"); | |
546 | return; | 544 | return; | |
547 | 545 | |||
548 | nodumpdev: | 546 | nodumpdev: | |
549 | dumpdev = NODEV; | 547 | dumpdev = NODEV; | |
550 | dumpcdev = NODEV; | 548 | dumpcdev = NODEV; | |
551 | aprint_normal("\n"); | 549 | aprint_normal("\n"); | |
552 | } | 550 | } | |
553 | 551 | |||
554 | static device_t | 552 | static device_t | |
555 | finddevice(const char *name) | 553 | finddevice(const char *name) | |
556 | { | 554 | { | |
557 | const char *wname; | 555 | const char *wname; | |
558 | 556 | |||
559 | if ((wname = getwedgename(name, strlen(name))) != NULL) | 557 | if ((wname = getwedgename(name, strlen(name))) != NULL) | |
560 | return dkwedge_find_by_wname(wname); | 558 | return dkwedge_find_by_wname(wname); | |
561 | 559 | |||
562 | return device_find_by_xname(name); | 560 | return device_find_by_xname(name); | |
563 | } | 561 | } | |
564 | 562 | |||
565 | static device_t | 563 | static device_t | |
566 | getdisk(char *str, int len, int defpart, dev_t *devp, int isdump) | 564 | getdisk(char *str, int len, int defpart, dev_t *devp, int isdump) | |
567 | { | 565 | { | |
568 | device_t dv; | 566 | device_t dv; | |
569 | deviter_t di; | 567 | deviter_t di; | |
570 | 568 | |||
571 | if ((dv = parsedisk(str, len, defpart, devp)) == NULL) { | 569 | if ((dv = parsedisk(str, len, defpart, devp)) == NULL) { | |
572 | printf("use one of:"); | 570 | printf("use one of:"); | |
573 | for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL; | 571 | for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL; | |
574 | dv = deviter_next(&di)) { | 572 | dv = deviter_next(&di)) { | |
575 | if (DEV_USES_PARTITIONS(dv)) | 573 | if (DEV_USES_PARTITIONS(dv)) | |
576 | printf(" %s[a-%c]", device_xname(dv), | 574 | printf(" %s[a-%c]", device_xname(dv), | |
577 | 'a' + MAXPARTITIONS - 1); | 575 | 'a' + MAXPARTITIONS - 1); | |
578 | else if (device_class(dv) == DV_DISK) | 576 | else if (device_class(dv) == DV_DISK) | |
579 | printf(" %s", device_xname(dv)); | 577 | printf(" %s", device_xname(dv)); | |
580 | if (isdump == 0 && device_class(dv) == DV_IFNET) | 578 | if (isdump == 0 && device_class(dv) == DV_IFNET) | |
581 | printf(" %s", device_xname(dv)); | 579 | printf(" %s", device_xname(dv)); | |
582 | } | 580 | } | |
583 | deviter_release(&di); | 581 | deviter_release(&di); | |
584 | dkwedge_print_wnames(); | 582 | dkwedge_print_wnames(); | |
585 | if (isdump) | 583 | if (isdump) | |
586 | printf(" none"); | 584 | printf(" none"); | |
587 | #if defined(DDB) | 585 | #if defined(DDB) | |
588 | printf(" ddb"); | 586 | printf(" ddb"); | |
589 | #endif | 587 | #endif | |
590 | printf(" halt reboot\n"); | 588 | printf(" halt reboot\n"); | |
591 | } | 589 | } | |
592 | return dv; | 590 | return dv; | |
593 | } | 591 | } | |
594 | 592 | |||
595 | static const char * | 593 | static const char * | |
596 | getwedgename(const char *name, int namelen) | 594 | getwedgename(const char *name, int namelen) | |
597 | { | 595 | { | |
598 | const char *wpfx = "wedge:"; | 596 | const char *wpfx = "wedge:"; | |
599 | const int wpfxlen = strlen(wpfx); | 597 | const int wpfxlen = strlen(wpfx); | |
600 | 598 | |||
601 | if (namelen < wpfxlen || strncmp(name, wpfx, wpfxlen) != 0) | 599 | if (namelen < wpfxlen || strncmp(name, wpfx, wpfxlen) != 0) | |
602 | return NULL; | 600 | return NULL; | |
603 | 601 | |||
604 | return name + wpfxlen; | 602 | return name + wpfxlen; | |
605 | } | 603 | } | |
606 | 604 | |||
607 | static device_t | 605 | static device_t | |
608 | parsedisk(char *str, int len, int defpart, dev_t *devp) | 606 | parsedisk(char *str, int len, int defpart, dev_t *devp) | |
609 | { | 607 | { | |
610 | device_t dv; | 608 | device_t dv; | |
611 | const char *wname; | 609 | const char *wname; | |
612 | char *cp, c; | 610 | char *cp, c; | |
613 | int majdev, part; | 611 | int majdev, part; | |
614 | if (len == 0) | 612 | if (len == 0) | |
615 | return (NULL); | 613 | return (NULL); | |
616 | 614 | |||
617 | if (len == 4 && strcmp(str, "halt") == 0) | 615 | if (len == 4 && strcmp(str, "halt") == 0) | |
618 | cpu_reboot(RB_HALT, NULL); | 616 | cpu_reboot(RB_HALT, NULL); | |
619 | else if (len == 6 && strcmp(str, "reboot") == 0) | 617 | else if (len == 6 && strcmp(str, "reboot") == 0) | |
620 | cpu_reboot(0, NULL); | 618 | cpu_reboot(0, NULL); | |
621 | #if defined(DDB) | 619 | #if defined(DDB) | |
622 | else if (len == 3 && strcmp(str, "ddb") == 0) | 620 | else if (len == 3 && strcmp(str, "ddb") == 0) | |
623 | console_debugger(); | 621 | console_debugger(); | |
624 | #endif | 622 | #endif | |
625 | 623 | |||
626 | cp = str + len - 1; | 624 | cp = str + len - 1; | |
627 | c = *cp; | 625 | c = *cp; | |
628 | 626 | |||
629 | if ((wname = getwedgename(str, len)) != NULL) { | 627 | if ((wname = getwedgename(str, len)) != NULL) { | |
630 | if ((dv = dkwedge_find_by_wname(wname)) == NULL) | 628 | if ((dv = dkwedge_find_by_wname(wname)) == NULL) | |
631 | return NULL; | 629 | return NULL; | |
632 | part = defpart; | 630 | part = defpart; | |
633 | goto gotdisk; | 631 | goto gotdisk; | |
634 | } else if (c >= 'a' && c <= ('a' + MAXPARTITIONS - 1)) { | 632 | } else if (c >= 'a' && c <= ('a' + MAXPARTITIONS - 1)) { | |
635 | part = c - 'a'; | 633 | part = c - 'a'; | |
636 | *cp = '\0'; | 634 | *cp = '\0'; | |
637 | } else | 635 | } else | |
638 | part = defpart; | 636 | part = defpart; | |
639 | 637 | |||
640 | dv = finddevice(str); | 638 | dv = finddevice(str); | |
641 | if (dv != NULL) { | 639 | if (dv != NULL) { | |
642 | if (device_class(dv) == DV_DISK) { | 640 | if (device_class(dv) == DV_DISK) { | |
643 | gotdisk: | 641 | gotdisk: | |
644 | majdev = devsw_name2blk(device_xname(dv), NULL, 0); | 642 | majdev = devsw_name2blk(device_xname(dv), NULL, 0); | |
645 | if (majdev < 0) | 643 | if (majdev < 0) | |
646 | panic("parsedisk"); | 644 | panic("parsedisk"); | |
647 | if (DEV_USES_PARTITIONS(dv)) | 645 | if (DEV_USES_PARTITIONS(dv)) | |
648 | *devp = MAKEDISKDEV(majdev, device_unit(dv), | 646 | *devp = MAKEDISKDEV(majdev, device_unit(dv), | |
649 | part); | 647 | part); | |
650 | else | 648 | else | |
651 | *devp = makedev(majdev, device_unit(dv)); | 649 | *devp = makedev(majdev, device_unit(dv)); | |
652 | } | 650 | } | |
653 | 651 | |||
654 | if (device_class(dv) == DV_IFNET) | 652 | if (device_class(dv) == DV_IFNET) | |
655 | *devp = NODEV; | 653 | *devp = NODEV; | |
656 | } | 654 | } | |
657 | 655 | |||
658 | *cp = c; | 656 | *cp = c; | |
659 | return (dv); | 657 | return (dv); | |
660 | } | 658 | } | |
661 | 659 | |||
662 | /* | 660 | /* | |
663 | * Return true if system call tracing is enabled for the specified process. | 661 | * Return true if system call tracing is enabled for the specified process. | |
664 | */ | 662 | */ | |
665 | bool | 663 | bool | |
666 | trace_is_enabled(struct proc *p) | 664 | trace_is_enabled(struct proc *p) | |
667 | { | 665 | { | |
668 | #ifdef SYSCALL_DEBUG | 666 | #ifdef SYSCALL_DEBUG | |
669 | return (true); | 667 | return (true); | |
670 | #endif | 668 | #endif | |
671 | #ifdef KTRACE | 669 | #ifdef KTRACE | |
672 | if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET))) | 670 | if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET))) | |
673 | return (true); | 671 | return (true); | |
674 | #endif | 672 | #endif | |
675 | #ifdef PTRACE | 673 | #ifdef PTRACE | |
676 | if (ISSET(p->p_slflag, PSL_SYSCALL)) | 674 | if (ISSET(p->p_slflag, PSL_SYSCALL)) | |
677 | return (true); | 675 | return (true); | |
678 | #endif | 676 | #endif | |
679 | 677 | |||
680 | return (false); | 678 | return (false); | |
681 | } | 679 | } | |
682 | 680 | |||
683 | /* | 681 | /* | |
684 | * Start trace of particular system call. If process is being traced, | 682 | * Start trace of particular system call. If process is being traced, | |
685 | * this routine is called by MD syscall dispatch code just before | 683 | * this routine is called by MD syscall dispatch code just before | |
686 | * a system call is actually executed. | 684 | * a system call is actually executed. | |
687 | */ | 685 | */ | |
688 | int | 686 | int | |
689 | trace_enter(register_t code, const register_t *args, int narg) | 687 | trace_enter(register_t code, const register_t *args, int narg) | |
690 | { | 688 | { | |
691 | #ifdef SYSCALL_DEBUG | 689 | #ifdef SYSCALL_DEBUG | |
692 | scdebug_call(code, args); | 690 | scdebug_call(code, args); | |
693 | #endif /* SYSCALL_DEBUG */ | 691 | #endif /* SYSCALL_DEBUG */ | |
694 | 692 | |||
695 | ktrsyscall(code, args, narg); | 693 | ktrsyscall(code, args, narg); | |
696 | 694 | |||
697 | #ifdef PTRACE | 695 | #ifdef PTRACE | |
698 | if ((curlwp->l_proc->p_slflag & (PSL_SYSCALL|PSL_TRACED)) == | 696 | if ((curlwp->l_proc->p_slflag & (PSL_SYSCALL|PSL_TRACED)) == | |
699 | (PSL_SYSCALL|PSL_TRACED)) | 697 | (PSL_SYSCALL|PSL_TRACED)) | |
700 | process_stoptrace(); | 698 | process_stoptrace(); | |
701 | #endif | 699 | #endif | |
702 | return 0; | 700 | return 0; | |
703 | } | 701 | } | |
704 | 702 | |||
705 | /* | 703 | /* | |
706 | * End trace of particular system call. If process is being traced, | 704 | * End trace of particular system call. If process is being traced, | |
707 | * this routine is called by MD syscall dispatch code just after | 705 | * this routine is called by MD syscall dispatch code just after | |
708 | * a system call finishes. | 706 | * a system call finishes. | |
709 | * MD caller guarantees the passed 'code' is within the supported | 707 | * MD caller guarantees the passed 'code' is within the supported | |
710 | * system call number range for emulation the process runs under. | 708 | * system call number range for emulation the process runs under. | |
711 | */ | 709 | */ | |
712 | void | 710 | void | |
713 | trace_exit(register_t code, register_t rval[], int error) | 711 | trace_exit(register_t code, register_t rval[], int error) | |
714 | { | 712 | { | |
715 | #ifdef SYSCALL_DEBUG | 713 | #ifdef SYSCALL_DEBUG | |
716 | scdebug_ret(code, error, rval); | 714 | scdebug_ret(code, error, rval); | |
717 | #endif /* SYSCALL_DEBUG */ | 715 | #endif /* SYSCALL_DEBUG */ | |
718 | 716 | |||
719 | ktrsysret(code, error, rval); | 717 | ktrsysret(code, error, rval); | |
720 | 718 | |||
721 | #ifdef PTRACE | 719 | #ifdef PTRACE | |
722 | if ((curlwp->l_proc->p_slflag & (PSL_SYSCALL|PSL_TRACED)) == | 720 | if ((curlwp->l_proc->p_slflag & (PSL_SYSCALL|PSL_TRACED)) == | |
723 | (PSL_SYSCALL|PSL_TRACED)) | 721 | (PSL_SYSCALL|PSL_TRACED)) | |
724 | process_stoptrace(); | 722 | process_stoptrace(); | |
725 | #endif | 723 | #endif | |
726 | } | 724 | } |
--- src/sys/kern/kern_time.c 2011/04/08 10:35:37 1.168
+++ src/sys/kern/kern_time.c 2011/07/27 14:35:34 1.169
@@ -1,1083 +1,1081 @@ | @@ -1,1083 +1,1081 @@ | |||
1 | /* $NetBSD: kern_time.c,v 1.168 2011/04/08 10:35:37 yamt Exp $ */ | 1 | /* $NetBSD: kern_time.c,v 1.169 2011/07/27 14:35:34 uebayasi Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 2000, 2004, 2005, 2007, 2008, 2009 The NetBSD Foundation, Inc. | 4 | * Copyright (c) 2000, 2004, 2005, 2007, 2008, 2009 The NetBSD Foundation, Inc. | |
5 | * All rights reserved. | 5 | * All rights reserved. | |
6 | * | 6 | * | |
7 | * This code is derived from software contributed to The NetBSD Foundation | 7 | * This code is derived from software contributed to The NetBSD Foundation | |
8 | * by Christopher G. Demetriou, and by Andrew Doran. | 8 | * by Christopher G. Demetriou, and by Andrew Doran. | |
9 | * | 9 | * | |
10 | * Redistribution and use in source and binary forms, with or without | 10 | * Redistribution and use in source and binary forms, with or without | |
11 | * modification, are permitted provided that the following conditions | 11 | * modification, are permitted provided that the following conditions | |
12 | * are met: | 12 | * are met: | |
13 | * 1. Redistributions of source code must retain the above copyright | 13 | * 1. Redistributions of source code must retain the above copyright | |
14 | * notice, this list of conditions and the following disclaimer. | 14 | * notice, this list of conditions and the following disclaimer. | |
15 | * 2. Redistributions in binary form must reproduce the above copyright | 15 | * 2. Redistributions in binary form must reproduce the above copyright | |
16 | * notice, this list of conditions and the following disclaimer in the | 16 | * notice, this list of conditions and the following disclaimer in the | |
17 | * documentation and/or other materials provided with the distribution. | 17 | * documentation and/or other materials provided with the distribution. | |
18 | * | 18 | * | |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
29 | * POSSIBILITY OF SUCH DAMAGE. | 29 | * POSSIBILITY OF SUCH DAMAGE. | |
30 | */ | 30 | */ | |
31 | 31 | |||
32 | /* | 32 | /* | |
33 | * Copyright (c) 1982, 1986, 1989, 1993 | 33 | * Copyright (c) 1982, 1986, 1989, 1993 | |
34 | * The Regents of the University of California. All rights reserved. | 34 | * The Regents of the University of California. All rights reserved. | |
35 | * | 35 | * | |
36 | * Redistribution and use in source and binary forms, with or without | 36 | * Redistribution and use in source and binary forms, with or without | |
37 | * modification, are permitted provided that the following conditions | 37 | * modification, are permitted provided that the following conditions | |
38 | * are met: | 38 | * are met: | |
39 | * 1. Redistributions of source code must retain the above copyright | 39 | * 1. Redistributions of source code must retain the above copyright | |
40 | * notice, this list of conditions and the following disclaimer. | 40 | * notice, this list of conditions and the following disclaimer. | |
41 | * 2. Redistributions in binary form must reproduce the above copyright | 41 | * 2. Redistributions in binary form must reproduce the above copyright | |
42 | * notice, this list of conditions and the following disclaimer in the | 42 | * notice, this list of conditions and the following disclaimer in the | |
43 | * documentation and/or other materials provided with the distribution. | 43 | * documentation and/or other materials provided with the distribution. | |
44 | * 3. Neither the name of the University nor the names of its contributors | 44 | * 3. Neither the name of the University nor the names of its contributors | |
45 | * may be used to endorse or promote products derived from this software | 45 | * may be used to endorse or promote products derived from this software | |
46 | * without specific prior written permission. | 46 | * without specific prior written permission. | |
47 | * | 47 | * | |
48 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | 48 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
49 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 49 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
50 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 50 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
51 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | 51 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
52 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 52 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
53 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 53 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
54 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 54 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
55 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 55 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
56 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 56 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
57 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 57 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
58 | * SUCH DAMAGE. | 58 | * SUCH DAMAGE. | |
59 | * | 59 | * | |
60 | * @(#)kern_time.c 8.4 (Berkeley) 5/26/95 | 60 | * @(#)kern_time.c 8.4 (Berkeley) 5/26/95 | |
61 | */ | 61 | */ | |
62 | 62 | |||
63 | #include <sys/cdefs.h> | 63 | #include <sys/cdefs.h> | |
64 | __KERNEL_RCSID(0, "$NetBSD: kern_time.c,v 1.168 2011/04/08 10:35:37 yamt Exp $"); | 64 | __KERNEL_RCSID(0, "$NetBSD: kern_time.c,v 1.169 2011/07/27 14:35:34 uebayasi Exp $"); | |
65 | 65 | |||
66 | #include <sys/param.h> | 66 | #include <sys/param.h> | |
67 | #include <sys/resourcevar.h> | 67 | #include <sys/resourcevar.h> | |
68 | #include <sys/kernel.h> | 68 | #include <sys/kernel.h> | |
69 | #include <sys/systm.h> | 69 | #include <sys/systm.h> | |
70 | #include <sys/proc.h> | 70 | #include <sys/proc.h> | |
71 | #include <sys/vnode.h> | 71 | #include <sys/vnode.h> | |
72 | #include <sys/signalvar.h> | 72 | #include <sys/signalvar.h> | |
73 | #include <sys/syslog.h> | 73 | #include <sys/syslog.h> | |
74 | #include <sys/timetc.h> | 74 | #include <sys/timetc.h> | |
75 | #include <sys/timex.h> | 75 | #include <sys/timex.h> | |
76 | #include <sys/kauth.h> | 76 | #include <sys/kauth.h> | |
77 | #include <sys/mount.h> | 77 | #include <sys/mount.h> | |
78 | #include <sys/sa.h> | 78 | #include <sys/sa.h> | |
79 | #include <sys/savar.h> | 79 | #include <sys/savar.h> | |
80 | #include <sys/syscallargs.h> | 80 | #include <sys/syscallargs.h> | |
81 | #include <sys/cpu.h> | 81 | #include <sys/cpu.h> | |
82 | 82 | |||
83 | #include <uvm/uvm_extern.h> | |||
84 | ||||
85 | #include "opt_sa.h" | 83 | #include "opt_sa.h" | |
86 | 84 | |||
87 | static void timer_intr(void *); | 85 | static void timer_intr(void *); | |
88 | static void itimerfire(struct ptimer *); | 86 | static void itimerfire(struct ptimer *); | |
89 | static void itimerfree(struct ptimers *, int); | 87 | static void itimerfree(struct ptimers *, int); | |
90 | 88 | |||
91 | kmutex_t timer_lock; | 89 | kmutex_t timer_lock; | |
92 | 90 | |||
93 | static void *timer_sih; | 91 | static void *timer_sih; | |
94 | static TAILQ_HEAD(, ptimer) timer_queue; | 92 | static TAILQ_HEAD(, ptimer) timer_queue; | |
95 | 93 | |||
96 | struct pool ptimer_pool, ptimers_pool; | 94 | struct pool ptimer_pool, ptimers_pool; | |
97 | 95 | |||
98 | #define CLOCK_VIRTUAL_P(clockid) \ | 96 | #define CLOCK_VIRTUAL_P(clockid) \ | |
99 | ((clockid) == CLOCK_VIRTUAL || (clockid) == CLOCK_PROF) | 97 | ((clockid) == CLOCK_VIRTUAL || (clockid) == CLOCK_PROF) | |
100 | 98 | |||
101 | CTASSERT(ITIMER_REAL == CLOCK_REALTIME); | 99 | CTASSERT(ITIMER_REAL == CLOCK_REALTIME); | |
102 | CTASSERT(ITIMER_VIRTUAL == CLOCK_VIRTUAL); | 100 | CTASSERT(ITIMER_VIRTUAL == CLOCK_VIRTUAL); | |
103 | CTASSERT(ITIMER_PROF == CLOCK_PROF); | 101 | CTASSERT(ITIMER_PROF == CLOCK_PROF); | |
104 | 102 | |||
105 | /* | 103 | /* | |
106 | * Initialize timekeeping. | 104 | * Initialize timekeeping. | |
107 | */ | 105 | */ | |
108 | void | 106 | void | |
109 | time_init(void) | 107 | time_init(void) | |
110 | { | 108 | { | |
111 | 109 | |||
112 | pool_init(&ptimer_pool, sizeof(struct ptimer), 0, 0, 0, "ptimerpl", | 110 | pool_init(&ptimer_pool, sizeof(struct ptimer), 0, 0, 0, "ptimerpl", | |
113 | &pool_allocator_nointr, IPL_NONE); | 111 | &pool_allocator_nointr, IPL_NONE); | |
114 | pool_init(&ptimers_pool, sizeof(struct ptimers), 0, 0, 0, "ptimerspl", | 112 | pool_init(&ptimers_pool, sizeof(struct ptimers), 0, 0, 0, "ptimerspl", | |
115 | &pool_allocator_nointr, IPL_NONE); | 113 | &pool_allocator_nointr, IPL_NONE); | |
116 | } | 114 | } | |
117 | 115 | |||
118 | void | 116 | void | |
119 | time_init2(void) | 117 | time_init2(void) | |
120 | { | 118 | { | |
121 | 119 | |||
122 | TAILQ_INIT(&timer_queue); | 120 | TAILQ_INIT(&timer_queue); | |
123 | mutex_init(&timer_lock, MUTEX_DEFAULT, IPL_SCHED); | 121 | mutex_init(&timer_lock, MUTEX_DEFAULT, IPL_SCHED); | |
124 | timer_sih = softint_establish(SOFTINT_CLOCK | SOFTINT_MPSAFE, | 122 | timer_sih = softint_establish(SOFTINT_CLOCK | SOFTINT_MPSAFE, | |
125 | timer_intr, NULL); | 123 | timer_intr, NULL); | |
126 | } | 124 | } | |
127 | 125 | |||
128 | /* Time of day and interval timer support. | 126 | /* Time of day and interval timer support. | |
129 | * | 127 | * | |
130 | * These routines provide the kernel entry points to get and set | 128 | * These routines provide the kernel entry points to get and set | |
131 | * the time-of-day and per-process interval timers. Subroutines | 129 | * the time-of-day and per-process interval timers. Subroutines | |
132 | * here provide support for adding and subtracting timeval structures | 130 | * here provide support for adding and subtracting timeval structures | |
133 | * and decrementing interval timers, optionally reloading the interval | 131 | * and decrementing interval timers, optionally reloading the interval | |
134 | * timers when they expire. | 132 | * timers when they expire. | |
135 | */ | 133 | */ | |
136 | 134 | |||
137 | /* This function is used by clock_settime and settimeofday */ | 135 | /* This function is used by clock_settime and settimeofday */ | |
138 | static int | 136 | static int | |
139 | settime1(struct proc *p, const struct timespec *ts, bool check_kauth) | 137 | settime1(struct proc *p, const struct timespec *ts, bool check_kauth) | |
140 | { | 138 | { | |
141 | struct timespec delta, now; | 139 | struct timespec delta, now; | |
142 | int s; | 140 | int s; | |
143 | 141 | |||
144 | /* WHAT DO WE DO ABOUT PENDING REAL-TIME TIMEOUTS??? */ | 142 | /* WHAT DO WE DO ABOUT PENDING REAL-TIME TIMEOUTS??? */ | |
145 | s = splclock(); | 143 | s = splclock(); | |
146 | nanotime(&now); | 144 | nanotime(&now); | |
147 | timespecsub(ts, &now, &delta); | 145 | timespecsub(ts, &now, &delta); | |
148 | 146 | |||
149 | if (check_kauth && kauth_authorize_system(kauth_cred_get(), | 147 | if (check_kauth && kauth_authorize_system(kauth_cred_get(), | |
150 | KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_SYSTEM, __UNCONST(ts), | 148 | KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_SYSTEM, __UNCONST(ts), | |
151 | &delta, KAUTH_ARG(check_kauth ? false : true)) != 0) { | 149 | &delta, KAUTH_ARG(check_kauth ? false : true)) != 0) { | |
152 | splx(s); | 150 | splx(s); | |
153 | return (EPERM); | 151 | return (EPERM); | |
154 | } | 152 | } | |
155 | 153 | |||
156 | #ifdef notyet | 154 | #ifdef notyet | |
157 | if ((delta.tv_sec < 86400) && securelevel > 0) { /* XXX elad - notyet */ | 155 | if ((delta.tv_sec < 86400) && securelevel > 0) { /* XXX elad - notyet */ | |
158 | splx(s); | 156 | splx(s); | |
159 | return (EPERM); | 157 | return (EPERM); | |
160 | } | 158 | } | |
161 | #endif | 159 | #endif | |
162 | 160 | |||
163 | tc_setclock(ts); | 161 | tc_setclock(ts); | |
164 | 162 | |||
165 | timespecadd(&boottime, &delta, &boottime); | 163 | timespecadd(&boottime, &delta, &boottime); | |
166 | 164 | |||
167 | resettodr(); | 165 | resettodr(); | |
168 | splx(s); | 166 | splx(s); | |
169 | 167 | |||
170 | return (0); | 168 | return (0); | |
171 | } | 169 | } | |
172 | 170 | |||
173 | int | 171 | int | |
174 | settime(struct proc *p, struct timespec *ts) | 172 | settime(struct proc *p, struct timespec *ts) | |
175 | { | 173 | { | |
176 | return (settime1(p, ts, true)); | 174 | return (settime1(p, ts, true)); | |
177 | } | 175 | } | |
178 | 176 | |||
179 | /* ARGSUSED */ | 177 | /* ARGSUSED */ | |
180 | int | 178 | int | |
181 | sys___clock_gettime50(struct lwp *l, | 179 | sys___clock_gettime50(struct lwp *l, | |
182 | const struct sys___clock_gettime50_args *uap, register_t *retval) | 180 | const struct sys___clock_gettime50_args *uap, register_t *retval) | |
183 | { | 181 | { | |
184 | /* { | 182 | /* { | |
185 | syscallarg(clockid_t) clock_id; | 183 | syscallarg(clockid_t) clock_id; | |
186 | syscallarg(struct timespec *) tp; | 184 | syscallarg(struct timespec *) tp; | |
187 | } */ | 185 | } */ | |
188 | int error; | 186 | int error; | |
189 | struct timespec ats; | 187 | struct timespec ats; | |
190 | 188 | |||
191 | error = clock_gettime1(SCARG(uap, clock_id), &ats); | 189 | error = clock_gettime1(SCARG(uap, clock_id), &ats); | |
192 | if (error != 0) | 190 | if (error != 0) | |
193 | return error; | 191 | return error; | |
194 | 192 | |||
195 | return copyout(&ats, SCARG(uap, tp), sizeof(ats)); | 193 | return copyout(&ats, SCARG(uap, tp), sizeof(ats)); | |
196 | } | 194 | } | |
197 | 195 | |||
198 | int | 196 | int | |
199 | clock_gettime1(clockid_t clock_id, struct timespec *ts) | 197 | clock_gettime1(clockid_t clock_id, struct timespec *ts) | |
200 | { | 198 | { | |
201 | 199 | |||
202 | switch (clock_id) { | 200 | switch (clock_id) { | |
203 | case CLOCK_REALTIME: | 201 | case CLOCK_REALTIME: | |
204 | nanotime(ts); | 202 | nanotime(ts); | |
205 | break; | 203 | break; | |
206 | case CLOCK_MONOTONIC: | 204 | case CLOCK_MONOTONIC: | |
207 | nanouptime(ts); | 205 | nanouptime(ts); | |
208 | break; | 206 | break; | |
209 | default: | 207 | default: | |
210 | return EINVAL; | 208 | return EINVAL; | |
211 | } | 209 | } | |
212 | 210 | |||
213 | return 0; | 211 | return 0; | |
214 | } | 212 | } | |
215 | 213 | |||
216 | /* ARGSUSED */ | 214 | /* ARGSUSED */ | |
217 | int | 215 | int | |
218 | sys___clock_settime50(struct lwp *l, | 216 | sys___clock_settime50(struct lwp *l, | |
219 | const struct sys___clock_settime50_args *uap, register_t *retval) | 217 | const struct sys___clock_settime50_args *uap, register_t *retval) | |
220 | { | 218 | { | |
221 | /* { | 219 | /* { | |
222 | syscallarg(clockid_t) clock_id; | 220 | syscallarg(clockid_t) clock_id; | |
223 | syscallarg(const struct timespec *) tp; | 221 | syscallarg(const struct timespec *) tp; | |
224 | } */ | 222 | } */ | |
225 | int error; | 223 | int error; | |
226 | struct timespec ats; | 224 | struct timespec ats; | |
227 | 225 | |||
228 | if ((error = copyin(SCARG(uap, tp), &ats, sizeof(ats))) != 0) | 226 | if ((error = copyin(SCARG(uap, tp), &ats, sizeof(ats))) != 0) | |
229 | return error; | 227 | return error; | |
230 | 228 | |||
231 | return clock_settime1(l->l_proc, SCARG(uap, clock_id), &ats, true); | 229 | return clock_settime1(l->l_proc, SCARG(uap, clock_id), &ats, true); | |
232 | } | 230 | } | |
233 | 231 | |||
234 | 232 | |||
235 | int | 233 | int | |
236 | clock_settime1(struct proc *p, clockid_t clock_id, const struct timespec *tp, | 234 | clock_settime1(struct proc *p, clockid_t clock_id, const struct timespec *tp, | |
237 | bool check_kauth) | 235 | bool check_kauth) | |
238 | { | 236 | { | |
239 | int error; | 237 | int error; | |
240 | 238 | |||
241 | switch (clock_id) { | 239 | switch (clock_id) { | |
242 | case CLOCK_REALTIME: | 240 | case CLOCK_REALTIME: | |
243 | if ((error = settime1(p, tp, check_kauth)) != 0) | 241 | if ((error = settime1(p, tp, check_kauth)) != 0) | |
244 | return (error); | 242 | return (error); | |
245 | break; | 243 | break; | |
246 | case CLOCK_MONOTONIC: | 244 | case CLOCK_MONOTONIC: | |
247 | return (EINVAL); /* read-only clock */ | 245 | return (EINVAL); /* read-only clock */ | |
248 | default: | 246 | default: | |
249 | return (EINVAL); | 247 | return (EINVAL); | |
250 | } | 248 | } | |
251 | 249 | |||
252 | return 0; | 250 | return 0; | |
253 | } | 251 | } | |
254 | 252 | |||
255 | int | 253 | int | |
256 | sys___clock_getres50(struct lwp *l, const struct sys___clock_getres50_args *uap, | 254 | sys___clock_getres50(struct lwp *l, const struct sys___clock_getres50_args *uap, | |
257 | register_t *retval) | 255 | register_t *retval) | |
258 | { | 256 | { | |
259 | /* { | 257 | /* { | |
260 | syscallarg(clockid_t) clock_id; | 258 | syscallarg(clockid_t) clock_id; | |
261 | syscallarg(struct timespec *) tp; | 259 | syscallarg(struct timespec *) tp; | |
262 | } */ | 260 | } */ | |
263 | struct timespec ts; | 261 | struct timespec ts; | |
264 | int error = 0; | 262 | int error = 0; | |
265 | 263 | |||
266 | if ((error = clock_getres1(SCARG(uap, clock_id), &ts)) != 0) | 264 | if ((error = clock_getres1(SCARG(uap, clock_id), &ts)) != 0) | |
267 | return error; | 265 | return error; | |
268 | 266 | |||
269 | if (SCARG(uap, tp)) | 267 | if (SCARG(uap, tp)) | |
270 | error = copyout(&ts, SCARG(uap, tp), sizeof(ts)); | 268 | error = copyout(&ts, SCARG(uap, tp), sizeof(ts)); | |
271 | 269 | |||
272 | return error; | 270 | return error; | |
273 | } | 271 | } | |
274 | 272 | |||
275 | int | 273 | int | |
276 | clock_getres1(clockid_t clock_id, struct timespec *ts) | 274 | clock_getres1(clockid_t clock_id, struct timespec *ts) | |
277 | { | 275 | { | |
278 | 276 | |||
279 | switch (clock_id) { | 277 | switch (clock_id) { | |
280 | case CLOCK_REALTIME: | 278 | case CLOCK_REALTIME: | |
281 | case CLOCK_MONOTONIC: | 279 | case CLOCK_MONOTONIC: | |
282 | ts->tv_sec = 0; | 280 | ts->tv_sec = 0; | |
283 | if (tc_getfrequency() > 1000000000) | 281 | if (tc_getfrequency() > 1000000000) | |
284 | ts->tv_nsec = 1; | 282 | ts->tv_nsec = 1; | |
285 | else | 283 | else | |
286 | ts->tv_nsec = 1000000000 / tc_getfrequency(); | 284 | ts->tv_nsec = 1000000000 / tc_getfrequency(); | |
287 | break; | 285 | break; | |
288 | default: | 286 | default: | |
289 | return EINVAL; | 287 | return EINVAL; | |
290 | } | 288 | } | |
291 | 289 | |||
292 | return 0; | 290 | return 0; | |
293 | } | 291 | } | |
294 | 292 | |||
295 | /* ARGSUSED */ | 293 | /* ARGSUSED */ | |
296 | int | 294 | int | |
297 | sys___nanosleep50(struct lwp *l, const struct sys___nanosleep50_args *uap, | 295 | sys___nanosleep50(struct lwp *l, const struct sys___nanosleep50_args *uap, | |
298 | register_t *retval) | 296 | register_t *retval) | |
299 | { | 297 | { | |
300 | /* { | 298 | /* { | |
301 | syscallarg(struct timespec *) rqtp; | 299 | syscallarg(struct timespec *) rqtp; | |
302 | syscallarg(struct timespec *) rmtp; | 300 | syscallarg(struct timespec *) rmtp; | |
303 | } */ | 301 | } */ | |
304 | struct timespec rmt, rqt; | 302 | struct timespec rmt, rqt; | |
305 | int error, error1; | 303 | int error, error1; | |
306 | 304 | |||
307 | error = copyin(SCARG(uap, rqtp), &rqt, sizeof(struct timespec)); | 305 | error = copyin(SCARG(uap, rqtp), &rqt, sizeof(struct timespec)); | |
308 | if (error) | 306 | if (error) | |
309 | return (error); | 307 | return (error); | |
310 | 308 | |||
311 | error = nanosleep1(l, &rqt, SCARG(uap, rmtp) ? &rmt : NULL); | 309 | error = nanosleep1(l, &rqt, SCARG(uap, rmtp) ? &rmt : NULL); | |
312 | if (SCARG(uap, rmtp) == NULL || (error != 0 && error != EINTR)) | 310 | if (SCARG(uap, rmtp) == NULL || (error != 0 && error != EINTR)) | |
313 | return error; | 311 | return error; | |
314 | 312 | |||
315 | error1 = copyout(&rmt, SCARG(uap, rmtp), sizeof(rmt)); | 313 | error1 = copyout(&rmt, SCARG(uap, rmtp), sizeof(rmt)); | |
316 | return error1 ? error1 : error; | 314 | return error1 ? error1 : error; | |
317 | } | 315 | } | |
318 | 316 | |||
319 | int | 317 | int | |
320 | nanosleep1(struct lwp *l, struct timespec *rqt, struct timespec *rmt) | 318 | nanosleep1(struct lwp *l, struct timespec *rqt, struct timespec *rmt) | |
321 | { | 319 | { | |
322 | struct timespec rmtstart; | 320 | struct timespec rmtstart; | |
323 | int error, timo; | 321 | int error, timo; | |
324 | 322 | |||
325 | if ((error = itimespecfix(rqt)) != 0) | 323 | if ((error = itimespecfix(rqt)) != 0) | |
326 | return error; | 324 | return error; | |
327 | 325 | |||
328 | timo = tstohz(rqt); | 326 | timo = tstohz(rqt); | |
329 | /* | 327 | /* | |
330 | * Avoid inadvertantly sleeping forever | 328 | * Avoid inadvertantly sleeping forever | |
331 | */ | 329 | */ | |
332 | if (timo == 0) | 330 | if (timo == 0) | |
333 | timo = 1; | 331 | timo = 1; | |
334 | getnanouptime(&rmtstart); | 332 | getnanouptime(&rmtstart); | |
335 | again: | 333 | again: | |
336 | error = kpause("nanoslp", true, timo, NULL); | 334 | error = kpause("nanoslp", true, timo, NULL); | |
337 | if (rmt != NULL || error == 0) { | 335 | if (rmt != NULL || error == 0) { | |
338 | struct timespec rmtend; | 336 | struct timespec rmtend; | |
339 | struct timespec t0; | 337 | struct timespec t0; | |
340 | struct timespec *t; | 338 | struct timespec *t; | |
341 | 339 | |||
342 | getnanouptime(&rmtend); | 340 | getnanouptime(&rmtend); | |
343 | t = (rmt != NULL) ? rmt : &t0; | 341 | t = (rmt != NULL) ? rmt : &t0; | |
344 | timespecsub(&rmtend, &rmtstart, t); | 342 | timespecsub(&rmtend, &rmtstart, t); | |
345 | timespecsub(rqt, t, t); | 343 | timespecsub(rqt, t, t); | |
346 | if (t->tv_sec < 0) | 344 | if (t->tv_sec < 0) | |
347 | timespecclear(t); | 345 | timespecclear(t); | |
348 | if (error == 0) { | 346 | if (error == 0) { | |
349 | timo = tstohz(t); | 347 | timo = tstohz(t); | |
350 | if (timo > 0) | 348 | if (timo > 0) | |
351 | goto again; | 349 | goto again; | |
352 | } | 350 | } | |
353 | } | 351 | } | |
354 | 352 | |||
355 | if (error == ERESTART) | 353 | if (error == ERESTART) | |
356 | error = EINTR; | 354 | error = EINTR; | |
357 | if (error == EWOULDBLOCK) | 355 | if (error == EWOULDBLOCK) | |
358 | error = 0; | 356 | error = 0; | |
359 | 357 | |||
360 | return error; | 358 | return error; | |
361 | } | 359 | } | |
362 | 360 | |||
363 | /* ARGSUSED */ | 361 | /* ARGSUSED */ | |
364 | int | 362 | int | |
365 | sys___gettimeofday50(struct lwp *l, const struct sys___gettimeofday50_args *uap, | 363 | sys___gettimeofday50(struct lwp *l, const struct sys___gettimeofday50_args *uap, | |
366 | register_t *retval) | 364 | register_t *retval) | |
367 | { | 365 | { | |
368 | /* { | 366 | /* { | |
369 | syscallarg(struct timeval *) tp; | 367 | syscallarg(struct timeval *) tp; | |
370 | syscallarg(void *) tzp; really "struct timezone *"; | 368 | syscallarg(void *) tzp; really "struct timezone *"; | |
371 | } */ | 369 | } */ | |
372 | struct timeval atv; | 370 | struct timeval atv; | |
373 | int error = 0; | 371 | int error = 0; | |
374 | struct timezone tzfake; | 372 | struct timezone tzfake; | |
375 | 373 | |||
376 | if (SCARG(uap, tp)) { | 374 | if (SCARG(uap, tp)) { | |
377 | microtime(&atv); | 375 | microtime(&atv); | |
378 | error = copyout(&atv, SCARG(uap, tp), sizeof(atv)); | 376 | error = copyout(&atv, SCARG(uap, tp), sizeof(atv)); | |
379 | if (error) | 377 | if (error) | |
380 | return (error); | 378 | return (error); | |
381 | } | 379 | } | |
382 | if (SCARG(uap, tzp)) { | 380 | if (SCARG(uap, tzp)) { | |
383 | /* | 381 | /* | |
384 | * NetBSD has no kernel notion of time zone, so we just | 382 | * NetBSD has no kernel notion of time zone, so we just | |
385 | * fake up a timezone struct and return it if demanded. | 383 | * fake up a timezone struct and return it if demanded. | |
386 | */ | 384 | */ | |
387 | tzfake.tz_minuteswest = 0; | 385 | tzfake.tz_minuteswest = 0; | |
388 | tzfake.tz_dsttime = 0; | 386 | tzfake.tz_dsttime = 0; | |
389 | error = copyout(&tzfake, SCARG(uap, tzp), sizeof(tzfake)); | 387 | error = copyout(&tzfake, SCARG(uap, tzp), sizeof(tzfake)); | |
390 | } | 388 | } | |
391 | return (error); | 389 | return (error); | |
392 | } | 390 | } | |
393 | 391 | |||
394 | /* ARGSUSED */ | 392 | /* ARGSUSED */ | |
395 | int | 393 | int | |
396 | sys___settimeofday50(struct lwp *l, const struct sys___settimeofday50_args *uap, | 394 | sys___settimeofday50(struct lwp *l, const struct sys___settimeofday50_args *uap, | |
397 | register_t *retval) | 395 | register_t *retval) | |
398 | { | 396 | { | |
399 | /* { | 397 | /* { | |
400 | syscallarg(const struct timeval *) tv; | 398 | syscallarg(const struct timeval *) tv; | |
401 | syscallarg(const void *) tzp; really "const struct timezone *"; | 399 | syscallarg(const void *) tzp; really "const struct timezone *"; | |
402 | } */ | 400 | } */ | |
403 | 401 | |||
404 | return settimeofday1(SCARG(uap, tv), true, SCARG(uap, tzp), l, true); | 402 | return settimeofday1(SCARG(uap, tv), true, SCARG(uap, tzp), l, true); | |
405 | } | 403 | } | |
406 | 404 | |||
407 | int | 405 | int | |
408 | settimeofday1(const struct timeval *utv, bool userspace, | 406 | settimeofday1(const struct timeval *utv, bool userspace, | |
409 | const void *utzp, struct lwp *l, bool check_kauth) | 407 | const void *utzp, struct lwp *l, bool check_kauth) | |
410 | { | 408 | { | |
411 | struct timeval atv; | 409 | struct timeval atv; | |
412 | struct timespec ts; | 410 | struct timespec ts; | |
413 | int error; | 411 | int error; | |
414 | 412 | |||
415 | /* Verify all parameters before changing time. */ | 413 | /* Verify all parameters before changing time. */ | |
416 | 414 | |||
417 | /* | 415 | /* | |
418 | * NetBSD has no kernel notion of time zone, and only an | 416 | * NetBSD has no kernel notion of time zone, and only an | |
419 | * obsolete program would try to set it, so we log a warning. | 417 | * obsolete program would try to set it, so we log a warning. | |
420 | */ | 418 | */ | |
421 | if (utzp) | 419 | if (utzp) | |
422 | log(LOG_WARNING, "pid %d attempted to set the " | 420 | log(LOG_WARNING, "pid %d attempted to set the " | |
423 | "(obsolete) kernel time zone\n", l->l_proc->p_pid); | 421 | "(obsolete) kernel time zone\n", l->l_proc->p_pid); | |
424 | 422 | |||
425 | if (utv == NULL) | 423 | if (utv == NULL) | |
426 | return 0; | 424 | return 0; | |
427 | 425 | |||
428 | if (userspace) { | 426 | if (userspace) { | |
429 | if ((error = copyin(utv, &atv, sizeof(atv))) != 0) | 427 | if ((error = copyin(utv, &atv, sizeof(atv))) != 0) | |
430 | return error; | 428 | return error; | |
431 | utv = &atv; | 429 | utv = &atv; | |
432 | } | 430 | } | |
433 | 431 | |||
434 | TIMEVAL_TO_TIMESPEC(utv, &ts); | 432 | TIMEVAL_TO_TIMESPEC(utv, &ts); | |
435 | return settime1(l->l_proc, &ts, check_kauth); | 433 | return settime1(l->l_proc, &ts, check_kauth); | |
436 | } | 434 | } | |
437 | 435 | |||
438 | int time_adjusted; /* set if an adjustment is made */ | 436 | int time_adjusted; /* set if an adjustment is made */ | |
439 | 437 | |||
440 | /* ARGSUSED */ | 438 | /* ARGSUSED */ | |
441 | int | 439 | int | |
442 | sys___adjtime50(struct lwp *l, const struct sys___adjtime50_args *uap, | 440 | sys___adjtime50(struct lwp *l, const struct sys___adjtime50_args *uap, | |
443 | register_t *retval) | 441 | register_t *retval) | |
444 | { | 442 | { | |
445 | /* { | 443 | /* { | |
446 | syscallarg(const struct timeval *) delta; | 444 | syscallarg(const struct timeval *) delta; | |
447 | syscallarg(struct timeval *) olddelta; | 445 | syscallarg(struct timeval *) olddelta; | |
448 | } */ | 446 | } */ | |
449 | int error = 0; | 447 | int error = 0; | |
450 | struct timeval atv, oldatv; | 448 | struct timeval atv, oldatv; | |
451 | 449 | |||
452 | if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_TIME, | 450 | if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_TIME, | |
453 | KAUTH_REQ_SYSTEM_TIME_ADJTIME, NULL, NULL, NULL)) != 0) | 451 | KAUTH_REQ_SYSTEM_TIME_ADJTIME, NULL, NULL, NULL)) != 0) | |
454 | return error; | 452 | return error; | |
455 | 453 | |||
456 | if (SCARG(uap, delta)) { | 454 | if (SCARG(uap, delta)) { | |
457 | error = copyin(SCARG(uap, delta), &atv, | 455 | error = copyin(SCARG(uap, delta), &atv, | |
458 | sizeof(*SCARG(uap, delta))); | 456 | sizeof(*SCARG(uap, delta))); | |
459 | if (error) | 457 | if (error) | |
460 | return (error); | 458 | return (error); | |
461 | } | 459 | } | |
462 | adjtime1(SCARG(uap, delta) ? &atv : NULL, | 460 | adjtime1(SCARG(uap, delta) ? &atv : NULL, | |
463 | SCARG(uap, olddelta) ? &oldatv : NULL, l->l_proc); | 461 | SCARG(uap, olddelta) ? &oldatv : NULL, l->l_proc); | |
464 | if (SCARG(uap, olddelta)) | 462 | if (SCARG(uap, olddelta)) | |
465 | error = copyout(&oldatv, SCARG(uap, olddelta), | 463 | error = copyout(&oldatv, SCARG(uap, olddelta), | |
466 | sizeof(*SCARG(uap, olddelta))); | 464 | sizeof(*SCARG(uap, olddelta))); | |
467 | return error; | 465 | return error; | |
468 | } | 466 | } | |
469 | 467 | |||
470 | void | 468 | void | |
471 | adjtime1(const struct timeval *delta, struct timeval *olddelta, struct proc *p) | 469 | adjtime1(const struct timeval *delta, struct timeval *olddelta, struct proc *p) | |
472 | { | 470 | { | |
473 | extern int64_t time_adjtime; /* in kern_ntptime.c */ | 471 | extern int64_t time_adjtime; /* in kern_ntptime.c */ | |
474 | 472 | |||
475 | if (olddelta) { | 473 | if (olddelta) { | |
476 | mutex_spin_enter(&timecounter_lock); | 474 | mutex_spin_enter(&timecounter_lock); | |
477 | olddelta->tv_sec = time_adjtime / 1000000; | 475 | olddelta->tv_sec = time_adjtime / 1000000; | |
478 | olddelta->tv_usec = time_adjtime % 1000000; | 476 | olddelta->tv_usec = time_adjtime % 1000000; | |
479 | if (olddelta->tv_usec < 0) { | 477 | if (olddelta->tv_usec < 0) { | |
480 | olddelta->tv_usec += 1000000; | 478 | olddelta->tv_usec += 1000000; | |
481 | olddelta->tv_sec--; | 479 | olddelta->tv_sec--; | |
482 | } | 480 | } | |
483 | mutex_spin_exit(&timecounter_lock); | 481 | mutex_spin_exit(&timecounter_lock); | |
484 | } | 482 | } | |
485 | 483 | |||
486 | if (delta) { | 484 | if (delta) { | |
487 | mutex_spin_enter(&timecounter_lock); | 485 | mutex_spin_enter(&timecounter_lock); | |
488 | time_adjtime = delta->tv_sec * 1000000 + delta->tv_usec; | 486 | time_adjtime = delta->tv_sec * 1000000 + delta->tv_usec; | |
489 | 487 | |||
490 | if (time_adjtime) { | 488 | if (time_adjtime) { | |
491 | /* We need to save the system time during shutdown */ | 489 | /* We need to save the system time during shutdown */ | |
492 | time_adjusted |= 1; | 490 | time_adjusted |= 1; | |
493 | } | 491 | } | |
494 | mutex_spin_exit(&timecounter_lock); | 492 | mutex_spin_exit(&timecounter_lock); | |
495 | } | 493 | } | |
496 | } | 494 | } | |
497 | 495 | |||
498 | /* | 496 | /* | |
499 | * Interval timer support. Both the BSD getitimer() family and the POSIX | 497 | * Interval timer support. Both the BSD getitimer() family and the POSIX | |
500 | * timer_*() family of routines are supported. | 498 | * timer_*() family of routines are supported. | |
501 | * | 499 | * | |
502 | * All timers are kept in an array pointed to by p_timers, which is | 500 | * All timers are kept in an array pointed to by p_timers, which is | |
503 | * allocated on demand - many processes don't use timers at all. The | 501 | * allocated on demand - many processes don't use timers at all. The | |
504 | * first three elements in this array are reserved for the BSD timers: | 502 | * first three elements in this array are reserved for the BSD timers: | |
505 | * element 0 is ITIMER_REAL, element 1 is ITIMER_VIRTUAL, and element | 503 | * element 0 is ITIMER_REAL, element 1 is ITIMER_VIRTUAL, and element | |
506 | * 2 is ITIMER_PROF. The rest may be allocated by the timer_create() | 504 | * 2 is ITIMER_PROF. The rest may be allocated by the timer_create() | |
507 | * syscall. | 505 | * syscall. | |
508 | * | 506 | * | |
509 | * Realtime timers are kept in the ptimer structure as an absolute | 507 | * Realtime timers are kept in the ptimer structure as an absolute | |
510 | * time; virtual time timers are kept as a linked list of deltas. | 508 | * time; virtual time timers are kept as a linked list of deltas. | |
511 | * Virtual time timers are processed in the hardclock() routine of | 509 | * Virtual time timers are processed in the hardclock() routine of | |
512 | * kern_clock.c. The real time timer is processed by a callout | 510 | * kern_clock.c. The real time timer is processed by a callout | |
513 | * routine, called from the softclock() routine. Since a callout may | 511 | * routine, called from the softclock() routine. Since a callout may | |
514 | * be delayed in real time due to interrupt processing in the system, | 512 | * be delayed in real time due to interrupt processing in the system, | |
515 | * it is possible for the real time timeout routine (realtimeexpire, | 513 | * it is possible for the real time timeout routine (realtimeexpire, | |
516 | * given below), to be delayed in real time past when it is supposed | 514 | * given below), to be delayed in real time past when it is supposed | |
517 | * to occur. It does not suffice, therefore, to reload the real timer | 515 | * to occur. It does not suffice, therefore, to reload the real timer | |
518 | * .it_value from the real time timers .it_interval. Rather, we | 516 | * .it_value from the real time timers .it_interval. Rather, we | |
519 | * compute the next time in absolute time the timer should go off. */ | 517 | * compute the next time in absolute time the timer should go off. */ | |
520 | 518 | |||
521 | /* Allocate a POSIX realtime timer. */ | 519 | /* Allocate a POSIX realtime timer. */ | |
522 | int | 520 | int | |
523 | sys_timer_create(struct lwp *l, const struct sys_timer_create_args *uap, | 521 | sys_timer_create(struct lwp *l, const struct sys_timer_create_args *uap, | |
524 | register_t *retval) | 522 | register_t *retval) | |
525 | { | 523 | { | |
526 | /* { | 524 | /* { | |
527 | syscallarg(clockid_t) clock_id; | 525 | syscallarg(clockid_t) clock_id; | |
528 | syscallarg(struct sigevent *) evp; | 526 | syscallarg(struct sigevent *) evp; | |
529 | syscallarg(timer_t *) timerid; | 527 | syscallarg(timer_t *) timerid; | |
530 | } */ | 528 | } */ | |
531 | 529 | |||
532 | return timer_create1(SCARG(uap, timerid), SCARG(uap, clock_id), | 530 | return timer_create1(SCARG(uap, timerid), SCARG(uap, clock_id), | |
533 | SCARG(uap, evp), copyin, l); | 531 | SCARG(uap, evp), copyin, l); | |
534 | } | 532 | } | |
535 | 533 | |||
536 | int | 534 | int | |
537 | timer_create1(timer_t *tid, clockid_t id, struct sigevent *evp, | 535 | timer_create1(timer_t *tid, clockid_t id, struct sigevent *evp, | |
538 | copyin_t fetch_event, struct lwp *l) | 536 | copyin_t fetch_event, struct lwp *l) | |
539 | { | 537 | { | |
540 | int error; | 538 | int error; | |
541 | timer_t timerid; | 539 | timer_t timerid; | |
542 | struct ptimers *pts; | 540 | struct ptimers *pts; | |
543 | struct ptimer *pt; | 541 | struct ptimer *pt; | |
544 | struct proc *p; | 542 | struct proc *p; | |
545 | 543 | |||
546 | p = l->l_proc; | 544 | p = l->l_proc; | |
547 | 545 | |||
548 | if (id != CLOCK_REALTIME && id != CLOCK_VIRTUAL && | 546 | if (id != CLOCK_REALTIME && id != CLOCK_VIRTUAL && | |
549 | id != CLOCK_PROF && id != CLOCK_MONOTONIC) | 547 | id != CLOCK_PROF && id != CLOCK_MONOTONIC) | |
550 | return (EINVAL); | 548 | return (EINVAL); | |
551 | 549 | |||
552 | if ((pts = p->p_timers) == NULL) | 550 | if ((pts = p->p_timers) == NULL) | |
553 | pts = timers_alloc(p); | 551 | pts = timers_alloc(p); | |
554 | 552 | |||
555 | pt = pool_get(&ptimer_pool, PR_WAITOK); | 553 | pt = pool_get(&ptimer_pool, PR_WAITOK); | |
556 | if (evp != NULL) { | 554 | if (evp != NULL) { | |
557 | if (((error = | 555 | if (((error = | |
558 | (*fetch_event)(evp, &pt->pt_ev, sizeof(pt->pt_ev))) != 0) || | 556 | (*fetch_event)(evp, &pt->pt_ev, sizeof(pt->pt_ev))) != 0) || | |
559 | ((pt->pt_ev.sigev_notify < SIGEV_NONE) || | 557 | ((pt->pt_ev.sigev_notify < SIGEV_NONE) || | |
560 | (pt->pt_ev.sigev_notify > SIGEV_SA)) || | 558 | (pt->pt_ev.sigev_notify > SIGEV_SA)) || | |
561 | (pt->pt_ev.sigev_notify == SIGEV_SIGNAL && | 559 | (pt->pt_ev.sigev_notify == SIGEV_SIGNAL && | |
562 | (pt->pt_ev.sigev_signo <= 0 || | 560 | (pt->pt_ev.sigev_signo <= 0 || | |
563 | pt->pt_ev.sigev_signo >= NSIG))) { | 561 | pt->pt_ev.sigev_signo >= NSIG))) { | |
564 | pool_put(&ptimer_pool, pt); | 562 | pool_put(&ptimer_pool, pt); | |
565 | return (error ? error : EINVAL); | 563 | return (error ? error : EINVAL); | |
566 | } | 564 | } | |
567 | } | 565 | } | |
568 | 566 | |||
569 | /* Find a free timer slot, skipping those reserved for setitimer(). */ | 567 | /* Find a free timer slot, skipping those reserved for setitimer(). */ | |
570 | mutex_spin_enter(&timer_lock); | 568 | mutex_spin_enter(&timer_lock); | |
571 | for (timerid = 3; timerid < TIMER_MAX; timerid++) | 569 | for (timerid = 3; timerid < TIMER_MAX; timerid++) | |
572 | if (pts->pts_timers[timerid] == NULL) | 570 | if (pts->pts_timers[timerid] == NULL) | |
573 | break; | 571 | break; | |
574 | if (timerid == TIMER_MAX) { | 572 | if (timerid == TIMER_MAX) { | |
575 | mutex_spin_exit(&timer_lock); | 573 | mutex_spin_exit(&timer_lock); | |
576 | pool_put(&ptimer_pool, pt); | 574 | pool_put(&ptimer_pool, pt); | |
577 | return EAGAIN; | 575 | return EAGAIN; | |
578 | } | 576 | } | |
579 | if (evp == NULL) { | 577 | if (evp == NULL) { | |
580 | pt->pt_ev.sigev_notify = SIGEV_SIGNAL; | 578 | pt->pt_ev.sigev_notify = SIGEV_SIGNAL; | |
581 | switch (id) { | 579 | switch (id) { | |
582 | case CLOCK_REALTIME: | 580 | case CLOCK_REALTIME: | |
583 | case CLOCK_MONOTONIC: | 581 | case CLOCK_MONOTONIC: | |
584 | pt->pt_ev.sigev_signo = SIGALRM; | 582 | pt->pt_ev.sigev_signo = SIGALRM; | |
585 | break; | 583 | break; | |
586 | case CLOCK_VIRTUAL: | 584 | case CLOCK_VIRTUAL: | |
587 | pt->pt_ev.sigev_signo = SIGVTALRM; | 585 | pt->pt_ev.sigev_signo = SIGVTALRM; | |
588 | break; | 586 | break; | |
589 | case CLOCK_PROF: | 587 | case CLOCK_PROF: | |
590 | pt->pt_ev.sigev_signo = SIGPROF; | 588 | pt->pt_ev.sigev_signo = SIGPROF; | |
591 | break; | 589 | break; | |
592 | } | 590 | } | |
593 | pt->pt_ev.sigev_value.sival_int = timerid; | 591 | pt->pt_ev.sigev_value.sival_int = timerid; | |
594 | } | 592 | } | |
595 | pt->pt_info.ksi_signo = pt->pt_ev.sigev_signo; | 593 | pt->pt_info.ksi_signo = pt->pt_ev.sigev_signo; | |
596 | pt->pt_info.ksi_errno = 0; | 594 | pt->pt_info.ksi_errno = 0; | |
597 | pt->pt_info.ksi_code = 0; | 595 | pt->pt_info.ksi_code = 0; | |
598 | pt->pt_info.ksi_pid = p->p_pid; | 596 | pt->pt_info.ksi_pid = p->p_pid; | |
599 | pt->pt_info.ksi_uid = kauth_cred_getuid(l->l_cred); | 597 | pt->pt_info.ksi_uid = kauth_cred_getuid(l->l_cred); | |
600 | pt->pt_info.ksi_value = pt->pt_ev.sigev_value; | 598 | pt->pt_info.ksi_value = pt->pt_ev.sigev_value; | |
601 | pt->pt_type = id; | 599 | pt->pt_type = id; | |
602 | pt->pt_proc = p; | 600 | pt->pt_proc = p; | |
603 | pt->pt_overruns = 0; | 601 | pt->pt_overruns = 0; | |
604 | pt->pt_poverruns = 0; | 602 | pt->pt_poverruns = 0; | |
605 | pt->pt_entry = timerid; | 603 | pt->pt_entry = timerid; | |
606 | pt->pt_queued = false; | 604 | pt->pt_queued = false; | |
607 | timespecclear(&pt->pt_time.it_value); | 605 | timespecclear(&pt->pt_time.it_value); | |
608 | if (!CLOCK_VIRTUAL_P(id)) | 606 | if (!CLOCK_VIRTUAL_P(id)) | |
609 | callout_init(&pt->pt_ch, CALLOUT_MPSAFE); | 607 | callout_init(&pt->pt_ch, CALLOUT_MPSAFE); | |
610 | else | 608 | else | |
611 | pt->pt_active = 0; | 609 | pt->pt_active = 0; | |
612 | 610 | |||
613 | pts->pts_timers[timerid] = pt; | 611 | pts->pts_timers[timerid] = pt; | |
614 | mutex_spin_exit(&timer_lock); | 612 | mutex_spin_exit(&timer_lock); | |
615 | 613 | |||
616 | return copyout(&timerid, tid, sizeof(timerid)); | 614 | return copyout(&timerid, tid, sizeof(timerid)); | |
617 | } | 615 | } | |
618 | 616 | |||
619 | /* Delete a POSIX realtime timer */ | 617 | /* Delete a POSIX realtime timer */ | |
620 | int | 618 | int | |
621 | sys_timer_delete(struct lwp *l, const struct sys_timer_delete_args *uap, | 619 | sys_timer_delete(struct lwp *l, const struct sys_timer_delete_args *uap, | |
622 | register_t *retval) | 620 | register_t *retval) | |
623 | { | 621 | { | |
624 | /* { | 622 | /* { | |
625 | syscallarg(timer_t) timerid; | 623 | syscallarg(timer_t) timerid; | |
626 | } */ | 624 | } */ | |
627 | struct proc *p = l->l_proc; | 625 | struct proc *p = l->l_proc; | |
628 | timer_t timerid; | 626 | timer_t timerid; | |
629 | struct ptimers *pts; | 627 | struct ptimers *pts; | |
630 | struct ptimer *pt, *ptn; | 628 | struct ptimer *pt, *ptn; | |
631 | 629 | |||
632 | timerid = SCARG(uap, timerid); | 630 | timerid = SCARG(uap, timerid); | |
633 | pts = p->p_timers; | 631 | pts = p->p_timers; | |
634 | 632 | |||
635 | if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX) | 633 | if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX) | |
636 | return (EINVAL); | 634 | return (EINVAL); | |
637 | 635 | |||
638 | mutex_spin_enter(&timer_lock); | 636 | mutex_spin_enter(&timer_lock); | |
639 | if ((pt = pts->pts_timers[timerid]) == NULL) { | 637 | if ((pt = pts->pts_timers[timerid]) == NULL) { | |
640 | mutex_spin_exit(&timer_lock); | 638 | mutex_spin_exit(&timer_lock); | |
641 | return (EINVAL); | 639 | return (EINVAL); | |
642 | } | 640 | } | |
643 | if (CLOCK_VIRTUAL_P(pt->pt_type)) { | 641 | if (CLOCK_VIRTUAL_P(pt->pt_type)) { | |
644 | if (pt->pt_active) { | 642 | if (pt->pt_active) { | |
645 | ptn = LIST_NEXT(pt, pt_list); | 643 | ptn = LIST_NEXT(pt, pt_list); | |
646 | LIST_REMOVE(pt, pt_list); | 644 | LIST_REMOVE(pt, pt_list); | |
647 | for ( ; ptn; ptn = LIST_NEXT(ptn, pt_list)) | 645 | for ( ; ptn; ptn = LIST_NEXT(ptn, pt_list)) | |
648 | timespecadd(&pt->pt_time.it_value, | 646 | timespecadd(&pt->pt_time.it_value, | |
649 | &ptn->pt_time.it_value, | 647 | &ptn->pt_time.it_value, | |
650 | &ptn->pt_time.it_value); | 648 | &ptn->pt_time.it_value); | |
651 | pt->pt_active = 0; | 649 | pt->pt_active = 0; | |
652 | } | 650 | } | |
653 | } | 651 | } | |
654 | itimerfree(pts, timerid); | 652 | itimerfree(pts, timerid); | |
655 | 653 | |||
656 | return (0); | 654 | return (0); | |
657 | } | 655 | } | |
658 | 656 | |||
659 | /* | 657 | /* | |
660 | * Set up the given timer. The value in pt->pt_time.it_value is taken | 658 | * Set up the given timer. The value in pt->pt_time.it_value is taken | |
661 | * to be an absolute time for CLOCK_REALTIME/CLOCK_MONOTONIC timers and | 659 | * to be an absolute time for CLOCK_REALTIME/CLOCK_MONOTONIC timers and | |
662 | * a relative time for CLOCK_VIRTUAL/CLOCK_PROF timers. | 660 | * a relative time for CLOCK_VIRTUAL/CLOCK_PROF timers. | |
663 | */ | 661 | */ | |
664 | void | 662 | void | |
665 | timer_settime(struct ptimer *pt) | 663 | timer_settime(struct ptimer *pt) | |
666 | { | 664 | { | |
667 | struct ptimer *ptn, *pptn; | 665 | struct ptimer *ptn, *pptn; | |
668 | struct ptlist *ptl; | 666 | struct ptlist *ptl; | |
669 | 667 | |||
670 | KASSERT(mutex_owned(&timer_lock)); | 668 | KASSERT(mutex_owned(&timer_lock)); | |
671 | 669 | |||
672 | if (!CLOCK_VIRTUAL_P(pt->pt_type)) { | 670 | if (!CLOCK_VIRTUAL_P(pt->pt_type)) { | |
673 | callout_halt(&pt->pt_ch, &timer_lock); | 671 | callout_halt(&pt->pt_ch, &timer_lock); | |
674 | if (timespecisset(&pt->pt_time.it_value)) { | 672 | if (timespecisset(&pt->pt_time.it_value)) { | |
675 | /* | 673 | /* | |
676 | * Don't need to check tshzto() return value, here. | 674 | * Don't need to check tshzto() return value, here. | |
677 | * callout_reset() does it for us. | 675 | * callout_reset() does it for us. | |
678 | */ | 676 | */ | |
679 | callout_reset(&pt->pt_ch, tshzto(&pt->pt_time.it_value), | 677 | callout_reset(&pt->pt_ch, tshzto(&pt->pt_time.it_value), | |
680 | realtimerexpire, pt); | 678 | realtimerexpire, pt); | |
681 | } | 679 | } | |
682 | } else { | 680 | } else { | |
683 | if (pt->pt_active) { | 681 | if (pt->pt_active) { | |
684 | ptn = LIST_NEXT(pt, pt_list); | 682 | ptn = LIST_NEXT(pt, pt_list); | |
685 | LIST_REMOVE(pt, pt_list); | 683 | LIST_REMOVE(pt, pt_list); | |
686 | for ( ; ptn; ptn = LIST_NEXT(ptn, pt_list)) | 684 | for ( ; ptn; ptn = LIST_NEXT(ptn, pt_list)) | |
687 | timespecadd(&pt->pt_time.it_value, | 685 | timespecadd(&pt->pt_time.it_value, | |
688 | &ptn->pt_time.it_value, | 686 | &ptn->pt_time.it_value, | |
689 | &ptn->pt_time.it_value); | 687 | &ptn->pt_time.it_value); | |
690 | } | 688 | } | |
691 | if (timespecisset(&pt->pt_time.it_value)) { | 689 | if (timespecisset(&pt->pt_time.it_value)) { | |
692 | if (pt->pt_type == CLOCK_VIRTUAL) | 690 | if (pt->pt_type == CLOCK_VIRTUAL) | |
693 | ptl = &pt->pt_proc->p_timers->pts_virtual; | 691 | ptl = &pt->pt_proc->p_timers->pts_virtual; | |
694 | else | 692 | else | |
695 | ptl = &pt->pt_proc->p_timers->pts_prof; | 693 | ptl = &pt->pt_proc->p_timers->pts_prof; | |
696 | 694 | |||
697 | for (ptn = LIST_FIRST(ptl), pptn = NULL; | 695 | for (ptn = LIST_FIRST(ptl), pptn = NULL; | |
698 | ptn && timespeccmp(&pt->pt_time.it_value, | 696 | ptn && timespeccmp(&pt->pt_time.it_value, | |
699 | &ptn->pt_time.it_value, >); | 697 | &ptn->pt_time.it_value, >); | |
700 | pptn = ptn, ptn = LIST_NEXT(ptn, pt_list)) | 698 | pptn = ptn, ptn = LIST_NEXT(ptn, pt_list)) | |
701 | timespecsub(&pt->pt_time.it_value, | 699 | timespecsub(&pt->pt_time.it_value, | |
702 | &ptn->pt_time.it_value, | 700 | &ptn->pt_time.it_value, | |
703 | &pt->pt_time.it_value); | 701 | &pt->pt_time.it_value); | |
704 | 702 | |||
705 | if (pptn) | 703 | if (pptn) | |
706 | LIST_INSERT_AFTER(pptn, pt, pt_list); | 704 | LIST_INSERT_AFTER(pptn, pt, pt_list); | |
707 | else | 705 | else | |
708 | LIST_INSERT_HEAD(ptl, pt, pt_list); | 706 | LIST_INSERT_HEAD(ptl, pt, pt_list); | |
709 | 707 | |||
710 | for ( ; ptn ; ptn = LIST_NEXT(ptn, pt_list)) | 708 | for ( ; ptn ; ptn = LIST_NEXT(ptn, pt_list)) | |
711 | timespecsub(&ptn->pt_time.it_value, | 709 | timespecsub(&ptn->pt_time.it_value, | |
712 | &pt->pt_time.it_value, | 710 | &pt->pt_time.it_value, | |
713 | &ptn->pt_time.it_value); | 711 | &ptn->pt_time.it_value); | |
714 | 712 | |||
715 | pt->pt_active = 1; | 713 | pt->pt_active = 1; | |
716 | } else | 714 | } else | |
717 | pt->pt_active = 0; | 715 | pt->pt_active = 0; | |
718 | } | 716 | } | |
719 | } | 717 | } | |
720 | 718 | |||
721 | void | 719 | void | |
722 | timer_gettime(struct ptimer *pt, struct itimerspec *aits) | 720 | timer_gettime(struct ptimer *pt, struct itimerspec *aits) | |
723 | { | 721 | { | |
724 | struct timespec now; | 722 | struct timespec now; | |
725 | struct ptimer *ptn; | 723 | struct ptimer *ptn; | |
726 | 724 | |||
727 | KASSERT(mutex_owned(&timer_lock)); | 725 | KASSERT(mutex_owned(&timer_lock)); | |
728 | 726 | |||
729 | *aits = pt->pt_time; | 727 | *aits = pt->pt_time; | |
730 | if (!CLOCK_VIRTUAL_P(pt->pt_type)) { | 728 | if (!CLOCK_VIRTUAL_P(pt->pt_type)) { | |
731 | /* | 729 | /* | |
732 | * Convert from absolute to relative time in .it_value | 730 | * Convert from absolute to relative time in .it_value | |
733 | * part of real time timer. If time for real time | 731 | * part of real time timer. If time for real time | |
734 | * timer has passed return 0, else return difference | 732 | * timer has passed return 0, else return difference | |
735 | * between current time and time for the timer to go | 733 | * between current time and time for the timer to go | |
736 | * off. | 734 | * off. | |
737 | */ | 735 | */ | |
738 | if (timespecisset(&aits->it_value)) { | 736 | if (timespecisset(&aits->it_value)) { | |
739 | if (pt->pt_type == CLOCK_REALTIME) { | 737 | if (pt->pt_type == CLOCK_REALTIME) { | |
740 | getnanotime(&now); | 738 | getnanotime(&now); | |
741 | } else { /* CLOCK_MONOTONIC */ | 739 | } else { /* CLOCK_MONOTONIC */ | |
742 | getnanouptime(&now); | 740 | getnanouptime(&now); | |
743 | } | 741 | } | |
744 | if (timespeccmp(&aits->it_value, &now, <)) | 742 | if (timespeccmp(&aits->it_value, &now, <)) | |
745 | timespecclear(&aits->it_value); | 743 | timespecclear(&aits->it_value); | |
746 | else | 744 | else | |
747 | timespecsub(&aits->it_value, &now, | 745 | timespecsub(&aits->it_value, &now, | |
748 | &aits->it_value); | 746 | &aits->it_value); | |
749 | } | 747 | } | |
750 | } else if (pt->pt_active) { | 748 | } else if (pt->pt_active) { | |
751 | if (pt->pt_type == CLOCK_VIRTUAL) | 749 | if (pt->pt_type == CLOCK_VIRTUAL) | |
752 | ptn = LIST_FIRST(&pt->pt_proc->p_timers->pts_virtual); | 750 | ptn = LIST_FIRST(&pt->pt_proc->p_timers->pts_virtual); | |
753 | else | 751 | else | |
754 | ptn = LIST_FIRST(&pt->pt_proc->p_timers->pts_prof); | 752 | ptn = LIST_FIRST(&pt->pt_proc->p_timers->pts_prof); | |
755 | for ( ; ptn && ptn != pt; ptn = LIST_NEXT(ptn, pt_list)) | 753 | for ( ; ptn && ptn != pt; ptn = LIST_NEXT(ptn, pt_list)) | |
756 | timespecadd(&aits->it_value, | 754 | timespecadd(&aits->it_value, | |
757 | &ptn->pt_time.it_value, &aits->it_value); | 755 | &ptn->pt_time.it_value, &aits->it_value); | |
758 | KASSERT(ptn != NULL); /* pt should be findable on the list */ | 756 | KASSERT(ptn != NULL); /* pt should be findable on the list */ | |
759 | } else | 757 | } else | |
760 | timespecclear(&aits->it_value); | 758 | timespecclear(&aits->it_value); | |
761 | } | 759 | } | |
762 | 760 | |||
763 | 761 | |||
764 | 762 | |||
765 | /* Set and arm a POSIX realtime timer */ | 763 | /* Set and arm a POSIX realtime timer */ | |
766 | int | 764 | int | |
767 | sys___timer_settime50(struct lwp *l, | 765 | sys___timer_settime50(struct lwp *l, | |
768 | const struct sys___timer_settime50_args *uap, | 766 | const struct sys___timer_settime50_args *uap, | |
769 | register_t *retval) | 767 | register_t *retval) | |
770 | { | 768 | { | |
771 | /* { | 769 | /* { | |
772 | syscallarg(timer_t) timerid; | 770 | syscallarg(timer_t) timerid; | |
773 | syscallarg(int) flags; | 771 | syscallarg(int) flags; | |
774 | syscallarg(const struct itimerspec *) value; | 772 | syscallarg(const struct itimerspec *) value; | |
775 | syscallarg(struct itimerspec *) ovalue; | 773 | syscallarg(struct itimerspec *) ovalue; | |
776 | } */ | 774 | } */ | |
777 | int error; | 775 | int error; | |
778 | struct itimerspec value, ovalue, *ovp = NULL; | 776 | struct itimerspec value, ovalue, *ovp = NULL; | |
779 | 777 | |||
780 | if ((error = copyin(SCARG(uap, value), &value, | 778 | if ((error = copyin(SCARG(uap, value), &value, | |
781 | sizeof(struct itimerspec))) != 0) | 779 | sizeof(struct itimerspec))) != 0) | |
782 | return (error); | 780 | return (error); | |
783 | 781 | |||
784 | if (SCARG(uap, ovalue)) | 782 | if (SCARG(uap, ovalue)) | |
785 | ovp = &ovalue; | 783 | ovp = &ovalue; | |
786 | 784 | |||
787 | if ((error = dotimer_settime(SCARG(uap, timerid), &value, ovp, | 785 | if ((error = dotimer_settime(SCARG(uap, timerid), &value, ovp, | |
788 | SCARG(uap, flags), l->l_proc)) != 0) | 786 | SCARG(uap, flags), l->l_proc)) != 0) | |
789 | return error; | 787 | return error; | |
790 | 788 | |||
791 | if (ovp) | 789 | if (ovp) | |
792 | return copyout(&ovalue, SCARG(uap, ovalue), | 790 | return copyout(&ovalue, SCARG(uap, ovalue), | |
793 | sizeof(struct itimerspec)); | 791 | sizeof(struct itimerspec)); | |
794 | return 0; | 792 | return 0; | |
795 | } | 793 | } | |
796 | 794 | |||
797 | int | 795 | int | |
798 | dotimer_settime(int timerid, struct itimerspec *value, | 796 | dotimer_settime(int timerid, struct itimerspec *value, | |
799 | struct itimerspec *ovalue, int flags, struct proc *p) | 797 | struct itimerspec *ovalue, int flags, struct proc *p) | |
800 | { | 798 | { | |
801 | struct timespec now; | 799 | struct timespec now; | |
802 | struct itimerspec val, oval; | 800 | struct itimerspec val, oval; | |
803 | struct ptimers *pts; | 801 | struct ptimers *pts; | |
804 | struct ptimer *pt; | 802 | struct ptimer *pt; | |
805 | int error; | 803 | int error; | |
806 | 804 | |||
807 | pts = p->p_timers; | 805 | pts = p->p_timers; | |
808 | 806 | |||
809 | if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX) | 807 | if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX) | |
810 | return EINVAL; | 808 | return EINVAL; | |
811 | val = *value; | 809 | val = *value; | |
812 | if ((error = itimespecfix(&val.it_value)) != 0 || | 810 | if ((error = itimespecfix(&val.it_value)) != 0 || | |
813 | (error = itimespecfix(&val.it_interval)) != 0) | 811 | (error = itimespecfix(&val.it_interval)) != 0) | |
814 | return error; | 812 | return error; | |
815 | 813 | |||
816 | mutex_spin_enter(&timer_lock); | 814 | mutex_spin_enter(&timer_lock); | |
817 | if ((pt = pts->pts_timers[timerid]) == NULL) { | 815 | if ((pt = pts->pts_timers[timerid]) == NULL) { | |
818 | mutex_spin_exit(&timer_lock); | 816 | mutex_spin_exit(&timer_lock); | |
819 | return EINVAL; | 817 | return EINVAL; | |
820 | } | 818 | } | |
821 | 819 | |||
822 | oval = pt->pt_time; | 820 | oval = pt->pt_time; | |
823 | pt->pt_time = val; | 821 | pt->pt_time = val; | |
824 | 822 | |||
825 | /* | 823 | /* | |
826 | * If we've been passed a relative time for a realtime timer, | 824 | * If we've been passed a relative time for a realtime timer, | |
827 | * convert it to absolute; if an absolute time for a virtual | 825 | * convert it to absolute; if an absolute time for a virtual | |
828 | * timer, convert it to relative and make sure we don't set it | 826 | * timer, convert it to relative and make sure we don't set it | |
829 | * to zero, which would cancel the timer, or let it go | 827 | * to zero, which would cancel the timer, or let it go | |
830 | * negative, which would confuse the comparison tests. | 828 | * negative, which would confuse the comparison tests. | |
831 | */ | 829 | */ | |
832 | if (timespecisset(&pt->pt_time.it_value)) { | 830 | if (timespecisset(&pt->pt_time.it_value)) { | |
833 | if (!CLOCK_VIRTUAL_P(pt->pt_type)) { | 831 | if (!CLOCK_VIRTUAL_P(pt->pt_type)) { | |
834 | if ((flags & TIMER_ABSTIME) == 0) { | 832 | if ((flags & TIMER_ABSTIME) == 0) { | |
835 | if (pt->pt_type == CLOCK_REALTIME) { | 833 | if (pt->pt_type == CLOCK_REALTIME) { | |
836 | getnanotime(&now); | 834 | getnanotime(&now); | |
837 | } else { /* CLOCK_MONOTONIC */ | 835 | } else { /* CLOCK_MONOTONIC */ | |
838 | getnanouptime(&now); | 836 | getnanouptime(&now); | |
839 | } | 837 | } | |
840 | timespecadd(&pt->pt_time.it_value, &now, | 838 | timespecadd(&pt->pt_time.it_value, &now, | |
841 | &pt->pt_time.it_value); | 839 | &pt->pt_time.it_value); | |
842 | } | 840 | } | |
843 | } else { | 841 | } else { | |
844 | if ((flags & TIMER_ABSTIME) != 0) { | 842 | if ((flags & TIMER_ABSTIME) != 0) { | |
845 | getnanotime(&now); | 843 | getnanotime(&now); | |
846 | timespecsub(&pt->pt_time.it_value, &now, | 844 | timespecsub(&pt->pt_time.it_value, &now, | |
847 | &pt->pt_time.it_value); | 845 | &pt->pt_time.it_value); | |
848 | if (!timespecisset(&pt->pt_time.it_value) || | 846 | if (!timespecisset(&pt->pt_time.it_value) || | |
849 | pt->pt_time.it_value.tv_sec < 0) { | 847 | pt->pt_time.it_value.tv_sec < 0) { | |
850 | pt->pt_time.it_value.tv_sec = 0; | 848 | pt->pt_time.it_value.tv_sec = 0; | |
851 | pt->pt_time.it_value.tv_nsec = 1; | 849 | pt->pt_time.it_value.tv_nsec = 1; | |
852 | } | 850 | } | |
853 | } | 851 | } | |
854 | } | 852 | } | |
855 | } | 853 | } | |
856 | 854 | |||
857 | timer_settime(pt); | 855 | timer_settime(pt); | |
858 | mutex_spin_exit(&timer_lock); | 856 | mutex_spin_exit(&timer_lock); | |
859 | 857 | |||
860 | if (ovalue) | 858 | if (ovalue) | |
861 | *ovalue = oval; | 859 | *ovalue = oval; | |
862 | 860 | |||
863 | return (0); | 861 | return (0); | |
864 | } | 862 | } | |
865 | 863 | |||
866 | /* Return the time remaining until a POSIX timer fires. */ | 864 | /* Return the time remaining until a POSIX timer fires. */ | |
867 | int | 865 | int | |
868 | sys___timer_gettime50(struct lwp *l, | 866 | sys___timer_gettime50(struct lwp *l, | |
869 | const struct sys___timer_gettime50_args *uap, register_t *retval) | 867 | const struct sys___timer_gettime50_args *uap, register_t *retval) | |
870 | { | 868 | { | |
871 | /* { | 869 | /* { | |
872 | syscallarg(timer_t) timerid; | 870 | syscallarg(timer_t) timerid; | |
873 | syscallarg(struct itimerspec *) value; | 871 | syscallarg(struct itimerspec *) value; | |
874 | } */ | 872 | } */ | |
875 | struct itimerspec its; | 873 | struct itimerspec its; | |
876 | int error; | 874 | int error; | |
877 | 875 | |||
878 | if ((error = dotimer_gettime(SCARG(uap, timerid), l->l_proc, | 876 | if ((error = dotimer_gettime(SCARG(uap, timerid), l->l_proc, | |
879 | &its)) != 0) | 877 | &its)) != 0) | |
880 | return error; | 878 | return error; | |
881 | 879 | |||
882 | return copyout(&its, SCARG(uap, value), sizeof(its)); | 880 | return copyout(&its, SCARG(uap, value), sizeof(its)); | |
883 | } | 881 | } | |
884 | 882 | |||
885 | int | 883 | int | |
886 | dotimer_gettime(int timerid, struct proc *p, struct itimerspec *its) | 884 | dotimer_gettime(int timerid, struct proc *p, struct itimerspec *its) | |
887 | { | 885 | { | |
888 | struct ptimer *pt; | 886 | struct ptimer *pt; | |
889 | struct ptimers *pts; | 887 | struct ptimers *pts; | |
890 | 888 | |||
891 | pts = p->p_timers; | 889 | pts = p->p_timers; | |
892 | if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX) | 890 | if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX) | |
893 | return (EINVAL); | 891 | return (EINVAL); | |
894 | mutex_spin_enter(&timer_lock); | 892 | mutex_spin_enter(&timer_lock); | |
895 | if ((pt = pts->pts_timers[timerid]) == NULL) { | 893 | if ((pt = pts->pts_timers[timerid]) == NULL) { | |
896 | mutex_spin_exit(&timer_lock); | 894 | mutex_spin_exit(&timer_lock); | |
897 | return (EINVAL); | 895 | return (EINVAL); | |
898 | } | 896 | } | |
899 | timer_gettime(pt, its); | 897 | timer_gettime(pt, its); | |
900 | mutex_spin_exit(&timer_lock); | 898 | mutex_spin_exit(&timer_lock); | |
901 | 899 | |||
902 | return 0; | 900 | return 0; | |
903 | } | 901 | } | |
904 | 902 | |||
905 | /* | 903 | /* | |
906 | * Return the count of the number of times a periodic timer expired | 904 | * Return the count of the number of times a periodic timer expired | |
907 | * while a notification was already pending. The counter is reset when | 905 | * while a notification was already pending. The counter is reset when | |
908 | * a timer expires and a notification can be posted. | 906 | * a timer expires and a notification can be posted. | |
909 | */ | 907 | */ | |
910 | int | 908 | int | |
911 | sys_timer_getoverrun(struct lwp *l, const struct sys_timer_getoverrun_args *uap, | 909 | sys_timer_getoverrun(struct lwp *l, const struct sys_timer_getoverrun_args *uap, | |
912 | register_t *retval) | 910 | register_t *retval) | |
913 | { | 911 | { | |
914 | /* { | 912 | /* { | |
915 | syscallarg(timer_t) timerid; | 913 | syscallarg(timer_t) timerid; | |
916 | } */ | 914 | } */ | |
917 | struct proc *p = l->l_proc; | 915 | struct proc *p = l->l_proc; | |
918 | struct ptimers *pts; | 916 | struct ptimers *pts; | |
919 | int timerid; | 917 | int timerid; | |
920 | struct ptimer *pt; | 918 | struct ptimer *pt; | |
921 | 919 | |||
922 | timerid = SCARG(uap, timerid); | 920 | timerid = SCARG(uap, timerid); | |
923 | 921 | |||
924 | pts = p->p_timers; | 922 | pts = p->p_timers; | |
925 | if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX) | 923 | if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX) | |
926 | return (EINVAL); | 924 | return (EINVAL); | |
927 | mutex_spin_enter(&timer_lock); | 925 | mutex_spin_enter(&timer_lock); | |
928 | if ((pt = pts->pts_timers[timerid]) == NULL) { | 926 | if ((pt = pts->pts_timers[timerid]) == NULL) { | |
929 | mutex_spin_exit(&timer_lock); | 927 | mutex_spin_exit(&timer_lock); | |
930 | return (EINVAL); | 928 | return (EINVAL); | |
931 | } | 929 | } | |
932 | *retval = pt->pt_poverruns; | 930 | *retval = pt->pt_poverruns; | |
933 | mutex_spin_exit(&timer_lock); | 931 | mutex_spin_exit(&timer_lock); | |
934 | 932 | |||
935 | return (0); | 933 | return (0); | |
936 | } | 934 | } | |
937 | 935 | |||
938 | #ifdef KERN_SA | 936 | #ifdef KERN_SA | |
939 | /* Glue function that triggers an upcall; called from userret(). */ | 937 | /* Glue function that triggers an upcall; called from userret(). */ | |
940 | void | 938 | void | |
941 | timerupcall(struct lwp *l) | 939 | timerupcall(struct lwp *l) | |
942 | { | 940 | { | |
943 | struct ptimers *pt = l->l_proc->p_timers; | 941 | struct ptimers *pt = l->l_proc->p_timers; | |
944 | struct proc *p = l->l_proc; | 942 | struct proc *p = l->l_proc; | |
945 | unsigned int i, fired, done; | 943 | unsigned int i, fired, done; | |
946 | 944 | |||
947 | KDASSERT(l->l_proc->p_sa); | 945 | KDASSERT(l->l_proc->p_sa); | |
948 | /* Bail out if we do not own the virtual processor */ | 946 | /* Bail out if we do not own the virtual processor */ | |
949 | if (l->l_savp->savp_lwp != l) | 947 | if (l->l_savp->savp_lwp != l) | |
950 | return ; | 948 | return ; | |
951 | 949 | |||
952 | mutex_enter(p->p_lock); | 950 | mutex_enter(p->p_lock); | |
953 | 951 | |||
954 | fired = pt->pts_fired; | 952 | fired = pt->pts_fired; | |
955 | done = 0; | 953 | done = 0; | |
956 | while ((i = ffs(fired)) != 0) { | 954 | while ((i = ffs(fired)) != 0) { | |
957 | siginfo_t *si; | 955 | siginfo_t *si; | |
958 | int mask = 1 << --i; | 956 | int mask = 1 << --i; | |
959 | int f; | 957 | int f; | |
960 | 958 | |||
961 | f = ~l->l_pflag & LP_SA_NOBLOCK; | 959 | f = ~l->l_pflag & LP_SA_NOBLOCK; | |
962 | l->l_pflag |= LP_SA_NOBLOCK; | 960 | l->l_pflag |= LP_SA_NOBLOCK; | |
963 | si = siginfo_alloc(PR_WAITOK); | 961 | si = siginfo_alloc(PR_WAITOK); | |
964 | si->_info = pt->pts_timers[i]->pt_info.ksi_info; | 962 | si->_info = pt->pts_timers[i]->pt_info.ksi_info; | |
965 | if (sa_upcall(l, SA_UPCALL_SIGEV | SA_UPCALL_DEFER, NULL, l, | 963 | if (sa_upcall(l, SA_UPCALL_SIGEV | SA_UPCALL_DEFER, NULL, l, | |
966 | sizeof(*si), si, siginfo_free) != 0) { | 964 | sizeof(*si), si, siginfo_free) != 0) { | |
967 | siginfo_free(si); | 965 | siginfo_free(si); | |
968 | /* XXX What do we do here?? */ | 966 | /* XXX What do we do here?? */ | |
969 | } else | 967 | } else | |
970 | done |= mask; | 968 | done |= mask; | |
971 | fired &= ~mask; | 969 | fired &= ~mask; | |
972 | l->l_pflag ^= f; | 970 | l->l_pflag ^= f; | |
973 | } | 971 | } | |
974 | pt->pts_fired &= ~done; | 972 | pt->pts_fired &= ~done; | |
975 | if (pt->pts_fired == 0) | 973 | if (pt->pts_fired == 0) | |
976 | l->l_proc->p_timerpend = 0; | 974 | l->l_proc->p_timerpend = 0; | |
977 | 975 | |||
978 | mutex_exit(p->p_lock); | 976 | mutex_exit(p->p_lock); | |
979 | } | 977 | } | |
980 | #endif /* KERN_SA */ | 978 | #endif /* KERN_SA */ | |
981 | 979 | |||
982 | /* | 980 | /* | |
983 | * Real interval timer expired: | 981 | * Real interval timer expired: | |
984 | * send process whose timer expired an alarm signal. | 982 | * send process whose timer expired an alarm signal. | |
985 | * If time is not set up to reload, then just return. | 983 | * If time is not set up to reload, then just return. | |
986 | * Else compute next time timer should go off which is > current time. | 984 | * Else compute next time timer should go off which is > current time. | |
987 | * This is where delay in processing this timeout causes multiple | 985 | * This is where delay in processing this timeout causes multiple | |
988 | * SIGALRM calls to be compressed into one. | 986 | * SIGALRM calls to be compressed into one. | |
989 | */ | 987 | */ | |
990 | void | 988 | void | |
991 | realtimerexpire(void *arg) | 989 | realtimerexpire(void *arg) | |
992 | { | 990 | { | |
993 | uint64_t last_val, next_val, interval, now_ns; | 991 | uint64_t last_val, next_val, interval, now_ns; | |
994 | struct timespec now, next; | 992 | struct timespec now, next; | |
995 | struct ptimer *pt; | 993 | struct ptimer *pt; | |
996 | int backwards; | 994 | int backwards; | |
997 | 995 | |||
998 | pt = arg; | 996 | pt = arg; | |
999 | 997 | |||
1000 | mutex_spin_enter(&timer_lock); | 998 | mutex_spin_enter(&timer_lock); | |
1001 | itimerfire(pt); | 999 | itimerfire(pt); | |
1002 | 1000 | |||
1003 | if (!timespecisset(&pt->pt_time.it_interval)) { | 1001 | if (!timespecisset(&pt->pt_time.it_interval)) { | |
1004 | timespecclear(&pt->pt_time.it_value); | 1002 | timespecclear(&pt->pt_time.it_value); | |
1005 | mutex_spin_exit(&timer_lock); | 1003 | mutex_spin_exit(&timer_lock); | |
1006 | return; | 1004 | return; | |
1007 | } | 1005 | } | |
1008 | 1006 | |||
1009 | getnanotime(&now); | 1007 | getnanotime(&now); | |
1010 | backwards = (timespeccmp(&pt->pt_time.it_value, &now, >)); | 1008 | backwards = (timespeccmp(&pt->pt_time.it_value, &now, >)); | |
1011 | timespecadd(&pt->pt_time.it_value, &pt->pt_time.it_interval, &next); | 1009 | timespecadd(&pt->pt_time.it_value, &pt->pt_time.it_interval, &next); | |
1012 | /* Handle the easy case of non-overflown timers first. */ | 1010 | /* Handle the easy case of non-overflown timers first. */ | |
1013 | if (!backwards && timespeccmp(&next, &now, >)) { | 1011 | if (!backwards && timespeccmp(&next, &now, >)) { | |
1014 | pt->pt_time.it_value = next; | 1012 | pt->pt_time.it_value = next; | |
1015 | } else { | 1013 | } else { | |
1016 | now_ns = timespec2ns(&now); | 1014 | now_ns = timespec2ns(&now); | |
1017 | last_val = timespec2ns(&pt->pt_time.it_value); | 1015 | last_val = timespec2ns(&pt->pt_time.it_value); | |
1018 | interval = timespec2ns(&pt->pt_time.it_interval); | 1016 | interval = timespec2ns(&pt->pt_time.it_interval); | |
1019 | 1017 | |||
1020 | next_val = now_ns + | 1018 | next_val = now_ns + | |
1021 | (now_ns - last_val + interval - 1) % interval; | 1019 | (now_ns - last_val + interval - 1) % interval; | |
1022 | 1020 | |||
1023 | if (backwards) | 1021 | if (backwards) | |
1024 | next_val += interval; | 1022 | next_val += interval; | |
1025 | else | 1023 | else | |
1026 | pt->pt_overruns += (now_ns - last_val) / interval; | 1024 | pt->pt_overruns += (now_ns - last_val) / interval; | |
1027 | 1025 | |||
1028 | pt->pt_time.it_value.tv_sec = next_val / 1000000000; | 1026 | pt->pt_time.it_value.tv_sec = next_val / 1000000000; | |
1029 | pt->pt_time.it_value.tv_nsec = next_val % 1000000000; | 1027 | pt->pt_time.it_value.tv_nsec = next_val % 1000000000; | |
1030 | } | 1028 | } | |
1031 | 1029 | |||
1032 | /* | 1030 | /* | |
1033 | * Don't need to check tshzto() return value, here. | 1031 | * Don't need to check tshzto() return value, here. | |
1034 | * callout_reset() does it for us. | 1032 | * callout_reset() does it for us. | |
1035 | */ | 1033 | */ | |
1036 | callout_reset(&pt->pt_ch, tshzto(&pt->pt_time.it_value), | 1034 | callout_reset(&pt->pt_ch, tshzto(&pt->pt_time.it_value), | |
1037 | realtimerexpire, pt); | 1035 | realtimerexpire, pt); | |
1038 | mutex_spin_exit(&timer_lock); | 1036 | mutex_spin_exit(&timer_lock); | |
1039 | } | 1037 | } | |
1040 | 1038 | |||
1041 | /* BSD routine to get the value of an interval timer. */ | 1039 | /* BSD routine to get the value of an interval timer. */ | |
1042 | /* ARGSUSED */ | 1040 | /* ARGSUSED */ | |
1043 | int | 1041 | int | |
1044 | sys___getitimer50(struct lwp *l, const struct sys___getitimer50_args *uap, | 1042 | sys___getitimer50(struct lwp *l, const struct sys___getitimer50_args *uap, | |
1045 | register_t *retval) | 1043 | register_t *retval) | |
1046 | { | 1044 | { | |
1047 | /* { | 1045 | /* { | |
1048 | syscallarg(int) which; | 1046 | syscallarg(int) which; | |
1049 | syscallarg(struct itimerval *) itv; | 1047 | syscallarg(struct itimerval *) itv; | |
1050 | } */ | 1048 | } */ | |
1051 | struct proc *p = l->l_proc; | 1049 | struct proc *p = l->l_proc; | |
1052 | struct itimerval aitv; | 1050 | struct itimerval aitv; | |
1053 | int error; | 1051 | int error; | |
1054 | 1052 | |||
1055 | error = dogetitimer(p, SCARG(uap, which), &aitv); | 1053 | error = dogetitimer(p, SCARG(uap, which), &aitv); | |
1056 | if (error) | 1054 | if (error) | |
1057 | return error; | 1055 | return error; | |
1058 | return (copyout(&aitv, SCARG(uap, itv), sizeof(struct itimerval))); | 1056 | return (copyout(&aitv, SCARG(uap, itv), sizeof(struct itimerval))); | |
1059 | } | 1057 | } | |
1060 | 1058 | |||
1061 | int | 1059 | int | |
1062 | dogetitimer(struct proc *p, int which, struct itimerval *itvp) | 1060 | dogetitimer(struct proc *p, int which, struct itimerval *itvp) | |
1063 | { | 1061 | { | |
1064 | struct ptimers *pts; | 1062 | struct ptimers *pts; | |
1065 | struct ptimer *pt; | 1063 | struct ptimer *pt; | |
1066 | struct itimerspec its; | 1064 | struct itimerspec its; | |
1067 | 1065 | |||
1068 | if ((u_int)which > ITIMER_PROF) | 1066 | if ((u_int)which > ITIMER_PROF) | |
1069 | return (EINVAL); | 1067 | return (EINVAL); | |
1070 | 1068 | |||
1071 | mutex_spin_enter(&timer_lock); | 1069 | mutex_spin_enter(&timer_lock); | |
1072 | pts = p->p_timers; | 1070 | pts = p->p_timers; | |
1073 | if (pts == NULL || (pt = pts->pts_timers[which]) == NULL) { | 1071 | if (pts == NULL || (pt = pts->pts_timers[which]) == NULL) { | |
1074 | timerclear(&itvp->it_value); | 1072 | timerclear(&itvp->it_value); | |
1075 | timerclear(&itvp->it_interval); | 1073 | timerclear(&itvp->it_interval); | |
1076 | } else { | 1074 | } else { | |
1077 | timer_gettime(pt, &its); | 1075 | timer_gettime(pt, &its); | |
1078 | TIMESPEC_TO_TIMEVAL(&itvp->it_value, &its.it_value); | 1076 | TIMESPEC_TO_TIMEVAL(&itvp->it_value, &its.it_value); | |
1079 | TIMESPEC_TO_TIMEVAL(&itvp->it_interval, &its.it_interval); | 1077 | TIMESPEC_TO_TIMEVAL(&itvp->it_interval, &its.it_interval); | |
1080 | } | 1078 | } | |
1081 | mutex_spin_exit(&timer_lock); | 1079 | mutex_spin_exit(&timer_lock); | |
1082 | 1080 | |||
1083 | return 0; | 1081 | return 0; |
--- src/sys/kern/sched_4bsd.c 2011/04/14 16:19:35 1.26
+++ src/sys/kern/sched_4bsd.c 2011/07/27 14:35:34 1.27
@@ -1,541 +1,539 @@ | @@ -1,541 +1,539 @@ | |||
1 | /* $NetBSD: sched_4bsd.c,v 1.26 2011/04/14 16:19:35 yamt Exp $ */ | 1 | /* $NetBSD: sched_4bsd.c,v 1.27 2011/07/27 14:35:34 uebayasi Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 1999, 2000, 2004, 2006, 2007, 2008 The NetBSD Foundation, Inc. | 4 | * Copyright (c) 1999, 2000, 2004, 2006, 2007, 2008 The NetBSD Foundation, Inc. | |
5 | * All rights reserved. | 5 | * All rights reserved. | |
6 | * | 6 | * | |
7 | * This code is derived from software contributed to The NetBSD Foundation | 7 | * This code is derived from software contributed to The NetBSD Foundation | |
8 | * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, | 8 | * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, | |
9 | * NASA Ames Research Center, by Charles M. Hannum, Andrew Doran, and | 9 | * NASA Ames Research Center, by Charles M. Hannum, Andrew Doran, and | |
10 | * Daniel Sieger. | 10 | * Daniel Sieger. | |
11 | * | 11 | * | |
12 | * Redistribution and use in source and binary forms, with or without | 12 | * Redistribution and use in source and binary forms, with or without | |
13 | * modification, are permitted provided that the following conditions | 13 | * modification, are permitted provided that the following conditions | |
14 | * are met: | 14 | * are met: | |
15 | * 1. Redistributions of source code must retain the above copyright | 15 | * 1. Redistributions of source code must retain the above copyright | |
16 | * notice, this list of conditions and the following disclaimer. | 16 | * notice, this list of conditions and the following disclaimer. | |
17 | * 2. Redistributions in binary form must reproduce the above copyright | 17 | * 2. Redistributions in binary form must reproduce the above copyright | |
18 | * notice, this list of conditions and the following disclaimer in the | 18 | * notice, this list of conditions and the following disclaimer in the | |
19 | * documentation and/or other materials provided with the distribution. | 19 | * documentation and/or other materials provided with the distribution. | |
20 | * | 20 | * | |
21 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | 21 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |
22 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | 22 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
23 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 23 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
24 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | 24 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |
25 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 25 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
26 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 26 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
27 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 27 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
28 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 28 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
29 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 29 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
30 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 30 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
31 | * POSSIBILITY OF SUCH DAMAGE. | 31 | * POSSIBILITY OF SUCH DAMAGE. | |
32 | */ | 32 | */ | |
33 | 33 | |||
34 | /*- | 34 | /*- | |
35 | * Copyright (c) 1982, 1986, 1990, 1991, 1993 | 35 | * Copyright (c) 1982, 1986, 1990, 1991, 1993 | |
36 | * The Regents of the University of California. All rights reserved. | 36 | * The Regents of the University of California. All rights reserved. | |
37 | * (c) UNIX System Laboratories, Inc. | 37 | * (c) UNIX System Laboratories, Inc. | |
38 | * All or some portions of this file are derived from material licensed | 38 | * All or some portions of this file are derived from material licensed | |
39 | * to the University of California by American Telephone and Telegraph | 39 | * to the University of California by American Telephone and Telegraph | |
40 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | 40 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | |
41 | * the permission of UNIX System Laboratories, Inc. | 41 | * the permission of UNIX System Laboratories, Inc. | |
42 | * | 42 | * | |
43 | * Redistribution and use in source and binary forms, with or without | 43 | * Redistribution and use in source and binary forms, with or without | |
44 | * modification, are permitted provided that the following conditions | 44 | * modification, are permitted provided that the following conditions | |
45 | * are met: | 45 | * are met: | |
46 | * 1. Redistributions of source code must retain the above copyright | 46 | * 1. Redistributions of source code must retain the above copyright | |
47 | * notice, this list of conditions and the following disclaimer. | 47 | * notice, this list of conditions and the following disclaimer. | |
48 | * 2. Redistributions in binary form must reproduce the above copyright | 48 | * 2. Redistributions in binary form must reproduce the above copyright | |
49 | * notice, this list of conditions and the following disclaimer in the | 49 | * notice, this list of conditions and the following disclaimer in the | |
50 | * documentation and/or other materials provided with the distribution. | 50 | * documentation and/or other materials provided with the distribution. | |
51 | * 3. Neither the name of the University nor the names of its contributors | 51 | * 3. Neither the name of the University nor the names of its contributors | |
52 | * may be used to endorse or promote products derived from this software | 52 | * may be used to endorse or promote products derived from this software | |
53 | * without specific prior written permission. | 53 | * without specific prior written permission. | |
54 | * | 54 | * | |
55 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | 55 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
56 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 56 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
57 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 57 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
58 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | 58 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
59 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 59 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
60 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 60 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
61 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 61 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
62 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 62 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
63 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 63 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
64 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 64 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
65 | * SUCH DAMAGE. | 65 | * SUCH DAMAGE. | |
66 | * | 66 | * | |
67 | * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 | 67 | * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 | |
68 | */ | 68 | */ | |
69 | 69 | |||
70 | #include <sys/cdefs.h> | 70 | #include <sys/cdefs.h> | |
71 | __KERNEL_RCSID(0, "$NetBSD: sched_4bsd.c,v 1.26 2011/04/14 16:19:35 yamt Exp $"); | 71 | __KERNEL_RCSID(0, "$NetBSD: sched_4bsd.c,v 1.27 2011/07/27 14:35:34 uebayasi Exp $"); | |
72 | 72 | |||
73 | #include "opt_ddb.h" | 73 | #include "opt_ddb.h" | |
74 | #include "opt_lockdebug.h" | 74 | #include "opt_lockdebug.h" | |
75 | #include "opt_perfctrs.h" | 75 | #include "opt_perfctrs.h" | |
76 | 76 | |||
77 | #include <sys/param.h> | 77 | #include <sys/param.h> | |
78 | #include <sys/systm.h> | 78 | #include <sys/systm.h> | |
79 | #include <sys/callout.h> | 79 | #include <sys/callout.h> | |
80 | #include <sys/cpu.h> | 80 | #include <sys/cpu.h> | |
81 | #include <sys/proc.h> | 81 | #include <sys/proc.h> | |
82 | #include <sys/kernel.h> | 82 | #include <sys/kernel.h> | |
83 | #include <sys/signalvar.h> | 83 | #include <sys/signalvar.h> | |
84 | #include <sys/resourcevar.h> | 84 | #include <sys/resourcevar.h> | |
85 | #include <sys/sched.h> | 85 | #include <sys/sched.h> | |
86 | #include <sys/sysctl.h> | 86 | #include <sys/sysctl.h> | |
87 | #include <sys/kauth.h> | 87 | #include <sys/kauth.h> | |
88 | #include <sys/lockdebug.h> | 88 | #include <sys/lockdebug.h> | |
89 | #include <sys/kmem.h> | 89 | #include <sys/kmem.h> | |
90 | #include <sys/intr.h> | 90 | #include <sys/intr.h> | |
91 | 91 | |||
92 | #include <uvm/uvm_extern.h> | |||
93 | ||||
94 | static void updatepri(struct lwp *); | 92 | static void updatepri(struct lwp *); | |
95 | static void resetpriority(struct lwp *); | 93 | static void resetpriority(struct lwp *); | |
96 | 94 | |||
97 | extern unsigned int sched_pstats_ticks; /* defined in kern_synch.c */ | 95 | extern unsigned int sched_pstats_ticks; /* defined in kern_synch.c */ | |
98 | 96 | |||
99 | /* Number of hardclock ticks per sched_tick() */ | 97 | /* Number of hardclock ticks per sched_tick() */ | |
100 | static int rrticks; | 98 | static int rrticks; | |
101 | 99 | |||
102 | /* | 100 | /* | |
103 | * Force switch among equal priority processes every 100ms. | 101 | * Force switch among equal priority processes every 100ms. | |
104 | * Called from hardclock every hz/10 == rrticks hardclock ticks. | 102 | * Called from hardclock every hz/10 == rrticks hardclock ticks. | |
105 | * | 103 | * | |
106 | * There's no need to lock anywhere in this routine, as it's | 104 | * There's no need to lock anywhere in this routine, as it's | |
107 | * CPU-local and runs at IPL_SCHED (called from clock interrupt). | 105 | * CPU-local and runs at IPL_SCHED (called from clock interrupt). | |
108 | */ | 106 | */ | |
109 | /* ARGSUSED */ | 107 | /* ARGSUSED */ | |
110 | void | 108 | void | |
111 | sched_tick(struct cpu_info *ci) | 109 | sched_tick(struct cpu_info *ci) | |
112 | { | 110 | { | |
113 | struct schedstate_percpu *spc = &ci->ci_schedstate; | 111 | struct schedstate_percpu *spc = &ci->ci_schedstate; | |
114 | lwp_t *l; | 112 | lwp_t *l; | |
115 | 113 | |||
116 | spc->spc_ticks = rrticks; | 114 | spc->spc_ticks = rrticks; | |
117 | 115 | |||
118 | if (CURCPU_IDLE_P()) { | 116 | if (CURCPU_IDLE_P()) { | |
119 | cpu_need_resched(ci, 0); | 117 | cpu_need_resched(ci, 0); | |
120 | return; | 118 | return; | |
121 | } | 119 | } | |
122 | l = ci->ci_data.cpu_onproc; | 120 | l = ci->ci_data.cpu_onproc; | |
123 | if (l == NULL) { | 121 | if (l == NULL) { | |
124 | return; | 122 | return; | |
125 | } | 123 | } | |
126 | switch (l->l_class) { | 124 | switch (l->l_class) { | |
127 | case SCHED_FIFO: | 125 | case SCHED_FIFO: | |
128 | /* No timeslicing for FIFO jobs. */ | 126 | /* No timeslicing for FIFO jobs. */ | |
129 | break; | 127 | break; | |
130 | case SCHED_RR: | 128 | case SCHED_RR: | |
131 | /* Force it into mi_switch() to look for other jobs to run. */ | 129 | /* Force it into mi_switch() to look for other jobs to run. */ | |
132 | cpu_need_resched(ci, RESCHED_KPREEMPT); | 130 | cpu_need_resched(ci, RESCHED_KPREEMPT); | |
133 | break; | 131 | break; | |
134 | default: | 132 | default: | |
135 | if (spc->spc_flags & SPCF_SHOULDYIELD) { | 133 | if (spc->spc_flags & SPCF_SHOULDYIELD) { | |
136 | /* | 134 | /* | |
137 | * Process is stuck in kernel somewhere, probably | 135 | * Process is stuck in kernel somewhere, probably | |
138 | * due to buggy or inefficient code. Force a | 136 | * due to buggy or inefficient code. Force a | |
139 | * kernel preemption. | 137 | * kernel preemption. | |
140 | */ | 138 | */ | |
141 | cpu_need_resched(ci, RESCHED_KPREEMPT); | 139 | cpu_need_resched(ci, RESCHED_KPREEMPT); | |
142 | } else if (spc->spc_flags & SPCF_SEENRR) { | 140 | } else if (spc->spc_flags & SPCF_SEENRR) { | |
143 | /* | 141 | /* | |
144 | * The process has already been through a roundrobin | 142 | * The process has already been through a roundrobin | |
145 | * without switching and may be hogging the CPU. | 143 | * without switching and may be hogging the CPU. | |
146 | * Indicate that the process should yield. | 144 | * Indicate that the process should yield. | |
147 | */ | 145 | */ | |
148 | spc->spc_flags |= SPCF_SHOULDYIELD; | 146 | spc->spc_flags |= SPCF_SHOULDYIELD; | |
149 | cpu_need_resched(ci, 0); | 147 | cpu_need_resched(ci, 0); | |
150 | } else { | 148 | } else { | |
151 | spc->spc_flags |= SPCF_SEENRR; | 149 | spc->spc_flags |= SPCF_SEENRR; | |
152 | } | 150 | } | |
153 | break; | 151 | break; | |
154 | } | 152 | } | |
155 | } | 153 | } | |
156 | 154 | |||
157 | /* | 155 | /* | |
158 | * Why PRIO_MAX - 2? From setpriority(2): | 156 | * Why PRIO_MAX - 2? From setpriority(2): | |
159 | * | 157 | * | |
160 | * prio is a value in the range -20 to 20. The default priority is | 158 | * prio is a value in the range -20 to 20. The default priority is | |
161 | * 0; lower priorities cause more favorable scheduling. A value of | 159 | * 0; lower priorities cause more favorable scheduling. A value of | |
162 | * 19 or 20 will schedule a process only when nothing at priority <= | 160 | * 19 or 20 will schedule a process only when nothing at priority <= | |
163 | * 0 is runnable. | 161 | * 0 is runnable. | |
164 | * | 162 | * | |
165 | * This gives estcpu influence over 18 priority levels, and leaves nice | 163 | * This gives estcpu influence over 18 priority levels, and leaves nice | |
166 | * with 40 levels. One way to think about it is that nice has 20 levels | 164 | * with 40 levels. One way to think about it is that nice has 20 levels | |
167 | * either side of estcpu's 18. | 165 | * either side of estcpu's 18. | |
168 | */ | 166 | */ | |
169 | #define ESTCPU_SHIFT 11 | 167 | #define ESTCPU_SHIFT 11 | |
170 | #define ESTCPU_MAX ((PRIO_MAX - 2) << ESTCPU_SHIFT) | 168 | #define ESTCPU_MAX ((PRIO_MAX - 2) << ESTCPU_SHIFT) | |
171 | #define ESTCPU_ACCUM (1 << (ESTCPU_SHIFT - 1)) | 169 | #define ESTCPU_ACCUM (1 << (ESTCPU_SHIFT - 1)) | |
172 | #define ESTCPULIM(e) min((e), ESTCPU_MAX) | 170 | #define ESTCPULIM(e) min((e), ESTCPU_MAX) | |
173 | 171 | |||
174 | /* | 172 | /* | |
175 | * Constants for digital decay and forget: | 173 | * Constants for digital decay and forget: | |
176 | * 90% of (l_estcpu) usage in 5 * loadav time | 174 | * 90% of (l_estcpu) usage in 5 * loadav time | |
177 | * 95% of (l_pctcpu) usage in 60 seconds (load insensitive) | 175 | * 95% of (l_pctcpu) usage in 60 seconds (load insensitive) | |
178 | * Note that, as ps(1) mentions, this can let percentages | 176 | * Note that, as ps(1) mentions, this can let percentages | |
179 | * total over 100% (I've seen 137.9% for 3 processes). | 177 | * total over 100% (I've seen 137.9% for 3 processes). | |
180 | * | 178 | * | |
181 | * Note that hardclock updates l_estcpu and l_cpticks independently. | 179 | * Note that hardclock updates l_estcpu and l_cpticks independently. | |
182 | * | 180 | * | |
183 | * We wish to decay away 90% of l_estcpu in (5 * loadavg) seconds. | 181 | * We wish to decay away 90% of l_estcpu in (5 * loadavg) seconds. | |
184 | * That is, the system wants to compute a value of decay such | 182 | * That is, the system wants to compute a value of decay such | |
185 | * that the following for loop: | 183 | * that the following for loop: | |
186 | * for (i = 0; i < (5 * loadavg); i++) | 184 | * for (i = 0; i < (5 * loadavg); i++) | |
187 | * l_estcpu *= decay; | 185 | * l_estcpu *= decay; | |
188 | * will compute | 186 | * will compute | |
189 | * l_estcpu *= 0.1; | 187 | * l_estcpu *= 0.1; | |
190 | * for all values of loadavg: | 188 | * for all values of loadavg: | |
191 | * | 189 | * | |
192 | * Mathematically this loop can be expressed by saying: | 190 | * Mathematically this loop can be expressed by saying: | |
193 | * decay ** (5 * loadavg) ~= .1 | 191 | * decay ** (5 * loadavg) ~= .1 | |
194 | * | 192 | * | |
195 | * The system computes decay as: | 193 | * The system computes decay as: | |
196 | * decay = (2 * loadavg) / (2 * loadavg + 1) | 194 | * decay = (2 * loadavg) / (2 * loadavg + 1) | |
197 | * | 195 | * | |
198 | * We wish to prove that the system's computation of decay | 196 | * We wish to prove that the system's computation of decay | |
199 | * will always fulfill the equation: | 197 | * will always fulfill the equation: | |
200 | * decay ** (5 * loadavg) ~= .1 | 198 | * decay ** (5 * loadavg) ~= .1 | |
201 | * | 199 | * | |
202 | * If we compute b as: | 200 | * If we compute b as: | |
203 | * b = 2 * loadavg | 201 | * b = 2 * loadavg | |
204 | * then | 202 | * then | |
205 | * decay = b / (b + 1) | 203 | * decay = b / (b + 1) | |
206 | * | 204 | * | |
207 | * We now need to prove two things: | 205 | * We now need to prove two things: | |
208 | * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) | 206 | * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) | |
209 | * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) | 207 | * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) | |
210 | * | 208 | * | |
211 | * Facts: | 209 | * Facts: | |
212 | * For x close to zero, exp(x) =~ 1 + x, since | 210 | * For x close to zero, exp(x) =~ 1 + x, since | |
213 | * exp(x) = 0! + x**1/1! + x**2/2! + ... . | 211 | * exp(x) = 0! + x**1/1! + x**2/2! + ... . | |
214 | * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. | 212 | * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. | |
215 | * For x close to zero, ln(1+x) =~ x, since | 213 | * For x close to zero, ln(1+x) =~ x, since | |
216 | * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 | 214 | * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 | |
217 | * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). | 215 | * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). | |
218 | * ln(.1) =~ -2.30 | 216 | * ln(.1) =~ -2.30 | |
219 | * | 217 | * | |
220 | * Proof of (1): | 218 | * Proof of (1): | |
221 | * Solve (factor)**(power) =~ .1 given power (5*loadav): | 219 | * Solve (factor)**(power) =~ .1 given power (5*loadav): | |
222 | * solving for factor, | 220 | * solving for factor, | |
223 | * ln(factor) =~ (-2.30/5*loadav), or | 221 | * ln(factor) =~ (-2.30/5*loadav), or | |
224 | * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = | 222 | * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = | |
225 | * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED | 223 | * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED | |
226 | * | 224 | * | |
227 | * Proof of (2): | 225 | * Proof of (2): | |
228 | * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): | 226 | * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): | |
229 | * solving for power, | 227 | * solving for power, | |
230 | * power*ln(b/(b+1)) =~ -2.30, or | 228 | * power*ln(b/(b+1)) =~ -2.30, or | |
231 | * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED | 229 | * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED | |
232 | * | 230 | * | |
233 | * Actual power values for the implemented algorithm are as follows: | 231 | * Actual power values for the implemented algorithm are as follows: | |
234 | * loadav: 1 2 3 4 | 232 | * loadav: 1 2 3 4 | |
235 | * power: 5.68 10.32 14.94 19.55 | 233 | * power: 5.68 10.32 14.94 19.55 | |
236 | */ | 234 | */ | |
237 | 235 | |||
238 | /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ | 236 | /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ | |
239 | #define loadfactor(loadav) (2 * (loadav) / ncpu) | 237 | #define loadfactor(loadav) (2 * (loadav) / ncpu) | |
240 | 238 | |||
241 | static fixpt_t | 239 | static fixpt_t | |
242 | decay_cpu(fixpt_t loadfac, fixpt_t estcpu) | 240 | decay_cpu(fixpt_t loadfac, fixpt_t estcpu) | |
243 | { | 241 | { | |
244 | 242 | |||
245 | if (estcpu == 0) { | 243 | if (estcpu == 0) { | |
246 | return 0; | 244 | return 0; | |
247 | } | 245 | } | |
248 | 246 | |||
249 | #if !defined(_LP64) | 247 | #if !defined(_LP64) | |
250 | /* avoid 64bit arithmetics. */ | 248 | /* avoid 64bit arithmetics. */ | |
251 | #define FIXPT_MAX ((fixpt_t)((UINTMAX_C(1) << sizeof(fixpt_t) * CHAR_BIT) - 1)) | 249 | #define FIXPT_MAX ((fixpt_t)((UINTMAX_C(1) << sizeof(fixpt_t) * CHAR_BIT) - 1)) | |
252 | if (__predict_true(loadfac <= FIXPT_MAX / ESTCPU_MAX)) { | 250 | if (__predict_true(loadfac <= FIXPT_MAX / ESTCPU_MAX)) { | |
253 | return estcpu * loadfac / (loadfac + FSCALE); | 251 | return estcpu * loadfac / (loadfac + FSCALE); | |
254 | } | 252 | } | |
255 | #endif /* !defined(_LP64) */ | 253 | #endif /* !defined(_LP64) */ | |
256 | 254 | |||
257 | return (uint64_t)estcpu * loadfac / (loadfac + FSCALE); | 255 | return (uint64_t)estcpu * loadfac / (loadfac + FSCALE); | |
258 | } | 256 | } | |
259 | 257 | |||
260 | /* | 258 | /* | |
261 | * For all load averages >= 1 and max l_estcpu of (255 << ESTCPU_SHIFT), | 259 | * For all load averages >= 1 and max l_estcpu of (255 << ESTCPU_SHIFT), | |
262 | * sleeping for at least seven times the loadfactor will decay l_estcpu to | 260 | * sleeping for at least seven times the loadfactor will decay l_estcpu to | |
263 | * less than (1 << ESTCPU_SHIFT). | 261 | * less than (1 << ESTCPU_SHIFT). | |
264 | * | 262 | * | |
265 | * note that our ESTCPU_MAX is actually much smaller than (255 << ESTCPU_SHIFT). | 263 | * note that our ESTCPU_MAX is actually much smaller than (255 << ESTCPU_SHIFT). | |
266 | */ | 264 | */ | |
267 | static fixpt_t | 265 | static fixpt_t | |
268 | decay_cpu_batch(fixpt_t loadfac, fixpt_t estcpu, unsigned int n) | 266 | decay_cpu_batch(fixpt_t loadfac, fixpt_t estcpu, unsigned int n) | |
269 | { | 267 | { | |
270 | 268 | |||
271 | if ((n << FSHIFT) >= 7 * loadfac) { | 269 | if ((n << FSHIFT) >= 7 * loadfac) { | |
272 | return 0; | 270 | return 0; | |
273 | } | 271 | } | |
274 | 272 | |||
275 | while (estcpu != 0 && n > 1) { | 273 | while (estcpu != 0 && n > 1) { | |
276 | estcpu = decay_cpu(loadfac, estcpu); | 274 | estcpu = decay_cpu(loadfac, estcpu); | |
277 | n--; | 275 | n--; | |
278 | } | 276 | } | |
279 | 277 | |||
280 | return estcpu; | 278 | return estcpu; | |
281 | } | 279 | } | |
282 | 280 | |||
283 | /* | 281 | /* | |
284 | * sched_pstats_hook: | 282 | * sched_pstats_hook: | |
285 | * | 283 | * | |
286 | * Periodically called from sched_pstats(); used to recalculate priorities. | 284 | * Periodically called from sched_pstats(); used to recalculate priorities. | |
287 | */ | 285 | */ | |
288 | void | 286 | void | |
289 | sched_pstats_hook(struct lwp *l, int batch) | 287 | sched_pstats_hook(struct lwp *l, int batch) | |
290 | { | 288 | { | |
291 | fixpt_t loadfac; | 289 | fixpt_t loadfac; | |
292 | 290 | |||
293 | /* | 291 | /* | |
294 | * If the LWP has slept an entire second, stop recalculating | 292 | * If the LWP has slept an entire second, stop recalculating | |
295 | * its priority until it wakes up. | 293 | * its priority until it wakes up. | |
296 | */ | 294 | */ | |
297 | KASSERT(lwp_locked(l, NULL)); | 295 | KASSERT(lwp_locked(l, NULL)); | |
298 | if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP || | 296 | if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP || | |
299 | l->l_stat == LSSUSPENDED) { | 297 | l->l_stat == LSSUSPENDED) { | |
300 | if (l->l_slptime > 1) { | 298 | if (l->l_slptime > 1) { | |
301 | return; | 299 | return; | |
302 | } | 300 | } | |
303 | } | 301 | } | |
304 | loadfac = 2 * (averunnable.ldavg[0]); | 302 | loadfac = 2 * (averunnable.ldavg[0]); | |
305 | l->l_estcpu = decay_cpu(loadfac, l->l_estcpu); | 303 | l->l_estcpu = decay_cpu(loadfac, l->l_estcpu); | |
306 | resetpriority(l); | 304 | resetpriority(l); | |
307 | } | 305 | } | |
308 | 306 | |||
309 | /* | 307 | /* | |
310 | * Recalculate the priority of a process after it has slept for a while. | 308 | * Recalculate the priority of a process after it has slept for a while. | |
311 | */ | 309 | */ | |
312 | static void | 310 | static void | |
313 | updatepri(struct lwp *l) | 311 | updatepri(struct lwp *l) | |
314 | { | 312 | { | |
315 | fixpt_t loadfac; | 313 | fixpt_t loadfac; | |
316 | 314 | |||
317 | KASSERT(lwp_locked(l, NULL)); | 315 | KASSERT(lwp_locked(l, NULL)); | |
318 | KASSERT(l->l_slptime > 1); | 316 | KASSERT(l->l_slptime > 1); | |
319 | 317 | |||
320 | loadfac = loadfactor(averunnable.ldavg[0]); | 318 | loadfac = loadfactor(averunnable.ldavg[0]); | |
321 | 319 | |||
322 | l->l_slptime--; /* the first time was done in sched_pstats */ | 320 | l->l_slptime--; /* the first time was done in sched_pstats */ | |
323 | l->l_estcpu = decay_cpu_batch(loadfac, l->l_estcpu, l->l_slptime); | 321 | l->l_estcpu = decay_cpu_batch(loadfac, l->l_estcpu, l->l_slptime); | |
324 | resetpriority(l); | 322 | resetpriority(l); | |
325 | } | 323 | } | |
326 | 324 | |||
327 | void | 325 | void | |
328 | sched_rqinit(void) | 326 | sched_rqinit(void) | |
329 | { | 327 | { | |
330 | 328 | |||
331 | } | 329 | } | |
332 | 330 | |||
333 | void | 331 | void | |
334 | sched_setrunnable(struct lwp *l) | 332 | sched_setrunnable(struct lwp *l) | |
335 | { | 333 | { | |
336 | 334 | |||
337 | if (l->l_slptime > 1) | 335 | if (l->l_slptime > 1) | |
338 | updatepri(l); | 336 | updatepri(l); | |
339 | } | 337 | } | |
340 | 338 | |||
341 | void | 339 | void | |
342 | sched_nice(struct proc *p, int n) | 340 | sched_nice(struct proc *p, int n) | |
343 | { | 341 | { | |
344 | struct lwp *l; | 342 | struct lwp *l; | |
345 | 343 | |||
346 | KASSERT(mutex_owned(p->p_lock)); | 344 | KASSERT(mutex_owned(p->p_lock)); | |
347 | 345 | |||
348 | p->p_nice = n; | 346 | p->p_nice = n; | |
349 | LIST_FOREACH(l, &p->p_lwps, l_sibling) { | 347 | LIST_FOREACH(l, &p->p_lwps, l_sibling) { | |
350 | lwp_lock(l); | 348 | lwp_lock(l); | |
351 | resetpriority(l); | 349 | resetpriority(l); | |
352 | lwp_unlock(l); | 350 | lwp_unlock(l); | |
353 | } | 351 | } | |
354 | } | 352 | } | |
355 | 353 | |||
356 | /* | 354 | /* | |
357 | * Recompute the priority of an LWP. Arrange to reschedule if | 355 | * Recompute the priority of an LWP. Arrange to reschedule if | |
358 | * the resulting priority is better than that of the current LWP. | 356 | * the resulting priority is better than that of the current LWP. | |
359 | */ | 357 | */ | |
360 | static void | 358 | static void | |
361 | resetpriority(struct lwp *l) | 359 | resetpriority(struct lwp *l) | |
362 | { | 360 | { | |
363 | pri_t pri; | 361 | pri_t pri; | |
364 | struct proc *p = l->l_proc; | 362 | struct proc *p = l->l_proc; | |
365 | 363 | |||
366 | KASSERT(lwp_locked(l, NULL)); | 364 | KASSERT(lwp_locked(l, NULL)); | |
367 | 365 | |||
368 | if (l->l_class != SCHED_OTHER) | 366 | if (l->l_class != SCHED_OTHER) | |
369 | return; | 367 | return; | |
370 | 368 | |||
371 | /* See comments above ESTCPU_SHIFT definition. */ | 369 | /* See comments above ESTCPU_SHIFT definition. */ | |
372 | pri = (PRI_KERNEL - 1) - (l->l_estcpu >> ESTCPU_SHIFT) - p->p_nice; | 370 | pri = (PRI_KERNEL - 1) - (l->l_estcpu >> ESTCPU_SHIFT) - p->p_nice; | |
373 | pri = imax(pri, 0); | 371 | pri = imax(pri, 0); | |
374 | if (pri != l->l_priority) | 372 | if (pri != l->l_priority) | |
375 | lwp_changepri(l, pri); | 373 | lwp_changepri(l, pri); | |
376 | } | 374 | } | |
377 | 375 | |||
378 | /* | 376 | /* | |
379 | * We adjust the priority of the current process. The priority of a process | 377 | * We adjust the priority of the current process. The priority of a process | |
380 | * gets worse as it accumulates CPU time. The CPU usage estimator (l_estcpu) | 378 | * gets worse as it accumulates CPU time. The CPU usage estimator (l_estcpu) | |
381 | * is increased here. The formula for computing priorities (in kern_synch.c) | 379 | * is increased here. The formula for computing priorities (in kern_synch.c) | |
382 | * will compute a different value each time l_estcpu increases. This can | 380 | * will compute a different value each time l_estcpu increases. This can | |
383 | * cause a switch, but unless the priority crosses a PPQ boundary the actual | 381 | * cause a switch, but unless the priority crosses a PPQ boundary the actual | |
384 | * queue will not change. The CPU usage estimator ramps up quite quickly | 382 | * queue will not change. The CPU usage estimator ramps up quite quickly | |
385 | * when the process is running (linearly), and decays away exponentially, at | 383 | * when the process is running (linearly), and decays away exponentially, at | |
386 | * a rate which is proportionally slower when the system is busy. The basic | 384 | * a rate which is proportionally slower when the system is busy. The basic | |
387 | * principle is that the system will 90% forget that the process used a lot | 385 | * principle is that the system will 90% forget that the process used a lot | |
388 | * of CPU time in 5 * loadav seconds. This causes the system to favor | 386 | * of CPU time in 5 * loadav seconds. This causes the system to favor | |
389 | * processes which haven't run much recently, and to round-robin among other | 387 | * processes which haven't run much recently, and to round-robin among other | |
390 | * processes. | 388 | * processes. | |
391 | */ | 389 | */ | |
392 | 390 | |||
393 | void | 391 | void | |
394 | sched_schedclock(struct lwp *l) | 392 | sched_schedclock(struct lwp *l) | |
395 | { | 393 | { | |
396 | 394 | |||
397 | if (l->l_class != SCHED_OTHER) | 395 | if (l->l_class != SCHED_OTHER) | |
398 | return; | 396 | return; | |
399 | 397 | |||
400 | KASSERT(!CURCPU_IDLE_P()); | 398 | KASSERT(!CURCPU_IDLE_P()); | |
401 | l->l_estcpu = ESTCPULIM(l->l_estcpu + ESTCPU_ACCUM); | 399 | l->l_estcpu = ESTCPULIM(l->l_estcpu + ESTCPU_ACCUM); | |
402 | lwp_lock(l); | 400 | lwp_lock(l); | |
403 | resetpriority(l); | 401 | resetpriority(l); | |
404 | lwp_unlock(l); | 402 | lwp_unlock(l); | |
405 | } | 403 | } | |
406 | 404 | |||
407 | /* | 405 | /* | |
408 | * sched_proc_fork: | 406 | * sched_proc_fork: | |
409 | * | 407 | * | |
410 | * Inherit the parent's scheduler history. | 408 | * Inherit the parent's scheduler history. | |
411 | */ | 409 | */ | |
412 | void | 410 | void | |
413 | sched_proc_fork(struct proc *parent, struct proc *child) | 411 | sched_proc_fork(struct proc *parent, struct proc *child) | |
414 | { | 412 | { | |
415 | lwp_t *pl; | 413 | lwp_t *pl; | |
416 | 414 | |||
417 | KASSERT(mutex_owned(parent->p_lock)); | 415 | KASSERT(mutex_owned(parent->p_lock)); | |
418 | 416 | |||
419 | pl = LIST_FIRST(&parent->p_lwps); | 417 | pl = LIST_FIRST(&parent->p_lwps); | |
420 | child->p_estcpu_inherited = pl->l_estcpu; | 418 | child->p_estcpu_inherited = pl->l_estcpu; | |
421 | child->p_forktime = sched_pstats_ticks; | 419 | child->p_forktime = sched_pstats_ticks; | |
422 | } | 420 | } | |
423 | 421 | |||
424 | /* | 422 | /* | |
425 | * sched_proc_exit: | 423 | * sched_proc_exit: | |
426 | * | 424 | * | |
427 | * Chargeback parents for the sins of their children. | 425 | * Chargeback parents for the sins of their children. | |
428 | */ | 426 | */ | |
429 | void | 427 | void | |
430 | sched_proc_exit(struct proc *parent, struct proc *child) | 428 | sched_proc_exit(struct proc *parent, struct proc *child) | |
431 | { | 429 | { | |
432 | fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); | 430 | fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); | |
433 | fixpt_t estcpu; | 431 | fixpt_t estcpu; | |
434 | lwp_t *pl, *cl; | 432 | lwp_t *pl, *cl; | |
435 | 433 | |||
436 | /* XXX Only if parent != init?? */ | 434 | /* XXX Only if parent != init?? */ | |
437 | 435 | |||
438 | mutex_enter(parent->p_lock); | 436 | mutex_enter(parent->p_lock); | |
439 | pl = LIST_FIRST(&parent->p_lwps); | 437 | pl = LIST_FIRST(&parent->p_lwps); | |
440 | cl = LIST_FIRST(&child->p_lwps); | 438 | cl = LIST_FIRST(&child->p_lwps); | |
441 | estcpu = decay_cpu_batch(loadfac, child->p_estcpu_inherited, | 439 | estcpu = decay_cpu_batch(loadfac, child->p_estcpu_inherited, | |
442 | sched_pstats_ticks - child->p_forktime); | 440 | sched_pstats_ticks - child->p_forktime); | |
443 | if (cl->l_estcpu > estcpu) { | 441 | if (cl->l_estcpu > estcpu) { | |
444 | lwp_lock(pl); | 442 | lwp_lock(pl); | |
445 | pl->l_estcpu = ESTCPULIM(pl->l_estcpu + cl->l_estcpu - estcpu); | 443 | pl->l_estcpu = ESTCPULIM(pl->l_estcpu + cl->l_estcpu - estcpu); | |
446 | lwp_unlock(pl); | 444 | lwp_unlock(pl); | |
447 | } | 445 | } | |
448 | mutex_exit(parent->p_lock); | 446 | mutex_exit(parent->p_lock); | |
449 | } | 447 | } | |
450 | 448 | |||
451 | void | 449 | void | |
452 | sched_wakeup(struct lwp *l) | 450 | sched_wakeup(struct lwp *l) | |
453 | { | 451 | { | |
454 | 452 | |||
455 | } | 453 | } | |
456 | 454 | |||
457 | void | 455 | void | |
458 | sched_slept(struct lwp *l) | 456 | sched_slept(struct lwp *l) | |
459 | { | 457 | { | |
460 | 458 | |||
461 | } | 459 | } | |
462 | 460 | |||
463 | void | 461 | void | |
464 | sched_lwp_fork(struct lwp *l1, struct lwp *l2) | 462 | sched_lwp_fork(struct lwp *l1, struct lwp *l2) | |
465 | { | 463 | { | |
466 | 464 | |||
467 | l2->l_estcpu = l1->l_estcpu; | 465 | l2->l_estcpu = l1->l_estcpu; | |
468 | } | 466 | } | |
469 | 467 | |||
470 | void | 468 | void | |
471 | sched_lwp_collect(struct lwp *t) | 469 | sched_lwp_collect(struct lwp *t) | |
472 | { | 470 | { | |
473 | lwp_t *l; | 471 | lwp_t *l; | |
474 | 472 | |||
475 | /* Absorb estcpu value of collected LWP. */ | 473 | /* Absorb estcpu value of collected LWP. */ | |
476 | l = curlwp; | 474 | l = curlwp; | |
477 | lwp_lock(l); | 475 | lwp_lock(l); | |
478 | l->l_estcpu += t->l_estcpu; | 476 | l->l_estcpu += t->l_estcpu; | |
479 | lwp_unlock(l); | 477 | lwp_unlock(l); | |
480 | } | 478 | } | |
481 | 479 | |||
482 | void | 480 | void | |
483 | sched_oncpu(lwp_t *l) | 481 | sched_oncpu(lwp_t *l) | |
484 | { | 482 | { | |
485 | 483 | |||
486 | } | 484 | } | |
487 | 485 | |||
488 | void | 486 | void | |
489 | sched_newts(lwp_t *l) | 487 | sched_newts(lwp_t *l) | |
490 | { | 488 | { | |
491 | 489 | |||
492 | } | 490 | } | |
493 | 491 | |||
494 | /* | 492 | /* | |
495 | * Sysctl nodes and initialization. | 493 | * Sysctl nodes and initialization. | |
496 | */ | 494 | */ | |
497 | 495 | |||
498 | static int | 496 | static int | |
499 | sysctl_sched_rtts(SYSCTLFN_ARGS) | 497 | sysctl_sched_rtts(SYSCTLFN_ARGS) | |
500 | { | 498 | { | |
501 | struct sysctlnode node; | 499 | struct sysctlnode node; | |
502 | int rttsms = hztoms(rrticks); | 500 | int rttsms = hztoms(rrticks); | |
503 | 501 | |||
504 | node = *rnode; | 502 | node = *rnode; | |
505 | node.sysctl_data = &rttsms; | 503 | node.sysctl_data = &rttsms; | |
506 | return sysctl_lookup(SYSCTLFN_CALL(&node)); | 504 | return sysctl_lookup(SYSCTLFN_CALL(&node)); | |
507 | } | 505 | } | |
508 | 506 | |||
509 | SYSCTL_SETUP(sysctl_sched_4bsd_setup, "sysctl sched setup") | 507 | SYSCTL_SETUP(sysctl_sched_4bsd_setup, "sysctl sched setup") | |
510 | { | 508 | { | |
511 | const struct sysctlnode *node = NULL; | 509 | const struct sysctlnode *node = NULL; | |
512 | 510 | |||
513 | sysctl_createv(clog, 0, NULL, NULL, | 511 | sysctl_createv(clog, 0, NULL, NULL, | |
514 | CTLFLAG_PERMANENT, | 512 | CTLFLAG_PERMANENT, | |
515 | CTLTYPE_NODE, "kern", NULL, | 513 | CTLTYPE_NODE, "kern", NULL, | |
516 | NULL, 0, NULL, 0, | 514 | NULL, 0, NULL, 0, | |
517 | CTL_KERN, CTL_EOL); | 515 | CTL_KERN, CTL_EOL); | |
518 | sysctl_createv(clog, 0, NULL, &node, | 516 | sysctl_createv(clog, 0, NULL, &node, | |
519 | CTLFLAG_PERMANENT, | 517 | CTLFLAG_PERMANENT, | |
520 | CTLTYPE_NODE, "sched", | 518 | CTLTYPE_NODE, "sched", | |
521 | SYSCTL_DESCR("Scheduler options"), | 519 | SYSCTL_DESCR("Scheduler options"), | |
522 | NULL, 0, NULL, 0, | 520 | NULL, 0, NULL, 0, | |
523 | CTL_KERN, CTL_CREATE, CTL_EOL); | 521 | CTL_KERN, CTL_CREATE, CTL_EOL); | |
524 | 522 | |||
525 | if (node == NULL) | 523 | if (node == NULL) | |
526 | return; | 524 | return; | |
527 | 525 | |||
528 | rrticks = hz / 10; | 526 | rrticks = hz / 10; | |
529 | 527 | |||
530 | sysctl_createv(NULL, 0, &node, NULL, | 528 | sysctl_createv(NULL, 0, &node, NULL, | |
531 | CTLFLAG_PERMANENT, | 529 | CTLFLAG_PERMANENT, | |
532 | CTLTYPE_STRING, "name", NULL, | 530 | CTLTYPE_STRING, "name", NULL, | |
533 | NULL, 0, __UNCONST("4.4BSD"), 0, | 531 | NULL, 0, __UNCONST("4.4BSD"), 0, | |
534 | CTL_CREATE, CTL_EOL); | 532 | CTL_CREATE, CTL_EOL); | |
535 | sysctl_createv(NULL, 0, &node, NULL, | 533 | sysctl_createv(NULL, 0, &node, NULL, | |
536 | CTLFLAG_PERMANENT, | 534 | CTLFLAG_PERMANENT, | |
537 | CTLTYPE_INT, "rtts", | 535 | CTLTYPE_INT, "rtts", | |
538 | SYSCTL_DESCR("Round-robin time quantum (in miliseconds)"), | 536 | SYSCTL_DESCR("Round-robin time quantum (in miliseconds)"), | |
539 | sysctl_sched_rtts, 0, NULL, 0, | 537 | sysctl_sched_rtts, 0, NULL, 0, | |
540 | CTL_CREATE, CTL_EOL); | 538 | CTL_CREATE, CTL_EOL); | |
541 | } | 539 | } |
--- src/sys/kern/subr_percpu.c 2011/05/13 22:16:44 1.13
+++ src/sys/kern/subr_percpu.c 2011/07/27 14:35:34 1.14
@@ -1,370 +1,368 @@ | @@ -1,370 +1,368 @@ | |||
1 | /* $NetBSD: subr_percpu.c,v 1.13 2011/05/13 22:16:44 rmind Exp $ */ | 1 | /* $NetBSD: subr_percpu.c,v 1.14 2011/07/27 14:35:34 uebayasi Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c)2007,2008 YAMAMOTO Takashi, | 4 | * Copyright (c)2007,2008 YAMAMOTO Takashi, | |
5 | * All rights reserved. | 5 | * All rights reserved. | |
6 | * | 6 | * | |
7 | * Redistribution and use in source and binary forms, with or without | 7 | * Redistribution and use in source and binary forms, with or without | |
8 | * modification, are permitted provided that the following conditions | 8 | * modification, are permitted provided that the following conditions | |
9 | * are met: | 9 | * are met: | |
10 | * 1. Redistributions of source code must retain the above copyright | 10 | * 1. Redistributions of source code must retain the above copyright | |
11 | * notice, this list of conditions and the following disclaimer. | 11 | * notice, this list of conditions and the following disclaimer. | |
12 | * 2. Redistributions in binary form must reproduce the above copyright | 12 | * 2. Redistributions in binary form must reproduce the above copyright | |
13 | * notice, this list of conditions and the following disclaimer in the | 13 | * notice, this list of conditions and the following disclaimer in the | |
14 | * documentation and/or other materials provided with the distribution. | 14 | * documentation and/or other materials provided with the distribution. | |
15 | * | 15 | * | |
16 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | 16 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | |
17 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 17 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |
20 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 20 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
21 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 21 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
22 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 22 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
23 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 23 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
24 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 24 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
25 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 25 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
26 | * SUCH DAMAGE. | 26 | * SUCH DAMAGE. | |
27 | */ | 27 | */ | |
28 | 28 | |||
29 | /* | 29 | /* | |
30 | * per-cpu storage. | 30 | * per-cpu storage. | |
31 | */ | 31 | */ | |
32 | 32 | |||
33 | #include <sys/cdefs.h> | 33 | #include <sys/cdefs.h> | |
34 | __KERNEL_RCSID(0, "$NetBSD: subr_percpu.c,v 1.13 2011/05/13 22:16:44 rmind Exp $"); | 34 | __KERNEL_RCSID(0, "$NetBSD: subr_percpu.c,v 1.14 2011/07/27 14:35:34 uebayasi Exp $"); | |
35 | 35 | |||
36 | #include <sys/param.h> | 36 | #include <sys/param.h> | |
37 | #include <sys/cpu.h> | 37 | #include <sys/cpu.h> | |
38 | #include <sys/kmem.h> | 38 | #include <sys/kmem.h> | |
39 | #include <sys/kernel.h> | 39 | #include <sys/kernel.h> | |
40 | #include <sys/mutex.h> | 40 | #include <sys/mutex.h> | |
41 | #include <sys/percpu.h> | 41 | #include <sys/percpu.h> | |
42 | #include <sys/rwlock.h> | 42 | #include <sys/rwlock.h> | |
43 | #include <sys/vmem.h> | 43 | #include <sys/vmem.h> | |
44 | #include <sys/xcall.h> | 44 | #include <sys/xcall.h> | |
45 | 45 | |||
46 | #include <uvm/uvm_extern.h> | |||
47 | ||||
48 | #define PERCPU_QUANTUM_SIZE (ALIGNBYTES + 1) | 46 | #define PERCPU_QUANTUM_SIZE (ALIGNBYTES + 1) | |
49 | #define PERCPU_QCACHE_MAX 0 | 47 | #define PERCPU_QCACHE_MAX 0 | |
50 | #define PERCPU_IMPORT_SIZE 2048 | 48 | #define PERCPU_IMPORT_SIZE 2048 | |
51 | 49 | |||
52 | #if defined(DIAGNOSTIC) | 50 | #if defined(DIAGNOSTIC) | |
53 | #define MAGIC 0x50435055 /* "PCPU" */ | 51 | #define MAGIC 0x50435055 /* "PCPU" */ | |
54 | #define percpu_encrypt(pc) ((pc) ^ MAGIC) | 52 | #define percpu_encrypt(pc) ((pc) ^ MAGIC) | |
55 | #define percpu_decrypt(pc) ((pc) ^ MAGIC) | 53 | #define percpu_decrypt(pc) ((pc) ^ MAGIC) | |
56 | #else /* defined(DIAGNOSTIC) */ | 54 | #else /* defined(DIAGNOSTIC) */ | |
57 | #define percpu_encrypt(pc) (pc) | 55 | #define percpu_encrypt(pc) (pc) | |
58 | #define percpu_decrypt(pc) (pc) | 56 | #define percpu_decrypt(pc) (pc) | |
59 | #endif /* defined(DIAGNOSTIC) */ | 57 | #endif /* defined(DIAGNOSTIC) */ | |
60 | 58 | |||
61 | static krwlock_t percpu_swap_lock __cacheline_aligned; | 59 | static krwlock_t percpu_swap_lock __cacheline_aligned; | |
62 | static kmutex_t percpu_allocation_lock __cacheline_aligned; | 60 | static kmutex_t percpu_allocation_lock __cacheline_aligned; | |
63 | static vmem_t * percpu_offset_arena __cacheline_aligned; | 61 | static vmem_t * percpu_offset_arena __cacheline_aligned; | |
64 | static unsigned int percpu_nextoff __cacheline_aligned; | 62 | static unsigned int percpu_nextoff __cacheline_aligned; | |
65 | 63 | |||
66 | static percpu_cpu_t * | 64 | static percpu_cpu_t * | |
67 | cpu_percpu(struct cpu_info *ci) | 65 | cpu_percpu(struct cpu_info *ci) | |
68 | { | 66 | { | |
69 | 67 | |||
70 | return &ci->ci_data.cpu_percpu; | 68 | return &ci->ci_data.cpu_percpu; | |
71 | } | 69 | } | |
72 | 70 | |||
73 | static unsigned int | 71 | static unsigned int | |
74 | percpu_offset(percpu_t *pc) | 72 | percpu_offset(percpu_t *pc) | |
75 | { | 73 | { | |
76 | const unsigned int off = percpu_decrypt((uintptr_t)pc); | 74 | const unsigned int off = percpu_decrypt((uintptr_t)pc); | |
77 | 75 | |||
78 | KASSERT(off < percpu_nextoff); | 76 | KASSERT(off < percpu_nextoff); | |
79 | return off; | 77 | return off; | |
80 | } | 78 | } | |
81 | 79 | |||
82 | /* | 80 | /* | |
83 | * percpu_cpu_swap: crosscall handler for percpu_cpu_enlarge | 81 | * percpu_cpu_swap: crosscall handler for percpu_cpu_enlarge | |
84 | */ | 82 | */ | |
85 | 83 | |||
86 | static void | 84 | static void | |
87 | percpu_cpu_swap(void *p1, void *p2) | 85 | percpu_cpu_swap(void *p1, void *p2) | |
88 | { | 86 | { | |
89 | struct cpu_info * const ci = p1; | 87 | struct cpu_info * const ci = p1; | |
90 | percpu_cpu_t * const newpcc = p2; | 88 | percpu_cpu_t * const newpcc = p2; | |
91 | percpu_cpu_t * const pcc = cpu_percpu(ci); | 89 | percpu_cpu_t * const pcc = cpu_percpu(ci); | |
92 | 90 | |||
93 | KASSERT(ci == curcpu() || !mp_online); | 91 | KASSERT(ci == curcpu() || !mp_online); | |
94 | 92 | |||
95 | /* | 93 | /* | |
96 | * swap *pcc and *newpcc unless anyone has beaten us. | 94 | * swap *pcc and *newpcc unless anyone has beaten us. | |
97 | */ | 95 | */ | |
98 | rw_enter(&percpu_swap_lock, RW_WRITER); | 96 | rw_enter(&percpu_swap_lock, RW_WRITER); | |
99 | if (newpcc->pcc_size > pcc->pcc_size) { | 97 | if (newpcc->pcc_size > pcc->pcc_size) { | |
100 | percpu_cpu_t tmp; | 98 | percpu_cpu_t tmp; | |
101 | int s; | 99 | int s; | |
102 | 100 | |||
103 | tmp = *pcc; | 101 | tmp = *pcc; | |
104 | 102 | |||
105 | /* | 103 | /* | |
106 | * block interrupts so that we don't lose their modifications. | 104 | * block interrupts so that we don't lose their modifications. | |
107 | */ | 105 | */ | |
108 | 106 | |||
109 | s = splhigh(); | 107 | s = splhigh(); | |
110 | 108 | |||
111 | /* | 109 | /* | |
112 | * copy data to new storage. | 110 | * copy data to new storage. | |
113 | */ | 111 | */ | |
114 | 112 | |||
115 | memcpy(newpcc->pcc_data, pcc->pcc_data, pcc->pcc_size); | 113 | memcpy(newpcc->pcc_data, pcc->pcc_data, pcc->pcc_size); | |
116 | 114 | |||
117 | /* | 115 | /* | |
118 | * this assignment needs to be atomic for percpu_getptr_remote. | 116 | * this assignment needs to be atomic for percpu_getptr_remote. | |
119 | */ | 117 | */ | |
120 | 118 | |||
121 | pcc->pcc_data = newpcc->pcc_data; | 119 | pcc->pcc_data = newpcc->pcc_data; | |
122 | 120 | |||
123 | splx(s); | 121 | splx(s); | |
124 | 122 | |||
125 | pcc->pcc_size = newpcc->pcc_size; | 123 | pcc->pcc_size = newpcc->pcc_size; | |
126 | *newpcc = tmp; | 124 | *newpcc = tmp; | |
127 | } | 125 | } | |
128 | rw_exit(&percpu_swap_lock); | 126 | rw_exit(&percpu_swap_lock); | |
129 | } | 127 | } | |
130 | 128 | |||
131 | /* | 129 | /* | |
132 | * percpu_cpu_enlarge: ensure that percpu_cpu_t of each cpus have enough space | 130 | * percpu_cpu_enlarge: ensure that percpu_cpu_t of each cpus have enough space | |
133 | */ | 131 | */ | |
134 | 132 | |||
135 | static void | 133 | static void | |
136 | percpu_cpu_enlarge(size_t size) | 134 | percpu_cpu_enlarge(size_t size) | |
137 | { | 135 | { | |
138 | CPU_INFO_ITERATOR cii; | 136 | CPU_INFO_ITERATOR cii; | |
139 | struct cpu_info *ci; | 137 | struct cpu_info *ci; | |
140 | 138 | |||
141 | for (CPU_INFO_FOREACH(cii, ci)) { | 139 | for (CPU_INFO_FOREACH(cii, ci)) { | |
142 | percpu_cpu_t pcc; | 140 | percpu_cpu_t pcc; | |
143 | 141 | |||
144 | pcc.pcc_data = kmem_alloc(size, KM_SLEEP); /* XXX cacheline */ | 142 | pcc.pcc_data = kmem_alloc(size, KM_SLEEP); /* XXX cacheline */ | |
145 | pcc.pcc_size = size; | 143 | pcc.pcc_size = size; | |
146 | if (!mp_online) { | 144 | if (!mp_online) { | |
147 | percpu_cpu_swap(ci, &pcc); | 145 | percpu_cpu_swap(ci, &pcc); | |
148 | } else { | 146 | } else { | |
149 | uint64_t where; | 147 | uint64_t where; | |
150 | 148 | |||
151 | where = xc_unicast(0, percpu_cpu_swap, ci, &pcc, ci); | 149 | where = xc_unicast(0, percpu_cpu_swap, ci, &pcc, ci); | |
152 | xc_wait(where); | 150 | xc_wait(where); | |
153 | } | 151 | } | |
154 | KASSERT(pcc.pcc_size < size); | 152 | KASSERT(pcc.pcc_size < size); | |
155 | if (pcc.pcc_data != NULL) { | 153 | if (pcc.pcc_data != NULL) { | |
156 | kmem_free(pcc.pcc_data, pcc.pcc_size); | 154 | kmem_free(pcc.pcc_data, pcc.pcc_size); | |
157 | } | 155 | } | |
158 | } | 156 | } | |
159 | } | 157 | } | |
160 | 158 | |||
161 | /* | 159 | /* | |
162 | * percpu_backend_alloc: vmem import callback for percpu_offset_arena | 160 | * percpu_backend_alloc: vmem import callback for percpu_offset_arena | |
163 | */ | 161 | */ | |
164 | 162 | |||
165 | static vmem_addr_t | 163 | static vmem_addr_t | |
166 | percpu_backend_alloc(vmem_t *dummy, vmem_size_t size, vmem_size_t *resultsize, | 164 | percpu_backend_alloc(vmem_t *dummy, vmem_size_t size, vmem_size_t *resultsize, | |
167 | vm_flag_t vmflags) | 165 | vm_flag_t vmflags) | |
168 | { | 166 | { | |
169 | unsigned int offset; | 167 | unsigned int offset; | |
170 | unsigned int nextoff; | 168 | unsigned int nextoff; | |
171 | 169 | |||
172 | ASSERT_SLEEPABLE(); | 170 | ASSERT_SLEEPABLE(); | |
173 | KASSERT(dummy == NULL); | 171 | KASSERT(dummy == NULL); | |
174 | 172 | |||
175 | if ((vmflags & VM_NOSLEEP) != 0) | 173 | if ((vmflags & VM_NOSLEEP) != 0) | |
176 | return VMEM_ADDR_NULL; | 174 | return VMEM_ADDR_NULL; | |
177 | 175 | |||
178 | size = roundup(size, PERCPU_IMPORT_SIZE); | 176 | size = roundup(size, PERCPU_IMPORT_SIZE); | |
179 | mutex_enter(&percpu_allocation_lock); | 177 | mutex_enter(&percpu_allocation_lock); | |
180 | offset = percpu_nextoff; | 178 | offset = percpu_nextoff; | |
181 | percpu_nextoff = nextoff = percpu_nextoff + size; | 179 | percpu_nextoff = nextoff = percpu_nextoff + size; | |
182 | mutex_exit(&percpu_allocation_lock); | 180 | mutex_exit(&percpu_allocation_lock); | |
183 | 181 | |||
184 | percpu_cpu_enlarge(nextoff); | 182 | percpu_cpu_enlarge(nextoff); | |
185 | 183 | |||
186 | *resultsize = size; | 184 | *resultsize = size; | |
187 | return (vmem_addr_t)offset; | 185 | return (vmem_addr_t)offset; | |
188 | } | 186 | } | |
189 | 187 | |||
190 | static void | 188 | static void | |
191 | percpu_zero_cb(void *vp, void *vp2, struct cpu_info *ci) | 189 | percpu_zero_cb(void *vp, void *vp2, struct cpu_info *ci) | |
192 | { | 190 | { | |
193 | size_t sz = (uintptr_t)vp2; | 191 | size_t sz = (uintptr_t)vp2; | |
194 | 192 | |||
195 | memset(vp, 0, sz); | 193 | memset(vp, 0, sz); | |
196 | } | 194 | } | |
197 | 195 | |||
198 | /* | 196 | /* | |
199 | * percpu_zero: initialize percpu storage with zero. | 197 | * percpu_zero: initialize percpu storage with zero. | |
200 | */ | 198 | */ | |
201 | 199 | |||
202 | static void | 200 | static void | |
203 | percpu_zero(percpu_t *pc, size_t sz) | 201 | percpu_zero(percpu_t *pc, size_t sz) | |
204 | { | 202 | { | |
205 | 203 | |||
206 | percpu_foreach(pc, percpu_zero_cb, (void *)(uintptr_t)sz); | 204 | percpu_foreach(pc, percpu_zero_cb, (void *)(uintptr_t)sz); | |
207 | } | 205 | } | |
208 | 206 | |||
209 | /* | 207 | /* | |
210 | * percpu_init: subsystem initialization | 208 | * percpu_init: subsystem initialization | |
211 | */ | 209 | */ | |
212 | 210 | |||
213 | void | 211 | void | |
214 | percpu_init(void) | 212 | percpu_init(void) | |
215 | { | 213 | { | |
216 | 214 | |||
217 | ASSERT_SLEEPABLE(); | 215 | ASSERT_SLEEPABLE(); | |
218 | rw_init(&percpu_swap_lock); | 216 | rw_init(&percpu_swap_lock); | |
219 | mutex_init(&percpu_allocation_lock, MUTEX_DEFAULT, IPL_NONE); | 217 | mutex_init(&percpu_allocation_lock, MUTEX_DEFAULT, IPL_NONE); | |
220 | percpu_nextoff = PERCPU_QUANTUM_SIZE; | 218 | percpu_nextoff = PERCPU_QUANTUM_SIZE; | |
221 | 219 | |||
222 | percpu_offset_arena = vmem_create("percpu", 0, 0, PERCPU_QUANTUM_SIZE, | 220 | percpu_offset_arena = vmem_create("percpu", 0, 0, PERCPU_QUANTUM_SIZE, | |
223 | percpu_backend_alloc, NULL, NULL, PERCPU_QCACHE_MAX, VM_SLEEP, | 221 | percpu_backend_alloc, NULL, NULL, PERCPU_QCACHE_MAX, VM_SLEEP, | |
224 | IPL_NONE); | 222 | IPL_NONE); | |
225 | } | 223 | } | |
226 | 224 | |||
227 | /* | 225 | /* | |
228 | * percpu_init_cpu: cpu initialization | 226 | * percpu_init_cpu: cpu initialization | |
229 | * | 227 | * | |
230 | * => should be called before the cpu appears on the list for CPU_INFO_FOREACH. | 228 | * => should be called before the cpu appears on the list for CPU_INFO_FOREACH. | |
231 | */ | 229 | */ | |
232 | 230 | |||
233 | void | 231 | void | |
234 | percpu_init_cpu(struct cpu_info *ci) | 232 | percpu_init_cpu(struct cpu_info *ci) | |
235 | { | 233 | { | |
236 | percpu_cpu_t * const pcc = cpu_percpu(ci); | 234 | percpu_cpu_t * const pcc = cpu_percpu(ci); | |
237 | size_t size = percpu_nextoff; /* XXX racy */ | 235 | size_t size = percpu_nextoff; /* XXX racy */ | |
238 | 236 | |||
239 | ASSERT_SLEEPABLE(); | 237 | ASSERT_SLEEPABLE(); | |
240 | pcc->pcc_size = size; | 238 | pcc->pcc_size = size; | |
241 | if (size) { | 239 | if (size) { | |
242 | pcc->pcc_data = kmem_zalloc(pcc->pcc_size, KM_SLEEP); | 240 | pcc->pcc_data = kmem_zalloc(pcc->pcc_size, KM_SLEEP); | |
243 | } | 241 | } | |
244 | } | 242 | } | |
245 | 243 | |||
246 | /* | 244 | /* | |
247 | * percpu_alloc: allocate percpu storage | 245 | * percpu_alloc: allocate percpu storage | |
248 | * | 246 | * | |
249 | * => called in thread context. | 247 | * => called in thread context. | |
250 | * => considered as an expensive and rare operation. | 248 | * => considered as an expensive and rare operation. | |
251 | * => allocated storage is initialized with zeros. | 249 | * => allocated storage is initialized with zeros. | |
252 | */ | 250 | */ | |
253 | 251 | |||
254 | percpu_t * | 252 | percpu_t * | |
255 | percpu_alloc(size_t size) | 253 | percpu_alloc(size_t size) | |
256 | { | 254 | { | |
257 | unsigned int offset; | 255 | unsigned int offset; | |
258 | percpu_t *pc; | 256 | percpu_t *pc; | |
259 | 257 | |||
260 | ASSERT_SLEEPABLE(); | 258 | ASSERT_SLEEPABLE(); | |
261 | offset = vmem_alloc(percpu_offset_arena, size, VM_SLEEP | VM_BESTFIT); | 259 | offset = vmem_alloc(percpu_offset_arena, size, VM_SLEEP | VM_BESTFIT); | |
262 | pc = (percpu_t *)percpu_encrypt((uintptr_t)offset); | 260 | pc = (percpu_t *)percpu_encrypt((uintptr_t)offset); | |
263 | percpu_zero(pc, size); | 261 | percpu_zero(pc, size); | |
264 | return pc; | 262 | return pc; | |
265 | } | 263 | } | |
266 | 264 | |||
267 | /* | 265 | /* | |
268 | * percpu_free: free percpu storage | 266 | * percpu_free: free percpu storage | |
269 | * | 267 | * | |
270 | * => called in thread context. | 268 | * => called in thread context. | |
271 | * => considered as an expensive and rare operation. | 269 | * => considered as an expensive and rare operation. | |
272 | */ | 270 | */ | |
273 | 271 | |||
274 | void | 272 | void | |
275 | percpu_free(percpu_t *pc, size_t size) | 273 | percpu_free(percpu_t *pc, size_t size) | |
276 | { | 274 | { | |
277 | 275 | |||
278 | ASSERT_SLEEPABLE(); | 276 | ASSERT_SLEEPABLE(); | |
279 | vmem_free(percpu_offset_arena, (vmem_addr_t)percpu_offset(pc), size); | 277 | vmem_free(percpu_offset_arena, (vmem_addr_t)percpu_offset(pc), size); | |
280 | } | 278 | } | |
281 | 279 | |||
282 | /* | 280 | /* | |
283 | * percpu_getref: | 281 | * percpu_getref: | |
284 | * | 282 | * | |
285 | * => safe to be used in either thread or interrupt context | 283 | * => safe to be used in either thread or interrupt context | |
286 | * => disables preemption; must be bracketed with a percpu_putref() | 284 | * => disables preemption; must be bracketed with a percpu_putref() | |
287 | */ | 285 | */ | |
288 | 286 | |||
289 | void * | 287 | void * | |
290 | percpu_getref(percpu_t *pc) | 288 | percpu_getref(percpu_t *pc) | |
291 | { | 289 | { | |
292 | 290 | |||
293 | KPREEMPT_DISABLE(curlwp); | 291 | KPREEMPT_DISABLE(curlwp); | |
294 | return percpu_getptr_remote(pc, curcpu()); | 292 | return percpu_getptr_remote(pc, curcpu()); | |
295 | } | 293 | } | |
296 | 294 | |||
297 | /* | 295 | /* | |
298 | * percpu_putref: | 296 | * percpu_putref: | |
299 | * | 297 | * | |
300 | * => drops the preemption-disabled count after caller is done with per-cpu | 298 | * => drops the preemption-disabled count after caller is done with per-cpu | |
301 | * data | 299 | * data | |
302 | */ | 300 | */ | |
303 | 301 | |||
304 | void | 302 | void | |
305 | percpu_putref(percpu_t *pc) | 303 | percpu_putref(percpu_t *pc) | |
306 | { | 304 | { | |
307 | 305 | |||
308 | KPREEMPT_ENABLE(curlwp); | 306 | KPREEMPT_ENABLE(curlwp); | |
309 | } | 307 | } | |
310 | 308 | |||
311 | /* | 309 | /* | |
312 | * percpu_traverse_enter, percpu_traverse_exit, percpu_getptr_remote: | 310 | * percpu_traverse_enter, percpu_traverse_exit, percpu_getptr_remote: | |
313 | * helpers to access remote cpu's percpu data. | 311 | * helpers to access remote cpu's percpu data. | |
314 | * | 312 | * | |
315 | * => called in thread context. | 313 | * => called in thread context. | |
316 | * => percpu_traverse_enter can block low-priority xcalls. | 314 | * => percpu_traverse_enter can block low-priority xcalls. | |
317 | * => typical usage would be: | 315 | * => typical usage would be: | |
318 | * | 316 | * | |
319 | * sum = 0; | 317 | * sum = 0; | |
320 | * percpu_traverse_enter(); | 318 | * percpu_traverse_enter(); | |
321 | * for (CPU_INFO_FOREACH(cii, ci)) { | 319 | * for (CPU_INFO_FOREACH(cii, ci)) { | |
322 | * unsigned int *p = percpu_getptr_remote(pc, ci); | 320 | * unsigned int *p = percpu_getptr_remote(pc, ci); | |
323 | * sum += *p; | 321 | * sum += *p; | |
324 | * } | 322 | * } | |
325 | * percpu_traverse_exit(); | 323 | * percpu_traverse_exit(); | |
326 | */ | 324 | */ | |
327 | 325 | |||
328 | void | 326 | void | |
329 | percpu_traverse_enter(void) | 327 | percpu_traverse_enter(void) | |
330 | { | 328 | { | |
331 | 329 | |||
332 | ASSERT_SLEEPABLE(); | 330 | ASSERT_SLEEPABLE(); | |
333 | rw_enter(&percpu_swap_lock, RW_READER); | 331 | rw_enter(&percpu_swap_lock, RW_READER); | |
334 | } | 332 | } | |
335 | 333 | |||
336 | void | 334 | void | |
337 | percpu_traverse_exit(void) | 335 | percpu_traverse_exit(void) | |
338 | { | 336 | { | |
339 | 337 | |||
340 | rw_exit(&percpu_swap_lock); | 338 | rw_exit(&percpu_swap_lock); | |
341 | } | 339 | } | |
342 | 340 | |||
343 | void * | 341 | void * | |
344 | percpu_getptr_remote(percpu_t *pc, struct cpu_info *ci) | 342 | percpu_getptr_remote(percpu_t *pc, struct cpu_info *ci) | |
345 | { | 343 | { | |
346 | 344 | |||
347 | return &((char *)cpu_percpu(ci)->pcc_data)[percpu_offset(pc)]; | 345 | return &((char *)cpu_percpu(ci)->pcc_data)[percpu_offset(pc)]; | |
348 | } | 346 | } | |
349 | 347 | |||
350 | /* | 348 | /* | |
351 | * percpu_foreach: call the specified callback function for each cpus. | 349 | * percpu_foreach: call the specified callback function for each cpus. | |
352 | * | 350 | * | |
353 | * => called in thread context. | 351 | * => called in thread context. | |
354 | * => caller should not rely on the cpu iteration order. | 352 | * => caller should not rely on the cpu iteration order. | |
355 | * => the callback function should be minimum because it is executed with | 353 | * => the callback function should be minimum because it is executed with | |
356 | * holding a global lock, which can block low-priority xcalls. | 354 | * holding a global lock, which can block low-priority xcalls. | |
357 | * eg. it's illegal for a callback function to sleep for memory allocation. | 355 | * eg. it's illegal for a callback function to sleep for memory allocation. | |
358 | */ | 356 | */ | |
359 | void | 357 | void | |
360 | percpu_foreach(percpu_t *pc, percpu_callback_t cb, void *arg) | 358 | percpu_foreach(percpu_t *pc, percpu_callback_t cb, void *arg) | |
361 | { | 359 | { | |
362 | CPU_INFO_ITERATOR cii; | 360 | CPU_INFO_ITERATOR cii; | |
363 | struct cpu_info *ci; | 361 | struct cpu_info *ci; | |
364 | 362 | |||
365 | percpu_traverse_enter(); | 363 | percpu_traverse_enter(); | |
366 | for (CPU_INFO_FOREACH(cii, ci)) { | 364 | for (CPU_INFO_FOREACH(cii, ci)) { | |
367 | (*cb)(percpu_getptr_remote(pc, ci), arg, ci); | 365 | (*cb)(percpu_getptr_remote(pc, ci), arg, ci); | |
368 | } | 366 | } | |
369 | percpu_traverse_exit(); | 367 | percpu_traverse_exit(); | |
370 | } | 368 | } |
--- src/sys/kern/subr_workqueue.c 2009/11/11 14:54:40 1.30
+++ src/sys/kern/subr_workqueue.c 2011/07/27 14:35:34 1.31
@@ -1,303 +1,301 @@ | @@ -1,303 +1,301 @@ | |||
1 | /* $NetBSD: subr_workqueue.c,v 1.30 2009/11/11 14:54:40 rmind Exp $ */ | 1 | /* $NetBSD: subr_workqueue.c,v 1.31 2011/07/27 14:35:34 uebayasi Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c)2002, 2005, 2006, 2007 YAMAMOTO Takashi, | 4 | * Copyright (c)2002, 2005, 2006, 2007 YAMAMOTO Takashi, | |
5 | * All rights reserved. | 5 | * All rights reserved. | |
6 | * | 6 | * | |
7 | * Redistribution and use in source and binary forms, with or without | 7 | * Redistribution and use in source and binary forms, with or without | |
8 | * modification, are permitted provided that the following conditions | 8 | * modification, are permitted provided that the following conditions | |
9 | * are met: | 9 | * are met: | |
10 | * 1. Redistributions of source code must retain the above copyright | 10 | * 1. Redistributions of source code must retain the above copyright | |
11 | * notice, this list of conditions and the following disclaimer. | 11 | * notice, this list of conditions and the following disclaimer. | |
12 | * 2. Redistributions in binary form must reproduce the above copyright | 12 | * 2. Redistributions in binary form must reproduce the above copyright | |
13 | * notice, this list of conditions and the following disclaimer in the | 13 | * notice, this list of conditions and the following disclaimer in the | |
14 | * documentation and/or other materials provided with the distribution. | 14 | * documentation and/or other materials provided with the distribution. | |
15 | * | 15 | * | |
16 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | 16 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | |
17 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 17 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |
20 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 20 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
21 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 21 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
22 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 22 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
23 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 23 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
24 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 24 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
25 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 25 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
26 | * SUCH DAMAGE. | 26 | * SUCH DAMAGE. | |
27 | */ | 27 | */ | |
28 | 28 | |||
29 | #include <sys/cdefs.h> | 29 | #include <sys/cdefs.h> | |
30 | __KERNEL_RCSID(0, "$NetBSD: subr_workqueue.c,v 1.30 2009/11/11 14:54:40 rmind Exp $"); | 30 | __KERNEL_RCSID(0, "$NetBSD: subr_workqueue.c,v 1.31 2011/07/27 14:35:34 uebayasi Exp $"); | |
31 | 31 | |||
32 | #include <sys/param.h> | 32 | #include <sys/param.h> | |
33 | #include <sys/cpu.h> | 33 | #include <sys/cpu.h> | |
34 | #include <sys/systm.h> | 34 | #include <sys/systm.h> | |
35 | #include <sys/kthread.h> | 35 | #include <sys/kthread.h> | |
36 | #include <sys/kmem.h> | 36 | #include <sys/kmem.h> | |
37 | #include <sys/proc.h> | 37 | #include <sys/proc.h> | |
38 | #include <sys/workqueue.h> | 38 | #include <sys/workqueue.h> | |
39 | #include <sys/mutex.h> | 39 | #include <sys/mutex.h> | |
40 | #include <sys/condvar.h> | 40 | #include <sys/condvar.h> | |
41 | #include <sys/queue.h> | 41 | #include <sys/queue.h> | |
42 | 42 | |||
43 | #include <uvm/uvm_extern.h> | |||
44 | ||||
45 | typedef struct work_impl { | 43 | typedef struct work_impl { | |
46 | SIMPLEQ_ENTRY(work_impl) wk_entry; | 44 | SIMPLEQ_ENTRY(work_impl) wk_entry; | |
47 | } work_impl_t; | 45 | } work_impl_t; | |
48 | 46 | |||
49 | SIMPLEQ_HEAD(workqhead, work_impl); | 47 | SIMPLEQ_HEAD(workqhead, work_impl); | |
50 | 48 | |||
51 | struct workqueue_queue { | 49 | struct workqueue_queue { | |
52 | kmutex_t q_mutex; | 50 | kmutex_t q_mutex; | |
53 | kcondvar_t q_cv; | 51 | kcondvar_t q_cv; | |
54 | struct workqhead q_queue; | 52 | struct workqhead q_queue; | |
55 | lwp_t *q_worker; | 53 | lwp_t *q_worker; | |
56 | }; | 54 | }; | |
57 | 55 | |||
58 | struct workqueue { | 56 | struct workqueue { | |
59 | void (*wq_func)(struct work *, void *); | 57 | void (*wq_func)(struct work *, void *); | |
60 | void *wq_arg; | 58 | void *wq_arg; | |
61 | int wq_flags; | 59 | int wq_flags; | |
62 | 60 | |||
63 | const char *wq_name; | 61 | const char *wq_name; | |
64 | pri_t wq_prio; | 62 | pri_t wq_prio; | |
65 | void *wq_ptr; | 63 | void *wq_ptr; | |
66 | }; | 64 | }; | |
67 | 65 | |||
68 | #define WQ_SIZE (roundup2(sizeof(struct workqueue), coherency_unit)) | 66 | #define WQ_SIZE (roundup2(sizeof(struct workqueue), coherency_unit)) | |
69 | #define WQ_QUEUE_SIZE (roundup2(sizeof(struct workqueue_queue), coherency_unit)) | 67 | #define WQ_QUEUE_SIZE (roundup2(sizeof(struct workqueue_queue), coherency_unit)) | |
70 | 68 | |||
71 | #define POISON 0xaabbccdd | 69 | #define POISON 0xaabbccdd | |
72 | 70 | |||
73 | static size_t | 71 | static size_t | |
74 | workqueue_size(int flags) | 72 | workqueue_size(int flags) | |
75 | { | 73 | { | |
76 | 74 | |||
77 | return WQ_SIZE | 75 | return WQ_SIZE | |
78 | + ((flags & WQ_PERCPU) != 0 ? ncpu : 1) * WQ_QUEUE_SIZE | 76 | + ((flags & WQ_PERCPU) != 0 ? ncpu : 1) * WQ_QUEUE_SIZE | |
79 | + coherency_unit; | 77 | + coherency_unit; | |
80 | } | 78 | } | |
81 | 79 | |||
82 | static struct workqueue_queue * | 80 | static struct workqueue_queue * | |
83 | workqueue_queue_lookup(struct workqueue *wq, struct cpu_info *ci) | 81 | workqueue_queue_lookup(struct workqueue *wq, struct cpu_info *ci) | |
84 | { | 82 | { | |
85 | u_int idx = 0; | 83 | u_int idx = 0; | |
86 | 84 | |||
87 | if (wq->wq_flags & WQ_PERCPU) { | 85 | if (wq->wq_flags & WQ_PERCPU) { | |
88 | idx = ci ? cpu_index(ci) : cpu_index(curcpu()); | 86 | idx = ci ? cpu_index(ci) : cpu_index(curcpu()); | |
89 | } | 87 | } | |
90 | 88 | |||
91 | return (void *)((uintptr_t)(wq) + WQ_SIZE + (idx * WQ_QUEUE_SIZE)); | 89 | return (void *)((uintptr_t)(wq) + WQ_SIZE + (idx * WQ_QUEUE_SIZE)); | |
92 | } | 90 | } | |
93 | 91 | |||
94 | static void | 92 | static void | |
95 | workqueue_runlist(struct workqueue *wq, struct workqhead *list) | 93 | workqueue_runlist(struct workqueue *wq, struct workqhead *list) | |
96 | { | 94 | { | |
97 | work_impl_t *wk; | 95 | work_impl_t *wk; | |
98 | work_impl_t *next; | 96 | work_impl_t *next; | |
99 | 97 | |||
100 | /* | 98 | /* | |
101 | * note that "list" is not a complete SIMPLEQ. | 99 | * note that "list" is not a complete SIMPLEQ. | |
102 | */ | 100 | */ | |
103 | 101 | |||
104 | for (wk = SIMPLEQ_FIRST(list); wk != NULL; wk = next) { | 102 | for (wk = SIMPLEQ_FIRST(list); wk != NULL; wk = next) { | |
105 | next = SIMPLEQ_NEXT(wk, wk_entry); | 103 | next = SIMPLEQ_NEXT(wk, wk_entry); | |
106 | (*wq->wq_func)((void *)wk, wq->wq_arg); | 104 | (*wq->wq_func)((void *)wk, wq->wq_arg); | |
107 | } | 105 | } | |
108 | } | 106 | } | |
109 | 107 | |||
110 | static void | 108 | static void | |
111 | workqueue_worker(void *cookie) | 109 | workqueue_worker(void *cookie) | |
112 | { | 110 | { | |
113 | struct workqueue *wq = cookie; | 111 | struct workqueue *wq = cookie; | |
114 | struct workqueue_queue *q; | 112 | struct workqueue_queue *q; | |
115 | 113 | |||
116 | /* find the workqueue of this kthread */ | 114 | /* find the workqueue of this kthread */ | |
117 | q = workqueue_queue_lookup(wq, curlwp->l_cpu); | 115 | q = workqueue_queue_lookup(wq, curlwp->l_cpu); | |
118 | 116 | |||
119 | for (;;) { | 117 | for (;;) { | |
120 | struct workqhead tmp; | 118 | struct workqhead tmp; | |
121 | 119 | |||
122 | /* | 120 | /* | |
123 | * we violate abstraction of SIMPLEQ. | 121 | * we violate abstraction of SIMPLEQ. | |
124 | */ | 122 | */ | |
125 | 123 | |||
126 | #if defined(DIAGNOSTIC) | 124 | #if defined(DIAGNOSTIC) | |
127 | tmp.sqh_last = (void *)POISON; | 125 | tmp.sqh_last = (void *)POISON; | |
128 | #endif /* defined(DIAGNOSTIC) */ | 126 | #endif /* defined(DIAGNOSTIC) */ | |
129 | 127 | |||
130 | mutex_enter(&q->q_mutex); | 128 | mutex_enter(&q->q_mutex); | |
131 | while (SIMPLEQ_EMPTY(&q->q_queue)) | 129 | while (SIMPLEQ_EMPTY(&q->q_queue)) | |
132 | cv_wait(&q->q_cv, &q->q_mutex); | 130 | cv_wait(&q->q_cv, &q->q_mutex); | |
133 | tmp.sqh_first = q->q_queue.sqh_first; /* XXX */ | 131 | tmp.sqh_first = q->q_queue.sqh_first; /* XXX */ | |
134 | SIMPLEQ_INIT(&q->q_queue); | 132 | SIMPLEQ_INIT(&q->q_queue); | |
135 | mutex_exit(&q->q_mutex); | 133 | mutex_exit(&q->q_mutex); | |
136 | 134 | |||
137 | workqueue_runlist(wq, &tmp); | 135 | workqueue_runlist(wq, &tmp); | |
138 | } | 136 | } | |
139 | } | 137 | } | |
140 | 138 | |||
141 | static void | 139 | static void | |
142 | workqueue_init(struct workqueue *wq, const char *name, | 140 | workqueue_init(struct workqueue *wq, const char *name, | |
143 | void (*callback_func)(struct work *, void *), void *callback_arg, | 141 | void (*callback_func)(struct work *, void *), void *callback_arg, | |
144 | pri_t prio, int ipl) | 142 | pri_t prio, int ipl) | |
145 | { | 143 | { | |
146 | 144 | |||
147 | wq->wq_prio = prio; | 145 | wq->wq_prio = prio; | |
148 | wq->wq_name = name; | 146 | wq->wq_name = name; | |
149 | wq->wq_func = callback_func; | 147 | wq->wq_func = callback_func; | |
150 | wq->wq_arg = callback_arg; | 148 | wq->wq_arg = callback_arg; | |
151 | } | 149 | } | |
152 | 150 | |||
153 | static int | 151 | static int | |
154 | workqueue_initqueue(struct workqueue *wq, struct workqueue_queue *q, | 152 | workqueue_initqueue(struct workqueue *wq, struct workqueue_queue *q, | |
155 | int ipl, struct cpu_info *ci) | 153 | int ipl, struct cpu_info *ci) | |
156 | { | 154 | { | |
157 | int error, ktf; | 155 | int error, ktf; | |
158 | 156 | |||
159 | KASSERT(q->q_worker == NULL); | 157 | KASSERT(q->q_worker == NULL); | |
160 | 158 | |||
161 | mutex_init(&q->q_mutex, MUTEX_DEFAULT, ipl); | 159 | mutex_init(&q->q_mutex, MUTEX_DEFAULT, ipl); | |
162 | cv_init(&q->q_cv, wq->wq_name); | 160 | cv_init(&q->q_cv, wq->wq_name); | |
163 | SIMPLEQ_INIT(&q->q_queue); | 161 | SIMPLEQ_INIT(&q->q_queue); | |
164 | ktf = ((wq->wq_flags & WQ_MPSAFE) != 0 ? KTHREAD_MPSAFE : 0); | 162 | ktf = ((wq->wq_flags & WQ_MPSAFE) != 0 ? KTHREAD_MPSAFE : 0); | |
165 | if (ci) { | 163 | if (ci) { | |
166 | error = kthread_create(wq->wq_prio, ktf, ci, workqueue_worker, | 164 | error = kthread_create(wq->wq_prio, ktf, ci, workqueue_worker, | |
167 | wq, &q->q_worker, "%s/%u", wq->wq_name, ci->ci_index); | 165 | wq, &q->q_worker, "%s/%u", wq->wq_name, ci->ci_index); | |
168 | } else { | 166 | } else { | |
169 | error = kthread_create(wq->wq_prio, ktf, ci, workqueue_worker, | 167 | error = kthread_create(wq->wq_prio, ktf, ci, workqueue_worker, | |
170 | wq, &q->q_worker, "%s", wq->wq_name); | 168 | wq, &q->q_worker, "%s", wq->wq_name); | |
171 | } | 169 | } | |
172 | if (error != 0) { | 170 | if (error != 0) { | |
173 | mutex_destroy(&q->q_mutex); | 171 | mutex_destroy(&q->q_mutex); | |
174 | cv_destroy(&q->q_cv); | 172 | cv_destroy(&q->q_cv); | |
175 | KASSERT(q->q_worker == NULL); | 173 | KASSERT(q->q_worker == NULL); | |
176 | } | 174 | } | |
177 | return error; | 175 | return error; | |
178 | } | 176 | } | |
179 | 177 | |||
180 | struct workqueue_exitargs { | 178 | struct workqueue_exitargs { | |
181 | work_impl_t wqe_wk; | 179 | work_impl_t wqe_wk; | |
182 | struct workqueue_queue *wqe_q; | 180 | struct workqueue_queue *wqe_q; | |
183 | }; | 181 | }; | |
184 | 182 | |||
185 | static void | 183 | static void | |
186 | workqueue_exit(struct work *wk, void *arg) | 184 | workqueue_exit(struct work *wk, void *arg) | |
187 | { | 185 | { | |
188 | struct workqueue_exitargs *wqe = (void *)wk; | 186 | struct workqueue_exitargs *wqe = (void *)wk; | |
189 | struct workqueue_queue *q = wqe->wqe_q; | 187 | struct workqueue_queue *q = wqe->wqe_q; | |
190 | 188 | |||
191 | /* | 189 | /* | |
192 | * only competition at this point is workqueue_finiqueue. | 190 | * only competition at this point is workqueue_finiqueue. | |
193 | */ | 191 | */ | |
194 | 192 | |||
195 | KASSERT(q->q_worker == curlwp); | 193 | KASSERT(q->q_worker == curlwp); | |
196 | KASSERT(SIMPLEQ_EMPTY(&q->q_queue)); | 194 | KASSERT(SIMPLEQ_EMPTY(&q->q_queue)); | |
197 | mutex_enter(&q->q_mutex); | 195 | mutex_enter(&q->q_mutex); | |
198 | q->q_worker = NULL; | 196 | q->q_worker = NULL; | |
199 | cv_signal(&q->q_cv); | 197 | cv_signal(&q->q_cv); | |
200 | mutex_exit(&q->q_mutex); | 198 | mutex_exit(&q->q_mutex); | |
201 | kthread_exit(0); | 199 | kthread_exit(0); | |
202 | } | 200 | } | |
203 | 201 | |||
204 | static void | 202 | static void | |
205 | workqueue_finiqueue(struct workqueue *wq, struct workqueue_queue *q) | 203 | workqueue_finiqueue(struct workqueue *wq, struct workqueue_queue *q) | |
206 | { | 204 | { | |
207 | struct workqueue_exitargs wqe; | 205 | struct workqueue_exitargs wqe; | |
208 | 206 | |||
209 | KASSERT(wq->wq_func == workqueue_exit); | 207 | KASSERT(wq->wq_func == workqueue_exit); | |
210 | 208 | |||
211 | wqe.wqe_q = q; | 209 | wqe.wqe_q = q; | |
212 | KASSERT(SIMPLEQ_EMPTY(&q->q_queue)); | 210 | KASSERT(SIMPLEQ_EMPTY(&q->q_queue)); | |
213 | KASSERT(q->q_worker != NULL); | 211 | KASSERT(q->q_worker != NULL); | |
214 | mutex_enter(&q->q_mutex); | 212 | mutex_enter(&q->q_mutex); | |
215 | SIMPLEQ_INSERT_TAIL(&q->q_queue, &wqe.wqe_wk, wk_entry); | 213 | SIMPLEQ_INSERT_TAIL(&q->q_queue, &wqe.wqe_wk, wk_entry); | |
216 | cv_signal(&q->q_cv); | 214 | cv_signal(&q->q_cv); | |
217 | while (q->q_worker != NULL) { | 215 | while (q->q_worker != NULL) { | |
218 | cv_wait(&q->q_cv, &q->q_mutex); | 216 | cv_wait(&q->q_cv, &q->q_mutex); | |
219 | } | 217 | } | |
220 | mutex_exit(&q->q_mutex); | 218 | mutex_exit(&q->q_mutex); | |
221 | mutex_destroy(&q->q_mutex); | 219 | mutex_destroy(&q->q_mutex); | |
222 | cv_destroy(&q->q_cv); | 220 | cv_destroy(&q->q_cv); | |
223 | } | 221 | } | |
224 | 222 | |||
225 | /* --- */ | 223 | /* --- */ | |
226 | 224 | |||
227 | int | 225 | int | |
228 | workqueue_create(struct workqueue **wqp, const char *name, | 226 | workqueue_create(struct workqueue **wqp, const char *name, | |
229 | void (*callback_func)(struct work *, void *), void *callback_arg, | 227 | void (*callback_func)(struct work *, void *), void *callback_arg, | |
230 | pri_t prio, int ipl, int flags) | 228 | pri_t prio, int ipl, int flags) | |
231 | { | 229 | { | |
232 | struct workqueue *wq; | 230 | struct workqueue *wq; | |
233 | struct workqueue_queue *q; | 231 | struct workqueue_queue *q; | |
234 | void *ptr; | 232 | void *ptr; | |
235 | int error = 0; | 233 | int error = 0; | |
236 | 234 | |||
237 | CTASSERT(sizeof(work_impl_t) <= sizeof(struct work)); | 235 | CTASSERT(sizeof(work_impl_t) <= sizeof(struct work)); | |
238 | 236 | |||
239 | ptr = kmem_zalloc(workqueue_size(flags), KM_SLEEP); | 237 | ptr = kmem_zalloc(workqueue_size(flags), KM_SLEEP); | |
240 | wq = (void *)roundup2((uintptr_t)ptr, coherency_unit); | 238 | wq = (void *)roundup2((uintptr_t)ptr, coherency_unit); | |
241 | wq->wq_ptr = ptr; | 239 | wq->wq_ptr = ptr; | |
242 | wq->wq_flags = flags; | 240 | wq->wq_flags = flags; | |
243 | 241 | |||
244 | workqueue_init(wq, name, callback_func, callback_arg, prio, ipl); | 242 | workqueue_init(wq, name, callback_func, callback_arg, prio, ipl); | |
245 | 243 | |||
246 | if (flags & WQ_PERCPU) { | 244 | if (flags & WQ_PERCPU) { | |
247 | struct cpu_info *ci; | 245 | struct cpu_info *ci; | |
248 | CPU_INFO_ITERATOR cii; | 246 | CPU_INFO_ITERATOR cii; | |
249 | 247 | |||
250 | /* create the work-queue for each CPU */ | 248 | /* create the work-queue for each CPU */ | |
251 | for (CPU_INFO_FOREACH(cii, ci)) { | 249 | for (CPU_INFO_FOREACH(cii, ci)) { | |
252 | q = workqueue_queue_lookup(wq, ci); | 250 | q = workqueue_queue_lookup(wq, ci); | |
253 | error = workqueue_initqueue(wq, q, ipl, ci); | 251 | error = workqueue_initqueue(wq, q, ipl, ci); | |
254 | if (error) { | 252 | if (error) { | |
255 | break; | 253 | break; | |
256 | } | 254 | } | |
257 | } | 255 | } | |
258 | } else { | 256 | } else { | |
259 | /* initialize a work-queue */ | 257 | /* initialize a work-queue */ | |
260 | q = workqueue_queue_lookup(wq, NULL); | 258 | q = workqueue_queue_lookup(wq, NULL); | |
261 | error = workqueue_initqueue(wq, q, ipl, NULL); | 259 | error = workqueue_initqueue(wq, q, ipl, NULL); | |
262 | } | 260 | } | |
263 | 261 | |||
264 | if (error != 0) { | 262 | if (error != 0) { | |
265 | workqueue_destroy(wq); | 263 | workqueue_destroy(wq); | |
266 | } else { | 264 | } else { | |
267 | *wqp = wq; | 265 | *wqp = wq; | |
268 | } | 266 | } | |
269 | 267 | |||
270 | return error; | 268 | return error; | |
271 | } | 269 | } | |
272 | 270 | |||
273 | void | 271 | void | |
274 | workqueue_destroy(struct workqueue *wq) | 272 | workqueue_destroy(struct workqueue *wq) | |
275 | { | 273 | { | |
276 | struct workqueue_queue *q; | 274 | struct workqueue_queue *q; | |
277 | struct cpu_info *ci; | 275 | struct cpu_info *ci; | |
278 | CPU_INFO_ITERATOR cii; | 276 | CPU_INFO_ITERATOR cii; | |
279 | 277 | |||
280 | wq->wq_func = workqueue_exit; | 278 | wq->wq_func = workqueue_exit; | |
281 | for (CPU_INFO_FOREACH(cii, ci)) { | 279 | for (CPU_INFO_FOREACH(cii, ci)) { | |
282 | q = workqueue_queue_lookup(wq, ci); | 280 | q = workqueue_queue_lookup(wq, ci); | |
283 | if (q->q_worker != NULL) { | 281 | if (q->q_worker != NULL) { | |
284 | workqueue_finiqueue(wq, q); | 282 | workqueue_finiqueue(wq, q); | |
285 | } | 283 | } | |
286 | } | 284 | } | |
287 | kmem_free(wq->wq_ptr, workqueue_size(wq->wq_flags)); | 285 | kmem_free(wq->wq_ptr, workqueue_size(wq->wq_flags)); | |
288 | } | 286 | } | |
289 | 287 | |||
290 | void | 288 | void | |
291 | workqueue_enqueue(struct workqueue *wq, struct work *wk0, struct cpu_info *ci) | 289 | workqueue_enqueue(struct workqueue *wq, struct work *wk0, struct cpu_info *ci) | |
292 | { | 290 | { | |
293 | struct workqueue_queue *q; | 291 | struct workqueue_queue *q; | |
294 | work_impl_t *wk = (void *)wk0; | 292 | work_impl_t *wk = (void *)wk0; | |
295 | 293 | |||
296 | KASSERT(wq->wq_flags & WQ_PERCPU || ci == NULL); | 294 | KASSERT(wq->wq_flags & WQ_PERCPU || ci == NULL); | |
297 | q = workqueue_queue_lookup(wq, ci); | 295 | q = workqueue_queue_lookup(wq, ci); | |
298 | 296 | |||
299 | mutex_enter(&q->q_mutex); | 297 | mutex_enter(&q->q_mutex); | |
300 | SIMPLEQ_INSERT_TAIL(&q->q_queue, wk, wk_entry); | 298 | SIMPLEQ_INSERT_TAIL(&q->q_queue, wk, wk_entry); | |
301 | cv_signal(&q->q_cv); | 299 | cv_signal(&q->q_cv); | |
302 | mutex_exit(&q->q_mutex); | 300 | mutex_exit(&q->q_mutex); | |
303 | } | 301 | } |
--- src/sys/kern/sys_generic.c 2011/04/10 15:45:33 1.126
+++ src/sys/kern/sys_generic.c 2011/07/27 14:35:34 1.127
@@ -1,677 +1,675 @@ | @@ -1,677 +1,675 @@ | |||
1 | /* $NetBSD: sys_generic.c,v 1.126 2011/04/10 15:45:33 christos Exp $ */ | 1 | /* $NetBSD: sys_generic.c,v 1.127 2011/07/27 14:35:34 uebayasi Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc. | 4 | * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc. | |
5 | * All rights reserved. | 5 | * All rights reserved. | |
6 | * | 6 | * | |
7 | * This code is derived from software contributed to The NetBSD Foundation | 7 | * This code is derived from software contributed to The NetBSD Foundation | |
8 | * by Andrew Doran. | 8 | * by Andrew Doran. | |
9 | * | 9 | * | |
10 | * Redistribution and use in source and binary forms, with or without | 10 | * Redistribution and use in source and binary forms, with or without | |
11 | * modification, are permitted provided that the following conditions | 11 | * modification, are permitted provided that the following conditions | |
12 | * are met: | 12 | * are met: | |
13 | * 1. Redistributions of source code must retain the above copyright | 13 | * 1. Redistributions of source code must retain the above copyright | |
14 | * notice, this list of conditions and the following disclaimer. | 14 | * notice, this list of conditions and the following disclaimer. | |
15 | * 2. Redistributions in binary form must reproduce the above copyright | 15 | * 2. Redistributions in binary form must reproduce the above copyright | |
16 | * notice, this list of conditions and the following disclaimer in the | 16 | * notice, this list of conditions and the following disclaimer in the | |
17 | * documentation and/or other materials provided with the distribution. | 17 | * documentation and/or other materials provided with the distribution. | |
18 | * | 18 | * | |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
29 | * POSSIBILITY OF SUCH DAMAGE. | 29 | * POSSIBILITY OF SUCH DAMAGE. | |
30 | */ | 30 | */ | |
31 | 31 | |||
32 | /* | 32 | /* | |
33 | * Copyright (c) 1982, 1986, 1989, 1993 | 33 | * Copyright (c) 1982, 1986, 1989, 1993 | |
34 | * The Regents of the University of California. All rights reserved. | 34 | * The Regents of the University of California. All rights reserved. | |
35 | * (c) UNIX System Laboratories, Inc. | 35 | * (c) UNIX System Laboratories, Inc. | |
36 | * All or some portions of this file are derived from material licensed | 36 | * All or some portions of this file are derived from material licensed | |
37 | * to the University of California by American Telephone and Telegraph | 37 | * to the University of California by American Telephone and Telegraph | |
38 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | 38 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | |
39 | * the permission of UNIX System Laboratories, Inc. | 39 | * the permission of UNIX System Laboratories, Inc. | |
40 | * | 40 | * | |
41 | * Redistribution and use in source and binary forms, with or without | 41 | * Redistribution and use in source and binary forms, with or without | |
42 | * modification, are permitted provided that the following conditions | 42 | * modification, are permitted provided that the following conditions | |
43 | * are met: | 43 | * are met: | |
44 | * 1. Redistributions of source code must retain the above copyright | 44 | * 1. Redistributions of source code must retain the above copyright | |
45 | * notice, this list of conditions and the following disclaimer. | 45 | * notice, this list of conditions and the following disclaimer. | |
46 | * 2. Redistributions in binary form must reproduce the above copyright | 46 | * 2. Redistributions in binary form must reproduce the above copyright | |
47 | * notice, this list of conditions and the following disclaimer in the | 47 | * notice, this list of conditions and the following disclaimer in the | |
48 | * documentation and/or other materials provided with the distribution. | 48 | * documentation and/or other materials provided with the distribution. | |
49 | * 3. Neither the name of the University nor the names of its contributors | 49 | * 3. Neither the name of the University nor the names of its contributors | |
50 | * may be used to endorse or promote products derived from this software | 50 | * may be used to endorse or promote products derived from this software | |
51 | * without specific prior written permission. | 51 | * without specific prior written permission. | |
52 | * | 52 | * | |
53 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | 53 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
54 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 54 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
55 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 55 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
56 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | 56 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
57 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 57 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
58 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 58 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
59 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 59 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
60 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 60 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
61 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 61 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
62 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 62 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
63 | * SUCH DAMAGE. | 63 | * SUCH DAMAGE. | |
64 | * | 64 | * | |
65 | * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95 | 65 | * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95 | |
66 | */ | 66 | */ | |
67 | 67 | |||
68 | /* | 68 | /* | |
69 | * System calls relating to files. | 69 | * System calls relating to files. | |
70 | */ | 70 | */ | |
71 | 71 | |||
72 | #include <sys/cdefs.h> | 72 | #include <sys/cdefs.h> | |
73 | __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.126 2011/04/10 15:45:33 christos Exp $"); | 73 | __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.127 2011/07/27 14:35:34 uebayasi Exp $"); | |
74 | 74 | |||
75 | #include <sys/param.h> | 75 | #include <sys/param.h> | |
76 | #include <sys/systm.h> | 76 | #include <sys/systm.h> | |
77 | #include <sys/filedesc.h> | 77 | #include <sys/filedesc.h> | |
78 | #include <sys/ioctl.h> | 78 | #include <sys/ioctl.h> | |
79 | #include <sys/file.h> | 79 | #include <sys/file.h> | |
80 | #include <sys/proc.h> | 80 | #include <sys/proc.h> | |
81 | #include <sys/socketvar.h> | 81 | #include <sys/socketvar.h> | |
82 | #include <sys/signalvar.h> | 82 | #include <sys/signalvar.h> | |
83 | #include <sys/uio.h> | 83 | #include <sys/uio.h> | |
84 | #include <sys/kernel.h> | 84 | #include <sys/kernel.h> | |
85 | #include <sys/stat.h> | 85 | #include <sys/stat.h> | |
86 | #include <sys/kmem.h> | 86 | #include <sys/kmem.h> | |
87 | #include <sys/poll.h> | 87 | #include <sys/poll.h> | |
88 | #include <sys/vnode.h> | 88 | #include <sys/vnode.h> | |
89 | #include <sys/mount.h> | 89 | #include <sys/mount.h> | |
90 | #include <sys/syscallargs.h> | 90 | #include <sys/syscallargs.h> | |
91 | #include <sys/ktrace.h> | 91 | #include <sys/ktrace.h> | |
92 | #include <sys/atomic.h> | 92 | #include <sys/atomic.h> | |
93 | #include <sys/disklabel.h> | 93 | #include <sys/disklabel.h> | |
94 | 94 | |||
95 | #include <uvm/uvm_extern.h> | |||
96 | ||||
97 | /* | 95 | /* | |
98 | * Read system call. | 96 | * Read system call. | |
99 | */ | 97 | */ | |
100 | /* ARGSUSED */ | 98 | /* ARGSUSED */ | |
101 | int | 99 | int | |
102 | sys_read(struct lwp *l, const struct sys_read_args *uap, register_t *retval) | 100 | sys_read(struct lwp *l, const struct sys_read_args *uap, register_t *retval) | |
103 | { | 101 | { | |
104 | /* { | 102 | /* { | |
105 | syscallarg(int) fd; | 103 | syscallarg(int) fd; | |
106 | syscallarg(void *) buf; | 104 | syscallarg(void *) buf; | |
107 | syscallarg(size_t) nbyte; | 105 | syscallarg(size_t) nbyte; | |
108 | } */ | 106 | } */ | |
109 | file_t *fp; | 107 | file_t *fp; | |
110 | int fd; | 108 | int fd; | |
111 | 109 | |||
112 | fd = SCARG(uap, fd); | 110 | fd = SCARG(uap, fd); | |
113 | 111 | |||
114 | if ((fp = fd_getfile(fd)) == NULL) | 112 | if ((fp = fd_getfile(fd)) == NULL) | |
115 | return (EBADF); | 113 | return (EBADF); | |
116 | 114 | |||
117 | if ((fp->f_flag & FREAD) == 0) { | 115 | if ((fp->f_flag & FREAD) == 0) { | |
118 | fd_putfile(fd); | 116 | fd_putfile(fd); | |
119 | return (EBADF); | 117 | return (EBADF); | |
120 | } | 118 | } | |
121 | 119 | |||
122 | /* dofileread() will unuse the descriptor for us */ | 120 | /* dofileread() will unuse the descriptor for us */ | |
123 | return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), | 121 | return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), | |
124 | &fp->f_offset, FOF_UPDATE_OFFSET, retval)); | 122 | &fp->f_offset, FOF_UPDATE_OFFSET, retval)); | |
125 | } | 123 | } | |
126 | 124 | |||
127 | int | 125 | int | |
128 | dofileread(int fd, struct file *fp, void *buf, size_t nbyte, | 126 | dofileread(int fd, struct file *fp, void *buf, size_t nbyte, | |
129 | off_t *offset, int flags, register_t *retval) | 127 | off_t *offset, int flags, register_t *retval) | |
130 | { | 128 | { | |
131 | struct iovec aiov; | 129 | struct iovec aiov; | |
132 | struct uio auio; | 130 | struct uio auio; | |
133 | size_t cnt; | 131 | size_t cnt; | |
134 | int error; | 132 | int error; | |
135 | lwp_t *l; | 133 | lwp_t *l; | |
136 | 134 | |||
137 | l = curlwp; | 135 | l = curlwp; | |
138 | 136 | |||
139 | aiov.iov_base = (void *)buf; | 137 | aiov.iov_base = (void *)buf; | |
140 | aiov.iov_len = nbyte; | 138 | aiov.iov_len = nbyte; | |
141 | auio.uio_iov = &aiov; | 139 | auio.uio_iov = &aiov; | |
142 | auio.uio_iovcnt = 1; | 140 | auio.uio_iovcnt = 1; | |
143 | auio.uio_resid = nbyte; | 141 | auio.uio_resid = nbyte; | |
144 | auio.uio_rw = UIO_READ; | 142 | auio.uio_rw = UIO_READ; | |
145 | auio.uio_vmspace = l->l_proc->p_vmspace; | 143 | auio.uio_vmspace = l->l_proc->p_vmspace; | |
146 | 144 | |||
147 | /* | 145 | /* | |
148 | * Reads return ssize_t because -1 is returned on error. Therefore | 146 | * Reads return ssize_t because -1 is returned on error. Therefore | |
149 | * we must restrict the length to SSIZE_MAX to avoid garbage return | 147 | * we must restrict the length to SSIZE_MAX to avoid garbage return | |
150 | * values. | 148 | * values. | |
151 | */ | 149 | */ | |
152 | if (auio.uio_resid > SSIZE_MAX) { | 150 | if (auio.uio_resid > SSIZE_MAX) { | |
153 | error = EINVAL; | 151 | error = EINVAL; | |
154 | goto out; | 152 | goto out; | |
155 | } | 153 | } | |
156 | 154 | |||
157 | cnt = auio.uio_resid; | 155 | cnt = auio.uio_resid; | |
158 | error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); | 156 | error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); | |
159 | if (error) | 157 | if (error) | |
160 | if (auio.uio_resid != cnt && (error == ERESTART || | 158 | if (auio.uio_resid != cnt && (error == ERESTART || | |
161 | error == EINTR || error == EWOULDBLOCK)) | 159 | error == EINTR || error == EWOULDBLOCK)) | |
162 | error = 0; | 160 | error = 0; | |
163 | cnt -= auio.uio_resid; | 161 | cnt -= auio.uio_resid; | |
164 | ktrgenio(fd, UIO_READ, buf, cnt, error); | 162 | ktrgenio(fd, UIO_READ, buf, cnt, error); | |
165 | *retval = cnt; | 163 | *retval = cnt; | |
166 | out: | 164 | out: | |
167 | fd_putfile(fd); | 165 | fd_putfile(fd); | |
168 | return (error); | 166 | return (error); | |
169 | } | 167 | } | |
170 | 168 | |||
171 | /* | 169 | /* | |
172 | * Scatter read system call. | 170 | * Scatter read system call. | |
173 | */ | 171 | */ | |
174 | int | 172 | int | |
175 | sys_readv(struct lwp *l, const struct sys_readv_args *uap, register_t *retval) | 173 | sys_readv(struct lwp *l, const struct sys_readv_args *uap, register_t *retval) | |
176 | { | 174 | { | |
177 | /* { | 175 | /* { | |
178 | syscallarg(int) fd; | 176 | syscallarg(int) fd; | |
179 | syscallarg(const struct iovec *) iovp; | 177 | syscallarg(const struct iovec *) iovp; | |
180 | syscallarg(int) iovcnt; | 178 | syscallarg(int) iovcnt; | |
181 | } */ | 179 | } */ | |
182 | 180 | |||
183 | return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), | 181 | return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), | |
184 | SCARG(uap, iovcnt), NULL, FOF_UPDATE_OFFSET, retval); | 182 | SCARG(uap, iovcnt), NULL, FOF_UPDATE_OFFSET, retval); | |
185 | } | 183 | } | |
186 | 184 | |||
187 | int | 185 | int | |
188 | do_filereadv(int fd, const struct iovec *iovp, int iovcnt, | 186 | do_filereadv(int fd, const struct iovec *iovp, int iovcnt, | |
189 | off_t *offset, int flags, register_t *retval) | 187 | off_t *offset, int flags, register_t *retval) | |
190 | { | 188 | { | |
191 | struct uio auio; | 189 | struct uio auio; | |
192 | struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV]; | 190 | struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV]; | |
193 | int i, error; | 191 | int i, error; | |
194 | size_t cnt; | 192 | size_t cnt; | |
195 | u_int iovlen; | 193 | u_int iovlen; | |
196 | struct file *fp; | 194 | struct file *fp; | |
197 | struct iovec *ktriov = NULL; | 195 | struct iovec *ktriov = NULL; | |
198 | 196 | |||
199 | if (iovcnt == 0) | 197 | if (iovcnt == 0) | |
200 | return EINVAL; | 198 | return EINVAL; | |
201 | 199 | |||
202 | if ((fp = fd_getfile(fd)) == NULL) | 200 | if ((fp = fd_getfile(fd)) == NULL) | |
203 | return EBADF; | 201 | return EBADF; | |
204 | 202 | |||
205 | if ((fp->f_flag & FREAD) == 0) { | 203 | if ((fp->f_flag & FREAD) == 0) { | |
206 | fd_putfile(fd); | 204 | fd_putfile(fd); | |
207 | return EBADF; | 205 | return EBADF; | |
208 | } | 206 | } | |
209 | 207 | |||
210 | if (offset == NULL) | 208 | if (offset == NULL) | |
211 | offset = &fp->f_offset; | 209 | offset = &fp->f_offset; | |
212 | else { | 210 | else { | |
213 | struct vnode *vp = fp->f_data; | 211 | struct vnode *vp = fp->f_data; | |
214 | if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { | 212 | if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { | |
215 | error = ESPIPE; | 213 | error = ESPIPE; | |
216 | goto out; | 214 | goto out; | |
217 | } | 215 | } | |
218 | /* | 216 | /* | |
219 | * Test that the device is seekable ? | 217 | * Test that the device is seekable ? | |
220 | * XXX This works because no file systems actually | 218 | * XXX This works because no file systems actually | |
221 | * XXX take any action on the seek operation. | 219 | * XXX take any action on the seek operation. | |
222 | */ | 220 | */ | |
223 | error = VOP_SEEK(vp, fp->f_offset, *offset, fp->f_cred); | 221 | error = VOP_SEEK(vp, fp->f_offset, *offset, fp->f_cred); | |
224 | if (error != 0) | 222 | if (error != 0) | |
225 | goto out; | 223 | goto out; | |
226 | } | 224 | } | |
227 | 225 | |||
228 | iovlen = iovcnt * sizeof(struct iovec); | 226 | iovlen = iovcnt * sizeof(struct iovec); | |
229 | if (flags & FOF_IOV_SYSSPACE) | 227 | if (flags & FOF_IOV_SYSSPACE) | |
230 | iov = __UNCONST(iovp); | 228 | iov = __UNCONST(iovp); | |
231 | else { | 229 | else { | |
232 | iov = aiov; | 230 | iov = aiov; | |
233 | if ((u_int)iovcnt > UIO_SMALLIOV) { | 231 | if ((u_int)iovcnt > UIO_SMALLIOV) { | |
234 | if ((u_int)iovcnt > IOV_MAX) { | 232 | if ((u_int)iovcnt > IOV_MAX) { | |
235 | error = EINVAL; | 233 | error = EINVAL; | |
236 | goto out; | 234 | goto out; | |
237 | } | 235 | } | |
238 | iov = kmem_alloc(iovlen, KM_SLEEP); | 236 | iov = kmem_alloc(iovlen, KM_SLEEP); | |
239 | if (iov == NULL) { | 237 | if (iov == NULL) { | |
240 | error = ENOMEM; | 238 | error = ENOMEM; | |
241 | goto out; | 239 | goto out; | |
242 | } | 240 | } | |
243 | needfree = iov; | 241 | needfree = iov; | |
244 | } | 242 | } | |
245 | error = copyin(iovp, iov, iovlen); | 243 | error = copyin(iovp, iov, iovlen); | |
246 | if (error) | 244 | if (error) | |
247 | goto done; | 245 | goto done; | |
248 | } | 246 | } | |
249 | 247 | |||
250 | auio.uio_iov = iov; | 248 | auio.uio_iov = iov; | |
251 | auio.uio_iovcnt = iovcnt; | 249 | auio.uio_iovcnt = iovcnt; | |
252 | auio.uio_rw = UIO_READ; | 250 | auio.uio_rw = UIO_READ; | |
253 | auio.uio_vmspace = curproc->p_vmspace; | 251 | auio.uio_vmspace = curproc->p_vmspace; | |
254 | 252 | |||
255 | auio.uio_resid = 0; | 253 | auio.uio_resid = 0; | |
256 | for (i = 0; i < iovcnt; i++, iov++) { | 254 | for (i = 0; i < iovcnt; i++, iov++) { | |
257 | auio.uio_resid += iov->iov_len; | 255 | auio.uio_resid += iov->iov_len; | |
258 | /* | 256 | /* | |
259 | * Reads return ssize_t because -1 is returned on error. | 257 | * Reads return ssize_t because -1 is returned on error. | |
260 | * Therefore we must restrict the length to SSIZE_MAX to | 258 | * Therefore we must restrict the length to SSIZE_MAX to | |
261 | * avoid garbage return values. | 259 | * avoid garbage return values. | |
262 | */ | 260 | */ | |
263 | if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { | 261 | if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { | |
264 | error = EINVAL; | 262 | error = EINVAL; | |
265 | goto done; | 263 | goto done; | |
266 | } | 264 | } | |
267 | } | 265 | } | |
268 | 266 | |||
269 | /* | 267 | /* | |
270 | * if tracing, save a copy of iovec | 268 | * if tracing, save a copy of iovec | |
271 | */ | 269 | */ | |
272 | if (ktrpoint(KTR_GENIO)) { | 270 | if (ktrpoint(KTR_GENIO)) { | |
273 | ktriov = kmem_alloc(iovlen, KM_SLEEP); | 271 | ktriov = kmem_alloc(iovlen, KM_SLEEP); | |
274 | if (ktriov != NULL) | 272 | if (ktriov != NULL) | |
275 | memcpy(ktriov, auio.uio_iov, iovlen); | 273 | memcpy(ktriov, auio.uio_iov, iovlen); | |
276 | } | 274 | } | |
277 | 275 | |||
278 | cnt = auio.uio_resid; | 276 | cnt = auio.uio_resid; | |
279 | error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); | 277 | error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); | |
280 | if (error) | 278 | if (error) | |
281 | if (auio.uio_resid != cnt && (error == ERESTART || | 279 | if (auio.uio_resid != cnt && (error == ERESTART || | |
282 | error == EINTR || error == EWOULDBLOCK)) | 280 | error == EINTR || error == EWOULDBLOCK)) | |
283 | error = 0; | 281 | error = 0; | |
284 | cnt -= auio.uio_resid; | 282 | cnt -= auio.uio_resid; | |
285 | *retval = cnt; | 283 | *retval = cnt; | |
286 | 284 | |||
287 | if (ktriov != NULL) { | 285 | if (ktriov != NULL) { | |
288 | ktrgeniov(fd, UIO_READ, ktriov, cnt, error); | 286 | ktrgeniov(fd, UIO_READ, ktriov, cnt, error); | |
289 | kmem_free(ktriov, iovlen); | 287 | kmem_free(ktriov, iovlen); | |
290 | } | 288 | } | |
291 | 289 | |||
292 | done: | 290 | done: | |
293 | if (needfree) | 291 | if (needfree) | |
294 | kmem_free(needfree, iovlen); | 292 | kmem_free(needfree, iovlen); | |
295 | out: | 293 | out: | |
296 | fd_putfile(fd); | 294 | fd_putfile(fd); | |
297 | return (error); | 295 | return (error); | |
298 | } | 296 | } | |
299 | 297 | |||
300 | /* | 298 | /* | |
301 | * Write system call | 299 | * Write system call | |
302 | */ | 300 | */ | |
303 | int | 301 | int | |
304 | sys_write(struct lwp *l, const struct sys_write_args *uap, register_t *retval) | 302 | sys_write(struct lwp *l, const struct sys_write_args *uap, register_t *retval) | |
305 | { | 303 | { | |
306 | /* { | 304 | /* { | |
307 | syscallarg(int) fd; | 305 | syscallarg(int) fd; | |
308 | syscallarg(const void *) buf; | 306 | syscallarg(const void *) buf; | |
309 | syscallarg(size_t) nbyte; | 307 | syscallarg(size_t) nbyte; | |
310 | } */ | 308 | } */ | |
311 | file_t *fp; | 309 | file_t *fp; | |
312 | int fd; | 310 | int fd; | |
313 | 311 | |||
314 | fd = SCARG(uap, fd); | 312 | fd = SCARG(uap, fd); | |
315 | 313 | |||
316 | if ((fp = fd_getfile(fd)) == NULL) | 314 | if ((fp = fd_getfile(fd)) == NULL) | |
317 | return (EBADF); | 315 | return (EBADF); | |
318 | 316 | |||
319 | if ((fp->f_flag & FWRITE) == 0) { | 317 | if ((fp->f_flag & FWRITE) == 0) { | |
320 | fd_putfile(fd); | 318 | fd_putfile(fd); | |
321 | return (EBADF); | 319 | return (EBADF); | |
322 | } | 320 | } | |
323 | 321 | |||
324 | /* dofilewrite() will unuse the descriptor for us */ | 322 | /* dofilewrite() will unuse the descriptor for us */ | |
325 | return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), | 323 | return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), | |
326 | &fp->f_offset, FOF_UPDATE_OFFSET, retval)); | 324 | &fp->f_offset, FOF_UPDATE_OFFSET, retval)); | |
327 | } | 325 | } | |
328 | 326 | |||
329 | int | 327 | int | |
330 | dofilewrite(int fd, struct file *fp, const void *buf, | 328 | dofilewrite(int fd, struct file *fp, const void *buf, | |
331 | size_t nbyte, off_t *offset, int flags, register_t *retval) | 329 | size_t nbyte, off_t *offset, int flags, register_t *retval) | |
332 | { | 330 | { | |
333 | struct iovec aiov; | 331 | struct iovec aiov; | |
334 | struct uio auio; | 332 | struct uio auio; | |
335 | size_t cnt; | 333 | size_t cnt; | |
336 | int error; | 334 | int error; | |
337 | 335 | |||
338 | aiov.iov_base = __UNCONST(buf); /* XXXUNCONST kills const */ | 336 | aiov.iov_base = __UNCONST(buf); /* XXXUNCONST kills const */ | |
339 | aiov.iov_len = nbyte; | 337 | aiov.iov_len = nbyte; | |
340 | auio.uio_iov = &aiov; | 338 | auio.uio_iov = &aiov; | |
341 | auio.uio_iovcnt = 1; | 339 | auio.uio_iovcnt = 1; | |
342 | auio.uio_resid = nbyte; | 340 | auio.uio_resid = nbyte; | |
343 | auio.uio_rw = UIO_WRITE; | 341 | auio.uio_rw = UIO_WRITE; | |
344 | auio.uio_vmspace = curproc->p_vmspace; | 342 | auio.uio_vmspace = curproc->p_vmspace; | |
345 | 343 | |||
346 | /* | 344 | /* | |
347 | * Writes return ssize_t because -1 is returned on error. Therefore | 345 | * Writes return ssize_t because -1 is returned on error. Therefore | |
348 | * we must restrict the length to SSIZE_MAX to avoid garbage return | 346 | * we must restrict the length to SSIZE_MAX to avoid garbage return | |
349 | * values. | 347 | * values. | |
350 | */ | 348 | */ | |
351 | if (auio.uio_resid > SSIZE_MAX) { | 349 | if (auio.uio_resid > SSIZE_MAX) { | |
352 | error = EINVAL; | 350 | error = EINVAL; | |
353 | goto out; | 351 | goto out; | |
354 | } | 352 | } | |
355 | 353 | |||
356 | cnt = auio.uio_resid; | 354 | cnt = auio.uio_resid; | |
357 | error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); | 355 | error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); | |
358 | if (error) { | 356 | if (error) { | |
359 | if (auio.uio_resid != cnt && (error == ERESTART || | 357 | if (auio.uio_resid != cnt && (error == ERESTART || | |
360 | error == EINTR || error == EWOULDBLOCK)) | 358 | error == EINTR || error == EWOULDBLOCK)) | |
361 | error = 0; | 359 | error = 0; | |
362 | if (error == EPIPE) { | 360 | if (error == EPIPE) { | |
363 | mutex_enter(proc_lock); | 361 | mutex_enter(proc_lock); | |
364 | psignal(curproc, SIGPIPE); | 362 | psignal(curproc, SIGPIPE); | |
365 | mutex_exit(proc_lock); | 363 | mutex_exit(proc_lock); | |
366 | } | 364 | } | |
367 | } | 365 | } | |
368 | cnt -= auio.uio_resid; | 366 | cnt -= auio.uio_resid; | |
369 | ktrgenio(fd, UIO_WRITE, buf, cnt, error); | 367 | ktrgenio(fd, UIO_WRITE, buf, cnt, error); | |
370 | *retval = cnt; | 368 | *retval = cnt; | |
371 | out: | 369 | out: | |
372 | fd_putfile(fd); | 370 | fd_putfile(fd); | |
373 | return (error); | 371 | return (error); | |
374 | } | 372 | } | |
375 | 373 | |||
376 | /* | 374 | /* | |
377 | * Gather write system call | 375 | * Gather write system call | |
378 | */ | 376 | */ | |
379 | int | 377 | int | |
380 | sys_writev(struct lwp *l, const struct sys_writev_args *uap, register_t *retval) | 378 | sys_writev(struct lwp *l, const struct sys_writev_args *uap, register_t *retval) | |
381 | { | 379 | { | |
382 | /* { | 380 | /* { | |
383 | syscallarg(int) fd; | 381 | syscallarg(int) fd; | |
384 | syscallarg(const struct iovec *) iovp; | 382 | syscallarg(const struct iovec *) iovp; | |
385 | syscallarg(int) iovcnt; | 383 | syscallarg(int) iovcnt; | |
386 | } */ | 384 | } */ | |
387 | 385 | |||
388 | return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), | 386 | return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), | |
389 | SCARG(uap, iovcnt), NULL, FOF_UPDATE_OFFSET, retval); | 387 | SCARG(uap, iovcnt), NULL, FOF_UPDATE_OFFSET, retval); | |
390 | } | 388 | } | |
391 | 389 | |||
392 | int | 390 | int | |
393 | do_filewritev(int fd, const struct iovec *iovp, int iovcnt, | 391 | do_filewritev(int fd, const struct iovec *iovp, int iovcnt, | |
394 | off_t *offset, int flags, register_t *retval) | 392 | off_t *offset, int flags, register_t *retval) | |
395 | { | 393 | { | |
396 | struct uio auio; | 394 | struct uio auio; | |
397 | struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV]; | 395 | struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV]; | |
398 | int i, error; | 396 | int i, error; | |
399 | size_t cnt; | 397 | size_t cnt; | |
400 | u_int iovlen; | 398 | u_int iovlen; | |
401 | struct file *fp; | 399 | struct file *fp; | |
402 | struct iovec *ktriov = NULL; | 400 | struct iovec *ktriov = NULL; | |
403 | 401 | |||
404 | if (iovcnt == 0) | 402 | if (iovcnt == 0) | |
405 | return EINVAL; | 403 | return EINVAL; | |
406 | 404 | |||
407 | if ((fp = fd_getfile(fd)) == NULL) | 405 | if ((fp = fd_getfile(fd)) == NULL) | |
408 | return EBADF; | 406 | return EBADF; | |
409 | 407 | |||
410 | if ((fp->f_flag & FWRITE) == 0) { | 408 | if ((fp->f_flag & FWRITE) == 0) { | |
411 | fd_putfile(fd); | 409 | fd_putfile(fd); | |
412 | return EBADF; | 410 | return EBADF; | |
413 | } | 411 | } | |
414 | 412 | |||
415 | if (offset == NULL) | 413 | if (offset == NULL) | |
416 | offset = &fp->f_offset; | 414 | offset = &fp->f_offset; | |
417 | else { | 415 | else { | |
418 | struct vnode *vp = fp->f_data; | 416 | struct vnode *vp = fp->f_data; | |
419 | if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { | 417 | if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { | |
420 | error = ESPIPE; | 418 | error = ESPIPE; | |
421 | goto out; | 419 | goto out; | |
422 | } | 420 | } | |
423 | /* | 421 | /* | |
424 | * Test that the device is seekable ? | 422 | * Test that the device is seekable ? | |
425 | * XXX This works because no file systems actually | 423 | * XXX This works because no file systems actually | |
426 | * XXX take any action on the seek operation. | 424 | * XXX take any action on the seek operation. | |
427 | */ | 425 | */ | |
428 | error = VOP_SEEK(vp, fp->f_offset, *offset, fp->f_cred); | 426 | error = VOP_SEEK(vp, fp->f_offset, *offset, fp->f_cred); | |
429 | if (error != 0) | 427 | if (error != 0) | |
430 | goto out; | 428 | goto out; | |
431 | } | 429 | } | |
432 | 430 | |||
433 | iovlen = iovcnt * sizeof(struct iovec); | 431 | iovlen = iovcnt * sizeof(struct iovec); | |
434 | if (flags & FOF_IOV_SYSSPACE) | 432 | if (flags & FOF_IOV_SYSSPACE) | |
435 | iov = __UNCONST(iovp); | 433 | iov = __UNCONST(iovp); | |
436 | else { | 434 | else { | |
437 | iov = aiov; | 435 | iov = aiov; | |
438 | if ((u_int)iovcnt > UIO_SMALLIOV) { | 436 | if ((u_int)iovcnt > UIO_SMALLIOV) { | |
439 | if ((u_int)iovcnt > IOV_MAX) { | 437 | if ((u_int)iovcnt > IOV_MAX) { | |
440 | error = EINVAL; | 438 | error = EINVAL; | |
441 | goto out; | 439 | goto out; | |
442 | } | 440 | } | |
443 | iov = kmem_alloc(iovlen, KM_SLEEP); | 441 | iov = kmem_alloc(iovlen, KM_SLEEP); | |
444 | if (iov == NULL) { | 442 | if (iov == NULL) { | |
445 | error = ENOMEM; | 443 | error = ENOMEM; | |
446 | goto out; | 444 | goto out; | |
447 | } | 445 | } | |
448 | needfree = iov; | 446 | needfree = iov; | |
449 | } | 447 | } | |
450 | error = copyin(iovp, iov, iovlen); | 448 | error = copyin(iovp, iov, iovlen); | |
451 | if (error) | 449 | if (error) | |
452 | goto done; | 450 | goto done; | |
453 | } | 451 | } | |
454 | 452 | |||
455 | auio.uio_iov = iov; | 453 | auio.uio_iov = iov; | |
456 | auio.uio_iovcnt = iovcnt; | 454 | auio.uio_iovcnt = iovcnt; | |
457 | auio.uio_rw = UIO_WRITE; | 455 | auio.uio_rw = UIO_WRITE; | |
458 | auio.uio_vmspace = curproc->p_vmspace; | 456 | auio.uio_vmspace = curproc->p_vmspace; | |
459 | 457 | |||
460 | auio.uio_resid = 0; | 458 | auio.uio_resid = 0; | |
461 | for (i = 0; i < iovcnt; i++, iov++) { | 459 | for (i = 0; i < iovcnt; i++, iov++) { | |
462 | auio.uio_resid += iov->iov_len; | 460 | auio.uio_resid += iov->iov_len; | |
463 | /* | 461 | /* | |
464 | * Writes return ssize_t because -1 is returned on error. | 462 | * Writes return ssize_t because -1 is returned on error. | |
465 | * Therefore we must restrict the length to SSIZE_MAX to | 463 | * Therefore we must restrict the length to SSIZE_MAX to | |
466 | * avoid garbage return values. | 464 | * avoid garbage return values. | |
467 | */ | 465 | */ | |
468 | if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { | 466 | if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { | |
469 | error = EINVAL; | 467 | error = EINVAL; | |
470 | goto done; | 468 | goto done; | |
471 | } | 469 | } | |
472 | } | 470 | } | |
473 | 471 | |||
474 | /* | 472 | /* | |
475 | * if tracing, save a copy of iovec | 473 | * if tracing, save a copy of iovec | |
476 | */ | 474 | */ | |
477 | if (ktrpoint(KTR_GENIO)) { | 475 | if (ktrpoint(KTR_GENIO)) { | |
478 | ktriov = kmem_alloc(iovlen, KM_SLEEP); | 476 | ktriov = kmem_alloc(iovlen, KM_SLEEP); | |
479 | if (ktriov != NULL) | 477 | if (ktriov != NULL) | |
480 | memcpy(ktriov, auio.uio_iov, iovlen); | 478 | memcpy(ktriov, auio.uio_iov, iovlen); | |
481 | } | 479 | } | |
482 | 480 | |||
483 | cnt = auio.uio_resid; | 481 | cnt = auio.uio_resid; | |
484 | error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); | 482 | error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); | |
485 | if (error) { | 483 | if (error) { | |
486 | if (auio.uio_resid != cnt && (error == ERESTART || | 484 | if (auio.uio_resid != cnt && (error == ERESTART || | |
487 | error == EINTR || error == EWOULDBLOCK)) | 485 | error == EINTR || error == EWOULDBLOCK)) | |
488 | error = 0; | 486 | error = 0; | |
489 | if (error == EPIPE) { | 487 | if (error == EPIPE) { | |
490 | mutex_enter(proc_lock); | 488 | mutex_enter(proc_lock); | |
491 | psignal(curproc, SIGPIPE); | 489 | psignal(curproc, SIGPIPE); | |
492 | mutex_exit(proc_lock); | 490 | mutex_exit(proc_lock); | |
493 | } | 491 | } | |
494 | } | 492 | } | |
495 | cnt -= auio.uio_resid; | 493 | cnt -= auio.uio_resid; | |
496 | *retval = cnt; | 494 | *retval = cnt; | |
497 | 495 | |||
498 | if (ktriov != NULL) { | 496 | if (ktriov != NULL) { | |
499 | ktrgeniov(fd, UIO_WRITE, ktriov, cnt, error); | 497 | ktrgeniov(fd, UIO_WRITE, ktriov, cnt, error); | |
500 | kmem_free(ktriov, iovlen); | 498 | kmem_free(ktriov, iovlen); | |
501 | } | 499 | } | |
502 | 500 | |||
503 | done: | 501 | done: | |
504 | if (needfree) | 502 | if (needfree) | |
505 | kmem_free(needfree, iovlen); | 503 | kmem_free(needfree, iovlen); | |
506 | out: | 504 | out: | |
507 | fd_putfile(fd); | 505 | fd_putfile(fd); | |
508 | return (error); | 506 | return (error); | |
509 | } | 507 | } | |
510 | 508 | |||
511 | /* | 509 | /* | |
512 | * Ioctl system call | 510 | * Ioctl system call | |
513 | */ | 511 | */ | |
514 | /* ARGSUSED */ | 512 | /* ARGSUSED */ | |
515 | int | 513 | int | |
516 | sys_ioctl(struct lwp *l, const struct sys_ioctl_args *uap, register_t *retval) | 514 | sys_ioctl(struct lwp *l, const struct sys_ioctl_args *uap, register_t *retval) | |
517 | { | 515 | { | |
518 | /* { | 516 | /* { | |
519 | syscallarg(int) fd; | 517 | syscallarg(int) fd; | |
520 | syscallarg(u_long) com; | 518 | syscallarg(u_long) com; | |
521 | syscallarg(void *) data; | 519 | syscallarg(void *) data; | |
522 | } */ | 520 | } */ | |
523 | struct file *fp; | 521 | struct file *fp; | |
524 | proc_t *p; | 522 | proc_t *p; | |
525 | u_long com; | 523 | u_long com; | |
526 | int error; | 524 | int error; | |
527 | size_t size, alloc_size; | 525 | size_t size, alloc_size; | |
528 | void *data, *memp; | 526 | void *data, *memp; | |
529 | #define STK_PARAMS 128 | 527 | #define STK_PARAMS 128 | |
530 | u_long stkbuf[STK_PARAMS/sizeof(u_long)]; | 528 | u_long stkbuf[STK_PARAMS/sizeof(u_long)]; | |
531 | 529 | |||
532 | memp = NULL; | 530 | memp = NULL; | |
533 | alloc_size = 0; | 531 | alloc_size = 0; | |
534 | error = 0; | 532 | error = 0; | |
535 | p = l->l_proc; | 533 | p = l->l_proc; | |
536 | 534 | |||
537 | if ((fp = fd_getfile(SCARG(uap, fd))) == NULL) | 535 | if ((fp = fd_getfile(SCARG(uap, fd))) == NULL) | |
538 | return (EBADF); | 536 | return (EBADF); | |
539 | 537 | |||
540 | if ((fp->f_flag & (FREAD | FWRITE)) == 0) { | 538 | if ((fp->f_flag & (FREAD | FWRITE)) == 0) { | |
541 | error = EBADF; | 539 | error = EBADF; | |
542 | com = 0; | 540 | com = 0; | |
543 | goto out; | 541 | goto out; | |
544 | } | 542 | } | |
545 | 543 | |||
546 | switch (com = SCARG(uap, com)) { | 544 | switch (com = SCARG(uap, com)) { | |
547 | case FIONCLEX: | 545 | case FIONCLEX: | |
548 | case FIOCLEX: | 546 | case FIOCLEX: | |
549 | fd_set_exclose(l, SCARG(uap, fd), com == FIOCLEX); | 547 | fd_set_exclose(l, SCARG(uap, fd), com == FIOCLEX); | |
550 | goto out; | 548 | goto out; | |
551 | } | 549 | } | |
552 | 550 | |||
553 | /* | 551 | /* | |
554 | * Interpret high order word to find amount of data to be | 552 | * Interpret high order word to find amount of data to be | |
555 | * copied to/from the user's address space. | 553 | * copied to/from the user's address space. | |
556 | */ | 554 | */ | |
557 | size = IOCPARM_LEN(com); | 555 | size = IOCPARM_LEN(com); | |
558 | alloc_size = size; | 556 | alloc_size = size; | |
559 | 557 | |||
560 | /* | 558 | /* | |
561 | * The disklabel is now padded to a multiple of 8 bytes however the old | 559 | * The disklabel is now padded to a multiple of 8 bytes however the old | |
562 | * disklabel on 32bit platforms wasn't. This leaves a difference in | 560 | * disklabel on 32bit platforms wasn't. This leaves a difference in | |
563 | * size of 4 bytes between the two but are otherwise identical. | 561 | * size of 4 bytes between the two but are otherwise identical. | |
564 | * To deal with this, we allocate enough space for the new disklabel | 562 | * To deal with this, we allocate enough space for the new disklabel | |
565 | * but only copyin/out the smaller amount. | 563 | * but only copyin/out the smaller amount. | |
566 | */ | 564 | */ | |
567 | if (IOCGROUP(com) == 'd') { | 565 | if (IOCGROUP(com) == 'd') { | |
568 | u_long ncom = com ^ (DIOCGDINFO ^ DIOCGDINFO32); | 566 | u_long ncom = com ^ (DIOCGDINFO ^ DIOCGDINFO32); | |
569 | switch (ncom) { | 567 | switch (ncom) { | |
570 | case DIOCGDINFO: | 568 | case DIOCGDINFO: | |
571 | case DIOCWDINFO: | 569 | case DIOCWDINFO: | |
572 | case DIOCSDINFO: | 570 | case DIOCSDINFO: | |
573 | case DIOCGDEFLABEL: | 571 | case DIOCGDEFLABEL: | |
574 | com = ncom; | 572 | com = ncom; | |
575 | if (IOCPARM_LEN(DIOCGDINFO32) < IOCPARM_LEN(DIOCGDINFO)) | 573 | if (IOCPARM_LEN(DIOCGDINFO32) < IOCPARM_LEN(DIOCGDINFO)) | |
576 | alloc_size = IOCPARM_LEN(DIOCGDINFO); | 574 | alloc_size = IOCPARM_LEN(DIOCGDINFO); | |
577 | break; | 575 | break; | |
578 | } | 576 | } | |
579 | } | 577 | } | |
580 | if (size > IOCPARM_MAX) { | 578 | if (size > IOCPARM_MAX) { | |
581 | error = ENOTTY; | 579 | error = ENOTTY; | |
582 | goto out; | 580 | goto out; | |
583 | } | 581 | } | |
584 | memp = NULL; | 582 | memp = NULL; | |
585 | if ((com >> IOCPARM_SHIFT) == 0) { | 583 | if ((com >> IOCPARM_SHIFT) == 0) { | |
586 | /* UNIX-style ioctl. */ | 584 | /* UNIX-style ioctl. */ | |
587 | data = SCARG(uap, data); | 585 | data = SCARG(uap, data); | |
588 | } else { | 586 | } else { | |
589 | if (alloc_size > sizeof(stkbuf)) { | 587 | if (alloc_size > sizeof(stkbuf)) { | |
590 | memp = kmem_alloc(alloc_size, KM_SLEEP); | 588 | memp = kmem_alloc(alloc_size, KM_SLEEP); | |
591 | data = memp; | 589 | data = memp; | |
592 | } else { | 590 | } else { | |
593 | data = (void *)stkbuf; | 591 | data = (void *)stkbuf; | |
594 | } | 592 | } | |
595 | if (com&IOC_IN) { | 593 | if (com&IOC_IN) { | |
596 | if (size) { | 594 | if (size) { | |
597 | error = copyin(SCARG(uap, data), data, size); | 595 | error = copyin(SCARG(uap, data), data, size); | |
598 | if (error) { | 596 | if (error) { | |
599 | goto out; | 597 | goto out; | |
600 | } | 598 | } | |
601 | /* | 599 | /* | |
602 | * The data between size and alloc_size has | 600 | * The data between size and alloc_size has | |
603 | * not been overwritten. It shouldn't matter | 601 | * not been overwritten. It shouldn't matter | |
604 | * but let's clear that anyway. | 602 | * but let's clear that anyway. | |
605 | */ | 603 | */ | |
606 | if (__predict_false(size < alloc_size)) { | 604 | if (__predict_false(size < alloc_size)) { | |
607 | memset((char *)data+size, 0, | 605 | memset((char *)data+size, 0, | |
608 | alloc_size - size); | 606 | alloc_size - size); | |
609 | } | 607 | } | |
610 | ktrgenio(SCARG(uap, fd), UIO_WRITE, | 608 | ktrgenio(SCARG(uap, fd), UIO_WRITE, | |
611 | SCARG(uap, data), size, 0); | 609 | SCARG(uap, data), size, 0); | |
612 | } else { | 610 | } else { | |
613 | *(void **)data = SCARG(uap, data); | 611 | *(void **)data = SCARG(uap, data); | |
614 | } | 612 | } | |
615 | } else if ((com&IOC_OUT) && size) { | 613 | } else if ((com&IOC_OUT) && size) { | |
616 | /* | 614 | /* | |
617 | * Zero the buffer so the user always | 615 | * Zero the buffer so the user always | |
618 | * gets back something deterministic. | 616 | * gets back something deterministic. | |
619 | */ | 617 | */ | |
620 | memset(data, 0, size); | 618 | memset(data, 0, size); | |
621 | } else if (com&IOC_VOID) { | 619 | } else if (com&IOC_VOID) { | |
622 | *(void **)data = SCARG(uap, data); | 620 | *(void **)data = SCARG(uap, data); | |
623 | } | 621 | } | |
624 | } | 622 | } | |
625 | 623 | |||
626 | switch (com) { | 624 | switch (com) { | |
627 | 625 | |||
628 | case FIONBIO: | 626 | case FIONBIO: | |
629 | /* XXX Code block is not atomic */ | 627 | /* XXX Code block is not atomic */ | |
630 | if (*(int *)data != 0) | 628 | if (*(int *)data != 0) | |
631 | atomic_or_uint(&fp->f_flag, FNONBLOCK); | 629 | atomic_or_uint(&fp->f_flag, FNONBLOCK); | |
632 | else | 630 | else | |
633 | atomic_and_uint(&fp->f_flag, ~FNONBLOCK); | 631 | atomic_and_uint(&fp->f_flag, ~FNONBLOCK); | |
634 | error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, data); | 632 | error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, data); | |
635 | break; | 633 | break; | |
636 | 634 | |||
637 | case FIOASYNC: | 635 | case FIOASYNC: | |
638 | /* XXX Code block is not atomic */ | 636 | /* XXX Code block is not atomic */ | |
639 | if (*(int *)data != 0) | 637 | if (*(int *)data != 0) | |
640 | atomic_or_uint(&fp->f_flag, FASYNC); | 638 | atomic_or_uint(&fp->f_flag, FASYNC); | |
641 | else | 639 | else | |
642 | atomic_and_uint(&fp->f_flag, ~FASYNC); | 640 | atomic_and_uint(&fp->f_flag, ~FASYNC); | |
643 | error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, data); | 641 | error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, data); | |
644 | break; | 642 | break; | |
645 | 643 | |||
646 | default: | 644 | default: | |
647 | error = (*fp->f_ops->fo_ioctl)(fp, com, data); | 645 | error = (*fp->f_ops->fo_ioctl)(fp, com, data); | |
648 | /* | 646 | /* | |
649 | * Copy any data to user, size was | 647 | * Copy any data to user, size was | |
650 | * already set and checked above. | 648 | * already set and checked above. | |
651 | */ | 649 | */ | |
652 | if (error == 0 && (com&IOC_OUT) && size) { | 650 | if (error == 0 && (com&IOC_OUT) && size) { | |
653 | error = copyout(data, SCARG(uap, data), size); | 651 | error = copyout(data, SCARG(uap, data), size); | |
654 | ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, data), | 652 | ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, data), | |
655 | size, error); | 653 | size, error); | |
656 | } | 654 | } | |
657 | break; | 655 | break; | |
658 | } | 656 | } | |
659 | out: | 657 | out: | |
660 | if (memp) | 658 | if (memp) | |
661 | kmem_free(memp, alloc_size); | 659 | kmem_free(memp, alloc_size); | |
662 | fd_putfile(SCARG(uap, fd)); | 660 | fd_putfile(SCARG(uap, fd)); | |
663 | switch (error) { | 661 | switch (error) { | |
664 | case -1: | 662 | case -1: | |
665 | printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: " | 663 | printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: " | |
666 | "pid=%d comm=%s\n", | 664 | "pid=%d comm=%s\n", | |
667 | (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "", | 665 | (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "", | |
668 | (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com), | 666 | (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com), | |
669 | p->p_pid, p->p_comm); | 667 | p->p_pid, p->p_comm); | |
670 | /* FALLTHROUGH */ | 668 | /* FALLTHROUGH */ | |
671 | case EPASSTHROUGH: | 669 | case EPASSTHROUGH: | |
672 | error = ENOTTY; | 670 | error = ENOTTY; | |
673 | /* FALLTHROUGH */ | 671 | /* FALLTHROUGH */ | |
674 | default: | 672 | default: | |
675 | return (error); | 673 | return (error); | |
676 | } | 674 | } | |
677 | } | 675 | } |
--- src/sys/kern/uipc_mbuf.c 2011/04/24 18:46:23 1.140
+++ src/sys/kern/uipc_mbuf.c 2011/07/27 14:35:34 1.141
@@ -1,1089 +1,1087 @@ | @@ -1,1089 +1,1087 @@ | |||
1 | /* $NetBSD: uipc_mbuf.c,v 1.140 2011/04/24 18:46:23 rmind Exp $ */ | 1 | /* $NetBSD: uipc_mbuf.c,v 1.141 2011/07/27 14:35:34 uebayasi Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 1999, 2001 The NetBSD Foundation, Inc. | 4 | * Copyright (c) 1999, 2001 The NetBSD Foundation, Inc. | |
5 | * All rights reserved. | 5 | * All rights reserved. | |
6 | * | 6 | * | |
7 | * This code is derived from software contributed to The NetBSD Foundation | 7 | * This code is derived from software contributed to The NetBSD Foundation | |
8 | * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, | 8 | * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, | |
9 | * NASA Ames Research Center. | 9 | * NASA Ames Research Center. | |
10 | * | 10 | * | |
11 | * Redistribution and use in source and binary forms, with or without | 11 | * Redistribution and use in source and binary forms, with or without | |
12 | * modification, are permitted provided that the following conditions | 12 | * modification, are permitted provided that the following conditions | |
13 | * are met: | 13 | * are met: | |
14 | * 1. Redistributions of source code must retain the above copyright | 14 | * 1. Redistributions of source code must retain the above copyright | |
15 | * notice, this list of conditions and the following disclaimer. | 15 | * notice, this list of conditions and the following disclaimer. | |
16 | * 2. Redistributions in binary form must reproduce the above copyright | 16 | * 2. Redistributions in binary form must reproduce the above copyright | |
17 | * notice, this list of conditions and the following disclaimer in the | 17 | * notice, this list of conditions and the following disclaimer in the | |
18 | * documentation and/or other materials provided with the distribution. | 18 | * documentation and/or other materials provided with the distribution. | |
19 | * | 19 | * | |
20 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | 20 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |
21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
22 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 22 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
23 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | 23 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |
24 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 24 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
25 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 25 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
26 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 26 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
27 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 27 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
29 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 29 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
30 | * POSSIBILITY OF SUCH DAMAGE. | 30 | * POSSIBILITY OF SUCH DAMAGE. | |
31 | */ | 31 | */ | |
32 | 32 | |||
33 | /* | 33 | /* | |
34 | * Copyright (c) 1982, 1986, 1988, 1991, 1993 | 34 | * Copyright (c) 1982, 1986, 1988, 1991, 1993 | |
35 | * The Regents of the University of California. All rights reserved. | 35 | * The Regents of the University of California. All rights reserved. | |
36 | * | 36 | * | |
37 | * Redistribution and use in source and binary forms, with or without | 37 | * Redistribution and use in source and binary forms, with or without | |
38 | * modification, are permitted provided that the following conditions | 38 | * modification, are permitted provided that the following conditions | |
39 | * are met: | 39 | * are met: | |
40 | * 1. Redistributions of source code must retain the above copyright | 40 | * 1. Redistributions of source code must retain the above copyright | |
41 | * notice, this list of conditions and the following disclaimer. | 41 | * notice, this list of conditions and the following disclaimer. | |
42 | * 2. Redistributions in binary form must reproduce the above copyright | 42 | * 2. Redistributions in binary form must reproduce the above copyright | |
43 | * notice, this list of conditions and the following disclaimer in the | 43 | * notice, this list of conditions and the following disclaimer in the | |
44 | * documentation and/or other materials provided with the distribution. | 44 | * documentation and/or other materials provided with the distribution. | |
45 | * 3. Neither the name of the University nor the names of its contributors | 45 | * 3. Neither the name of the University nor the names of its contributors | |
46 | * may be used to endorse or promote products derived from this software | 46 | * may be used to endorse or promote products derived from this software | |
47 | * without specific prior written permission. | 47 | * without specific prior written permission. | |
48 | * | 48 | * | |
49 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | 49 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
50 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 50 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
51 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 51 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
52 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | 52 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
53 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 53 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
54 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 54 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
55 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 55 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
56 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 56 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
57 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 57 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
58 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 58 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
59 | * SUCH DAMAGE. | 59 | * SUCH DAMAGE. | |
60 | * | 60 | * | |
61 | * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95 | 61 | * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95 | |
62 | */ | 62 | */ | |
63 | 63 | |||
64 | #include <sys/cdefs.h> | 64 | #include <sys/cdefs.h> | |
65 | __KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.140 2011/04/24 18:46:23 rmind Exp $"); | 65 | __KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.141 2011/07/27 14:35:34 uebayasi Exp $"); | |
66 | 66 | |||
67 | #include "opt_mbuftrace.h" | 67 | #include "opt_mbuftrace.h" | |
68 | #include "opt_nmbclusters.h" | 68 | #include "opt_nmbclusters.h" | |
69 | #include "opt_ddb.h" | 69 | #include "opt_ddb.h" | |
70 | 70 | |||
71 | #include <sys/param.h> | 71 | #include <sys/param.h> | |
72 | #include <sys/systm.h> | 72 | #include <sys/systm.h> | |
73 | #include <sys/atomic.h> | 73 | #include <sys/atomic.h> | |
74 | #include <sys/cpu.h> | 74 | #include <sys/cpu.h> | |
75 | #include <sys/proc.h> | 75 | #include <sys/proc.h> | |
76 | #define MBTYPES | 76 | #define MBTYPES | |
77 | #include <sys/mbuf.h> | 77 | #include <sys/mbuf.h> | |
78 | #include <sys/kernel.h> | 78 | #include <sys/kernel.h> | |
79 | #include <sys/syslog.h> | 79 | #include <sys/syslog.h> | |
80 | #include <sys/domain.h> | 80 | #include <sys/domain.h> | |
81 | #include <sys/protosw.h> | 81 | #include <sys/protosw.h> | |
82 | #include <sys/percpu.h> | 82 | #include <sys/percpu.h> | |
83 | #include <sys/pool.h> | 83 | #include <sys/pool.h> | |
84 | #include <sys/socket.h> | 84 | #include <sys/socket.h> | |
85 | #include <sys/sysctl.h> | 85 | #include <sys/sysctl.h> | |
86 | 86 | |||
87 | #include <net/if.h> | 87 | #include <net/if.h> | |
88 | 88 | |||
89 | #include <uvm/uvm_extern.h> | |||
90 | ||||
91 | pool_cache_t mb_cache; /* mbuf cache */ | 89 | pool_cache_t mb_cache; /* mbuf cache */ | |
92 | pool_cache_t mcl_cache; /* mbuf cluster cache */ | 90 | pool_cache_t mcl_cache; /* mbuf cluster cache */ | |
93 | 91 | |||
94 | struct mbstat mbstat; | 92 | struct mbstat mbstat; | |
95 | int max_linkhdr; | 93 | int max_linkhdr; | |
96 | int max_protohdr; | 94 | int max_protohdr; | |
97 | int max_hdr; | 95 | int max_hdr; | |
98 | int max_datalen; | 96 | int max_datalen; | |
99 | 97 | |||
100 | static int mb_ctor(void *, void *, int); | 98 | static int mb_ctor(void *, void *, int); | |
101 | 99 | |||
102 | static void sysctl_kern_mbuf_setup(void); | 100 | static void sysctl_kern_mbuf_setup(void); | |
103 | 101 | |||
104 | static struct sysctllog *mbuf_sysctllog; | 102 | static struct sysctllog *mbuf_sysctllog; | |
105 | 103 | |||
106 | static struct mbuf *m_copym0(struct mbuf *, int, int, int, int); | 104 | static struct mbuf *m_copym0(struct mbuf *, int, int, int, int); | |
107 | static struct mbuf *m_split0(struct mbuf *, int, int, int); | 105 | static struct mbuf *m_split0(struct mbuf *, int, int, int); | |
108 | static int m_copyback0(struct mbuf **, int, int, const void *, int, int); | 106 | static int m_copyback0(struct mbuf **, int, int, const void *, int, int); | |
109 | 107 | |||
110 | /* flags for m_copyback0 */ | 108 | /* flags for m_copyback0 */ | |
111 | #define M_COPYBACK0_COPYBACK 0x0001 /* copyback from cp */ | 109 | #define M_COPYBACK0_COPYBACK 0x0001 /* copyback from cp */ | |
112 | #define M_COPYBACK0_PRESERVE 0x0002 /* preserve original data */ | 110 | #define M_COPYBACK0_PRESERVE 0x0002 /* preserve original data */ | |
113 | #define M_COPYBACK0_COW 0x0004 /* do copy-on-write */ | 111 | #define M_COPYBACK0_COW 0x0004 /* do copy-on-write */ | |
114 | #define M_COPYBACK0_EXTEND 0x0008 /* extend chain */ | 112 | #define M_COPYBACK0_EXTEND 0x0008 /* extend chain */ | |
115 | 113 | |||
116 | static const char mclpool_warnmsg[] = | 114 | static const char mclpool_warnmsg[] = | |
117 | "WARNING: mclpool limit reached; increase kern.mbuf.nmbclusters"; | 115 | "WARNING: mclpool limit reached; increase kern.mbuf.nmbclusters"; | |
118 | 116 | |||
119 | MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); | 117 | MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); | |
120 | 118 | |||
121 | static percpu_t *mbstat_percpu; | 119 | static percpu_t *mbstat_percpu; | |
122 | 120 | |||
123 | #ifdef MBUFTRACE | 121 | #ifdef MBUFTRACE | |
124 | struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners); | 122 | struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners); | |
125 | struct mowner unknown_mowners[] = { | 123 | struct mowner unknown_mowners[] = { | |
126 | MOWNER_INIT("unknown", "free"), | 124 | MOWNER_INIT("unknown", "free"), | |
127 | MOWNER_INIT("unknown", "data"), | 125 | MOWNER_INIT("unknown", "data"), | |
128 | MOWNER_INIT("unknown", "header"), | 126 | MOWNER_INIT("unknown", "header"), | |
129 | MOWNER_INIT("unknown", "soname"), | 127 | MOWNER_INIT("unknown", "soname"), | |
130 | MOWNER_INIT("unknown", "soopts"), | 128 | MOWNER_INIT("unknown", "soopts"), | |
131 | MOWNER_INIT("unknown", "ftable"), | 129 | MOWNER_INIT("unknown", "ftable"), | |
132 | MOWNER_INIT("unknown", "control"), | 130 | MOWNER_INIT("unknown", "control"), | |
133 | MOWNER_INIT("unknown", "oobdata"), | 131 | MOWNER_INIT("unknown", "oobdata"), | |
134 | }; | 132 | }; | |
135 | struct mowner revoked_mowner = MOWNER_INIT("revoked", ""); | 133 | struct mowner revoked_mowner = MOWNER_INIT("revoked", ""); | |
136 | #endif | 134 | #endif | |
137 | 135 | |||
138 | #define MEXT_ISEMBEDDED(m) ((m)->m_ext_ref == (m)) | 136 | #define MEXT_ISEMBEDDED(m) ((m)->m_ext_ref == (m)) | |
139 | 137 | |||
140 | #define MCLADDREFERENCE(o, n) \ | 138 | #define MCLADDREFERENCE(o, n) \ | |
141 | do { \ | 139 | do { \ | |
142 | KASSERT(((o)->m_flags & M_EXT) != 0); \ | 140 | KASSERT(((o)->m_flags & M_EXT) != 0); \ | |
143 | KASSERT(((n)->m_flags & M_EXT) == 0); \ | 141 | KASSERT(((n)->m_flags & M_EXT) == 0); \ | |
144 | KASSERT((o)->m_ext.ext_refcnt >= 1); \ | 142 | KASSERT((o)->m_ext.ext_refcnt >= 1); \ | |
145 | (n)->m_flags |= ((o)->m_flags & M_EXTCOPYFLAGS); \ | 143 | (n)->m_flags |= ((o)->m_flags & M_EXTCOPYFLAGS); \ | |
146 | atomic_inc_uint(&(o)->m_ext.ext_refcnt); \ | 144 | atomic_inc_uint(&(o)->m_ext.ext_refcnt); \ | |
147 | (n)->m_ext_ref = (o)->m_ext_ref; \ | 145 | (n)->m_ext_ref = (o)->m_ext_ref; \ | |
148 | mowner_ref((n), (n)->m_flags); \ | 146 | mowner_ref((n), (n)->m_flags); \ | |
149 | MCLREFDEBUGN((n), __FILE__, __LINE__); \ | 147 | MCLREFDEBUGN((n), __FILE__, __LINE__); \ | |
150 | } while (/* CONSTCOND */ 0) | 148 | } while (/* CONSTCOND */ 0) | |
151 | 149 | |||
152 | static int | 150 | static int | |
153 | nmbclusters_limit(void) | 151 | nmbclusters_limit(void) | |
154 | { | 152 | { | |
155 | #if defined(PMAP_MAP_POOLPAGE) | 153 | #if defined(PMAP_MAP_POOLPAGE) | |
156 | /* direct mapping, doesn't use space in kmem_map */ | 154 | /* direct mapping, doesn't use space in kmem_map */ | |
157 | vsize_t max_size = physmem / 4; | 155 | vsize_t max_size = physmem / 4; | |
158 | #else | 156 | #else | |
159 | vsize_t max_size = MIN(physmem / 4, nkmempages / 2); | 157 | vsize_t max_size = MIN(physmem / 4, nkmempages / 2); | |
160 | #endif | 158 | #endif | |
161 | 159 | |||
162 | max_size = max_size * PAGE_SIZE / MCLBYTES; | 160 | max_size = max_size * PAGE_SIZE / MCLBYTES; | |
163 | #ifdef NMBCLUSTERS_MAX | 161 | #ifdef NMBCLUSTERS_MAX | |
164 | max_size = MIN(max_size, NMBCLUSTERS_MAX); | 162 | max_size = MIN(max_size, NMBCLUSTERS_MAX); | |
165 | #endif | 163 | #endif | |
166 | 164 | |||
167 | #ifdef NMBCLUSTERS | 165 | #ifdef NMBCLUSTERS | |
168 | return MIN(max_size, NMBCLUSTERS); | 166 | return MIN(max_size, NMBCLUSTERS); | |
169 | #else | 167 | #else | |
170 | return max_size; | 168 | return max_size; | |
171 | #endif | 169 | #endif | |
172 | } | 170 | } | |
173 | 171 | |||
174 | /* | 172 | /* | |
175 | * Initialize the mbuf allocator. | 173 | * Initialize the mbuf allocator. | |
176 | */ | 174 | */ | |
177 | void | 175 | void | |
178 | mbinit(void) | 176 | mbinit(void) | |
179 | { | 177 | { | |
180 | 178 | |||
181 | CTASSERT(sizeof(struct _m_ext) <= MHLEN); | 179 | CTASSERT(sizeof(struct _m_ext) <= MHLEN); | |
182 | CTASSERT(sizeof(struct mbuf) == MSIZE); | 180 | CTASSERT(sizeof(struct mbuf) == MSIZE); | |
183 | 181 | |||
184 | sysctl_kern_mbuf_setup(); | 182 | sysctl_kern_mbuf_setup(); | |
185 | 183 | |||
186 | mb_cache = pool_cache_init(msize, 0, 0, 0, "mbpl", | 184 | mb_cache = pool_cache_init(msize, 0, 0, 0, "mbpl", | |
187 | NULL, IPL_VM, mb_ctor, NULL, NULL); | 185 | NULL, IPL_VM, mb_ctor, NULL, NULL); | |
188 | KASSERT(mb_cache != NULL); | 186 | KASSERT(mb_cache != NULL); | |
189 | 187 | |||
190 | mcl_cache = pool_cache_init(mclbytes, 0, 0, 0, "mclpl", NULL, | 188 | mcl_cache = pool_cache_init(mclbytes, 0, 0, 0, "mclpl", NULL, | |
191 | IPL_VM, NULL, NULL, NULL); | 189 | IPL_VM, NULL, NULL, NULL); | |
192 | KASSERT(mcl_cache != NULL); | 190 | KASSERT(mcl_cache != NULL); | |
193 | 191 | |||
194 | pool_cache_set_drain_hook(mb_cache, m_reclaim, NULL); | 192 | pool_cache_set_drain_hook(mb_cache, m_reclaim, NULL); | |
195 | pool_cache_set_drain_hook(mcl_cache, m_reclaim, NULL); | 193 | pool_cache_set_drain_hook(mcl_cache, m_reclaim, NULL); | |
196 | 194 | |||
197 | /* | 195 | /* | |
198 | * Set an arbitrary default limit on the number of mbuf clusters. | 196 | * Set an arbitrary default limit on the number of mbuf clusters. | |
199 | */ | 197 | */ | |
200 | #ifdef NMBCLUSTERS | 198 | #ifdef NMBCLUSTERS | |
201 | nmbclusters = nmbclusters_limit(); | 199 | nmbclusters = nmbclusters_limit(); | |
202 | #else | 200 | #else | |
203 | nmbclusters = MAX(1024, | 201 | nmbclusters = MAX(1024, | |
204 | (vsize_t)physmem * PAGE_SIZE / MCLBYTES / 16); | 202 | (vsize_t)physmem * PAGE_SIZE / MCLBYTES / 16); | |
205 | nmbclusters = MIN(nmbclusters, nmbclusters_limit()); | 203 | nmbclusters = MIN(nmbclusters, nmbclusters_limit()); | |
206 | #endif | 204 | #endif | |
207 | 205 | |||
208 | /* | 206 | /* | |
209 | * Set the hard limit on the mclpool to the number of | 207 | * Set the hard limit on the mclpool to the number of | |
210 | * mbuf clusters the kernel is to support. Log the limit | 208 | * mbuf clusters the kernel is to support. Log the limit | |
211 | * reached message max once a minute. | 209 | * reached message max once a minute. | |
212 | */ | 210 | */ | |
213 | pool_cache_sethardlimit(mcl_cache, nmbclusters, mclpool_warnmsg, 60); | 211 | pool_cache_sethardlimit(mcl_cache, nmbclusters, mclpool_warnmsg, 60); | |
214 | 212 | |||
215 | mbstat_percpu = percpu_alloc(sizeof(struct mbstat_cpu)); | 213 | mbstat_percpu = percpu_alloc(sizeof(struct mbstat_cpu)); | |
216 | 214 | |||
217 | /* | 215 | /* | |
218 | * Set a low water mark for both mbufs and clusters. This should | 216 | * Set a low water mark for both mbufs and clusters. This should | |
219 | * help ensure that they can be allocated in a memory starvation | 217 | * help ensure that they can be allocated in a memory starvation | |
220 | * situation. This is important for e.g. diskless systems which | 218 | * situation. This is important for e.g. diskless systems which | |
221 | * must allocate mbufs in order for the pagedaemon to clean pages. | 219 | * must allocate mbufs in order for the pagedaemon to clean pages. | |
222 | */ | 220 | */ | |
223 | pool_cache_setlowat(mb_cache, mblowat); | 221 | pool_cache_setlowat(mb_cache, mblowat); | |
224 | pool_cache_setlowat(mcl_cache, mcllowat); | 222 | pool_cache_setlowat(mcl_cache, mcllowat); | |
225 | 223 | |||
226 | #ifdef MBUFTRACE | 224 | #ifdef MBUFTRACE | |
227 | { | 225 | { | |
228 | /* | 226 | /* | |
229 | * Attach the unknown mowners. | 227 | * Attach the unknown mowners. | |
230 | */ | 228 | */ | |
231 | int i; | 229 | int i; | |
232 | MOWNER_ATTACH(&revoked_mowner); | 230 | MOWNER_ATTACH(&revoked_mowner); | |
233 | for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]); | 231 | for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]); | |
234 | i-- > 0; ) | 232 | i-- > 0; ) | |
235 | MOWNER_ATTACH(&unknown_mowners[i]); | 233 | MOWNER_ATTACH(&unknown_mowners[i]); | |
236 | } | 234 | } | |
237 | #endif | 235 | #endif | |
238 | } | 236 | } | |
239 | 237 | |||
240 | /* | 238 | /* | |
241 | * sysctl helper routine for the kern.mbuf subtree. | 239 | * sysctl helper routine for the kern.mbuf subtree. | |
242 | * nmbclusters, mblowat and mcllowat need range | 240 | * nmbclusters, mblowat and mcllowat need range | |
243 | * checking and pool tweaking after being reset. | 241 | * checking and pool tweaking after being reset. | |
244 | */ | 242 | */ | |
245 | static int | 243 | static int | |
246 | sysctl_kern_mbuf(SYSCTLFN_ARGS) | 244 | sysctl_kern_mbuf(SYSCTLFN_ARGS) | |
247 | { | 245 | { | |
248 | int error, newval; | 246 | int error, newval; | |
249 | struct sysctlnode node; | 247 | struct sysctlnode node; | |
250 | 248 | |||
251 | node = *rnode; | 249 | node = *rnode; | |
252 | node.sysctl_data = &newval; | 250 | node.sysctl_data = &newval; | |
253 | switch (rnode->sysctl_num) { | 251 | switch (rnode->sysctl_num) { | |
254 | case MBUF_NMBCLUSTERS: | 252 | case MBUF_NMBCLUSTERS: | |
255 | case MBUF_MBLOWAT: | 253 | case MBUF_MBLOWAT: | |
256 | case MBUF_MCLLOWAT: | 254 | case MBUF_MCLLOWAT: | |
257 | newval = *(int*)rnode->sysctl_data; | 255 | newval = *(int*)rnode->sysctl_data; | |
258 | break; | 256 | break; | |
259 | default: | 257 | default: | |
260 | return (EOPNOTSUPP); | 258 | return (EOPNOTSUPP); | |
261 | } | 259 | } | |
262 | 260 | |||
263 | error = sysctl_lookup(SYSCTLFN_CALL(&node)); | 261 | error = sysctl_lookup(SYSCTLFN_CALL(&node)); | |
264 | if (error || newp == NULL) | 262 | if (error || newp == NULL) | |
265 | return (error); | 263 | return (error); | |
266 | if (newval < 0) | 264 | if (newval < 0) | |
267 | return (EINVAL); | 265 | return (EINVAL); | |
268 | 266 | |||
269 | switch (node.sysctl_num) { | 267 | switch (node.sysctl_num) { | |
270 | case MBUF_NMBCLUSTERS: | 268 | case MBUF_NMBCLUSTERS: | |
271 | if (newval < nmbclusters) | 269 | if (newval < nmbclusters) | |
272 | return (EINVAL); | 270 | return (EINVAL); | |
273 | if (newval > nmbclusters_limit()) | 271 | if (newval > nmbclusters_limit()) | |
274 | return (EINVAL); | 272 | return (EINVAL); | |
275 | nmbclusters = newval; | 273 | nmbclusters = newval; | |
276 | pool_cache_sethardlimit(mcl_cache, nmbclusters, | 274 | pool_cache_sethardlimit(mcl_cache, nmbclusters, | |
277 | mclpool_warnmsg, 60); | 275 | mclpool_warnmsg, 60); | |
278 | break; | 276 | break; | |
279 | case MBUF_MBLOWAT: | 277 | case MBUF_MBLOWAT: | |
280 | mblowat = newval; | 278 | mblowat = newval; | |
281 | pool_cache_setlowat(mb_cache, mblowat); | 279 | pool_cache_setlowat(mb_cache, mblowat); | |
282 | break; | 280 | break; | |
283 | case MBUF_MCLLOWAT: | 281 | case MBUF_MCLLOWAT: | |
284 | mcllowat = newval; | 282 | mcllowat = newval; | |
285 | pool_cache_setlowat(mcl_cache, mcllowat); | 283 | pool_cache_setlowat(mcl_cache, mcllowat); | |
286 | break; | 284 | break; | |
287 | } | 285 | } | |
288 | 286 | |||
289 | return (0); | 287 | return (0); | |
290 | } | 288 | } | |
291 | 289 | |||
292 | #ifdef MBUFTRACE | 290 | #ifdef MBUFTRACE | |
293 | static void | 291 | static void | |
294 | mowner_conver_to_user_cb(void *v1, void *v2, struct cpu_info *ci) | 292 | mowner_conver_to_user_cb(void *v1, void *v2, struct cpu_info *ci) | |
295 | { | 293 | { | |
296 | struct mowner_counter *mc = v1; | 294 | struct mowner_counter *mc = v1; | |
297 | struct mowner_user *mo_user = v2; | 295 | struct mowner_user *mo_user = v2; | |
298 | int i; | 296 | int i; | |
299 | 297 | |||
300 | for (i = 0; i < MOWNER_COUNTER_NCOUNTERS; i++) { | 298 | for (i = 0; i < MOWNER_COUNTER_NCOUNTERS; i++) { | |
301 | mo_user->mo_counter[i] += mc->mc_counter[i]; | 299 | mo_user->mo_counter[i] += mc->mc_counter[i]; | |
302 | } | 300 | } | |
303 | } | 301 | } | |
304 | 302 | |||
305 | static void | 303 | static void | |
306 | mowner_convert_to_user(struct mowner *mo, struct mowner_user *mo_user) | 304 | mowner_convert_to_user(struct mowner *mo, struct mowner_user *mo_user) | |
307 | { | 305 | { | |
308 | 306 | |||
309 | memset(mo_user, 0, sizeof(*mo_user)); | 307 | memset(mo_user, 0, sizeof(*mo_user)); | |
310 | CTASSERT(sizeof(mo_user->mo_name) == sizeof(mo->mo_name)); | 308 | CTASSERT(sizeof(mo_user->mo_name) == sizeof(mo->mo_name)); | |
311 | CTASSERT(sizeof(mo_user->mo_descr) == sizeof(mo->mo_descr)); | 309 | CTASSERT(sizeof(mo_user->mo_descr) == sizeof(mo->mo_descr)); | |
312 | memcpy(mo_user->mo_name, mo->mo_name, sizeof(mo->mo_name)); | 310 | memcpy(mo_user->mo_name, mo->mo_name, sizeof(mo->mo_name)); | |
313 | memcpy(mo_user->mo_descr, mo->mo_descr, sizeof(mo->mo_descr)); | 311 | memcpy(mo_user->mo_descr, mo->mo_descr, sizeof(mo->mo_descr)); | |
314 | percpu_foreach(mo->mo_counters, mowner_conver_to_user_cb, mo_user); | 312 | percpu_foreach(mo->mo_counters, mowner_conver_to_user_cb, mo_user); | |
315 | } | 313 | } | |
316 | 314 | |||
317 | static int | 315 | static int | |
318 | sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS) | 316 | sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS) | |
319 | { | 317 | { | |
320 | struct mowner *mo; | 318 | struct mowner *mo; | |
321 | size_t len = 0; | 319 | size_t len = 0; | |
322 | int error = 0; | 320 | int error = 0; | |
323 | 321 | |||
324 | if (namelen != 0) | 322 | if (namelen != 0) | |
325 | return (EINVAL); | 323 | return (EINVAL); | |
326 | if (newp != NULL) | 324 | if (newp != NULL) | |
327 | return (EPERM); | 325 | return (EPERM); | |
328 | 326 | |||
329 | LIST_FOREACH(mo, &mowners, mo_link) { | 327 | LIST_FOREACH(mo, &mowners, mo_link) { | |
330 | struct mowner_user mo_user; | 328 | struct mowner_user mo_user; | |
331 | 329 | |||
332 | mowner_convert_to_user(mo, &mo_user); | 330 | mowner_convert_to_user(mo, &mo_user); | |
333 | 331 | |||
334 | if (oldp != NULL) { | 332 | if (oldp != NULL) { | |
335 | if (*oldlenp - len < sizeof(mo_user)) { | 333 | if (*oldlenp - len < sizeof(mo_user)) { | |
336 | error = ENOMEM; | 334 | error = ENOMEM; | |
337 | break; | 335 | break; | |
338 | } | 336 | } | |
339 | error = copyout(&mo_user, (char *)oldp + len, | 337 | error = copyout(&mo_user, (char *)oldp + len, | |
340 | sizeof(mo_user)); | 338 | sizeof(mo_user)); | |
341 | if (error) | 339 | if (error) | |
342 | break; | 340 | break; | |
343 | } | 341 | } | |
344 | len += sizeof(mo_user); | 342 | len += sizeof(mo_user); | |
345 | } | 343 | } | |
346 | 344 | |||
347 | if (error == 0) | 345 | if (error == 0) | |
348 | *oldlenp = len; | 346 | *oldlenp = len; | |
349 | 347 | |||
350 | return (error); | 348 | return (error); | |
351 | } | 349 | } | |
352 | #endif /* MBUFTRACE */ | 350 | #endif /* MBUFTRACE */ | |
353 | 351 | |||
354 | static void | 352 | static void | |
355 | mbstat_conver_to_user_cb(void *v1, void *v2, struct cpu_info *ci) | 353 | mbstat_conver_to_user_cb(void *v1, void *v2, struct cpu_info *ci) | |
356 | { | 354 | { | |
357 | struct mbstat_cpu *mbsc = v1; | 355 | struct mbstat_cpu *mbsc = v1; | |
358 | struct mbstat *mbs = v2; | 356 | struct mbstat *mbs = v2; | |
359 | int i; | 357 | int i; | |
360 | 358 | |||
361 | for (i = 0; i < __arraycount(mbs->m_mtypes); i++) { | 359 | for (i = 0; i < __arraycount(mbs->m_mtypes); i++) { | |
362 | mbs->m_mtypes[i] += mbsc->m_mtypes[i]; | 360 | mbs->m_mtypes[i] += mbsc->m_mtypes[i]; | |
363 | } | 361 | } | |
364 | } | 362 | } | |
365 | 363 | |||
366 | static void | 364 | static void | |
367 | mbstat_convert_to_user(struct mbstat *mbs) | 365 | mbstat_convert_to_user(struct mbstat *mbs) | |
368 | { | 366 | { | |
369 | 367 | |||
370 | memset(mbs, 0, sizeof(*mbs)); | 368 | memset(mbs, 0, sizeof(*mbs)); | |
371 | mbs->m_drain = mbstat.m_drain; | 369 | mbs->m_drain = mbstat.m_drain; | |
372 | percpu_foreach(mbstat_percpu, mbstat_conver_to_user_cb, mbs); | 370 | percpu_foreach(mbstat_percpu, mbstat_conver_to_user_cb, mbs); | |
373 | } | 371 | } | |
374 | 372 | |||
375 | static int | 373 | static int | |
376 | sysctl_kern_mbuf_stats(SYSCTLFN_ARGS) | 374 | sysctl_kern_mbuf_stats(SYSCTLFN_ARGS) | |
377 | { | 375 | { | |
378 | struct sysctlnode node; | 376 | struct sysctlnode node; | |
379 | struct mbstat mbs; | 377 | struct mbstat mbs; | |
380 | 378 | |||
381 | mbstat_convert_to_user(&mbs); | 379 | mbstat_convert_to_user(&mbs); | |
382 | node = *rnode; | 380 | node = *rnode; | |
383 | node.sysctl_data = &mbs; | 381 | node.sysctl_data = &mbs; | |
384 | node.sysctl_size = sizeof(mbs); | 382 | node.sysctl_size = sizeof(mbs); | |
385 | return sysctl_lookup(SYSCTLFN_CALL(&node)); | 383 | return sysctl_lookup(SYSCTLFN_CALL(&node)); | |
386 | } | 384 | } | |
387 | 385 | |||
388 | static void | 386 | static void | |
389 | sysctl_kern_mbuf_setup(void) | 387 | sysctl_kern_mbuf_setup(void) | |
390 | { | 388 | { | |
391 | 389 | |||
392 | KASSERT(mbuf_sysctllog == NULL); | 390 | KASSERT(mbuf_sysctllog == NULL); | |
393 | sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, | 391 | sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, | |
394 | CTLFLAG_PERMANENT, | 392 | CTLFLAG_PERMANENT, | |
395 | CTLTYPE_NODE, "kern", NULL, | 393 | CTLTYPE_NODE, "kern", NULL, | |
396 | NULL, 0, NULL, 0, | 394 | NULL, 0, NULL, 0, | |
397 | CTL_KERN, CTL_EOL); | 395 | CTL_KERN, CTL_EOL); | |
398 | sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, | 396 | sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, | |
399 | CTLFLAG_PERMANENT, | 397 | CTLFLAG_PERMANENT, | |
400 | CTLTYPE_NODE, "mbuf", | 398 | CTLTYPE_NODE, "mbuf", | |
401 | SYSCTL_DESCR("mbuf control variables"), | 399 | SYSCTL_DESCR("mbuf control variables"), | |
402 | NULL, 0, NULL, 0, | 400 | NULL, 0, NULL, 0, | |
403 | CTL_KERN, KERN_MBUF, CTL_EOL); | 401 | CTL_KERN, KERN_MBUF, CTL_EOL); | |
404 | 402 | |||
405 | sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, | 403 | sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, | |
406 | CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, | 404 | CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, | |
407 | CTLTYPE_INT, "msize", | 405 | CTLTYPE_INT, "msize", | |
408 | SYSCTL_DESCR("mbuf base size"), | 406 | SYSCTL_DESCR("mbuf base size"), | |
409 | NULL, msize, NULL, 0, | 407 | NULL, msize, NULL, 0, | |
410 | CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL); | 408 | CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL); | |
411 | sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, | 409 | sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, | |
412 | CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, | 410 | CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, | |
413 | CTLTYPE_INT, "mclbytes", | 411 | CTLTYPE_INT, "mclbytes", | |
414 | SYSCTL_DESCR("mbuf cluster size"), | 412 | SYSCTL_DESCR("mbuf cluster size"), | |
415 | NULL, mclbytes, NULL, 0, | 413 | NULL, mclbytes, NULL, 0, | |
416 | CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL); | 414 | CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL); | |
417 | sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, | 415 | sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, | |
418 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, | 416 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, | |
419 | CTLTYPE_INT, "nmbclusters", | 417 | CTLTYPE_INT, "nmbclusters", | |
420 | SYSCTL_DESCR("Limit on the number of mbuf clusters"), | 418 | SYSCTL_DESCR("Limit on the number of mbuf clusters"), | |
421 | sysctl_kern_mbuf, 0, &nmbclusters, 0, | 419 | sysctl_kern_mbuf, 0, &nmbclusters, 0, | |
422 | CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL); | 420 | CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL); | |
423 | sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, | 421 | sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, | |
424 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, | 422 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, | |
425 | CTLTYPE_INT, "mblowat", | 423 | CTLTYPE_INT, "mblowat", | |
426 | SYSCTL_DESCR("mbuf low water mark"), | 424 | SYSCTL_DESCR("mbuf low water mark"), | |
427 | sysctl_kern_mbuf, 0, &mblowat, 0, | 425 | sysctl_kern_mbuf, 0, &mblowat, 0, | |
428 | CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL); | 426 | CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL); | |
429 | sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, | 427 | sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, | |
430 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, | 428 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, | |
431 | CTLTYPE_INT, "mcllowat", | 429 | CTLTYPE_INT, "mcllowat", | |
432 | SYSCTL_DESCR("mbuf cluster low water mark"), | 430 | SYSCTL_DESCR("mbuf cluster low water mark"), | |
433 | sysctl_kern_mbuf, 0, &mcllowat, 0, | 431 | sysctl_kern_mbuf, 0, &mcllowat, 0, | |
434 | CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL); | 432 | CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL); | |
435 | sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, | 433 | sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, | |
436 | CTLFLAG_PERMANENT, | 434 | CTLFLAG_PERMANENT, | |
437 | CTLTYPE_STRUCT, "stats", | 435 | CTLTYPE_STRUCT, "stats", | |
438 | SYSCTL_DESCR("mbuf allocation statistics"), | 436 | SYSCTL_DESCR("mbuf allocation statistics"), | |
439 | sysctl_kern_mbuf_stats, 0, NULL, 0, | 437 | sysctl_kern_mbuf_stats, 0, NULL, 0, | |
440 | CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL); | 438 | CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL); | |
441 | #ifdef MBUFTRACE | 439 | #ifdef MBUFTRACE | |
442 | sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, | 440 | sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, | |
443 | CTLFLAG_PERMANENT, | 441 | CTLFLAG_PERMANENT, | |
444 | CTLTYPE_STRUCT, "mowners", | 442 | CTLTYPE_STRUCT, "mowners", | |
445 | SYSCTL_DESCR("Information about mbuf owners"), | 443 | SYSCTL_DESCR("Information about mbuf owners"), | |
446 | sysctl_kern_mbuf_mowners, 0, NULL, 0, | 444 | sysctl_kern_mbuf_mowners, 0, NULL, 0, | |
447 | CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL); | 445 | CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL); | |
448 | #endif /* MBUFTRACE */ | 446 | #endif /* MBUFTRACE */ | |
449 | } | 447 | } | |
450 | 448 | |||
451 | static int | 449 | static int | |
452 | mb_ctor(void *arg, void *object, int flags) | 450 | mb_ctor(void *arg, void *object, int flags) | |
453 | { | 451 | { | |
454 | struct mbuf *m = object; | 452 | struct mbuf *m = object; | |
455 | 453 | |||
456 | #ifdef POOL_VTOPHYS | 454 | #ifdef POOL_VTOPHYS | |
457 | m->m_paddr = POOL_VTOPHYS(m); | 455 | m->m_paddr = POOL_VTOPHYS(m); | |
458 | #else | 456 | #else | |
459 | m->m_paddr = M_PADDR_INVALID; | 457 | m->m_paddr = M_PADDR_INVALID; | |
460 | #endif | 458 | #endif | |
461 | return (0); | 459 | return (0); | |
462 | } | 460 | } | |
463 | 461 | |||
464 | void | 462 | void | |
465 | m_reclaim(void *arg, int flags) | 463 | m_reclaim(void *arg, int flags) | |
466 | { | 464 | { | |
467 | struct domain *dp; | 465 | struct domain *dp; | |
468 | const struct protosw *pr; | 466 | const struct protosw *pr; | |
469 | struct ifnet *ifp; | 467 | struct ifnet *ifp; | |
470 | int s; | 468 | int s; | |
471 | 469 | |||
472 | KERNEL_LOCK(1, NULL); | 470 | KERNEL_LOCK(1, NULL); | |
473 | s = splvm(); | 471 | s = splvm(); | |
474 | DOMAIN_FOREACH(dp) { | 472 | DOMAIN_FOREACH(dp) { | |
475 | for (pr = dp->dom_protosw; | 473 | for (pr = dp->dom_protosw; | |
476 | pr < dp->dom_protoswNPROTOSW; pr++) | 474 | pr < dp->dom_protoswNPROTOSW; pr++) | |
477 | if (pr->pr_drain) | 475 | if (pr->pr_drain) | |
478 | (*pr->pr_drain)(); | 476 | (*pr->pr_drain)(); | |
479 | } | 477 | } | |
480 | IFNET_FOREACH(ifp) { | 478 | IFNET_FOREACH(ifp) { | |
481 | if (ifp->if_drain) | 479 | if (ifp->if_drain) | |
482 | (*ifp->if_drain)(ifp); | 480 | (*ifp->if_drain)(ifp); | |
483 | } | 481 | } | |
484 | splx(s); | 482 | splx(s); | |
485 | mbstat.m_drain++; | 483 | mbstat.m_drain++; | |
486 | KERNEL_UNLOCK_ONE(NULL); | 484 | KERNEL_UNLOCK_ONE(NULL); | |
487 | } | 485 | } | |
488 | 486 | |||
489 | /* | 487 | /* | |
490 | * Space allocation routines. | 488 | * Space allocation routines. | |
491 | * These are also available as macros | 489 | * These are also available as macros | |
492 | * for critical paths. | 490 | * for critical paths. | |
493 | */ | 491 | */ | |
494 | struct mbuf * | 492 | struct mbuf * | |
495 | m_get(int nowait, int type) | 493 | m_get(int nowait, int type) | |
496 | { | 494 | { | |
497 | struct mbuf *m; | 495 | struct mbuf *m; | |
498 | 496 | |||
499 | m = pool_cache_get(mb_cache, | 497 | m = pool_cache_get(mb_cache, | |
500 | nowait == M_WAIT ? PR_WAITOK|PR_LIMITFAIL : 0); | 498 | nowait == M_WAIT ? PR_WAITOK|PR_LIMITFAIL : 0); | |
501 | if (m == NULL) | 499 | if (m == NULL) | |
502 | return NULL; | 500 | return NULL; | |
503 | 501 | |||
504 | mbstat_type_add(type, 1); | 502 | mbstat_type_add(type, 1); | |
505 | mowner_init(m, type); | 503 | mowner_init(m, type); | |
506 | m->m_ext_ref = m; | 504 | m->m_ext_ref = m; | |
507 | m->m_type = type; | 505 | m->m_type = type; | |
508 | m->m_next = NULL; | 506 | m->m_next = NULL; | |
509 | m->m_nextpkt = NULL; | 507 | m->m_nextpkt = NULL; | |
510 | m->m_data = m->m_dat; | 508 | m->m_data = m->m_dat; | |
511 | m->m_flags = 0; | 509 | m->m_flags = 0; | |
512 | 510 | |||
513 | return m; | 511 | return m; | |
514 | } | 512 | } | |
515 | 513 | |||
516 | struct mbuf * | 514 | struct mbuf * | |
517 | m_gethdr(int nowait, int type) | 515 | m_gethdr(int nowait, int type) | |
518 | { | 516 | { | |
519 | struct mbuf *m; | 517 | struct mbuf *m; | |
520 | 518 | |||
521 | m = m_get(nowait, type); | 519 | m = m_get(nowait, type); | |
522 | if (m == NULL) | 520 | if (m == NULL) | |
523 | return NULL; | 521 | return NULL; | |
524 | 522 | |||
525 | m->m_data = m->m_pktdat; | 523 | m->m_data = m->m_pktdat; | |
526 | m->m_flags = M_PKTHDR; | 524 | m->m_flags = M_PKTHDR; | |
527 | m->m_pkthdr.rcvif = NULL; | 525 | m->m_pkthdr.rcvif = NULL; | |
528 | m->m_pkthdr.csum_flags = 0; | 526 | m->m_pkthdr.csum_flags = 0; | |
529 | m->m_pkthdr.csum_data = 0; | 527 | m->m_pkthdr.csum_data = 0; | |
530 | SLIST_INIT(&m->m_pkthdr.tags); | 528 | SLIST_INIT(&m->m_pkthdr.tags); | |
531 | 529 | |||
532 | return m; | 530 | return m; | |
533 | } | 531 | } | |
534 | 532 | |||
535 | struct mbuf * | 533 | struct mbuf * | |
536 | m_getclr(int nowait, int type) | 534 | m_getclr(int nowait, int type) | |
537 | { | 535 | { | |
538 | struct mbuf *m; | 536 | struct mbuf *m; | |
539 | 537 | |||
540 | MGET(m, nowait, type); | 538 | MGET(m, nowait, type); | |
541 | if (m == 0) | 539 | if (m == 0) | |
542 | return (NULL); | 540 | return (NULL); | |
543 | memset(mtod(m, void *), 0, MLEN); | 541 | memset(mtod(m, void *), 0, MLEN); | |
544 | return (m); | 542 | return (m); | |
545 | } | 543 | } | |
546 | 544 | |||
547 | void | 545 | void | |
548 | m_clget(struct mbuf *m, int nowait) | 546 | m_clget(struct mbuf *m, int nowait) | |
549 | { | 547 | { | |
550 | 548 | |||
551 | MCLGET(m, nowait); | 549 | MCLGET(m, nowait); | |
552 | } | 550 | } | |
553 | 551 | |||
554 | struct mbuf * | 552 | struct mbuf * | |
555 | m_free(struct mbuf *m) | 553 | m_free(struct mbuf *m) | |
556 | { | 554 | { | |
557 | struct mbuf *n; | 555 | struct mbuf *n; | |
558 | 556 | |||
559 | MFREE(m, n); | 557 | MFREE(m, n); | |
560 | return (n); | 558 | return (n); | |
561 | } | 559 | } | |
562 | 560 | |||
563 | void | 561 | void | |
564 | m_freem(struct mbuf *m) | 562 | m_freem(struct mbuf *m) | |
565 | { | 563 | { | |
566 | struct mbuf *n; | 564 | struct mbuf *n; | |
567 | 565 | |||
568 | if (m == NULL) | 566 | if (m == NULL) | |
569 | return; | 567 | return; | |
570 | do { | 568 | do { | |
571 | MFREE(m, n); | 569 | MFREE(m, n); | |
572 | m = n; | 570 | m = n; | |
573 | } while (m); | 571 | } while (m); | |
574 | } | 572 | } | |
575 | 573 | |||
576 | #ifdef MBUFTRACE | 574 | #ifdef MBUFTRACE | |
577 | /* | 575 | /* | |
578 | * Walk a chain of mbufs, claiming ownership of each mbuf in the chain. | 576 | * Walk a chain of mbufs, claiming ownership of each mbuf in the chain. | |
579 | */ | 577 | */ | |
580 | void | 578 | void | |
581 | m_claimm(struct mbuf *m, struct mowner *mo) | 579 | m_claimm(struct mbuf *m, struct mowner *mo) | |
582 | { | 580 | { | |
583 | 581 | |||
584 | for (; m != NULL; m = m->m_next) | 582 | for (; m != NULL; m = m->m_next) | |
585 | MCLAIM(m, mo); | 583 | MCLAIM(m, mo); | |
586 | } | 584 | } | |
587 | #endif | 585 | #endif | |
588 | 586 | |||
589 | /* | 587 | /* | |
590 | * Mbuffer utility routines. | 588 | * Mbuffer utility routines. | |
591 | */ | 589 | */ | |
592 | 590 | |||
593 | /* | 591 | /* | |
594 | * Lesser-used path for M_PREPEND: | 592 | * Lesser-used path for M_PREPEND: | |
595 | * allocate new mbuf to prepend to chain, | 593 | * allocate new mbuf to prepend to chain, | |
596 | * copy junk along. | 594 | * copy junk along. | |
597 | */ | 595 | */ | |
598 | struct mbuf * | 596 | struct mbuf * | |
599 | m_prepend(struct mbuf *m, int len, int how) | 597 | m_prepend(struct mbuf *m, int len, int how) | |
600 | { | 598 | { | |
601 | struct mbuf *mn; | 599 | struct mbuf *mn; | |
602 | 600 | |||
603 | MGET(mn, how, m->m_type); | 601 | MGET(mn, how, m->m_type); | |
604 | if (mn == (struct mbuf *)NULL) { | 602 | if (mn == (struct mbuf *)NULL) { | |
605 | m_freem(m); | 603 | m_freem(m); | |
606 | return ((struct mbuf *)NULL); | 604 | return ((struct mbuf *)NULL); | |
607 | } | 605 | } | |
608 | if (m->m_flags & M_PKTHDR) { | 606 | if (m->m_flags & M_PKTHDR) { | |
609 | M_MOVE_PKTHDR(mn, m); | 607 | M_MOVE_PKTHDR(mn, m); | |
610 | } else { | 608 | } else { | |
611 | MCLAIM(mn, m->m_owner); | 609 | MCLAIM(mn, m->m_owner); | |
612 | } | 610 | } | |
613 | mn->m_next = m; | 611 | mn->m_next = m; | |
614 | m = mn; | 612 | m = mn; | |
615 | if (len < MHLEN) | 613 | if (len < MHLEN) | |
616 | MH_ALIGN(m, len); | 614 | MH_ALIGN(m, len); | |
617 | m->m_len = len; | 615 | m->m_len = len; | |
618 | return (m); | 616 | return (m); | |
619 | } | 617 | } | |
620 | 618 | |||
621 | /* | 619 | /* | |
622 | * Make a copy of an mbuf chain starting "off0" bytes from the beginning, | 620 | * Make a copy of an mbuf chain starting "off0" bytes from the beginning, | |
623 | * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. | 621 | * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. | |
624 | * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller. | 622 | * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller. | |
625 | */ | 623 | */ | |
626 | int MCFail; | 624 | int MCFail; | |
627 | 625 | |||
628 | struct mbuf * | 626 | struct mbuf * | |
629 | m_copym(struct mbuf *m, int off0, int len, int wait) | 627 | m_copym(struct mbuf *m, int off0, int len, int wait) | |
630 | { | 628 | { | |
631 | 629 | |||
632 | return m_copym0(m, off0, len, wait, 0); /* shallow copy on M_EXT */ | 630 | return m_copym0(m, off0, len, wait, 0); /* shallow copy on M_EXT */ | |
633 | } | 631 | } | |
634 | 632 | |||
635 | struct mbuf * | 633 | struct mbuf * | |
636 | m_dup(struct mbuf *m, int off0, int len, int wait) | 634 | m_dup(struct mbuf *m, int off0, int len, int wait) | |
637 | { | 635 | { | |
638 | 636 | |||
639 | return m_copym0(m, off0, len, wait, 1); /* deep copy */ | 637 | return m_copym0(m, off0, len, wait, 1); /* deep copy */ | |
640 | } | 638 | } | |
641 | 639 | |||
642 | static struct mbuf * | 640 | static struct mbuf * | |
643 | m_copym0(struct mbuf *m, int off0, int len, int wait, int deep) | 641 | m_copym0(struct mbuf *m, int off0, int len, int wait, int deep) | |
644 | { | 642 | { | |
645 | struct mbuf *n, **np; | 643 | struct mbuf *n, **np; | |
646 | int off = off0; | 644 | int off = off0; | |
647 | struct mbuf *top; | 645 | struct mbuf *top; | |
648 | int copyhdr = 0; | 646 | int copyhdr = 0; | |
649 | 647 | |||
650 | if (off < 0 || len < 0) | 648 | if (off < 0 || len < 0) | |
651 | panic("m_copym: off %d, len %d", off, len); | 649 | panic("m_copym: off %d, len %d", off, len); | |
652 | if (off == 0 && m->m_flags & M_PKTHDR) | 650 | if (off == 0 && m->m_flags & M_PKTHDR) | |
653 | copyhdr = 1; | 651 | copyhdr = 1; | |
654 | while (off > 0) { | 652 | while (off > 0) { | |
655 | if (m == 0) | 653 | if (m == 0) | |
656 | panic("m_copym: m == 0, off %d", off); | 654 | panic("m_copym: m == 0, off %d", off); | |
657 | if (off < m->m_len) | 655 | if (off < m->m_len) | |
658 | break; | 656 | break; | |
659 | off -= m->m_len; | 657 | off -= m->m_len; | |
660 | m = m->m_next; | 658 | m = m->m_next; | |
661 | } | 659 | } | |
662 | np = ⊤ | 660 | np = ⊤ | |
663 | top = 0; | 661 | top = 0; | |
664 | while (len > 0) { | 662 | while (len > 0) { | |
665 | if (m == 0) { | 663 | if (m == 0) { | |
666 | if (len != M_COPYALL) | 664 | if (len != M_COPYALL) | |
667 | panic("m_copym: m == 0, len %d [!COPYALL]", | 665 | panic("m_copym: m == 0, len %d [!COPYALL]", | |
668 | len); | 666 | len); | |
669 | break; | 667 | break; | |
670 | } | 668 | } | |
671 | MGET(n, wait, m->m_type); | 669 | MGET(n, wait, m->m_type); | |
672 | *np = n; | 670 | *np = n; | |
673 | if (n == 0) | 671 | if (n == 0) | |
674 | goto nospace; | 672 | goto nospace; | |
675 | MCLAIM(n, m->m_owner); | 673 | MCLAIM(n, m->m_owner); | |
676 | if (copyhdr) { | 674 | if (copyhdr) { | |
677 | M_COPY_PKTHDR(n, m); | 675 | M_COPY_PKTHDR(n, m); | |
678 | if (len == M_COPYALL) | 676 | if (len == M_COPYALL) | |
679 | n->m_pkthdr.len -= off0; | 677 | n->m_pkthdr.len -= off0; | |
680 | else | 678 | else | |
681 | n->m_pkthdr.len = len; | 679 | n->m_pkthdr.len = len; | |
682 | copyhdr = 0; | 680 | copyhdr = 0; | |
683 | } | 681 | } | |
684 | n->m_len = min(len, m->m_len - off); | 682 | n->m_len = min(len, m->m_len - off); | |
685 | if (m->m_flags & M_EXT) { | 683 | if (m->m_flags & M_EXT) { | |
686 | if (!deep) { | 684 | if (!deep) { | |
687 | n->m_data = m->m_data + off; | 685 | n->m_data = m->m_data + off; | |
688 | MCLADDREFERENCE(m, n); | 686 | MCLADDREFERENCE(m, n); | |
689 | } else { | 687 | } else { | |
690 | /* | 688 | /* | |
691 | * we are unsure about the way m was allocated. | 689 | * we are unsure about the way m was allocated. | |
692 | * copy into multiple MCLBYTES cluster mbufs. | 690 | * copy into multiple MCLBYTES cluster mbufs. | |
693 | */ | 691 | */ | |
694 | MCLGET(n, wait); | 692 | MCLGET(n, wait); | |
695 | n->m_len = 0; | 693 | n->m_len = 0; | |
696 | n->m_len = M_TRAILINGSPACE(n); | 694 | n->m_len = M_TRAILINGSPACE(n); | |
697 | n->m_len = min(n->m_len, len); | 695 | n->m_len = min(n->m_len, len); | |
698 | n->m_len = min(n->m_len, m->m_len - off); | 696 | n->m_len = min(n->m_len, m->m_len - off); | |
699 | memcpy(mtod(n, void *), mtod(m, char *) + off, | 697 | memcpy(mtod(n, void *), mtod(m, char *) + off, | |
700 | (unsigned)n->m_len); | 698 | (unsigned)n->m_len); | |
701 | } | 699 | } | |
702 | } else | 700 | } else | |
703 | memcpy(mtod(n, void *), mtod(m, char *) + off, | 701 | memcpy(mtod(n, void *), mtod(m, char *) + off, | |
704 | (unsigned)n->m_len); | 702 | (unsigned)n->m_len); | |
705 | if (len != M_COPYALL) | 703 | if (len != M_COPYALL) | |
706 | len -= n->m_len; | 704 | len -= n->m_len; | |
707 | off += n->m_len; | 705 | off += n->m_len; | |
708 | #ifdef DIAGNOSTIC | 706 | #ifdef DIAGNOSTIC | |
709 | if (off > m->m_len) | 707 | if (off > m->m_len) | |
710 | panic("m_copym0 overrun"); | 708 | panic("m_copym0 overrun"); | |
711 | #endif | 709 | #endif | |
712 | if (off == m->m_len) { | 710 | if (off == m->m_len) { | |
713 | m = m->m_next; | 711 | m = m->m_next; | |
714 | off = 0; | 712 | off = 0; | |
715 | } | 713 | } | |
716 | np = &n->m_next; | 714 | np = &n->m_next; | |
717 | } | 715 | } | |
718 | if (top == 0) | 716 | if (top == 0) | |
719 | MCFail++; | 717 | MCFail++; | |
720 | return (top); | 718 | return (top); | |
721 | nospace: | 719 | nospace: | |
722 | m_freem(top); | 720 | m_freem(top); | |
723 | MCFail++; | 721 | MCFail++; | |
724 | return (NULL); | 722 | return (NULL); | |
725 | } | 723 | } | |
726 | 724 | |||
727 | /* | 725 | /* | |
728 | * Copy an entire packet, including header (which must be present). | 726 | * Copy an entire packet, including header (which must be present). | |
729 | * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. | 727 | * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. | |
730 | */ | 728 | */ | |
731 | struct mbuf * | 729 | struct mbuf * | |
732 | m_copypacket(struct mbuf *m, int how) | 730 | m_copypacket(struct mbuf *m, int how) | |
733 | { | 731 | { | |
734 | struct mbuf *top, *n, *o; | 732 | struct mbuf *top, *n, *o; | |
735 | 733 | |||
736 | MGET(n, how, m->m_type); | 734 | MGET(n, how, m->m_type); | |
737 | top = n; | 735 | top = n; | |
738 | if (!n) | 736 | if (!n) | |
739 | goto nospace; | 737 | goto nospace; | |
740 | 738 | |||
741 | MCLAIM(n, m->m_owner); | 739 | MCLAIM(n, m->m_owner); | |
742 | M_COPY_PKTHDR(n, m); | 740 | M_COPY_PKTHDR(n, m); | |
743 | n->m_len = m->m_len; | 741 | n->m_len = m->m_len; | |
744 | if (m->m_flags & M_EXT) { | 742 | if (m->m_flags & M_EXT) { | |
745 | n->m_data = m->m_data; | 743 | n->m_data = m->m_data; | |
746 | MCLADDREFERENCE(m, n); | 744 | MCLADDREFERENCE(m, n); | |
747 | } else { | 745 | } else { | |
748 | memcpy(mtod(n, char *), mtod(m, char *), n->m_len); | 746 | memcpy(mtod(n, char *), mtod(m, char *), n->m_len); | |
749 | } | 747 | } | |
750 | 748 | |||
751 | m = m->m_next; | 749 | m = m->m_next; | |
752 | while (m) { | 750 | while (m) { | |
753 | MGET(o, how, m->m_type); | 751 | MGET(o, how, m->m_type); | |
754 | if (!o) | 752 | if (!o) | |
755 | goto nospace; | 753 | goto nospace; | |
756 | 754 | |||
757 | MCLAIM(o, m->m_owner); | 755 | MCLAIM(o, m->m_owner); | |
758 | n->m_next = o; | 756 | n->m_next = o; | |
759 | n = n->m_next; | 757 | n = n->m_next; | |
760 | 758 | |||
761 | n->m_len = m->m_len; | 759 | n->m_len = m->m_len; | |
762 | if (m->m_flags & M_EXT) { | 760 | if (m->m_flags & M_EXT) { | |
763 | n->m_data = m->m_data; | 761 | n->m_data = m->m_data; | |
764 | MCLADDREFERENCE(m, n); | 762 | MCLADDREFERENCE(m, n); | |
765 | } else { | 763 | } else { | |
766 | memcpy(mtod(n, char *), mtod(m, char *), n->m_len); | 764 | memcpy(mtod(n, char *), mtod(m, char *), n->m_len); | |
767 | } | 765 | } | |
768 | 766 | |||
769 | m = m->m_next; | 767 | m = m->m_next; | |
770 | } | 768 | } | |
771 | return top; | 769 | return top; | |
772 | nospace: | 770 | nospace: | |
773 | m_freem(top); | 771 | m_freem(top); | |
774 | MCFail++; | 772 | MCFail++; | |
775 | return NULL; | 773 | return NULL; | |
776 | } | 774 | } | |
777 | 775 | |||
778 | /* | 776 | /* | |
779 | * Copy data from an mbuf chain starting "off" bytes from the beginning, | 777 | * Copy data from an mbuf chain starting "off" bytes from the beginning, | |
780 | * continuing for "len" bytes, into the indicated buffer. | 778 | * continuing for "len" bytes, into the indicated buffer. | |
781 | */ | 779 | */ | |
782 | void | 780 | void | |
783 | m_copydata(struct mbuf *m, int off, int len, void *vp) | 781 | m_copydata(struct mbuf *m, int off, int len, void *vp) | |
784 | { | 782 | { | |
785 | unsigned count; | 783 | unsigned count; | |
786 | void * cp = vp; | 784 | void * cp = vp; | |
787 | 785 | |||
788 | if (off < 0 || len < 0) | 786 | if (off < 0 || len < 0) | |
789 | panic("m_copydata: off %d, len %d", off, len); | 787 | panic("m_copydata: off %d, len %d", off, len); | |
790 | while (off > 0) { | 788 | while (off > 0) { | |
791 | if (m == NULL) | 789 | if (m == NULL) | |
792 | panic("m_copydata: m == NULL, off %d", off); | 790 | panic("m_copydata: m == NULL, off %d", off); | |
793 | if (off < m->m_len) | 791 | if (off < m->m_len) | |
794 | break; | 792 | break; | |
795 | off -= m->m_len; | 793 | off -= m->m_len; | |
796 | m = m->m_next; | 794 | m = m->m_next; | |
797 | } | 795 | } | |
798 | while (len > 0) { | 796 | while (len > 0) { | |
799 | if (m == NULL) | 797 | if (m == NULL) | |
800 | panic("m_copydata: m == NULL, len %d", len); | 798 | panic("m_copydata: m == NULL, len %d", len); | |
801 | count = min(m->m_len - off, len); | 799 | count = min(m->m_len - off, len); | |
802 | memcpy(cp, mtod(m, char *) + off, count); | 800 | memcpy(cp, mtod(m, char *) + off, count); | |
803 | len -= count; | 801 | len -= count; | |
804 | cp = (char *)cp + count; | 802 | cp = (char *)cp + count; | |
805 | off = 0; | 803 | off = 0; | |
806 | m = m->m_next; | 804 | m = m->m_next; | |
807 | } | 805 | } | |
808 | } | 806 | } | |
809 | 807 | |||
810 | /* | 808 | /* | |
811 | * Concatenate mbuf chain n to m. | 809 | * Concatenate mbuf chain n to m. | |
812 | * n might be copied into m (when n->m_len is small), therefore data portion of | 810 | * n might be copied into m (when n->m_len is small), therefore data portion of | |
813 | * n could be copied into an mbuf of different mbuf type. | 811 | * n could be copied into an mbuf of different mbuf type. | |
814 | * Any m_pkthdr is not updated. | 812 | * Any m_pkthdr is not updated. | |
815 | */ | 813 | */ | |
816 | void | 814 | void | |
817 | m_cat(struct mbuf *m, struct mbuf *n) | 815 | m_cat(struct mbuf *m, struct mbuf *n) | |
818 | { | 816 | { | |
819 | 817 | |||
820 | while (m->m_next) | 818 | while (m->m_next) | |
821 | m = m->m_next; | 819 | m = m->m_next; | |
822 | while (n) { | 820 | while (n) { | |
823 | if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) { | 821 | if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) { | |
824 | /* just join the two chains */ | 822 | /* just join the two chains */ | |
825 | m->m_next = n; | 823 | m->m_next = n; | |
826 | return; | 824 | return; | |
827 | } | 825 | } | |
828 | /* splat the data from one into the other */ | 826 | /* splat the data from one into the other */ | |
829 | memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), | 827 | memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), | |
830 | (u_int)n->m_len); | 828 | (u_int)n->m_len); | |
831 | m->m_len += n->m_len; | 829 | m->m_len += n->m_len; | |
832 | n = m_free(n); | 830 | n = m_free(n); | |
833 | } | 831 | } | |
834 | } | 832 | } | |
835 | 833 | |||
836 | void | 834 | void | |
837 | m_adj(struct mbuf *mp, int req_len) | 835 | m_adj(struct mbuf *mp, int req_len) | |
838 | { | 836 | { | |
839 | int len = req_len; | 837 | int len = req_len; | |
840 | struct mbuf *m; | 838 | struct mbuf *m; | |
841 | int count; | 839 | int count; | |
842 | 840 | |||
843 | if ((m = mp) == NULL) | 841 | if ((m = mp) == NULL) | |
844 | return; | 842 | return; | |
845 | if (len >= 0) { | 843 | if (len >= 0) { | |
846 | /* | 844 | /* | |
847 | * Trim from head. | 845 | * Trim from head. | |
848 | */ | 846 | */ | |
849 | while (m != NULL && len > 0) { | 847 | while (m != NULL && len > 0) { | |
850 | if (m->m_len <= len) { | 848 | if (m->m_len <= len) { | |
851 | len -= m->m_len; | 849 | len -= m->m_len; | |
852 | m->m_len = 0; | 850 | m->m_len = 0; | |
853 | m = m->m_next; | 851 | m = m->m_next; | |
854 | } else { | 852 | } else { | |
855 | m->m_len -= len; | 853 | m->m_len -= len; | |
856 | m->m_data += len; | 854 | m->m_data += len; | |
857 | len = 0; | 855 | len = 0; | |
858 | } | 856 | } | |
859 | } | 857 | } | |
860 | m = mp; | 858 | m = mp; | |
861 | if (mp->m_flags & M_PKTHDR) | 859 | if (mp->m_flags & M_PKTHDR) | |
862 | m->m_pkthdr.len -= (req_len - len); | 860 | m->m_pkthdr.len -= (req_len - len); | |
863 | } else { | 861 | } else { | |
864 | /* | 862 | /* | |
865 | * Trim from tail. Scan the mbuf chain, | 863 | * Trim from tail. Scan the mbuf chain, | |
866 | * calculating its length and finding the last mbuf. | 864 | * calculating its length and finding the last mbuf. | |
867 | * If the adjustment only affects this mbuf, then just | 865 | * If the adjustment only affects this mbuf, then just | |
868 | * adjust and return. Otherwise, rescan and truncate | 866 | * adjust and return. Otherwise, rescan and truncate | |
869 | * after the remaining size. | 867 | * after the remaining size. | |
870 | */ | 868 | */ | |
871 | len = -len; | 869 | len = -len; | |
872 | count = 0; | 870 | count = 0; | |
873 | for (;;) { | 871 | for (;;) { | |
874 | count += m->m_len; | 872 | count += m->m_len; | |
875 | if (m->m_next == (struct mbuf *)0) | 873 | if (m->m_next == (struct mbuf *)0) | |
876 | break; | 874 | break; | |
877 | m = m->m_next; | 875 | m = m->m_next; | |
878 | } | 876 | } | |
879 | if (m->m_len >= len) { | 877 | if (m->m_len >= len) { | |
880 | m->m_len -= len; | 878 | m->m_len -= len; | |
881 | if (mp->m_flags & M_PKTHDR) | 879 | if (mp->m_flags & M_PKTHDR) | |
882 | mp->m_pkthdr.len -= len; | 880 | mp->m_pkthdr.len -= len; | |
883 | return; | 881 | return; | |
884 | } | 882 | } | |
885 | count -= len; | 883 | count -= len; | |
886 | if (count < 0) | 884 | if (count < 0) | |
887 | count = 0; | 885 | count = 0; | |
888 | /* | 886 | /* | |
889 | * Correct length for chain is "count". | 887 | * Correct length for chain is "count". | |
890 | * Find the mbuf with last data, adjust its length, | 888 | * Find the mbuf with last data, adjust its length, | |
891 | * and toss data from remaining mbufs on chain. | 889 | * and toss data from remaining mbufs on chain. | |
892 | */ | 890 | */ | |
893 | m = mp; | 891 | m = mp; | |
894 | if (m->m_flags & M_PKTHDR) | 892 | if (m->m_flags & M_PKTHDR) | |
895 | m->m_pkthdr.len = count; | 893 | m->m_pkthdr.len = count; | |
896 | for (; m; m = m->m_next) { | 894 | for (; m; m = m->m_next) { | |
897 | if (m->m_len >= count) { | 895 | if (m->m_len >= count) { | |
898 | m->m_len = count; | 896 | m->m_len = count; | |
899 | break; | 897 | break; | |
900 | } | 898 | } | |
901 | count -= m->m_len; | 899 | count -= m->m_len; | |
902 | } | 900 | } | |
903 | if (m) | 901 | if (m) | |
904 | while (m->m_next) | 902 | while (m->m_next) | |
905 | (m = m->m_next)->m_len = 0; | 903 | (m = m->m_next)->m_len = 0; | |
906 | } | 904 | } | |
907 | } | 905 | } | |
908 | 906 | |||
909 | /* | 907 | /* | |
910 | * Rearrange an mbuf chain so that len bytes are contiguous | 908 | * Rearrange an mbuf chain so that len bytes are contiguous | |
911 | * and in the data area of an mbuf (so that mtod and dtom | 909 | * and in the data area of an mbuf (so that mtod and dtom | |
912 | * will work for a structure of size len). Returns the resulting | 910 | * will work for a structure of size len). Returns the resulting | |
913 | * mbuf chain on success, frees it and returns null on failure. | 911 | * mbuf chain on success, frees it and returns null on failure. | |
914 | * If there is room, it will add up to max_protohdr-len extra bytes to the | 912 | * If there is room, it will add up to max_protohdr-len extra bytes to the | |
915 | * contiguous region in an attempt to avoid being called next time. | 913 | * contiguous region in an attempt to avoid being called next time. | |
916 | */ | 914 | */ | |
917 | int MPFail; | 915 | int MPFail; | |
918 | 916 | |||
919 | struct mbuf * | 917 | struct mbuf * | |
920 | m_pullup(struct mbuf *n, int len) | 918 | m_pullup(struct mbuf *n, int len) | |
921 | { | 919 | { | |
922 | struct mbuf *m; | 920 | struct mbuf *m; | |
923 | int count; | 921 | int count; | |
924 | int space; | 922 | int space; | |
925 | 923 | |||
926 | /* | 924 | /* | |
927 | * If first mbuf has no cluster, and has room for len bytes | 925 | * If first mbuf has no cluster, and has room for len bytes | |
928 | * without shifting current data, pullup into it, | 926 | * without shifting current data, pullup into it, | |
929 | * otherwise allocate a new mbuf to prepend to the chain. | 927 | * otherwise allocate a new mbuf to prepend to the chain. | |
930 | */ | 928 | */ | |
931 | if ((n->m_flags & M_EXT) == 0 && | 929 | if ((n->m_flags & M_EXT) == 0 && | |
932 | n->m_data + len < &n->m_dat[MLEN] && n->m_next) { | 930 | n->m_data + len < &n->m_dat[MLEN] && n->m_next) { | |
933 | if (n->m_len >= len) | 931 | if (n->m_len >= len) | |
934 | return (n); | 932 | return (n); | |
935 | m = n; | 933 | m = n; | |
936 | n = n->m_next; | 934 | n = n->m_next; | |
937 | len -= m->m_len; | 935 | len -= m->m_len; | |
938 | } else { | 936 | } else { | |
939 | if (len > MHLEN) | 937 | if (len > MHLEN) | |
940 | goto bad; | 938 | goto bad; | |
941 | MGET(m, M_DONTWAIT, n->m_type); | 939 | MGET(m, M_DONTWAIT, n->m_type); | |
942 | if (m == 0) | 940 | if (m == 0) | |
943 | goto bad; | 941 | goto bad; | |
944 | MCLAIM(m, n->m_owner); | 942 | MCLAIM(m, n->m_owner); | |
945 | m->m_len = 0; | 943 | m->m_len = 0; | |
946 | if (n->m_flags & M_PKTHDR) { | 944 | if (n->m_flags & M_PKTHDR) { | |
947 | M_MOVE_PKTHDR(m, n); | 945 | M_MOVE_PKTHDR(m, n); | |
948 | } | 946 | } | |
949 | } | 947 | } | |
950 | space = &m->m_dat[MLEN] - (m->m_data + m->m_len); | 948 | space = &m->m_dat[MLEN] - (m->m_data + m->m_len); | |
951 | do { | 949 | do { | |
952 | count = min(min(max(len, max_protohdr), space), n->m_len); | 950 | count = min(min(max(len, max_protohdr), space), n->m_len); | |
953 | memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), | 951 | memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), | |
954 | (unsigned)count); | 952 | (unsigned)count); | |
955 | len -= count; | 953 | len -= count; | |
956 | m->m_len += count; | 954 | m->m_len += count; | |
957 | n->m_len -= count; | 955 | n->m_len -= count; | |
958 | space -= count; | 956 | space -= count; | |
959 | if (n->m_len) | 957 | if (n->m_len) | |
960 | n->m_data += count; | 958 | n->m_data += count; | |
961 | else | 959 | else | |
962 | n = m_free(n); | 960 | n = m_free(n); | |
963 | } while (len > 0 && n); | 961 | } while (len > 0 && n); | |
964 | if (len > 0) { | 962 | if (len > 0) { | |
965 | (void) m_free(m); | 963 | (void) m_free(m); | |
966 | goto bad; | 964 | goto bad; | |
967 | } | 965 | } | |
968 | m->m_next = n; | 966 | m->m_next = n; | |
969 | return (m); | 967 | return (m); | |
970 | bad: | 968 | bad: | |
971 | m_freem(n); | 969 | m_freem(n); | |
972 | MPFail++; | 970 | MPFail++; | |
973 | return (NULL); | 971 | return (NULL); | |
974 | } | 972 | } | |
975 | 973 | |||
976 | /* | 974 | /* | |
977 | * Like m_pullup(), except a new mbuf is always allocated, and we allow | 975 | * Like m_pullup(), except a new mbuf is always allocated, and we allow | |
978 | * the amount of empty space before the data in the new mbuf to be specified | 976 | * the amount of empty space before the data in the new mbuf to be specified | |
979 | * (in the event that the caller expects to prepend later). | 977 | * (in the event that the caller expects to prepend later). | |
980 | */ | 978 | */ | |
981 | int MSFail; | 979 | int MSFail; | |
982 | 980 | |||
983 | struct mbuf * | 981 | struct mbuf * | |
984 | m_copyup(struct mbuf *n, int len, int dstoff) | 982 | m_copyup(struct mbuf *n, int len, int dstoff) | |
985 | { | 983 | { | |
986 | struct mbuf *m; | 984 | struct mbuf *m; | |
987 | int count, space; | 985 | int count, space; | |
988 | 986 | |||
989 | if (len > (MHLEN - dstoff)) | 987 | if (len > (MHLEN - dstoff)) | |
990 | goto bad; | 988 | goto bad; | |
991 | MGET(m, M_DONTWAIT, n->m_type); | 989 | MGET(m, M_DONTWAIT, n->m_type); | |
992 | if (m == NULL) | 990 | if (m == NULL) | |
993 | goto bad; | 991 | goto bad; | |
994 | MCLAIM(m, n->m_owner); | 992 | MCLAIM(m, n->m_owner); | |
995 | m->m_len = 0; | 993 | m->m_len = 0; | |
996 | if (n->m_flags & M_PKTHDR) { | 994 | if (n->m_flags & M_PKTHDR) { | |
997 | M_MOVE_PKTHDR(m, n); | 995 | M_MOVE_PKTHDR(m, n); | |
998 | } | 996 | } | |
999 | m->m_data += dstoff; | 997 | m->m_data += dstoff; | |
1000 | space = &m->m_dat[MLEN] - (m->m_data + m->m_len); | 998 | space = &m->m_dat[MLEN] - (m->m_data + m->m_len); | |
1001 | do { | 999 | do { | |
1002 | count = min(min(max(len, max_protohdr), space), n->m_len); | 1000 | count = min(min(max(len, max_protohdr), space), n->m_len); | |
1003 | memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), | 1001 | memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), | |
1004 | (unsigned)count); | 1002 | (unsigned)count); | |
1005 | len -= count; | 1003 | len -= count; | |
1006 | m->m_len += count; | 1004 | m->m_len += count; | |
1007 | n->m_len -= count; | 1005 | n->m_len -= count; | |
1008 | space -= count; | 1006 | space -= count; | |
1009 | if (n->m_len) | 1007 | if (n->m_len) | |
1010 | n->m_data += count; | 1008 | n->m_data += count; | |
1011 | else | 1009 | else | |
1012 | n = m_free(n); | 1010 | n = m_free(n); | |
1013 | } while (len > 0 && n); | 1011 | } while (len > 0 && n); | |
1014 | if (len > 0) { | 1012 | if (len > 0) { | |
1015 | (void) m_free(m); | 1013 | (void) m_free(m); | |
1016 | goto bad; | 1014 | goto bad; | |
1017 | } | 1015 | } | |
1018 | m->m_next = n; | 1016 | m->m_next = n; | |
1019 | return (m); | 1017 | return (m); | |
1020 | bad: | 1018 | bad: | |
1021 | m_freem(n); | 1019 | m_freem(n); | |
1022 | MSFail++; | 1020 | MSFail++; | |
1023 | return (NULL); | 1021 | return (NULL); | |
1024 | } | 1022 | } | |
1025 | 1023 | |||
1026 | /* | 1024 | /* | |
1027 | * Partition an mbuf chain in two pieces, returning the tail -- | 1025 | * Partition an mbuf chain in two pieces, returning the tail -- | |
1028 | * all but the first len0 bytes. In case of failure, it returns NULL and | 1026 | * all but the first len0 bytes. In case of failure, it returns NULL and | |
1029 | * attempts to restore the chain to its original state. | 1027 | * attempts to restore the chain to its original state. | |
1030 | */ | 1028 | */ | |
1031 | struct mbuf * | 1029 | struct mbuf * | |
1032 | m_split(struct mbuf *m0, int len0, int wait) | 1030 | m_split(struct mbuf *m0, int len0, int wait) | |
1033 | { | 1031 | { | |
1034 | 1032 | |||
1035 | return m_split0(m0, len0, wait, 1); | 1033 | return m_split0(m0, len0, wait, 1); | |
1036 | } | 1034 | } | |
1037 | 1035 | |||
1038 | static struct mbuf * | 1036 | static struct mbuf * | |
1039 | m_split0(struct mbuf *m0, int len0, int wait, int copyhdr) | 1037 | m_split0(struct mbuf *m0, int len0, int wait, int copyhdr) | |
1040 | { | 1038 | { | |
1041 | struct mbuf *m, *n; | 1039 | struct mbuf *m, *n; | |
1042 | unsigned len = len0, remain, len_save; | 1040 | unsigned len = len0, remain, len_save; | |
1043 | 1041 | |||
1044 | for (m = m0; m && len > m->m_len; m = m->m_next) | 1042 | for (m = m0; m && len > m->m_len; m = m->m_next) | |
1045 | len -= m->m_len; | 1043 | len -= m->m_len; | |
1046 | if (m == 0) | 1044 | if (m == 0) | |
1047 | return (NULL); | 1045 | return (NULL); | |
1048 | remain = m->m_len - len; | 1046 | remain = m->m_len - len; | |
1049 | if (copyhdr && (m0->m_flags & M_PKTHDR)) { | 1047 | if (copyhdr && (m0->m_flags & M_PKTHDR)) { | |
1050 | MGETHDR(n, wait, m0->m_type); | 1048 | MGETHDR(n, wait, m0->m_type); | |
1051 | if (n == 0) | 1049 | if (n == 0) | |
1052 | return (NULL); | 1050 | return (NULL); | |
1053 | MCLAIM(n, m0->m_owner); | 1051 | MCLAIM(n, m0->m_owner); | |
1054 | n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; | 1052 | n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; | |
1055 | n->m_pkthdr.len = m0->m_pkthdr.len - len0; | 1053 | n->m_pkthdr.len = m0->m_pkthdr.len - len0; | |
1056 | len_save = m0->m_pkthdr.len; | 1054 | len_save = m0->m_pkthdr.len; | |
1057 | m0->m_pkthdr.len = len0; | 1055 | m0->m_pkthdr.len = len0; | |
1058 | if (m->m_flags & M_EXT) | 1056 | if (m->m_flags & M_EXT) | |
1059 | goto extpacket; | 1057 | goto extpacket; | |
1060 | if (remain > MHLEN) { | 1058 | if (remain > MHLEN) { | |
1061 | /* m can't be the lead packet */ | 1059 | /* m can't be the lead packet */ | |
1062 | MH_ALIGN(n, 0); | 1060 | MH_ALIGN(n, 0); | |
1063 | n->m_len = 0; | 1061 | n->m_len = 0; | |
1064 | n->m_next = m_split(m, len, wait); | 1062 | n->m_next = m_split(m, len, wait); | |
1065 | if (n->m_next == 0) { | 1063 | if (n->m_next == 0) { | |
1066 | (void) m_free(n); | 1064 | (void) m_free(n); | |
1067 | m0->m_pkthdr.len = len_save; | 1065 | m0->m_pkthdr.len = len_save; | |
1068 | return (NULL); | 1066 | return (NULL); | |
1069 | } else | 1067 | } else | |
1070 | return (n); | 1068 | return (n); | |
1071 | } else | 1069 | } else | |
1072 | MH_ALIGN(n, remain); | 1070 | MH_ALIGN(n, remain); | |
1073 | } else if (remain == 0) { | 1071 | } else if (remain == 0) { | |
1074 | n = m->m_next; | 1072 | n = m->m_next; | |
1075 | m->m_next = 0; | 1073 | m->m_next = 0; | |
1076 | return (n); | 1074 | return (n); | |
1077 | } else { | 1075 | } else { | |
1078 | MGET(n, wait, m->m_type); | 1076 | MGET(n, wait, m->m_type); | |
1079 | if (n == 0) | 1077 | if (n == 0) | |
1080 | return (NULL); | 1078 | return (NULL); | |
1081 | MCLAIM(n, m->m_owner); | 1079 | MCLAIM(n, m->m_owner); | |
1082 | M_ALIGN(n, remain); | 1080 | M_ALIGN(n, remain); | |
1083 | } | 1081 | } | |
1084 | extpacket: | 1082 | extpacket: | |
1085 | if (m->m_flags & M_EXT) { | 1083 | if (m->m_flags & M_EXT) { | |
1086 | n->m_data = m->m_data + len; | 1084 | n->m_data = m->m_data + len; | |
1087 | MCLADDREFERENCE(m, n); | 1085 | MCLADDREFERENCE(m, n); | |
1088 | } else { | 1086 | } else { | |
1089 | memcpy(mtod(n, void *), mtod(m, char *) + len, remain); | 1087 | memcpy(mtod(n, void *), mtod(m, char *) + len, remain); |
--- src/sys/kern/uipc_syscalls.c 2011/07/15 14:50:19 1.145
+++ src/sys/kern/uipc_syscalls.c 2011/07/27 14:35:34 1.146
@@ -1,1087 +1,1085 @@ | @@ -1,1087 +1,1085 @@ | |||
1 | /* $NetBSD: uipc_syscalls.c,v 1.145 2011/07/15 14:50:19 christos Exp $ */ | 1 | /* $NetBSD: uipc_syscalls.c,v 1.146 2011/07/27 14:35:34 uebayasi Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. | 4 | * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. | |
5 | * All rights reserved. | 5 | * All rights reserved. | |
6 | * | 6 | * | |
7 | * This code is derived from software contributed to The NetBSD Foundation | 7 | * This code is derived from software contributed to The NetBSD Foundation | |
8 | * by Andrew Doran. | 8 | * by Andrew Doran. | |
9 | * | 9 | * | |
10 | * Redistribution and use in source and binary forms, with or without | 10 | * Redistribution and use in source and binary forms, with or without | |
11 | * modification, are permitted provided that the following conditions | 11 | * modification, are permitted provided that the following conditions | |
12 | * are met: | 12 | * are met: | |
13 | * 1. Redistributions of source code must retain the above copyright | 13 | * 1. Redistributions of source code must retain the above copyright | |
14 | * notice, this list of conditions and the following disclaimer. | 14 | * notice, this list of conditions and the following disclaimer. | |
15 | * 2. Redistributions in binary form must reproduce the above copyright | 15 | * 2. Redistributions in binary form must reproduce the above copyright | |
16 | * notice, this list of conditions and the following disclaimer in the | 16 | * notice, this list of conditions and the following disclaimer in the | |
17 | * documentation and/or other materials provided with the distribution. | 17 | * documentation and/or other materials provided with the distribution. | |
18 | * | 18 | * | |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
29 | * POSSIBILITY OF SUCH DAMAGE. | 29 | * POSSIBILITY OF SUCH DAMAGE. | |
30 | */ | 30 | */ | |
31 | 31 | |||
32 | /* | 32 | /* | |
33 | * Copyright (c) 1982, 1986, 1989, 1990, 1993 | 33 | * Copyright (c) 1982, 1986, 1989, 1990, 1993 | |
34 | * The Regents of the University of California. All rights reserved. | 34 | * The Regents of the University of California. All rights reserved. | |
35 | * | 35 | * | |
36 | * Redistribution and use in source and binary forms, with or without | 36 | * Redistribution and use in source and binary forms, with or without | |
37 | * modification, are permitted provided that the following conditions | 37 | * modification, are permitted provided that the following conditions | |
38 | * are met: | 38 | * are met: | |
39 | * 1. Redistributions of source code must retain the above copyright | 39 | * 1. Redistributions of source code must retain the above copyright | |
40 | * notice, this list of conditions and the following disclaimer. | 40 | * notice, this list of conditions and the following disclaimer. | |
41 | * 2. Redistributions in binary form must reproduce the above copyright | 41 | * 2. Redistributions in binary form must reproduce the above copyright | |
42 | * notice, this list of conditions and the following disclaimer in the | 42 | * notice, this list of conditions and the following disclaimer in the | |
43 | * documentation and/or other materials provided with the distribution. | 43 | * documentation and/or other materials provided with the distribution. | |
44 | * 3. Neither the name of the University nor the names of its contributors | 44 | * 3. Neither the name of the University nor the names of its contributors | |
45 | * may be used to endorse or promote products derived from this software | 45 | * may be used to endorse or promote products derived from this software | |
46 | * without specific prior written permission. | 46 | * without specific prior written permission. | |
47 | * | 47 | * | |
48 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | 48 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
49 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 49 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
50 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 50 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
51 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | 51 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
52 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 52 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
53 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 53 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
54 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 54 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
55 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 55 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
56 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 56 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
57 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 57 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
58 | * SUCH DAMAGE. | 58 | * SUCH DAMAGE. | |
59 | * | 59 | * | |
60 | * @(#)uipc_syscalls.c 8.6 (Berkeley) 2/14/95 | 60 | * @(#)uipc_syscalls.c 8.6 (Berkeley) 2/14/95 | |
61 | */ | 61 | */ | |
62 | 62 | |||
63 | #include <sys/cdefs.h> | 63 | #include <sys/cdefs.h> | |
64 | __KERNEL_RCSID(0, "$NetBSD: uipc_syscalls.c,v 1.145 2011/07/15 14:50:19 christos Exp $"); | 64 | __KERNEL_RCSID(0, "$NetBSD: uipc_syscalls.c,v 1.146 2011/07/27 14:35:34 uebayasi Exp $"); | |
65 | 65 | |||
66 | #include "opt_pipe.h" | 66 | #include "opt_pipe.h" | |
67 | 67 | |||
68 | #include <sys/param.h> | 68 | #include <sys/param.h> | |
69 | #include <sys/systm.h> | 69 | #include <sys/systm.h> | |
70 | #include <sys/filedesc.h> | 70 | #include <sys/filedesc.h> | |
71 | #include <sys/proc.h> | 71 | #include <sys/proc.h> | |
72 | #include <sys/file.h> | 72 | #include <sys/file.h> | |
73 | #include <sys/buf.h> | 73 | #include <sys/buf.h> | |
74 | #include <sys/mbuf.h> | 74 | #include <sys/mbuf.h> | |
75 | #include <sys/protosw.h> | 75 | #include <sys/protosw.h> | |
76 | #include <sys/socket.h> | 76 | #include <sys/socket.h> | |
77 | #include <sys/socketvar.h> | 77 | #include <sys/socketvar.h> | |
78 | #include <sys/signalvar.h> | 78 | #include <sys/signalvar.h> | |
79 | #include <sys/un.h> | 79 | #include <sys/un.h> | |
80 | #include <sys/ktrace.h> | 80 | #include <sys/ktrace.h> | |
81 | #include <sys/event.h> | 81 | #include <sys/event.h> | |
82 | #include <sys/kauth.h> | 82 | #include <sys/kauth.h> | |
83 | 83 | |||
84 | #include <sys/mount.h> | 84 | #include <sys/mount.h> | |
85 | #include <sys/syscallargs.h> | 85 | #include <sys/syscallargs.h> | |
86 | 86 | |||
87 | #include <uvm/uvm_extern.h> | |||
88 | ||||
89 | /* | 87 | /* | |
90 | * System call interface to the socket abstraction. | 88 | * System call interface to the socket abstraction. | |
91 | */ | 89 | */ | |
92 | extern const struct fileops socketops; | 90 | extern const struct fileops socketops; | |
93 | 91 | |||
94 | int | 92 | int | |
95 | sys___socket30(struct lwp *l, const struct sys___socket30_args *uap, register_t *retval) | 93 | sys___socket30(struct lwp *l, const struct sys___socket30_args *uap, register_t *retval) | |
96 | { | 94 | { | |
97 | /* { | 95 | /* { | |
98 | syscallarg(int) domain; | 96 | syscallarg(int) domain; | |
99 | syscallarg(int) type; | 97 | syscallarg(int) type; | |
100 | syscallarg(int) protocol; | 98 | syscallarg(int) protocol; | |
101 | } */ | 99 | } */ | |
102 | int fd, error; | 100 | int fd, error; | |
103 | 101 | |||
104 | error = fsocreate(SCARG(uap, domain), NULL, SCARG(uap, type), | 102 | error = fsocreate(SCARG(uap, domain), NULL, SCARG(uap, type), | |
105 | SCARG(uap, protocol), l, &fd); | 103 | SCARG(uap, protocol), l, &fd); | |
106 | if (error == 0) | 104 | if (error == 0) | |
107 | *retval = fd; | 105 | *retval = fd; | |
108 | return error; | 106 | return error; | |
109 | } | 107 | } | |
110 | 108 | |||
111 | /* ARGSUSED */ | 109 | /* ARGSUSED */ | |
112 | int | 110 | int | |
113 | sys_bind(struct lwp *l, const struct sys_bind_args *uap, register_t *retval) | 111 | sys_bind(struct lwp *l, const struct sys_bind_args *uap, register_t *retval) | |
114 | { | 112 | { | |
115 | /* { | 113 | /* { | |
116 | syscallarg(int) s; | 114 | syscallarg(int) s; | |
117 | syscallarg(const struct sockaddr *) name; | 115 | syscallarg(const struct sockaddr *) name; | |
118 | syscallarg(unsigned int) namelen; | 116 | syscallarg(unsigned int) namelen; | |
119 | } */ | 117 | } */ | |
120 | struct mbuf *nam; | 118 | struct mbuf *nam; | |
121 | int error; | 119 | int error; | |
122 | 120 | |||
123 | error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen), | 121 | error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen), | |
124 | MT_SONAME); | 122 | MT_SONAME); | |
125 | if (error) | 123 | if (error) | |
126 | return error; | 124 | return error; | |
127 | 125 | |||
128 | return do_sys_bind(l, SCARG(uap, s), nam); | 126 | return do_sys_bind(l, SCARG(uap, s), nam); | |
129 | } | 127 | } | |
130 | 128 | |||
131 | int | 129 | int | |
132 | do_sys_bind(struct lwp *l, int fd, struct mbuf *nam) | 130 | do_sys_bind(struct lwp *l, int fd, struct mbuf *nam) | |
133 | { | 131 | { | |
134 | struct socket *so; | 132 | struct socket *so; | |
135 | int error; | 133 | int error; | |
136 | 134 | |||
137 | if ((error = fd_getsock(fd, &so)) != 0) { | 135 | if ((error = fd_getsock(fd, &so)) != 0) { | |
138 | m_freem(nam); | 136 | m_freem(nam); | |
139 | return (error); | 137 | return (error); | |
140 | } | 138 | } | |
141 | MCLAIM(nam, so->so_mowner); | 139 | MCLAIM(nam, so->so_mowner); | |
142 | error = sobind(so, nam, l); | 140 | error = sobind(so, nam, l); | |
143 | m_freem(nam); | 141 | m_freem(nam); | |
144 | fd_putfile(fd); | 142 | fd_putfile(fd); | |
145 | return error; | 143 | return error; | |
146 | } | 144 | } | |
147 | 145 | |||
148 | /* ARGSUSED */ | 146 | /* ARGSUSED */ | |
149 | int | 147 | int | |
150 | sys_listen(struct lwp *l, const struct sys_listen_args *uap, register_t *retval) | 148 | sys_listen(struct lwp *l, const struct sys_listen_args *uap, register_t *retval) | |
151 | { | 149 | { | |
152 | /* { | 150 | /* { | |
153 | syscallarg(int) s; | 151 | syscallarg(int) s; | |
154 | syscallarg(int) backlog; | 152 | syscallarg(int) backlog; | |
155 | } */ | 153 | } */ | |
156 | struct socket *so; | 154 | struct socket *so; | |
157 | int error; | 155 | int error; | |
158 | 156 | |||
159 | if ((error = fd_getsock(SCARG(uap, s), &so)) != 0) | 157 | if ((error = fd_getsock(SCARG(uap, s), &so)) != 0) | |
160 | return (error); | 158 | return (error); | |
161 | error = solisten(so, SCARG(uap, backlog), l); | 159 | error = solisten(so, SCARG(uap, backlog), l); | |
162 | fd_putfile(SCARG(uap, s)); | 160 | fd_putfile(SCARG(uap, s)); | |
163 | return error; | 161 | return error; | |
164 | } | 162 | } | |
165 | 163 | |||
166 | int | 164 | int | |
167 | do_sys_accept(struct lwp *l, int sock, struct mbuf **name, register_t *new_sock, | 165 | do_sys_accept(struct lwp *l, int sock, struct mbuf **name, register_t *new_sock, | |
168 | const sigset_t *mask, int flags, int clrflags) | 166 | const sigset_t *mask, int flags, int clrflags) | |
169 | { | 167 | { | |
170 | file_t *fp, *fp2; | 168 | file_t *fp, *fp2; | |
171 | struct mbuf *nam; | 169 | struct mbuf *nam; | |
172 | int error, fd; | 170 | int error, fd; | |
173 | struct socket *so, *so2; | 171 | struct socket *so, *so2; | |
174 | short wakeup_state = 0; | 172 | short wakeup_state = 0; | |
175 | 173 | |||
176 | if ((fp = fd_getfile(sock)) == NULL) | 174 | if ((fp = fd_getfile(sock)) == NULL) | |
177 | return (EBADF); | 175 | return (EBADF); | |
178 | if (fp->f_type != DTYPE_SOCKET) { | 176 | if (fp->f_type != DTYPE_SOCKET) { | |
179 | fd_putfile(sock); | 177 | fd_putfile(sock); | |
180 | return (ENOTSOCK); | 178 | return (ENOTSOCK); | |
181 | } | 179 | } | |
182 | if ((error = fd_allocfile(&fp2, &fd)) != 0) { | 180 | if ((error = fd_allocfile(&fp2, &fd)) != 0) { | |
183 | fd_putfile(sock); | 181 | fd_putfile(sock); | |
184 | return (error); | 182 | return (error); | |
185 | } | 183 | } | |
186 | nam = m_get(M_WAIT, MT_SONAME); | 184 | nam = m_get(M_WAIT, MT_SONAME); | |
187 | *new_sock = fd; | 185 | *new_sock = fd; | |
188 | so = fp->f_data; | 186 | so = fp->f_data; | |
189 | solock(so); | 187 | solock(so); | |
190 | 188 | |||
191 | if (__predict_false(mask)) | 189 | if (__predict_false(mask)) | |
192 | sigsuspendsetup(l, mask); | 190 | sigsuspendsetup(l, mask); | |
193 | 191 | |||
194 | if (!(so->so_proto->pr_flags & PR_LISTEN)) { | 192 | if (!(so->so_proto->pr_flags & PR_LISTEN)) { | |
195 | error = EOPNOTSUPP; | 193 | error = EOPNOTSUPP; | |
196 | goto bad; | 194 | goto bad; | |
197 | } | 195 | } | |
198 | if ((so->so_options & SO_ACCEPTCONN) == 0) { | 196 | if ((so->so_options & SO_ACCEPTCONN) == 0) { | |
199 | error = EINVAL; | 197 | error = EINVAL; | |
200 | goto bad; | 198 | goto bad; | |
201 | } | 199 | } | |
202 | if (so->so_nbio && so->so_qlen == 0) { | 200 | if (so->so_nbio && so->so_qlen == 0) { | |
203 | error = EWOULDBLOCK; | 201 | error = EWOULDBLOCK; | |
204 | goto bad; | 202 | goto bad; | |
205 | } | 203 | } | |
206 | while (so->so_qlen == 0 && so->so_error == 0) { | 204 | while (so->so_qlen == 0 && so->so_error == 0) { | |
207 | if (so->so_state & SS_CANTRCVMORE) { | 205 | if (so->so_state & SS_CANTRCVMORE) { | |
208 | so->so_error = ECONNABORTED; | 206 | so->so_error = ECONNABORTED; | |
209 | break; | 207 | break; | |
210 | } | 208 | } | |
211 | if (wakeup_state & SS_RESTARTSYS) { | 209 | if (wakeup_state & SS_RESTARTSYS) { | |
212 | error = ERESTART; | 210 | error = ERESTART; | |
213 | goto bad; | 211 | goto bad; | |
214 | } | 212 | } | |
215 | error = sowait(so, true, 0); | 213 | error = sowait(so, true, 0); | |
216 | if (error) { | 214 | if (error) { | |
217 | goto bad; | 215 | goto bad; | |
218 | } | 216 | } | |
219 | wakeup_state = so->so_state; | 217 | wakeup_state = so->so_state; | |
220 | } | 218 | } | |
221 | if (so->so_error) { | 219 | if (so->so_error) { | |
222 | error = so->so_error; | 220 | error = so->so_error; | |
223 | so->so_error = 0; | 221 | so->so_error = 0; | |
224 | goto bad; | 222 | goto bad; | |
225 | } | 223 | } | |
226 | /* connection has been removed from the listen queue */ | 224 | /* connection has been removed from the listen queue */ | |
227 | KNOTE(&so->so_rcv.sb_sel.sel_klist, NOTE_SUBMIT); | 225 | KNOTE(&so->so_rcv.sb_sel.sel_klist, NOTE_SUBMIT); | |
228 | so2 = TAILQ_FIRST(&so->so_q); | 226 | so2 = TAILQ_FIRST(&so->so_q); | |
229 | if (soqremque(so2, 1) == 0) | 227 | if (soqremque(so2, 1) == 0) | |
230 | panic("accept"); | 228 | panic("accept"); | |
231 | fp2->f_type = DTYPE_SOCKET; | 229 | fp2->f_type = DTYPE_SOCKET; | |
232 | fp2->f_flag = (fp->f_flag & ~clrflags) | | 230 | fp2->f_flag = (fp->f_flag & ~clrflags) | | |
233 | ((flags & SOCK_NONBLOCK) ? FNONBLOCK : 0); | 231 | ((flags & SOCK_NONBLOCK) ? FNONBLOCK : 0); | |
234 | fp2->f_ops = &socketops; | 232 | fp2->f_ops = &socketops; | |
235 | fp2->f_data = so2; | 233 | fp2->f_data = so2; | |
236 | error = soaccept(so2, nam); | 234 | error = soaccept(so2, nam); | |
237 | so2->so_cred = kauth_cred_dup(so->so_cred); | 235 | so2->so_cred = kauth_cred_dup(so->so_cred); | |
238 | sounlock(so); | 236 | sounlock(so); | |
239 | if (error) { | 237 | if (error) { | |
240 | /* an error occurred, free the file descriptor and mbuf */ | 238 | /* an error occurred, free the file descriptor and mbuf */ | |
241 | m_freem(nam); | 239 | m_freem(nam); | |
242 | mutex_enter(&fp2->f_lock); | 240 | mutex_enter(&fp2->f_lock); | |
243 | fp2->f_count++; | 241 | fp2->f_count++; | |
244 | mutex_exit(&fp2->f_lock); | 242 | mutex_exit(&fp2->f_lock); | |
245 | closef(fp2); | 243 | closef(fp2); | |
246 | fd_abort(curproc, NULL, fd); | 244 | fd_abort(curproc, NULL, fd); | |
247 | } else { | 245 | } else { | |
248 | fd_set_exclose(l, fd, (flags & SOCK_CLOEXEC) != 0); | 246 | fd_set_exclose(l, fd, (flags & SOCK_CLOEXEC) != 0); | |
249 | fd_affix(curproc, fp2, fd); | 247 | fd_affix(curproc, fp2, fd); | |
250 | *name = nam; | 248 | *name = nam; | |
251 | } | 249 | } | |
252 | fd_putfile(sock); | 250 | fd_putfile(sock); | |
253 | if (__predict_false(mask)) | 251 | if (__predict_false(mask)) | |
254 | sigsuspendteardown(l); | 252 | sigsuspendteardown(l); | |
255 | return (error); | 253 | return (error); | |
256 | bad: | 254 | bad: | |
257 | sounlock(so); | 255 | sounlock(so); | |
258 | m_freem(nam); | 256 | m_freem(nam); | |
259 | fd_putfile(sock); | 257 | fd_putfile(sock); | |
260 | fd_abort(curproc, fp2, fd); | 258 | fd_abort(curproc, fp2, fd); | |
261 | if (__predict_false(mask)) | 259 | if (__predict_false(mask)) | |
262 | sigsuspendteardown(l); | 260 | sigsuspendteardown(l); | |
263 | return (error); | 261 | return (error); | |
264 | } | 262 | } | |
265 | 263 | |||
266 | int | 264 | int | |
267 | sys_accept(struct lwp *l, const struct sys_accept_args *uap, register_t *retval) | 265 | sys_accept(struct lwp *l, const struct sys_accept_args *uap, register_t *retval) | |
268 | { | 266 | { | |
269 | /* { | 267 | /* { | |
270 | syscallarg(int) s; | 268 | syscallarg(int) s; | |
271 | syscallarg(struct sockaddr *) name; | 269 | syscallarg(struct sockaddr *) name; | |
272 | syscallarg(unsigned int *) anamelen; | 270 | syscallarg(unsigned int *) anamelen; | |
273 | } */ | 271 | } */ | |
274 | int error, fd; | 272 | int error, fd; | |
275 | struct mbuf *name; | 273 | struct mbuf *name; | |
276 | 274 | |||
277 | error = do_sys_accept(l, SCARG(uap, s), &name, retval, NULL, 0, 0); | 275 | error = do_sys_accept(l, SCARG(uap, s), &name, retval, NULL, 0, 0); | |
278 | if (error != 0) | 276 | if (error != 0) | |
279 | return error; | 277 | return error; | |
280 | error = copyout_sockname(SCARG(uap, name), SCARG(uap, anamelen), | 278 | error = copyout_sockname(SCARG(uap, name), SCARG(uap, anamelen), | |
281 | MSG_LENUSRSPACE, name); | 279 | MSG_LENUSRSPACE, name); | |
282 | if (name != NULL) | 280 | if (name != NULL) | |
283 | m_free(name); | 281 | m_free(name); | |
284 | if (error != 0) { | 282 | if (error != 0) { | |
285 | fd = (int)*retval; | 283 | fd = (int)*retval; | |
286 | if (fd_getfile(fd) != NULL) | 284 | if (fd_getfile(fd) != NULL) | |
287 | (void)fd_close(fd); | 285 | (void)fd_close(fd); | |
288 | } | 286 | } | |
289 | return error; | 287 | return error; | |
290 | } | 288 | } | |
291 | 289 | |||
292 | int | 290 | int | |
293 | sys_paccept(struct lwp *l, const struct sys_paccept_args *uap, | 291 | sys_paccept(struct lwp *l, const struct sys_paccept_args *uap, | |
294 | register_t *retval) | 292 | register_t *retval) | |
295 | { | 293 | { | |
296 | /* { | 294 | /* { | |
297 | syscallarg(int) s; | 295 | syscallarg(int) s; | |
298 | syscallarg(struct sockaddr *) name; | 296 | syscallarg(struct sockaddr *) name; | |
299 | syscallarg(unsigned int *) anamelen; | 297 | syscallarg(unsigned int *) anamelen; | |
300 | syscallarg(const sigset_t *) mask; | 298 | syscallarg(const sigset_t *) mask; | |
301 | syscallarg(int) flags; | 299 | syscallarg(int) flags; | |
302 | } */ | 300 | } */ | |
303 | int error, fd; | 301 | int error, fd; | |
304 | struct mbuf *name; | 302 | struct mbuf *name; | |
305 | sigset_t *mask, amask; | 303 | sigset_t *mask, amask; | |
306 | 304 | |||
307 | if (SCARG(uap, mask) != NULL) { | 305 | if (SCARG(uap, mask) != NULL) { | |
308 | error = copyin(SCARG(uap, mask), &amask, sizeof(amask)); | 306 | error = copyin(SCARG(uap, mask), &amask, sizeof(amask)); | |
309 | if (error) | 307 | if (error) | |
310 | return error; | 308 | return error; | |
311 | mask = &amask; | 309 | mask = &amask; | |
312 | } else | 310 | } else | |
313 | mask = NULL; | 311 | mask = NULL; | |
314 | 312 | |||
315 | error = do_sys_accept(l, SCARG(uap, s), &name, retval, mask, | 313 | error = do_sys_accept(l, SCARG(uap, s), &name, retval, mask, | |
316 | SCARG(uap, flags), FNONBLOCK); | 314 | SCARG(uap, flags), FNONBLOCK); | |
317 | if (error != 0) | 315 | if (error != 0) | |
318 | return error; | 316 | return error; | |
319 | error = copyout_sockname(SCARG(uap, name), SCARG(uap, anamelen), | 317 | error = copyout_sockname(SCARG(uap, name), SCARG(uap, anamelen), | |
320 | MSG_LENUSRSPACE, name); | 318 | MSG_LENUSRSPACE, name); | |
321 | if (name != NULL) | 319 | if (name != NULL) | |
322 | m_free(name); | 320 | m_free(name); | |
323 | if (error != 0) { | 321 | if (error != 0) { | |
324 | fd = (int)*retval; | 322 | fd = (int)*retval; | |
325 | if (fd_getfile(fd) != NULL) | 323 | if (fd_getfile(fd) != NULL) | |
326 | (void)fd_close(fd); | 324 | (void)fd_close(fd); | |
327 | } | 325 | } | |
328 | return error; | 326 | return error; | |
329 | } | 327 | } | |
330 | 328 | |||
331 | /* ARGSUSED */ | 329 | /* ARGSUSED */ | |
332 | int | 330 | int | |
333 | sys_connect(struct lwp *l, const struct sys_connect_args *uap, register_t *retval) | 331 | sys_connect(struct lwp *l, const struct sys_connect_args *uap, register_t *retval) | |
334 | { | 332 | { | |
335 | /* { | 333 | /* { | |
336 | syscallarg(int) s; | 334 | syscallarg(int) s; | |
337 | syscallarg(const struct sockaddr *) name; | 335 | syscallarg(const struct sockaddr *) name; | |
338 | syscallarg(unsigned int) namelen; | 336 | syscallarg(unsigned int) namelen; | |
339 | } */ | 337 | } */ | |
340 | int error; | 338 | int error; | |
341 | struct mbuf *nam; | 339 | struct mbuf *nam; | |
342 | 340 | |||
343 | error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen), | 341 | error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen), | |
344 | MT_SONAME); | 342 | MT_SONAME); | |
345 | if (error) | 343 | if (error) | |
346 | return error; | 344 | return error; | |
347 | return do_sys_connect(l, SCARG(uap, s), nam); | 345 | return do_sys_connect(l, SCARG(uap, s), nam); | |
348 | } | 346 | } | |
349 | 347 | |||
350 | int | 348 | int | |
351 | do_sys_connect(struct lwp *l, int fd, struct mbuf *nam) | 349 | do_sys_connect(struct lwp *l, int fd, struct mbuf *nam) | |
352 | { | 350 | { | |
353 | struct socket *so; | 351 | struct socket *so; | |
354 | int error; | 352 | int error; | |
355 | int interrupted = 0; | 353 | int interrupted = 0; | |
356 | 354 | |||
357 | if ((error = fd_getsock(fd, &so)) != 0) { | 355 | if ((error = fd_getsock(fd, &so)) != 0) { | |
358 | m_freem(nam); | 356 | m_freem(nam); | |
359 | return (error); | 357 | return (error); | |
360 | } | 358 | } | |
361 | solock(so); | 359 | solock(so); | |
362 | MCLAIM(nam, so->so_mowner); | 360 | MCLAIM(nam, so->so_mowner); | |
363 | if ((so->so_state & SS_ISCONNECTING) != 0) { | 361 | if ((so->so_state & SS_ISCONNECTING) != 0) { | |
364 | error = EALREADY; | 362 | error = EALREADY; | |
365 | goto out; | 363 | goto out; | |
366 | } | 364 | } | |
367 | 365 | |||
368 | error = soconnect(so, nam, l); | 366 | error = soconnect(so, nam, l); | |
369 | if (error) | 367 | if (error) | |
370 | goto bad; | 368 | goto bad; | |
371 | if (so->so_nbio && (so->so_state & SS_ISCONNECTING) != 0) { | 369 | if (so->so_nbio && (so->so_state & SS_ISCONNECTING) != 0) { | |
372 | error = EINPROGRESS; | 370 | error = EINPROGRESS; | |
373 | goto out; | 371 | goto out; | |
374 | } | 372 | } | |
375 | while ((so->so_state & SS_ISCONNECTING) != 0 && so->so_error == 0) { | 373 | while ((so->so_state & SS_ISCONNECTING) != 0 && so->so_error == 0) { | |
376 | error = sowait(so, true, 0); | 374 | error = sowait(so, true, 0); | |
377 | if (__predict_false((so->so_state & SS_ISABORTING) != 0)) { | 375 | if (__predict_false((so->so_state & SS_ISABORTING) != 0)) { | |
378 | error = EPIPE; | 376 | error = EPIPE; | |
379 | interrupted = 1; | 377 | interrupted = 1; | |
380 | break; | 378 | break; | |
381 | } | 379 | } | |
382 | if (error) { | 380 | if (error) { | |
383 | if (error == EINTR || error == ERESTART) | 381 | if (error == EINTR || error == ERESTART) | |
384 | interrupted = 1; | 382 | interrupted = 1; | |
385 | break; | 383 | break; | |
386 | } | 384 | } | |
387 | } | 385 | } | |
388 | if (error == 0) { | 386 | if (error == 0) { | |
389 | error = so->so_error; | 387 | error = so->so_error; | |
390 | so->so_error = 0; | 388 | so->so_error = 0; | |
391 | } | 389 | } | |
392 | bad: | 390 | bad: | |
393 | if (!interrupted) | 391 | if (!interrupted) | |
394 | so->so_state &= ~SS_ISCONNECTING; | 392 | so->so_state &= ~SS_ISCONNECTING; | |
395 | if (error == ERESTART) | 393 | if (error == ERESTART) | |
396 | error = EINTR; | 394 | error = EINTR; | |
397 | out: | 395 | out: | |
398 | sounlock(so); | 396 | sounlock(so); | |
399 | fd_putfile(fd); | 397 | fd_putfile(fd); | |
400 | m_freem(nam); | 398 | m_freem(nam); | |
401 | return (error); | 399 | return (error); | |
402 | } | 400 | } | |
403 | 401 | |||
404 | int | 402 | int | |
405 | sys_socketpair(struct lwp *l, const struct sys_socketpair_args *uap, register_t *retval) | 403 | sys_socketpair(struct lwp *l, const struct sys_socketpair_args *uap, register_t *retval) | |
406 | { | 404 | { | |
407 | /* { | 405 | /* { | |
408 | syscallarg(int) domain; | 406 | syscallarg(int) domain; | |
409 | syscallarg(int) type; | 407 | syscallarg(int) type; | |
410 | syscallarg(int) protocol; | 408 | syscallarg(int) protocol; | |
411 | syscallarg(int *) rsv; | 409 | syscallarg(int *) rsv; | |
412 | } */ | 410 | } */ | |
413 | file_t *fp1, *fp2; | 411 | file_t *fp1, *fp2; | |
414 | struct socket *so1, *so2; | 412 | struct socket *so1, *so2; | |
415 | int fd, error, sv[2]; | 413 | int fd, error, sv[2]; | |
416 | proc_t *p; | 414 | proc_t *p; | |
417 | int flags = SCARG(uap, type) & SOCK_FLAGS_MASK; | 415 | int flags = SCARG(uap, type) & SOCK_FLAGS_MASK; | |
418 | int type = SCARG(uap, type) & ~SOCK_FLAGS_MASK; | 416 | int type = SCARG(uap, type) & ~SOCK_FLAGS_MASK; | |
419 | int fnonblock = (flags & SOCK_NONBLOCK) ? FNONBLOCK : 0; | 417 | int fnonblock = (flags & SOCK_NONBLOCK) ? FNONBLOCK : 0; | |
420 | 418 | |||
421 | p = curproc; | 419 | p = curproc; | |
422 | error = socreate(SCARG(uap, domain), &so1, type, | 420 | error = socreate(SCARG(uap, domain), &so1, type, | |
423 | SCARG(uap, protocol), l, NULL); | 421 | SCARG(uap, protocol), l, NULL); | |
424 | if (error) | 422 | if (error) | |
425 | return (error); | 423 | return (error); | |
426 | error = socreate(SCARG(uap, domain), &so2, type, | 424 | error = socreate(SCARG(uap, domain), &so2, type, | |
427 | SCARG(uap, protocol), l, so1); | 425 | SCARG(uap, protocol), l, so1); | |
428 | if (error) | 426 | if (error) | |
429 | goto free1; | 427 | goto free1; | |
430 | if ((error = fd_allocfile(&fp1, &fd)) != 0) | 428 | if ((error = fd_allocfile(&fp1, &fd)) != 0) | |
431 | goto free2; | 429 | goto free2; | |
432 | fd_set_exclose(l, fd, (flags & SOCK_CLOEXEC) != 0); | 430 | fd_set_exclose(l, fd, (flags & SOCK_CLOEXEC) != 0); | |
433 | sv[0] = fd; | 431 | sv[0] = fd; | |
434 | fp1->f_flag = FREAD|FWRITE|fnonblock; | 432 | fp1->f_flag = FREAD|FWRITE|fnonblock; | |
435 | fp1->f_type = DTYPE_SOCKET; | 433 | fp1->f_type = DTYPE_SOCKET; | |
436 | fp1->f_ops = &socketops; | 434 | fp1->f_ops = &socketops; | |
437 | fp1->f_data = so1; | 435 | fp1->f_data = so1; | |
438 | if ((error = fd_allocfile(&fp2, &fd)) != 0) | 436 | if ((error = fd_allocfile(&fp2, &fd)) != 0) | |
439 | goto free3; | 437 | goto free3; | |
440 | fd_set_exclose(l, fd, (flags & SOCK_CLOEXEC) != 0); | 438 | fd_set_exclose(l, fd, (flags & SOCK_CLOEXEC) != 0); | |
441 | fp2->f_flag = FREAD|FWRITE|fnonblock; | 439 | fp2->f_flag = FREAD|FWRITE|fnonblock; | |
442 | fp2->f_type = DTYPE_SOCKET; | 440 | fp2->f_type = DTYPE_SOCKET; | |
443 | fp2->f_ops = &socketops; | 441 | fp2->f_ops = &socketops; | |
444 | fp2->f_data = so2; | 442 | fp2->f_data = so2; | |
445 | sv[1] = fd; | 443 | sv[1] = fd; | |
446 | solock(so1); | 444 | solock(so1); | |
447 | error = soconnect2(so1, so2); | 445 | error = soconnect2(so1, so2); | |
448 | if (error == 0 && SCARG(uap, type) == SOCK_DGRAM) { | 446 | if (error == 0 && SCARG(uap, type) == SOCK_DGRAM) { | |
449 | /* | 447 | /* | |
450 | * Datagram socket connection is asymmetric. | 448 | * Datagram socket connection is asymmetric. | |
451 | */ | 449 | */ | |
452 | error = soconnect2(so2, so1); | 450 | error = soconnect2(so2, so1); | |
453 | } | 451 | } | |
454 | sounlock(so1); | 452 | sounlock(so1); | |
455 | if (error == 0) | 453 | if (error == 0) | |
456 | error = copyout(sv, SCARG(uap, rsv), 2 * sizeof(int)); | 454 | error = copyout(sv, SCARG(uap, rsv), 2 * sizeof(int)); | |
457 | if (error == 0) { | 455 | if (error == 0) { | |
458 | fd_affix(p, fp2, sv[1]); | 456 | fd_affix(p, fp2, sv[1]); | |
459 | fd_affix(p, fp1, sv[0]); | 457 | fd_affix(p, fp1, sv[0]); | |
460 | return (0); | 458 | return (0); | |
461 | } | 459 | } | |
462 | fd_abort(p, fp2, sv[1]); | 460 | fd_abort(p, fp2, sv[1]); | |
463 | free3: | 461 | free3: | |
464 | fd_abort(p, fp1, sv[0]); | 462 | fd_abort(p, fp1, sv[0]); | |
465 | free2: | 463 | free2: | |
466 | (void)soclose(so2); | 464 | (void)soclose(so2); | |
467 | free1: | 465 | free1: | |
468 | (void)soclose(so1); | 466 | (void)soclose(so1); | |
469 | return (error); | 467 | return (error); | |
470 | } | 468 | } | |
471 | 469 | |||
472 | int | 470 | int | |
473 | sys_sendto(struct lwp *l, const struct sys_sendto_args *uap, register_t *retval) | 471 | sys_sendto(struct lwp *l, const struct sys_sendto_args *uap, register_t *retval) | |
474 | { | 472 | { | |
475 | /* { | 473 | /* { | |
476 | syscallarg(int) s; | 474 | syscallarg(int) s; | |
477 | syscallarg(const void *) buf; | 475 | syscallarg(const void *) buf; | |
478 | syscallarg(size_t) len; | 476 | syscallarg(size_t) len; | |
479 | syscallarg(int) flags; | 477 | syscallarg(int) flags; | |
480 | syscallarg(const struct sockaddr *) to; | 478 | syscallarg(const struct sockaddr *) to; | |
481 | syscallarg(unsigned int) tolen; | 479 | syscallarg(unsigned int) tolen; | |
482 | } */ | 480 | } */ | |
483 | struct msghdr msg; | 481 | struct msghdr msg; | |
484 | struct iovec aiov; | 482 | struct iovec aiov; | |
485 | 483 | |||
486 | msg.msg_name = __UNCONST(SCARG(uap, to)); /* XXXUNCONST kills const */ | 484 | msg.msg_name = __UNCONST(SCARG(uap, to)); /* XXXUNCONST kills const */ | |
487 | msg.msg_namelen = SCARG(uap, tolen); | 485 | msg.msg_namelen = SCARG(uap, tolen); | |
488 | msg.msg_iov = &aiov; | 486 | msg.msg_iov = &aiov; | |
489 | msg.msg_iovlen = 1; | 487 | msg.msg_iovlen = 1; | |
490 | msg.msg_control = NULL; | 488 | msg.msg_control = NULL; | |
491 | msg.msg_flags = 0; | 489 | msg.msg_flags = 0; | |
492 | aiov.iov_base = __UNCONST(SCARG(uap, buf)); /* XXXUNCONST kills const */ | 490 | aiov.iov_base = __UNCONST(SCARG(uap, buf)); /* XXXUNCONST kills const */ | |
493 | aiov.iov_len = SCARG(uap, len); | 491 | aiov.iov_len = SCARG(uap, len); | |
494 | return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags), retval); | 492 | return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags), retval); | |
495 | } | 493 | } | |
496 | 494 | |||
497 | int | 495 | int | |
498 | sys_sendmsg(struct lwp *l, const struct sys_sendmsg_args *uap, register_t *retval) | 496 | sys_sendmsg(struct lwp *l, const struct sys_sendmsg_args *uap, register_t *retval) | |
499 | { | 497 | { | |
500 | /* { | 498 | /* { | |
501 | syscallarg(int) s; | 499 | syscallarg(int) s; | |
502 | syscallarg(const struct msghdr *) msg; | 500 | syscallarg(const struct msghdr *) msg; | |
503 | syscallarg(int) flags; | 501 | syscallarg(int) flags; | |
504 | } */ | 502 | } */ | |
505 | struct msghdr msg; | 503 | struct msghdr msg; | |
506 | int error; | 504 | int error; | |
507 | 505 | |||
508 | error = copyin(SCARG(uap, msg), &msg, sizeof(msg)); | 506 | error = copyin(SCARG(uap, msg), &msg, sizeof(msg)); | |
509 | if (error) | 507 | if (error) | |
510 | return (error); | 508 | return (error); | |
511 | 509 | |||
512 | msg.msg_flags = MSG_IOVUSRSPACE; | 510 | msg.msg_flags = MSG_IOVUSRSPACE; | |
513 | return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags), retval); | 511 | return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags), retval); | |
514 | } | 512 | } | |
515 | 513 | |||
516 | int | 514 | int | |
517 | do_sys_sendmsg(struct lwp *l, int s, struct msghdr *mp, int flags, | 515 | do_sys_sendmsg(struct lwp *l, int s, struct msghdr *mp, int flags, | |
518 | register_t *retsize) | 516 | register_t *retsize) | |
519 | { | 517 | { | |
520 | struct iovec aiov[UIO_SMALLIOV], *iov = aiov, *tiov, *ktriov = NULL; | 518 | struct iovec aiov[UIO_SMALLIOV], *iov = aiov, *tiov, *ktriov = NULL; | |
521 | struct mbuf *to, *control; | 519 | struct mbuf *to, *control; | |
522 | struct socket *so; | 520 | struct socket *so; | |
523 | struct uio auio; | 521 | struct uio auio; | |
524 | size_t len, iovsz; | 522 | size_t len, iovsz; | |
525 | int i, error; | 523 | int i, error; | |
526 | 524 | |||
527 | ktrkuser("msghdr", mp, sizeof *mp); | 525 | ktrkuser("msghdr", mp, sizeof *mp); | |
528 | 526 | |||
529 | /* If the caller passed us stuff in mbufs, we must free them. */ | 527 | /* If the caller passed us stuff in mbufs, we must free them. */ | |
530 | to = (mp->msg_flags & MSG_NAMEMBUF) ? mp->msg_name : NULL; | 528 | to = (mp->msg_flags & MSG_NAMEMBUF) ? mp->msg_name : NULL; | |
531 | control = (mp->msg_flags & MSG_CONTROLMBUF) ? mp->msg_control : NULL; | 529 | control = (mp->msg_flags & MSG_CONTROLMBUF) ? mp->msg_control : NULL; | |
532 | iovsz = mp->msg_iovlen * sizeof(struct iovec); | 530 | iovsz = mp->msg_iovlen * sizeof(struct iovec); | |
533 | 531 | |||
534 | if (mp->msg_flags & MSG_IOVUSRSPACE) { | 532 | if (mp->msg_flags & MSG_IOVUSRSPACE) { | |
535 | if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) { | 533 | if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) { | |
536 | if ((unsigned int)mp->msg_iovlen > IOV_MAX) { | 534 | if ((unsigned int)mp->msg_iovlen > IOV_MAX) { | |
537 | error = EMSGSIZE; | 535 | error = EMSGSIZE; | |
538 | goto bad; | 536 | goto bad; | |
539 | } | 537 | } | |
540 | iov = kmem_alloc(iovsz, KM_SLEEP); | 538 | iov = kmem_alloc(iovsz, KM_SLEEP); | |
541 | } | 539 | } | |
542 | if (mp->msg_iovlen != 0) { | 540 | if (mp->msg_iovlen != 0) { | |
543 | error = copyin(mp->msg_iov, iov, iovsz); | 541 | error = copyin(mp->msg_iov, iov, iovsz); | |
544 | if (error) | 542 | if (error) | |
545 | goto bad; | 543 | goto bad; | |
546 | } | 544 | } | |
547 | mp->msg_iov = iov; | 545 | mp->msg_iov = iov; | |
548 | } | 546 | } | |
549 | 547 | |||
550 | auio.uio_iov = mp->msg_iov; | 548 | auio.uio_iov = mp->msg_iov; | |
551 | auio.uio_iovcnt = mp->msg_iovlen; | 549 | auio.uio_iovcnt = mp->msg_iovlen; | |
552 | auio.uio_rw = UIO_WRITE; | 550 | auio.uio_rw = UIO_WRITE; | |
553 | auio.uio_offset = 0; /* XXX */ | 551 | auio.uio_offset = 0; /* XXX */ | |
554 | auio.uio_resid = 0; | 552 | auio.uio_resid = 0; | |
555 | KASSERT(l == curlwp); | 553 | KASSERT(l == curlwp); | |
556 | auio.uio_vmspace = l->l_proc->p_vmspace; | 554 | auio.uio_vmspace = l->l_proc->p_vmspace; | |
557 | 555 | |||
558 | for (i = 0, tiov = mp->msg_iov; i < mp->msg_iovlen; i++, tiov++) { | 556 | for (i = 0, tiov = mp->msg_iov; i < mp->msg_iovlen; i++, tiov++) { | |
559 | /* | 557 | /* | |
560 | * Writes return ssize_t because -1 is returned on error. | 558 | * Writes return ssize_t because -1 is returned on error. | |
561 | * Therefore, we must restrict the length to SSIZE_MAX to | 559 | * Therefore, we must restrict the length to SSIZE_MAX to | |
562 | * avoid garbage return values. | 560 | * avoid garbage return values. | |
563 | */ | 561 | */ | |
564 | auio.uio_resid += tiov->iov_len; | 562 | auio.uio_resid += tiov->iov_len; | |
565 | if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { | 563 | if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { | |
566 | error = EINVAL; | 564 | error = EINVAL; | |
567 | goto bad; | 565 | goto bad; | |
568 | } | 566 | } | |
569 | } | 567 | } | |
570 | 568 | |||
571 | if (mp->msg_name && to == NULL) { | 569 | if (mp->msg_name && to == NULL) { | |
572 | error = sockargs(&to, mp->msg_name, mp->msg_namelen, | 570 | error = sockargs(&to, mp->msg_name, mp->msg_namelen, | |
573 | MT_SONAME); | 571 | MT_SONAME); | |
574 | if (error) | 572 | if (error) | |
575 | goto bad; | 573 | goto bad; | |
576 | } | 574 | } | |
577 | 575 | |||
578 | if (mp->msg_control) { | 576 | if (mp->msg_control) { | |
579 | if (mp->msg_controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) { | 577 | if (mp->msg_controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) { | |
580 | error = EINVAL; | 578 | error = EINVAL; | |
581 | goto bad; | 579 | goto bad; | |
582 | } | 580 | } | |
583 | if (control == NULL) { | 581 | if (control == NULL) { | |
584 | error = sockargs(&control, mp->msg_control, | 582 | error = sockargs(&control, mp->msg_control, | |
585 | mp->msg_controllen, MT_CONTROL); | 583 | mp->msg_controllen, MT_CONTROL); | |
586 | if (error) | 584 | if (error) | |
587 | goto bad; | 585 | goto bad; | |
588 | } | 586 | } | |
589 | } | 587 | } | |
590 | 588 | |||
591 | if (ktrpoint(KTR_GENIO)) { | 589 | if (ktrpoint(KTR_GENIO)) { | |
592 | ktriov = kmem_alloc(iovsz, KM_SLEEP); | 590 | ktriov = kmem_alloc(iovsz, KM_SLEEP); | |
593 | memcpy(ktriov, auio.uio_iov, iovsz); | 591 | memcpy(ktriov, auio.uio_iov, iovsz); | |
594 | } | 592 | } | |
595 | 593 | |||
596 | if ((error = fd_getsock(s, &so)) != 0) | 594 | if ((error = fd_getsock(s, &so)) != 0) | |
597 | goto bad; | 595 | goto bad; | |
598 | 596 | |||
599 | if (mp->msg_name) | 597 | if (mp->msg_name) | |
600 | MCLAIM(to, so->so_mowner); | 598 | MCLAIM(to, so->so_mowner); | |
601 | if (mp->msg_control) | 599 | if (mp->msg_control) | |
602 | MCLAIM(control, so->so_mowner); | 600 | MCLAIM(control, so->so_mowner); | |
603 | 601 | |||
604 | len = auio.uio_resid; | 602 | len = auio.uio_resid; | |
605 | error = (*so->so_send)(so, to, &auio, NULL, control, flags, l); | 603 | error = (*so->so_send)(so, to, &auio, NULL, control, flags, l); | |
606 | /* Protocol is responsible for freeing 'control' */ | 604 | /* Protocol is responsible for freeing 'control' */ | |
607 | control = NULL; | 605 | control = NULL; | |
608 | 606 | |||
609 | fd_putfile(s); | 607 | fd_putfile(s); | |
610 | 608 | |||
611 | if (error) { | 609 | if (error) { | |
612 | if (auio.uio_resid != len && (error == ERESTART || | 610 | if (auio.uio_resid != len && (error == ERESTART || | |
613 | error == EINTR || error == EWOULDBLOCK)) | 611 | error == EINTR || error == EWOULDBLOCK)) | |
614 | error = 0; | 612 | error = 0; | |
615 | if (error == EPIPE && (flags & MSG_NOSIGNAL) == 0) { | 613 | if (error == EPIPE && (flags & MSG_NOSIGNAL) == 0) { | |
616 | mutex_enter(proc_lock); | 614 | mutex_enter(proc_lock); | |
617 | psignal(l->l_proc, SIGPIPE); | 615 | psignal(l->l_proc, SIGPIPE); | |
618 | mutex_exit(proc_lock); | 616 | mutex_exit(proc_lock); | |
619 | } | 617 | } | |
620 | } | 618 | } | |
621 | if (error == 0) | 619 | if (error == 0) | |
622 | *retsize = len - auio.uio_resid; | 620 | *retsize = len - auio.uio_resid; | |
623 | 621 | |||
624 | bad: | 622 | bad: | |
625 | if (ktriov != NULL) { | 623 | if (ktriov != NULL) { | |
626 | ktrgeniov(s, UIO_WRITE, ktriov, *retsize, error); | 624 | ktrgeniov(s, UIO_WRITE, ktriov, *retsize, error); | |
627 | kmem_free(ktriov, iovsz); | 625 | kmem_free(ktriov, iovsz); | |
628 | } | 626 | } | |
629 | 627 | |||
630 | if (iov != aiov) | 628 | if (iov != aiov) | |
631 | kmem_free(iov, iovsz); | 629 | kmem_free(iov, iovsz); | |
632 | if (to) | 630 | if (to) | |
633 | m_freem(to); | 631 | m_freem(to); | |
634 | if (control) | 632 | if (control) | |
635 | m_freem(control); | 633 | m_freem(control); | |
636 | 634 | |||
637 | return (error); | 635 | return (error); | |
638 | } | 636 | } | |
639 | 637 | |||
640 | int | 638 | int | |
641 | sys_recvfrom(struct lwp *l, const struct sys_recvfrom_args *uap, register_t *retval) | 639 | sys_recvfrom(struct lwp *l, const struct sys_recvfrom_args *uap, register_t *retval) | |
642 | { | 640 | { | |
643 | /* { | 641 | /* { | |
644 | syscallarg(int) s; | 642 | syscallarg(int) s; | |
645 | syscallarg(void *) buf; | 643 | syscallarg(void *) buf; | |
646 | syscallarg(size_t) len; | 644 | syscallarg(size_t) len; | |
647 | syscallarg(int) flags; | 645 | syscallarg(int) flags; | |
648 | syscallarg(struct sockaddr *) from; | 646 | syscallarg(struct sockaddr *) from; | |
649 | syscallarg(unsigned int *) fromlenaddr; | 647 | syscallarg(unsigned int *) fromlenaddr; | |
650 | } */ | 648 | } */ | |
651 | struct msghdr msg; | 649 | struct msghdr msg; | |
652 | struct iovec aiov; | 650 | struct iovec aiov; | |
653 | int error; | 651 | int error; | |
654 | struct mbuf *from; | 652 | struct mbuf *from; | |
655 | 653 | |||
656 | msg.msg_name = NULL; | 654 | msg.msg_name = NULL; | |
657 | msg.msg_iov = &aiov; | 655 | msg.msg_iov = &aiov; | |
658 | msg.msg_iovlen = 1; | 656 | msg.msg_iovlen = 1; | |
659 | aiov.iov_base = SCARG(uap, buf); | 657 | aiov.iov_base = SCARG(uap, buf); | |
660 | aiov.iov_len = SCARG(uap, len); | 658 | aiov.iov_len = SCARG(uap, len); | |
661 | msg.msg_control = NULL; | 659 | msg.msg_control = NULL; | |
662 | msg.msg_flags = SCARG(uap, flags) & MSG_USERFLAGS; | 660 | msg.msg_flags = SCARG(uap, flags) & MSG_USERFLAGS; | |
663 | 661 | |||
664 | error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from, NULL, retval); | 662 | error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from, NULL, retval); | |
665 | if (error != 0) | 663 | if (error != 0) | |
666 | return error; | 664 | return error; | |
667 | 665 | |||
668 | error = copyout_sockname(SCARG(uap, from), SCARG(uap, fromlenaddr), | 666 | error = copyout_sockname(SCARG(uap, from), SCARG(uap, fromlenaddr), | |
669 | MSG_LENUSRSPACE, from); | 667 | MSG_LENUSRSPACE, from); | |
670 | if (from != NULL) | 668 | if (from != NULL) | |
671 | m_free(from); | 669 | m_free(from); | |
672 | return error; | 670 | return error; | |
673 | } | 671 | } | |
674 | 672 | |||
675 | int | 673 | int | |
676 | sys_recvmsg(struct lwp *l, const struct sys_recvmsg_args *uap, register_t *retval) | 674 | sys_recvmsg(struct lwp *l, const struct sys_recvmsg_args *uap, register_t *retval) | |
677 | { | 675 | { | |
678 | /* { | 676 | /* { | |
679 | syscallarg(int) s; | 677 | syscallarg(int) s; | |
680 | syscallarg(struct msghdr *) msg; | 678 | syscallarg(struct msghdr *) msg; | |
681 | syscallarg(int) flags; | 679 | syscallarg(int) flags; | |
682 | } */ | 680 | } */ | |
683 | struct msghdr msg; | 681 | struct msghdr msg; | |
684 | int error; | 682 | int error; | |
685 | struct mbuf *from, *control; | 683 | struct mbuf *from, *control; | |
686 | 684 | |||
687 | error = copyin(SCARG(uap, msg), &msg, sizeof(msg)); | 685 | error = copyin(SCARG(uap, msg), &msg, sizeof(msg)); | |
688 | if (error) | 686 | if (error) | |
689 | return (error); | 687 | return (error); | |
690 | 688 | |||
691 | msg.msg_flags = (SCARG(uap, flags) & MSG_USERFLAGS) | MSG_IOVUSRSPACE; | 689 | msg.msg_flags = (SCARG(uap, flags) & MSG_USERFLAGS) | MSG_IOVUSRSPACE; | |
692 | 690 | |||
693 | error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from, | 691 | error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from, | |
694 | msg.msg_control != NULL ? &control : NULL, retval); | 692 | msg.msg_control != NULL ? &control : NULL, retval); | |
695 | if (error != 0) | 693 | if (error != 0) | |
696 | return error; | 694 | return error; | |
697 | 695 | |||
698 | if (msg.msg_control != NULL) | 696 | if (msg.msg_control != NULL) | |
699 | error = copyout_msg_control(l, &msg, control); | 697 | error = copyout_msg_control(l, &msg, control); | |
700 | 698 | |||
701 | if (error == 0) | 699 | if (error == 0) | |
702 | error = copyout_sockname(msg.msg_name, &msg.msg_namelen, 0, | 700 | error = copyout_sockname(msg.msg_name, &msg.msg_namelen, 0, | |
703 | from); | 701 | from); | |
704 | if (from != NULL) | 702 | if (from != NULL) | |
705 | m_free(from); | 703 | m_free(from); | |
706 | if (error == 0) { | 704 | if (error == 0) { | |
707 | ktrkuser("msghdr", &msg, sizeof msg); | 705 | ktrkuser("msghdr", &msg, sizeof msg); | |
708 | error = copyout(&msg, SCARG(uap, msg), sizeof(msg)); | 706 | error = copyout(&msg, SCARG(uap, msg), sizeof(msg)); | |
709 | } | 707 | } | |
710 | 708 | |||
711 | return (error); | 709 | return (error); | |
712 | } | 710 | } | |
713 | 711 | |||
714 | /* | 712 | /* | |
715 | * Adjust for a truncated SCM_RIGHTS control message. | 713 | * Adjust for a truncated SCM_RIGHTS control message. | |
716 | * This means closing any file descriptors that aren't present | 714 | * This means closing any file descriptors that aren't present | |
717 | * in the returned buffer. | 715 | * in the returned buffer. | |
718 | * m is the mbuf holding the (already externalized) SCM_RIGHTS message. | 716 | * m is the mbuf holding the (already externalized) SCM_RIGHTS message. | |
719 | */ | 717 | */ | |
720 | static void | 718 | static void | |
721 | free_rights(struct mbuf *m) | 719 | free_rights(struct mbuf *m) | |
722 | { | 720 | { | |
723 | int nfd; | 721 | int nfd; | |
724 | int i; | 722 | int i; | |
725 | int *fdv; | 723 | int *fdv; | |
726 | 724 | |||
727 | nfd = m->m_len < CMSG_SPACE(sizeof(int)) ? 0 | 725 | nfd = m->m_len < CMSG_SPACE(sizeof(int)) ? 0 | |
728 | : (m->m_len - CMSG_SPACE(sizeof(int))) / sizeof(int) + 1; | 726 | : (m->m_len - CMSG_SPACE(sizeof(int))) / sizeof(int) + 1; | |
729 | fdv = (int *) CMSG_DATA(mtod(m,struct cmsghdr *)); | 727 | fdv = (int *) CMSG_DATA(mtod(m,struct cmsghdr *)); | |
730 | for (i = 0; i < nfd; i++) { | 728 | for (i = 0; i < nfd; i++) { | |
731 | if (fd_getfile(fdv[i]) != NULL) | 729 | if (fd_getfile(fdv[i]) != NULL) | |
732 | (void)fd_close(fdv[i]); | 730 | (void)fd_close(fdv[i]); | |
733 | } | 731 | } | |
734 | } | 732 | } | |
735 | 733 | |||
736 | void | 734 | void | |
737 | free_control_mbuf(struct lwp *l, struct mbuf *control, struct mbuf *uncopied) | 735 | free_control_mbuf(struct lwp *l, struct mbuf *control, struct mbuf *uncopied) | |
738 | { | 736 | { | |
739 | struct mbuf *next; | 737 | struct mbuf *next; | |
740 | struct cmsghdr *cmsg; | 738 | struct cmsghdr *cmsg; | |
741 | bool do_free_rights = false; | 739 | bool do_free_rights = false; | |
742 | 740 | |||
743 | while (control != NULL) { | 741 | while (control != NULL) { | |
744 | cmsg = mtod(control, struct cmsghdr *); | 742 | cmsg = mtod(control, struct cmsghdr *); | |
745 | if (control == uncopied) | 743 | if (control == uncopied) | |
746 | do_free_rights = true; | 744 | do_free_rights = true; | |
747 | if (do_free_rights && cmsg->cmsg_level == SOL_SOCKET | 745 | if (do_free_rights && cmsg->cmsg_level == SOL_SOCKET | |
748 | && cmsg->cmsg_type == SCM_RIGHTS) | 746 | && cmsg->cmsg_type == SCM_RIGHTS) | |
749 | free_rights(control); | 747 | free_rights(control); | |
750 | next = control->m_next; | 748 | next = control->m_next; | |
751 | m_free(control); | 749 | m_free(control); | |
752 | control = next; | 750 | control = next; | |
753 | } | 751 | } | |
754 | } | 752 | } | |
755 | 753 | |||
756 | /* Copy socket control/CMSG data to user buffer, frees the mbuf */ | 754 | /* Copy socket control/CMSG data to user buffer, frees the mbuf */ | |
757 | int | 755 | int | |
758 | copyout_msg_control(struct lwp *l, struct msghdr *mp, struct mbuf *control) | 756 | copyout_msg_control(struct lwp *l, struct msghdr *mp, struct mbuf *control) | |
759 | { | 757 | { | |
760 | int i, len, error = 0; | 758 | int i, len, error = 0; | |
761 | struct cmsghdr *cmsg; | 759 | struct cmsghdr *cmsg; | |
762 | struct mbuf *m; | 760 | struct mbuf *m; | |
763 | char *q; | 761 | char *q; | |
764 | 762 | |||
765 | len = mp->msg_controllen; | 763 | len = mp->msg_controllen; | |
766 | if (len <= 0 || control == 0) { | 764 | if (len <= 0 || control == 0) { | |
767 | mp->msg_controllen = 0; | 765 | mp->msg_controllen = 0; | |
768 | free_control_mbuf(l, control, control); | 766 | free_control_mbuf(l, control, control); | |
769 | return 0; | 767 | return 0; | |
770 | } | 768 | } | |
771 | 769 | |||
772 | q = (char *)mp->msg_control; | 770 | q = (char *)mp->msg_control; | |
773 | 771 | |||
774 | for (m = control; m != NULL; ) { | 772 | for (m = control; m != NULL; ) { | |
775 | cmsg = mtod(m, struct cmsghdr *); | 773 | cmsg = mtod(m, struct cmsghdr *); | |
776 | i = m->m_len; | 774 | i = m->m_len; | |
777 | if (len < i) { | 775 | if (len < i) { | |
778 | mp->msg_flags |= MSG_CTRUNC; | 776 | mp->msg_flags |= MSG_CTRUNC; | |
779 | if (cmsg->cmsg_level == SOL_SOCKET | 777 | if (cmsg->cmsg_level == SOL_SOCKET | |
780 | && cmsg->cmsg_type == SCM_RIGHTS) | 778 | && cmsg->cmsg_type == SCM_RIGHTS) | |
781 | /* Do not truncate me ... */ | 779 | /* Do not truncate me ... */ | |
782 | break; | 780 | break; | |
783 | i = len; | 781 | i = len; | |
784 | } | 782 | } | |
785 | error = copyout(mtod(m, void *), q, i); | 783 | error = copyout(mtod(m, void *), q, i); | |
786 | ktrkuser("msgcontrol", mtod(m, void *), i); | 784 | ktrkuser("msgcontrol", mtod(m, void *), i); | |
787 | if (error != 0) { | 785 | if (error != 0) { | |
788 | /* We must free all the SCM_RIGHTS */ | 786 | /* We must free all the SCM_RIGHTS */ | |
789 | m = control; | 787 | m = control; | |
790 | break; | 788 | break; | |
791 | } | 789 | } | |
792 | m = m->m_next; | 790 | m = m->m_next; | |
793 | if (m) | 791 | if (m) | |
794 | i = ALIGN(i); | 792 | i = ALIGN(i); | |
795 | q += i; | 793 | q += i; | |
796 | len -= i; | 794 | len -= i; | |
797 | if (len <= 0) | 795 | if (len <= 0) | |
798 | break; | 796 | break; | |
799 | } | 797 | } | |
800 | 798 | |||
801 | free_control_mbuf(l, control, m); | 799 | free_control_mbuf(l, control, m); | |
802 | 800 | |||
803 | mp->msg_controllen = q - (char *)mp->msg_control; | 801 | mp->msg_controllen = q - (char *)mp->msg_control; | |
804 | return error; | 802 | return error; | |
805 | } | 803 | } | |
806 | 804 | |||
807 | int | 805 | int | |
808 | do_sys_recvmsg(struct lwp *l, int s, struct msghdr *mp, struct mbuf **from, | 806 | do_sys_recvmsg(struct lwp *l, int s, struct msghdr *mp, struct mbuf **from, | |
809 | struct mbuf **control, register_t *retsize) | 807 | struct mbuf **control, register_t *retsize) | |
810 | { | 808 | { | |
811 | struct iovec aiov[UIO_SMALLIOV], *iov = aiov, *tiov, *ktriov; | 809 | struct iovec aiov[UIO_SMALLIOV], *iov = aiov, *tiov, *ktriov; | |
812 | struct socket *so; | 810 | struct socket *so; | |
813 | struct uio auio; | 811 | struct uio auio; | |
814 | size_t len, iovsz; | 812 | size_t len, iovsz; | |
815 | int i, error; | 813 | int i, error; | |
816 | 814 | |||
817 | ktrkuser("msghdr", mp, sizeof *mp); | 815 | ktrkuser("msghdr", mp, sizeof *mp); | |
818 | 816 | |||
819 | *from = NULL; | 817 | *from = NULL; | |
820 | if (control != NULL) | 818 | if (control != NULL) | |
821 | *control = NULL; | 819 | *control = NULL; | |
822 | 820 | |||
823 | if ((error = fd_getsock(s, &so)) != 0) | 821 | if ((error = fd_getsock(s, &so)) != 0) | |
824 | return (error); | 822 | return (error); | |
825 | 823 | |||
826 | iovsz = mp->msg_iovlen * sizeof(struct iovec); | 824 | iovsz = mp->msg_iovlen * sizeof(struct iovec); | |
827 | 825 | |||
828 | if (mp->msg_flags & MSG_IOVUSRSPACE) { | 826 | if (mp->msg_flags & MSG_IOVUSRSPACE) { | |
829 | if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) { | 827 | if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) { | |
830 | if ((unsigned int)mp->msg_iovlen > IOV_MAX) { | 828 | if ((unsigned int)mp->msg_iovlen > IOV_MAX) { | |
831 | error = EMSGSIZE; | 829 | error = EMSGSIZE; | |
832 | goto out; | 830 | goto out; | |
833 | } | 831 | } | |
834 | iov = kmem_alloc(iovsz, KM_SLEEP); | 832 | iov = kmem_alloc(iovsz, KM_SLEEP); | |
835 | } | 833 | } | |
836 | if (mp->msg_iovlen != 0) { | 834 | if (mp->msg_iovlen != 0) { | |
837 | error = copyin(mp->msg_iov, iov, iovsz); | 835 | error = copyin(mp->msg_iov, iov, iovsz); | |
838 | if (error) | 836 | if (error) | |
839 | goto out; | 837 | goto out; | |
840 | } | 838 | } | |
841 | auio.uio_iov = iov; | 839 | auio.uio_iov = iov; | |
842 | } else | 840 | } else | |
843 | auio.uio_iov = mp->msg_iov; | 841 | auio.uio_iov = mp->msg_iov; | |
844 | auio.uio_iovcnt = mp->msg_iovlen; | 842 | auio.uio_iovcnt = mp->msg_iovlen; | |
845 | auio.uio_rw = UIO_READ; | 843 | auio.uio_rw = UIO_READ; | |
846 | auio.uio_offset = 0; /* XXX */ | 844 | auio.uio_offset = 0; /* XXX */ | |
847 | auio.uio_resid = 0; | 845 | auio.uio_resid = 0; | |
848 | KASSERT(l == curlwp); | 846 | KASSERT(l == curlwp); | |
849 | auio.uio_vmspace = l->l_proc->p_vmspace; | 847 | auio.uio_vmspace = l->l_proc->p_vmspace; | |
850 | 848 | |||
851 | tiov = auio.uio_iov; | 849 | tiov = auio.uio_iov; | |
852 | for (i = 0; i < mp->msg_iovlen; i++, tiov++) { | 850 | for (i = 0; i < mp->msg_iovlen; i++, tiov++) { | |
853 | /* | 851 | /* | |
854 | * Reads return ssize_t because -1 is returned on error. | 852 | * Reads return ssize_t because -1 is returned on error. | |
855 | * Therefore we must restrict the length to SSIZE_MAX to | 853 | * Therefore we must restrict the length to SSIZE_MAX to | |
856 | * avoid garbage return values. | 854 | * avoid garbage return values. | |
857 | */ | 855 | */ | |
858 | auio.uio_resid += tiov->iov_len; | 856 | auio.uio_resid += tiov->iov_len; | |
859 | if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { | 857 | if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { | |
860 | error = EINVAL; | 858 | error = EINVAL; | |
861 | goto out; | 859 | goto out; | |
862 | } | 860 | } | |
863 | } | 861 | } | |
864 | 862 | |||
865 | ktriov = NULL; | 863 | ktriov = NULL; | |
866 | if (ktrpoint(KTR_GENIO)) { | 864 | if (ktrpoint(KTR_GENIO)) { | |
867 | ktriov = kmem_alloc(iovsz, KM_SLEEP); | 865 | ktriov = kmem_alloc(iovsz, KM_SLEEP); | |
868 | memcpy(ktriov, auio.uio_iov, iovsz); | 866 | memcpy(ktriov, auio.uio_iov, iovsz); | |
869 | } | 867 | } | |
870 | 868 | |||
871 | len = auio.uio_resid; | 869 | len = auio.uio_resid; | |
872 | mp->msg_flags &= MSG_USERFLAGS; | 870 | mp->msg_flags &= MSG_USERFLAGS; | |
873 | error = (*so->so_receive)(so, from, &auio, NULL, control, | 871 | error = (*so->so_receive)(so, from, &auio, NULL, control, | |
874 | &mp->msg_flags); | 872 | &mp->msg_flags); | |
875 | len -= auio.uio_resid; | 873 | len -= auio.uio_resid; | |
876 | *retsize = len; | 874 | *retsize = len; | |
877 | if (error != 0 && len != 0 | 875 | if (error != 0 && len != 0 | |
878 | && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) | 876 | && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) | |
879 | /* Some data transferred */ | 877 | /* Some data transferred */ | |
880 | error = 0; | 878 | error = 0; | |
881 | 879 | |||
882 | if (ktriov != NULL) { | 880 | if (ktriov != NULL) { | |
883 | ktrgeniov(s, UIO_READ, ktriov, len, error); | 881 | ktrgeniov(s, UIO_READ, ktriov, len, error); | |
884 | kmem_free(ktriov, iovsz); | 882 | kmem_free(ktriov, iovsz); | |
885 | } | 883 | } | |
886 | 884 | |||
887 | if (error != 0) { | 885 | if (error != 0) { | |
888 | m_freem(*from); | 886 | m_freem(*from); | |
889 | *from = NULL; | 887 | *from = NULL; | |
890 | if (control != NULL) { | 888 | if (control != NULL) { | |
891 | free_control_mbuf(l, *control, *control); | 889 | free_control_mbuf(l, *control, *control); | |
892 | *control = NULL; | 890 | *control = NULL; | |
893 | } | 891 | } | |
894 | } | 892 | } | |
895 | out: | 893 | out: | |
896 | if (iov != aiov) | 894 | if (iov != aiov) | |
897 | kmem_free(iov, iovsz); | 895 | kmem_free(iov, iovsz); | |
898 | fd_putfile(s); | 896 | fd_putfile(s); | |
899 | return (error); | 897 | return (error); | |
900 | } | 898 | } | |
901 | 899 | |||
902 | 900 | |||
903 | /* ARGSUSED */ | 901 | /* ARGSUSED */ | |
904 | int | 902 | int | |
905 | sys_shutdown(struct lwp *l, const struct sys_shutdown_args *uap, register_t *retval) | 903 | sys_shutdown(struct lwp *l, const struct sys_shutdown_args *uap, register_t *retval) | |
906 | { | 904 | { | |
907 | /* { | 905 | /* { | |
908 | syscallarg(int) s; | 906 | syscallarg(int) s; | |
909 | syscallarg(int) how; | 907 | syscallarg(int) how; | |
910 | } */ | 908 | } */ | |
911 | struct socket *so; | 909 | struct socket *so; | |
912 | int error; | 910 | int error; | |
913 | 911 | |||
914 | if ((error = fd_getsock(SCARG(uap, s), &so)) != 0) | 912 | if ((error = fd_getsock(SCARG(uap, s), &so)) != 0) | |
915 | return (error); | 913 | return (error); | |
916 | solock(so); | 914 | solock(so); | |
917 | error = soshutdown(so, SCARG(uap, how)); | 915 | error = soshutdown(so, SCARG(uap, how)); | |
918 | sounlock(so); | 916 | sounlock(so); | |
919 | fd_putfile(SCARG(uap, s)); | 917 | fd_putfile(SCARG(uap, s)); | |
920 | return (error); | 918 | return (error); | |
921 | } | 919 | } | |
922 | 920 | |||
923 | /* ARGSUSED */ | 921 | /* ARGSUSED */ | |
924 | int | 922 | int | |
925 | sys_setsockopt(struct lwp *l, const struct sys_setsockopt_args *uap, register_t *retval) | 923 | sys_setsockopt(struct lwp *l, const struct sys_setsockopt_args *uap, register_t *retval) | |
926 | { | 924 | { | |
927 | /* { | 925 | /* { | |
928 | syscallarg(int) s; | 926 | syscallarg(int) s; | |
929 | syscallarg(int) level; | 927 | syscallarg(int) level; | |
930 | syscallarg(int) name; | 928 | syscallarg(int) name; | |
931 | syscallarg(const void *) val; | 929 | syscallarg(const void *) val; | |
932 | syscallarg(unsigned int) valsize; | 930 | syscallarg(unsigned int) valsize; | |
933 | } */ | 931 | } */ | |
934 | struct sockopt sopt; | 932 | struct sockopt sopt; | |
935 | struct socket *so; | 933 | struct socket *so; | |
936 | int error; | 934 | int error; | |
937 | unsigned int len; | 935 | unsigned int len; | |
938 | 936 | |||
939 | len = SCARG(uap, valsize); | 937 | len = SCARG(uap, valsize); | |
940 | if (len > 0 && SCARG(uap, val) == NULL) | 938 | if (len > 0 && SCARG(uap, val) == NULL) | |
941 | return (EINVAL); | 939 | return (EINVAL); | |
942 | 940 | |||
943 | if (len > MCLBYTES) | 941 | if (len > MCLBYTES) | |
944 | return (EINVAL); | 942 | return (EINVAL); | |
945 | 943 | |||
946 | if ((error = fd_getsock(SCARG(uap, s), &so)) != 0) | 944 | if ((error = fd_getsock(SCARG(uap, s), &so)) != 0) | |
947 | return (error); | 945 | return (error); | |
948 | 946 | |||
949 | sockopt_init(&sopt, SCARG(uap, level), SCARG(uap, name), len); | 947 | sockopt_init(&sopt, SCARG(uap, level), SCARG(uap, name), len); | |
950 | 948 | |||
951 | if (len > 0) { | 949 | if (len > 0) { | |
952 | error = copyin(SCARG(uap, val), sopt.sopt_data, len); | 950 | error = copyin(SCARG(uap, val), sopt.sopt_data, len); | |
953 | if (error) | 951 | if (error) | |
954 | goto out; | 952 | goto out; | |
955 | } | 953 | } | |
956 | 954 | |||
957 | error = sosetopt(so, &sopt); | 955 | error = sosetopt(so, &sopt); | |
958 | 956 | |||
959 | out: | 957 | out: | |
960 | sockopt_destroy(&sopt); | 958 | sockopt_destroy(&sopt); | |
961 | fd_putfile(SCARG(uap, s)); | 959 | fd_putfile(SCARG(uap, s)); | |
962 | return (error); | 960 | return (error); | |
963 | } | 961 | } | |
964 | 962 | |||
965 | /* ARGSUSED */ | 963 | /* ARGSUSED */ | |
966 | int | 964 | int | |
967 | sys_getsockopt(struct lwp *l, const struct sys_getsockopt_args *uap, register_t *retval) | 965 | sys_getsockopt(struct lwp *l, const struct sys_getsockopt_args *uap, register_t *retval) | |
968 | { | 966 | { | |
969 | /* { | 967 | /* { | |
970 | syscallarg(int) s; | 968 | syscallarg(int) s; | |
971 | syscallarg(int) level; | 969 | syscallarg(int) level; | |
972 | syscallarg(int) name; | 970 | syscallarg(int) name; | |
973 | syscallarg(void *) val; | 971 | syscallarg(void *) val; | |
974 | syscallarg(unsigned int *) avalsize; | 972 | syscallarg(unsigned int *) avalsize; | |
975 | } */ | 973 | } */ | |
976 | struct sockopt sopt; | 974 | struct sockopt sopt; | |
977 | struct socket *so; | 975 | struct socket *so; | |
978 | unsigned int valsize, len; | 976 | unsigned int valsize, len; | |
979 | int error; | 977 | int error; | |
980 | 978 | |||
981 | if (SCARG(uap, val) != NULL) { | 979 | if (SCARG(uap, val) != NULL) { | |
982 | error = copyin(SCARG(uap, avalsize), &valsize, sizeof(valsize)); | 980 | error = copyin(SCARG(uap, avalsize), &valsize, sizeof(valsize)); | |
983 | if (error) | 981 | if (error) | |
984 | return (error); | 982 | return (error); | |
985 | } else | 983 | } else | |
986 | valsize = 0; | 984 | valsize = 0; | |
987 | 985 | |||
988 | if ((error = fd_getsock(SCARG(uap, s), &so)) != 0) | 986 | if ((error = fd_getsock(SCARG(uap, s), &so)) != 0) | |
989 | return (error); | 987 | return (error); | |
990 | 988 | |||
991 | sockopt_init(&sopt, SCARG(uap, level), SCARG(uap, name), 0); | 989 | sockopt_init(&sopt, SCARG(uap, level), SCARG(uap, name), 0); | |
992 | 990 | |||
993 | error = sogetopt(so, &sopt); | 991 | error = sogetopt(so, &sopt); | |
994 | if (error) | 992 | if (error) | |
995 | goto out; | 993 | goto out; | |
996 | 994 | |||
997 | if (valsize > 0) { | 995 | if (valsize > 0) { | |
998 | len = min(valsize, sopt.sopt_size); | 996 | len = min(valsize, sopt.sopt_size); | |
999 | error = copyout(sopt.sopt_data, SCARG(uap, val), len); | 997 | error = copyout(sopt.sopt_data, SCARG(uap, val), len); | |
1000 | if (error) | 998 | if (error) | |
1001 | goto out; | 999 | goto out; | |
1002 | 1000 | |||
1003 | error = copyout(&len, SCARG(uap, avalsize), sizeof(len)); | 1001 | error = copyout(&len, SCARG(uap, avalsize), sizeof(len)); | |
1004 | if (error) | 1002 | if (error) | |
1005 | goto out; | 1003 | goto out; | |
1006 | } | 1004 | } | |
1007 | 1005 | |||
1008 | out: | 1006 | out: | |
1009 | sockopt_destroy(&sopt); | 1007 | sockopt_destroy(&sopt); | |
1010 | fd_putfile(SCARG(uap, s)); | 1008 | fd_putfile(SCARG(uap, s)); | |
1011 | return (error); | 1009 | return (error); | |
1012 | } | 1010 | } | |
1013 | 1011 | |||
1014 | #ifdef PIPE_SOCKETPAIR | 1012 | #ifdef PIPE_SOCKETPAIR | |
1015 | /* ARGSUSED */ | 1013 | /* ARGSUSED */ | |
1016 | int | 1014 | int | |
1017 | pipe1(struct lwp *l, register_t *retval, int flags) | 1015 | pipe1(struct lwp *l, register_t *retval, int flags) | |
1018 | { | 1016 | { | |
1019 | file_t *rf, *wf; | 1017 | file_t *rf, *wf; | |
1020 | struct socket *rso, *wso; | 1018 | struct socket *rso, *wso; | |
1021 | int fd, error; | 1019 | int fd, error; | |
1022 | proc_t *p; | 1020 | proc_t *p; | |
1023 | 1021 | |||
1024 | if (flags & ~(O_CLOEXEC|O_NONBLOCK)) | 1022 | if (flags & ~(O_CLOEXEC|O_NONBLOCK)) | |
1025 | return EINVAL; | 1023 | return EINVAL; | |
1026 | p = curproc; | 1024 | p = curproc; | |
1027 | if ((error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0, l, NULL)) != 0) | 1025 | if ((error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0, l, NULL)) != 0) | |
1028 | return (error); | 1026 | return (error); | |
1029 | if ((error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0, l, rso)) != 0) | 1027 | if ((error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0, l, rso)) != 0) | |
1030 | goto free1; | 1028 | goto free1; | |
1031 | /* remember this socket pair implements a pipe */ | 1029 | /* remember this socket pair implements a pipe */ | |
1032 | wso->so_state |= SS_ISAPIPE; | 1030 | wso->so_state |= SS_ISAPIPE; | |
1033 | rso->so_state |= SS_ISAPIPE; | 1031 | rso->so_state |= SS_ISAPIPE; | |
1034 | if ((error = fd_allocfile(&rf, &fd)) != 0) | 1032 | if ((error = fd_allocfile(&rf, &fd)) != 0) | |
1035 | goto free2; | 1033 | goto free2; | |
1036 | retval[0] = fd; | 1034 | retval[0] = fd; | |
1037 | rf->f_flag = FREAD | flags; | 1035 | rf->f_flag = FREAD | flags; | |
1038 | rf->f_type = DTYPE_SOCKET; | 1036 | rf->f_type = DTYPE_SOCKET; | |
1039 | rf->f_ops = &socketops; | 1037 | rf->f_ops = &socketops; | |
1040 | rf->f_data = rso; | 1038 | rf->f_data = rso; | |
1041 | if ((error = fd_allocfile(&wf, &fd)) != 0) | 1039 | if ((error = fd_allocfile(&wf, &fd)) != 0) | |
1042 | goto free3; | 1040 | goto free3; | |
1043 | wf->f_flag = FWRITE | flags; | 1041 | wf->f_flag = FWRITE | flags; | |
1044 | wf->f_type = DTYPE_SOCKET; | 1042 | wf->f_type = DTYPE_SOCKET; | |
1045 | wf->f_ops = &socketops; | 1043 | wf->f_ops = &socketops; | |
1046 | wf->f_data = wso; | 1044 | wf->f_data = wso; | |
1047 | retval[1] = fd; | 1045 | retval[1] = fd; | |
1048 | solock(wso); | 1046 | solock(wso); | |
1049 | error = unp_connect2(wso, rso, PRU_CONNECT2); | 1047 | error = unp_connect2(wso, rso, PRU_CONNECT2); | |
1050 | sounlock(wso); | 1048 | sounlock(wso); | |
1051 | if (error != 0) | 1049 | if (error != 0) | |
1052 | goto free4; | 1050 | goto free4; | |
1053 | fd_affix(p, wf, (int)retval[1]); | 1051 | fd_affix(p, wf, (int)retval[1]); | |
1054 | fd_affix(p, rf, (int)retval[0]); | 1052 | fd_affix(p, rf, (int)retval[0]); | |
1055 | return (0); | 1053 | return (0); | |
1056 | free4: | 1054 | free4: | |
1057 | fd_abort(p, wf, (int)retval[1]); | 1055 | fd_abort(p, wf, (int)retval[1]); | |
1058 | free3: | 1056 | free3: | |
1059 | fd_abort(p, rf, (int)retval[0]); | 1057 | fd_abort(p, rf, (int)retval[0]); | |
1060 | free2: | 1058 | free2: | |
1061 | (void)soclose(wso); | 1059 | (void)soclose(wso); | |
1062 | free1: | 1060 | free1: | |
1063 | (void)soclose(rso); | 1061 | (void)soclose(rso); | |
1064 | return (error); | 1062 | return (error); | |
1065 | } | 1063 | } | |
1066 | #endif /* PIPE_SOCKETPAIR */ | 1064 | #endif /* PIPE_SOCKETPAIR */ | |
1067 | 1065 | |||
1068 | /* | 1066 | /* | |
1069 | * Get socket name. | 1067 | * Get socket name. | |
1070 | */ | 1068 | */ | |
1071 | /* ARGSUSED */ | 1069 | /* ARGSUSED */ | |
1072 | int | 1070 | int | |
1073 | do_sys_getsockname(struct lwp *l, int fd, int which, struct mbuf **nam) | 1071 | do_sys_getsockname(struct lwp *l, int fd, int which, struct mbuf **nam) | |
1074 | { | 1072 | { | |
1075 | struct socket *so; | 1073 | struct socket *so; | |
1076 | struct mbuf *m; | 1074 | struct mbuf *m; | |
1077 | int error; | 1075 | int error; | |
1078 | 1076 | |||
1079 | if ((error = fd_getsock(fd, &so)) != 0) | 1077 | if ((error = fd_getsock(fd, &so)) != 0) | |
1080 | return error; | 1078 | return error; | |
1081 | 1079 | |||
1082 | m = m_getclr(M_WAIT, MT_SONAME); | 1080 | m = m_getclr(M_WAIT, MT_SONAME); | |
1083 | MCLAIM(m, so->so_mowner); | 1081 | MCLAIM(m, so->so_mowner); | |
1084 | 1082 | |||
1085 | solock(so); | 1083 | solock(so); | |
1086 | if (which == PRU_PEERADDR | 1084 | if (which == PRU_PEERADDR | |
1087 | && (so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) { | 1085 | && (so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) { |