- Amend fd_hold() to take an argument and add assert (reflects two cases, fork1() and the rest, e.g. kthread_create(), when creating from lwp0). - lwp_create(): do not touch filedesc internals, use fd_hold().diff -r1.199 -r1.200 src/sys/kern/kern_descrip.c
(rmind)
--- src/sys/kern/kern_descrip.c 2009/08/16 11:00:20 1.199
+++ src/sys/kern/kern_descrip.c 2009/10/27 02:58:28 1.200
@@ -1,1828 +1,1830 @@ | @@ -1,1828 +1,1830 @@ | |||
1 | /* $NetBSD: kern_descrip.c,v 1.199 2009/08/16 11:00:20 yamt Exp $ */ | 1 | /* $NetBSD: kern_descrip.c,v 1.200 2009/10/27 02:58:28 rmind Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. | 4 | * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. | |
5 | * All rights reserved. | 5 | * All rights reserved. | |
6 | * | 6 | * | |
7 | * This code is derived from software contributed to The NetBSD Foundation | 7 | * This code is derived from software contributed to The NetBSD Foundation | |
8 | * by Andrew Doran. | 8 | * by Andrew Doran. | |
9 | * | 9 | * | |
10 | * Redistribution and use in source and binary forms, with or without | 10 | * Redistribution and use in source and binary forms, with or without | |
11 | * modification, are permitted provided that the following conditions | 11 | * modification, are permitted provided that the following conditions | |
12 | * are met: | 12 | * are met: | |
13 | * 1. Redistributions of source code must retain the above copyright | 13 | * 1. Redistributions of source code must retain the above copyright | |
14 | * notice, this list of conditions and the following disclaimer. | 14 | * notice, this list of conditions and the following disclaimer. | |
15 | * 2. Redistributions in binary form must reproduce the above copyright | 15 | * 2. Redistributions in binary form must reproduce the above copyright | |
16 | * notice, this list of conditions and the following disclaimer in the | 16 | * notice, this list of conditions and the following disclaimer in the | |
17 | * documentation and/or other materials provided with the distribution. | 17 | * documentation and/or other materials provided with the distribution. | |
18 | * | 18 | * | |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
29 | * POSSIBILITY OF SUCH DAMAGE. | 29 | * POSSIBILITY OF SUCH DAMAGE. | |
30 | */ | 30 | */ | |
31 | 31 | |||
32 | /* | 32 | /* | |
33 | * Copyright (c) 1982, 1986, 1989, 1991, 1993 | 33 | * Copyright (c) 1982, 1986, 1989, 1991, 1993 | |
34 | * The Regents of the University of California. All rights reserved. | 34 | * The Regents of the University of California. All rights reserved. | |
35 | * (c) UNIX System Laboratories, Inc. | 35 | * (c) UNIX System Laboratories, Inc. | |
36 | * All or some portions of this file are derived from material licensed | 36 | * All or some portions of this file are derived from material licensed | |
37 | * to the University of California by American Telephone and Telegraph | 37 | * to the University of California by American Telephone and Telegraph | |
38 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | 38 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | |
39 | * the permission of UNIX System Laboratories, Inc. | 39 | * the permission of UNIX System Laboratories, Inc. | |
40 | * | 40 | * | |
41 | * Redistribution and use in source and binary forms, with or without | 41 | * Redistribution and use in source and binary forms, with or without | |
42 | * modification, are permitted provided that the following conditions | 42 | * modification, are permitted provided that the following conditions | |
43 | * are met: | 43 | * are met: | |
44 | * 1. Redistributions of source code must retain the above copyright | 44 | * 1. Redistributions of source code must retain the above copyright | |
45 | * notice, this list of conditions and the following disclaimer. | 45 | * notice, this list of conditions and the following disclaimer. | |
46 | * 2. Redistributions in binary form must reproduce the above copyright | 46 | * 2. Redistributions in binary form must reproduce the above copyright | |
47 | * notice, this list of conditions and the following disclaimer in the | 47 | * notice, this list of conditions and the following disclaimer in the | |
48 | * documentation and/or other materials provided with the distribution. | 48 | * documentation and/or other materials provided with the distribution. | |
49 | * 3. Neither the name of the University nor the names of its contributors | 49 | * 3. Neither the name of the University nor the names of its contributors | |
50 | * may be used to endorse or promote products derived from this software | 50 | * may be used to endorse or promote products derived from this software | |
51 | * without specific prior written permission. | 51 | * without specific prior written permission. | |
52 | * | 52 | * | |
53 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | 53 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
54 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 54 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
55 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 55 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
56 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | 56 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
57 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 57 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
58 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 58 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
59 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 59 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
60 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 60 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
61 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 61 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
62 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 62 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
63 | * SUCH DAMAGE. | 63 | * SUCH DAMAGE. | |
64 | * | 64 | * | |
65 | * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95 | 65 | * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95 | |
66 | */ | 66 | */ | |
67 | 67 | |||
68 | /* | 68 | /* | |
69 | * File descriptor management. | 69 | * File descriptor management. | |
70 | */ | 70 | */ | |
71 | 71 | |||
72 | #include <sys/cdefs.h> | 72 | #include <sys/cdefs.h> | |
73 | __KERNEL_RCSID(0, "$NetBSD: kern_descrip.c,v 1.199 2009/08/16 11:00:20 yamt Exp $"); | 73 | __KERNEL_RCSID(0, "$NetBSD: kern_descrip.c,v 1.200 2009/10/27 02:58:28 rmind Exp $"); | |
74 | 74 | |||
75 | #include <sys/param.h> | 75 | #include <sys/param.h> | |
76 | #include <sys/systm.h> | 76 | #include <sys/systm.h> | |
77 | #include <sys/filedesc.h> | 77 | #include <sys/filedesc.h> | |
78 | #include <sys/kernel.h> | 78 | #include <sys/kernel.h> | |
79 | #include <sys/proc.h> | 79 | #include <sys/proc.h> | |
80 | #include <sys/file.h> | 80 | #include <sys/file.h> | |
81 | #include <sys/socket.h> | 81 | #include <sys/socket.h> | |
82 | #include <sys/socketvar.h> | 82 | #include <sys/socketvar.h> | |
83 | #include <sys/stat.h> | 83 | #include <sys/stat.h> | |
84 | #include <sys/ioctl.h> | 84 | #include <sys/ioctl.h> | |
85 | #include <sys/fcntl.h> | 85 | #include <sys/fcntl.h> | |
86 | #include <sys/pool.h> | 86 | #include <sys/pool.h> | |
87 | #include <sys/unistd.h> | 87 | #include <sys/unistd.h> | |
88 | #include <sys/resourcevar.h> | 88 | #include <sys/resourcevar.h> | |
89 | #include <sys/conf.h> | 89 | #include <sys/conf.h> | |
90 | #include <sys/event.h> | 90 | #include <sys/event.h> | |
91 | #include <sys/kauth.h> | 91 | #include <sys/kauth.h> | |
92 | #include <sys/atomic.h> | 92 | #include <sys/atomic.h> | |
93 | #include <sys/syscallargs.h> | 93 | #include <sys/syscallargs.h> | |
94 | #include <sys/cpu.h> | 94 | #include <sys/cpu.h> | |
95 | #include <sys/kmem.h> | 95 | #include <sys/kmem.h> | |
96 | #include <sys/vnode.h> | 96 | #include <sys/vnode.h> | |
97 | 97 | |||
98 | static int file_ctor(void *, void *, int); | 98 | static int file_ctor(void *, void *, int); | |
99 | static void file_dtor(void *, void *); | 99 | static void file_dtor(void *, void *); | |
100 | static int fdfile_ctor(void *, void *, int); | 100 | static int fdfile_ctor(void *, void *, int); | |
101 | static void fdfile_dtor(void *, void *); | 101 | static void fdfile_dtor(void *, void *); | |
102 | static int filedesc_ctor(void *, void *, int); | 102 | static int filedesc_ctor(void *, void *, int); | |
103 | static void filedesc_dtor(void *, void *); | 103 | static void filedesc_dtor(void *, void *); | |
104 | static int filedescopen(dev_t, int, int, lwp_t *); | 104 | static int filedescopen(dev_t, int, int, lwp_t *); | |
105 | 105 | |||
106 | kmutex_t filelist_lock; /* lock on filehead */ | 106 | kmutex_t filelist_lock; /* lock on filehead */ | |
107 | struct filelist filehead; /* head of list of open files */ | 107 | struct filelist filehead; /* head of list of open files */ | |
108 | u_int nfiles; /* actual number of open files */ | 108 | u_int nfiles; /* actual number of open files */ | |
109 | 109 | |||
110 | static pool_cache_t filedesc_cache; | 110 | static pool_cache_t filedesc_cache; | |
111 | static pool_cache_t file_cache; | 111 | static pool_cache_t file_cache; | |
112 | static pool_cache_t fdfile_cache; | 112 | static pool_cache_t fdfile_cache; | |
113 | 113 | |||
114 | const struct cdevsw filedesc_cdevsw = { | 114 | const struct cdevsw filedesc_cdevsw = { | |
115 | filedescopen, noclose, noread, nowrite, noioctl, | 115 | filedescopen, noclose, noread, nowrite, noioctl, | |
116 | nostop, notty, nopoll, nommap, nokqfilter, D_OTHER | D_MPSAFE, | 116 | nostop, notty, nopoll, nommap, nokqfilter, D_OTHER | D_MPSAFE, | |
117 | }; | 117 | }; | |
118 | 118 | |||
119 | /* For ease of reading. */ | 119 | /* For ease of reading. */ | |
120 | __strong_alias(fd_putvnode,fd_putfile) | 120 | __strong_alias(fd_putvnode,fd_putfile) | |
121 | __strong_alias(fd_putsock,fd_putfile) | 121 | __strong_alias(fd_putsock,fd_putfile) | |
122 | 122 | |||
123 | /* | 123 | /* | |
124 | * Initialize the descriptor system. | 124 | * Initialize the descriptor system. | |
125 | */ | 125 | */ | |
126 | void | 126 | void | |
127 | fd_sys_init(void) | 127 | fd_sys_init(void) | |
128 | { | 128 | { | |
129 | 129 | |||
130 | mutex_init(&filelist_lock, MUTEX_DEFAULT, IPL_NONE); | 130 | mutex_init(&filelist_lock, MUTEX_DEFAULT, IPL_NONE); | |
131 | 131 | |||
132 | file_cache = pool_cache_init(sizeof(file_t), coherency_unit, 0, | 132 | file_cache = pool_cache_init(sizeof(file_t), coherency_unit, 0, | |
133 | 0, "file", NULL, IPL_NONE, file_ctor, file_dtor, NULL); | 133 | 0, "file", NULL, IPL_NONE, file_ctor, file_dtor, NULL); | |
134 | KASSERT(file_cache != NULL); | 134 | KASSERT(file_cache != NULL); | |
135 | 135 | |||
136 | fdfile_cache = pool_cache_init(sizeof(fdfile_t), coherency_unit, 0, | 136 | fdfile_cache = pool_cache_init(sizeof(fdfile_t), coherency_unit, 0, | |
137 | PR_LARGECACHE, "fdfile", NULL, IPL_NONE, fdfile_ctor, fdfile_dtor, | 137 | PR_LARGECACHE, "fdfile", NULL, IPL_NONE, fdfile_ctor, fdfile_dtor, | |
138 | NULL); | 138 | NULL); | |
139 | KASSERT(fdfile_cache != NULL); | 139 | KASSERT(fdfile_cache != NULL); | |
140 | 140 | |||
141 | filedesc_cache = pool_cache_init(sizeof(filedesc_t), coherency_unit, | 141 | filedesc_cache = pool_cache_init(sizeof(filedesc_t), coherency_unit, | |
142 | 0, 0, "filedesc", NULL, IPL_NONE, filedesc_ctor, filedesc_dtor, | 142 | 0, 0, "filedesc", NULL, IPL_NONE, filedesc_ctor, filedesc_dtor, | |
143 | NULL); | 143 | NULL); | |
144 | KASSERT(filedesc_cache != NULL); | 144 | KASSERT(filedesc_cache != NULL); | |
145 | } | 145 | } | |
146 | 146 | |||
147 | static bool | 147 | static bool | |
148 | fd_isused(filedesc_t *fdp, unsigned fd) | 148 | fd_isused(filedesc_t *fdp, unsigned fd) | |
149 | { | 149 | { | |
150 | u_int off = fd >> NDENTRYSHIFT; | 150 | u_int off = fd >> NDENTRYSHIFT; | |
151 | 151 | |||
152 | KASSERT(fd < fdp->fd_dt->dt_nfiles); | 152 | KASSERT(fd < fdp->fd_dt->dt_nfiles); | |
153 | 153 | |||
154 | return (fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0; | 154 | return (fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0; | |
155 | } | 155 | } | |
156 | 156 | |||
157 | /* | 157 | /* | |
158 | * Verify that the bitmaps match the descriptor table. | 158 | * Verify that the bitmaps match the descriptor table. | |
159 | */ | 159 | */ | |
160 | static inline void | 160 | static inline void | |
161 | fd_checkmaps(filedesc_t *fdp) | 161 | fd_checkmaps(filedesc_t *fdp) | |
162 | { | 162 | { | |
163 | #ifdef DEBUG | 163 | #ifdef DEBUG | |
164 | fdtab_t *dt; | 164 | fdtab_t *dt; | |
165 | u_int fd; | 165 | u_int fd; | |
166 | 166 | |||
167 | dt = fdp->fd_dt; | 167 | dt = fdp->fd_dt; | |
168 | if (fdp->fd_refcnt == -1) { | 168 | if (fdp->fd_refcnt == -1) { | |
169 | /* | 169 | /* | |
170 | * fd_free tears down the table without maintaining its bitmap. | 170 | * fd_free tears down the table without maintaining its bitmap. | |
171 | */ | 171 | */ | |
172 | return; | 172 | return; | |
173 | } | 173 | } | |
174 | for (fd = 0; fd < dt->dt_nfiles; fd++) { | 174 | for (fd = 0; fd < dt->dt_nfiles; fd++) { | |
175 | if (fd < NDFDFILE) { | 175 | if (fd < NDFDFILE) { | |
176 | KASSERT(dt->dt_ff[fd] == | 176 | KASSERT(dt->dt_ff[fd] == | |
177 | (fdfile_t *)fdp->fd_dfdfile[fd]); | 177 | (fdfile_t *)fdp->fd_dfdfile[fd]); | |
178 | } | 178 | } | |
179 | if (dt->dt_ff[fd] == NULL) { | 179 | if (dt->dt_ff[fd] == NULL) { | |
180 | KASSERT(!fd_isused(fdp, fd)); | 180 | KASSERT(!fd_isused(fdp, fd)); | |
181 | } else if (dt->dt_ff[fd]->ff_file != NULL) { | 181 | } else if (dt->dt_ff[fd]->ff_file != NULL) { | |
182 | KASSERT(fd_isused(fdp, fd)); | 182 | KASSERT(fd_isused(fdp, fd)); | |
183 | } | 183 | } | |
184 | } | 184 | } | |
185 | #else /* DEBUG */ | 185 | #else /* DEBUG */ | |
186 | /* nothing */ | 186 | /* nothing */ | |
187 | #endif /* DEBUG */ | 187 | #endif /* DEBUG */ | |
188 | } | 188 | } | |
189 | 189 | |||
190 | static int | 190 | static int | |
191 | fd_next_zero(filedesc_t *fdp, uint32_t *bitmap, int want, u_int bits) | 191 | fd_next_zero(filedesc_t *fdp, uint32_t *bitmap, int want, u_int bits) | |
192 | { | 192 | { | |
193 | int i, off, maxoff; | 193 | int i, off, maxoff; | |
194 | uint32_t sub; | 194 | uint32_t sub; | |
195 | 195 | |||
196 | KASSERT(mutex_owned(&fdp->fd_lock)); | 196 | KASSERT(mutex_owned(&fdp->fd_lock)); | |
197 | 197 | |||
198 | fd_checkmaps(fdp); | 198 | fd_checkmaps(fdp); | |
199 | 199 | |||
200 | if (want > bits) | 200 | if (want > bits) | |
201 | return -1; | 201 | return -1; | |
202 | 202 | |||
203 | off = want >> NDENTRYSHIFT; | 203 | off = want >> NDENTRYSHIFT; | |
204 | i = want & NDENTRYMASK; | 204 | i = want & NDENTRYMASK; | |
205 | if (i) { | 205 | if (i) { | |
206 | sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i)); | 206 | sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i)); | |
207 | if (sub != ~0) | 207 | if (sub != ~0) | |
208 | goto found; | 208 | goto found; | |
209 | off++; | 209 | off++; | |
210 | } | 210 | } | |
211 | 211 | |||
212 | maxoff = NDLOSLOTS(bits); | 212 | maxoff = NDLOSLOTS(bits); | |
213 | while (off < maxoff) { | 213 | while (off < maxoff) { | |
214 | if ((sub = bitmap[off]) != ~0) | 214 | if ((sub = bitmap[off]) != ~0) | |
215 | goto found; | 215 | goto found; | |
216 | off++; | 216 | off++; | |
217 | } | 217 | } | |
218 | 218 | |||
219 | return (-1); | 219 | return (-1); | |
220 | 220 | |||
221 | found: | 221 | found: | |
222 | return (off << NDENTRYSHIFT) + ffs(~sub) - 1; | 222 | return (off << NDENTRYSHIFT) + ffs(~sub) - 1; | |
223 | } | 223 | } | |
224 | 224 | |||
225 | static int | 225 | static int | |
226 | fd_last_set(filedesc_t *fd, int last) | 226 | fd_last_set(filedesc_t *fd, int last) | |
227 | { | 227 | { | |
228 | int off, i; | 228 | int off, i; | |
229 | fdfile_t **ff = fd->fd_dt->dt_ff; | 229 | fdfile_t **ff = fd->fd_dt->dt_ff; | |
230 | uint32_t *bitmap = fd->fd_lomap; | 230 | uint32_t *bitmap = fd->fd_lomap; | |
231 | 231 | |||
232 | KASSERT(mutex_owned(&fd->fd_lock)); | 232 | KASSERT(mutex_owned(&fd->fd_lock)); | |
233 | 233 | |||
234 | fd_checkmaps(fd); | 234 | fd_checkmaps(fd); | |
235 | 235 | |||
236 | off = (last - 1) >> NDENTRYSHIFT; | 236 | off = (last - 1) >> NDENTRYSHIFT; | |
237 | 237 | |||
238 | while (off >= 0 && !bitmap[off]) | 238 | while (off >= 0 && !bitmap[off]) | |
239 | off--; | 239 | off--; | |
240 | 240 | |||
241 | if (off < 0) | 241 | if (off < 0) | |
242 | return (-1); | 242 | return (-1); | |
243 | 243 | |||
244 | i = ((off + 1) << NDENTRYSHIFT) - 1; | 244 | i = ((off + 1) << NDENTRYSHIFT) - 1; | |
245 | if (i >= last) | 245 | if (i >= last) | |
246 | i = last - 1; | 246 | i = last - 1; | |
247 | 247 | |||
248 | /* XXX should use bitmap */ | 248 | /* XXX should use bitmap */ | |
249 | while (i > 0 && (ff[i] == NULL || !ff[i]->ff_allocated)) | 249 | while (i > 0 && (ff[i] == NULL || !ff[i]->ff_allocated)) | |
250 | i--; | 250 | i--; | |
251 | 251 | |||
252 | return (i); | 252 | return (i); | |
253 | } | 253 | } | |
254 | 254 | |||
255 | static inline void | 255 | static inline void | |
256 | fd_used(filedesc_t *fdp, unsigned fd) | 256 | fd_used(filedesc_t *fdp, unsigned fd) | |
257 | { | 257 | { | |
258 | u_int off = fd >> NDENTRYSHIFT; | 258 | u_int off = fd >> NDENTRYSHIFT; | |
259 | fdfile_t *ff; | 259 | fdfile_t *ff; | |
260 | 260 | |||
261 | ff = fdp->fd_dt->dt_ff[fd]; | 261 | ff = fdp->fd_dt->dt_ff[fd]; | |
262 | 262 | |||
263 | KASSERT(mutex_owned(&fdp->fd_lock)); | 263 | KASSERT(mutex_owned(&fdp->fd_lock)); | |
264 | KASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) == 0); | 264 | KASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) == 0); | |
265 | KASSERT(ff != NULL); | 265 | KASSERT(ff != NULL); | |
266 | KASSERT(ff->ff_file == NULL); | 266 | KASSERT(ff->ff_file == NULL); | |
267 | KASSERT(!ff->ff_allocated); | 267 | KASSERT(!ff->ff_allocated); | |
268 | 268 | |||
269 | ff->ff_allocated = 1; | 269 | ff->ff_allocated = 1; | |
270 | fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK); | 270 | fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK); | |
271 | if (__predict_false(fdp->fd_lomap[off] == ~0)) { | 271 | if (__predict_false(fdp->fd_lomap[off] == ~0)) { | |
272 | KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & | 272 | KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & | |
273 | (1 << (off & NDENTRYMASK))) == 0); | 273 | (1 << (off & NDENTRYMASK))) == 0); | |
274 | fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK); | 274 | fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK); | |
275 | } | 275 | } | |
276 | 276 | |||
277 | if ((int)fd > fdp->fd_lastfile) { | 277 | if ((int)fd > fdp->fd_lastfile) { | |
278 | fdp->fd_lastfile = fd; | 278 | fdp->fd_lastfile = fd; | |
279 | } | 279 | } | |
280 | 280 | |||
281 | fd_checkmaps(fdp); | 281 | fd_checkmaps(fdp); | |
282 | } | 282 | } | |
283 | 283 | |||
284 | static inline void | 284 | static inline void | |
285 | fd_unused(filedesc_t *fdp, unsigned fd) | 285 | fd_unused(filedesc_t *fdp, unsigned fd) | |
286 | { | 286 | { | |
287 | u_int off = fd >> NDENTRYSHIFT; | 287 | u_int off = fd >> NDENTRYSHIFT; | |
288 | fdfile_t *ff; | 288 | fdfile_t *ff; | |
289 | 289 | |||
290 | ff = fdp->fd_dt->dt_ff[fd]; | 290 | ff = fdp->fd_dt->dt_ff[fd]; | |
291 | 291 | |||
292 | /* | 292 | /* | |
293 | * Don't assert the lock is held here, as we may be copying | 293 | * Don't assert the lock is held here, as we may be copying | |
294 | * the table during exec() and it is not needed there. | 294 | * the table during exec() and it is not needed there. | |
295 | * procfs and sysctl are locked out by proc::p_reflock. | 295 | * procfs and sysctl are locked out by proc::p_reflock. | |
296 | * | 296 | * | |
297 | * KASSERT(mutex_owned(&fdp->fd_lock)); | 297 | * KASSERT(mutex_owned(&fdp->fd_lock)); | |
298 | */ | 298 | */ | |
299 | KASSERT(ff != NULL); | 299 | KASSERT(ff != NULL); | |
300 | KASSERT(ff->ff_file == NULL); | 300 | KASSERT(ff->ff_file == NULL); | |
301 | KASSERT(ff->ff_allocated); | 301 | KASSERT(ff->ff_allocated); | |
302 | 302 | |||
303 | if (fd < fdp->fd_freefile) { | 303 | if (fd < fdp->fd_freefile) { | |
304 | fdp->fd_freefile = fd; | 304 | fdp->fd_freefile = fd; | |
305 | } | 305 | } | |
306 | 306 | |||
307 | if (fdp->fd_lomap[off] == ~0) { | 307 | if (fdp->fd_lomap[off] == ~0) { | |
308 | KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & | 308 | KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & | |
309 | (1 << (off & NDENTRYMASK))) != 0); | 309 | (1 << (off & NDENTRYMASK))) != 0); | |
310 | fdp->fd_himap[off >> NDENTRYSHIFT] &= | 310 | fdp->fd_himap[off >> NDENTRYSHIFT] &= | |
311 | ~(1 << (off & NDENTRYMASK)); | 311 | ~(1 << (off & NDENTRYMASK)); | |
312 | } | 312 | } | |
313 | KASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0); | 313 | KASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0); | |
314 | fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK)); | 314 | fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK)); | |
315 | ff->ff_allocated = 0; | 315 | ff->ff_allocated = 0; | |
316 | 316 | |||
317 | KASSERT(fd <= fdp->fd_lastfile); | 317 | KASSERT(fd <= fdp->fd_lastfile); | |
318 | if (fd == fdp->fd_lastfile) { | 318 | if (fd == fdp->fd_lastfile) { | |
319 | fdp->fd_lastfile = fd_last_set(fdp, fd); | 319 | fdp->fd_lastfile = fd_last_set(fdp, fd); | |
320 | } | 320 | } | |
321 | fd_checkmaps(fdp); | 321 | fd_checkmaps(fdp); | |
322 | } | 322 | } | |
323 | 323 | |||
324 | /* | 324 | /* | |
325 | * Look up the file structure corresponding to a file descriptor | 325 | * Look up the file structure corresponding to a file descriptor | |
326 | * and return the file, holding a reference on the descriptor. | 326 | * and return the file, holding a reference on the descriptor. | |
327 | */ | 327 | */ | |
328 | inline file_t * | 328 | inline file_t * | |
329 | fd_getfile(unsigned fd) | 329 | fd_getfile(unsigned fd) | |
330 | { | 330 | { | |
331 | filedesc_t *fdp; | 331 | filedesc_t *fdp; | |
332 | fdfile_t *ff; | 332 | fdfile_t *ff; | |
333 | file_t *fp; | 333 | file_t *fp; | |
334 | fdtab_t *dt; | 334 | fdtab_t *dt; | |
335 | 335 | |||
336 | /* | 336 | /* | |
337 | * Look up the fdfile structure representing this descriptor. | 337 | * Look up the fdfile structure representing this descriptor. | |
338 | * We are doing this unlocked. See fd_tryexpand(). | 338 | * We are doing this unlocked. See fd_tryexpand(). | |
339 | */ | 339 | */ | |
340 | fdp = curlwp->l_fd; | 340 | fdp = curlwp->l_fd; | |
341 | dt = fdp->fd_dt; | 341 | dt = fdp->fd_dt; | |
342 | if (__predict_false(fd >= dt->dt_nfiles)) { | 342 | if (__predict_false(fd >= dt->dt_nfiles)) { | |
343 | return NULL; | 343 | return NULL; | |
344 | } | 344 | } | |
345 | ff = dt->dt_ff[fd]; | 345 | ff = dt->dt_ff[fd]; | |
346 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | 346 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | |
347 | if (__predict_false(ff == NULL)) { | 347 | if (__predict_false(ff == NULL)) { | |
348 | return NULL; | 348 | return NULL; | |
349 | } | 349 | } | |
350 | 350 | |||
351 | /* Now get a reference to the descriptor. */ | 351 | /* Now get a reference to the descriptor. */ | |
352 | if (fdp->fd_refcnt == 1) { | 352 | if (fdp->fd_refcnt == 1) { | |
353 | /* | 353 | /* | |
354 | * Single threaded: don't need to worry about concurrent | 354 | * Single threaded: don't need to worry about concurrent | |
355 | * access (other than earlier calls to kqueue, which may | 355 | * access (other than earlier calls to kqueue, which may | |
356 | * hold a reference to the descriptor). | 356 | * hold a reference to the descriptor). | |
357 | */ | 357 | */ | |
358 | ff->ff_refcnt++; | 358 | ff->ff_refcnt++; | |
359 | } else { | 359 | } else { | |
360 | /* | 360 | /* | |
361 | * Multi threaded: issue a memory barrier to ensure that we | 361 | * Multi threaded: issue a memory barrier to ensure that we | |
362 | * acquire the file pointer _after_ adding a reference. If | 362 | * acquire the file pointer _after_ adding a reference. If | |
363 | * no memory barrier, we could fetch a stale pointer. | 363 | * no memory barrier, we could fetch a stale pointer. | |
364 | */ | 364 | */ | |
365 | atomic_inc_uint(&ff->ff_refcnt); | 365 | atomic_inc_uint(&ff->ff_refcnt); | |
366 | #ifndef __HAVE_ATOMIC_AS_MEMBAR | 366 | #ifndef __HAVE_ATOMIC_AS_MEMBAR | |
367 | membar_enter(); | 367 | membar_enter(); | |
368 | #endif | 368 | #endif | |
369 | } | 369 | } | |
370 | 370 | |||
371 | /* | 371 | /* | |
372 | * If the file is not open or is being closed then put the | 372 | * If the file is not open or is being closed then put the | |
373 | * reference back. | 373 | * reference back. | |
374 | */ | 374 | */ | |
375 | fp = ff->ff_file; | 375 | fp = ff->ff_file; | |
376 | if (__predict_true(fp != NULL)) { | 376 | if (__predict_true(fp != NULL)) { | |
377 | return fp; | 377 | return fp; | |
378 | } | 378 | } | |
379 | fd_putfile(fd); | 379 | fd_putfile(fd); | |
380 | return NULL; | 380 | return NULL; | |
381 | } | 381 | } | |
382 | 382 | |||
383 | /* | 383 | /* | |
384 | * Release a reference to a file descriptor acquired with fd_getfile(). | 384 | * Release a reference to a file descriptor acquired with fd_getfile(). | |
385 | */ | 385 | */ | |
386 | void | 386 | void | |
387 | fd_putfile(unsigned fd) | 387 | fd_putfile(unsigned fd) | |
388 | { | 388 | { | |
389 | filedesc_t *fdp; | 389 | filedesc_t *fdp; | |
390 | fdfile_t *ff; | 390 | fdfile_t *ff; | |
391 | u_int u, v; | 391 | u_int u, v; | |
392 | 392 | |||
393 | fdp = curlwp->l_fd; | 393 | fdp = curlwp->l_fd; | |
394 | ff = fdp->fd_dt->dt_ff[fd]; | 394 | ff = fdp->fd_dt->dt_ff[fd]; | |
395 | 395 | |||
396 | KASSERT(fd < fdp->fd_dt->dt_nfiles); | 396 | KASSERT(fd < fdp->fd_dt->dt_nfiles); | |
397 | KASSERT(ff != NULL); | 397 | KASSERT(ff != NULL); | |
398 | KASSERT((ff->ff_refcnt & FR_MASK) > 0); | 398 | KASSERT((ff->ff_refcnt & FR_MASK) > 0); | |
399 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | 399 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | |
400 | 400 | |||
401 | if (fdp->fd_refcnt == 1) { | 401 | if (fdp->fd_refcnt == 1) { | |
402 | /* | 402 | /* | |
403 | * Single threaded: don't need to worry about concurrent | 403 | * Single threaded: don't need to worry about concurrent | |
404 | * access (other than earlier calls to kqueue, which may | 404 | * access (other than earlier calls to kqueue, which may | |
405 | * hold a reference to the descriptor). | 405 | * hold a reference to the descriptor). | |
406 | */ | 406 | */ | |
407 | if (__predict_false((ff->ff_refcnt & FR_CLOSING) != 0)) { | 407 | if (__predict_false((ff->ff_refcnt & FR_CLOSING) != 0)) { | |
408 | fd_close(fd); | 408 | fd_close(fd); | |
409 | return; | 409 | return; | |
410 | } | 410 | } | |
411 | ff->ff_refcnt--; | 411 | ff->ff_refcnt--; | |
412 | return; | 412 | return; | |
413 | } | 413 | } | |
414 | 414 | |||
415 | /* | 415 | /* | |
416 | * Ensure that any use of the file is complete and globally | 416 | * Ensure that any use of the file is complete and globally | |
417 | * visible before dropping the final reference. If no membar, | 417 | * visible before dropping the final reference. If no membar, | |
418 | * the current CPU could still access memory associated with | 418 | * the current CPU could still access memory associated with | |
419 | * the file after it has been freed or recycled by another | 419 | * the file after it has been freed or recycled by another | |
420 | * CPU. | 420 | * CPU. | |
421 | */ | 421 | */ | |
422 | #ifndef __HAVE_ATOMIC_AS_MEMBAR | 422 | #ifndef __HAVE_ATOMIC_AS_MEMBAR | |
423 | membar_exit(); | 423 | membar_exit(); | |
424 | #endif | 424 | #endif | |
425 | 425 | |||
426 | /* | 426 | /* | |
427 | * Be optimistic and start out with the assumption that no other | 427 | * Be optimistic and start out with the assumption that no other | |
428 | * threads are trying to close the descriptor. If the CAS fails, | 428 | * threads are trying to close the descriptor. If the CAS fails, | |
429 | * we lost a race and/or it's being closed. | 429 | * we lost a race and/or it's being closed. | |
430 | */ | 430 | */ | |
431 | for (u = ff->ff_refcnt & FR_MASK;; u = v) { | 431 | for (u = ff->ff_refcnt & FR_MASK;; u = v) { | |
432 | v = atomic_cas_uint(&ff->ff_refcnt, u, u - 1); | 432 | v = atomic_cas_uint(&ff->ff_refcnt, u, u - 1); | |
433 | if (__predict_true(u == v)) { | 433 | if (__predict_true(u == v)) { | |
434 | return; | 434 | return; | |
435 | } | 435 | } | |
436 | if (__predict_false((v & FR_CLOSING) != 0)) { | 436 | if (__predict_false((v & FR_CLOSING) != 0)) { | |
437 | break; | 437 | break; | |
438 | } | 438 | } | |
439 | } | 439 | } | |
440 | 440 | |||
441 | /* Another thread is waiting to close the file: join it. */ | 441 | /* Another thread is waiting to close the file: join it. */ | |
442 | (void)fd_close(fd); | 442 | (void)fd_close(fd); | |
443 | } | 443 | } | |
444 | 444 | |||
445 | /* | 445 | /* | |
446 | * Convenience wrapper around fd_getfile() that returns reference | 446 | * Convenience wrapper around fd_getfile() that returns reference | |
447 | * to a vnode. | 447 | * to a vnode. | |
448 | */ | 448 | */ | |
449 | int | 449 | int | |
450 | fd_getvnode(unsigned fd, file_t **fpp) | 450 | fd_getvnode(unsigned fd, file_t **fpp) | |
451 | { | 451 | { | |
452 | vnode_t *vp; | 452 | vnode_t *vp; | |
453 | file_t *fp; | 453 | file_t *fp; | |
454 | 454 | |||
455 | fp = fd_getfile(fd); | 455 | fp = fd_getfile(fd); | |
456 | if (__predict_false(fp == NULL)) { | 456 | if (__predict_false(fp == NULL)) { | |
457 | return EBADF; | 457 | return EBADF; | |
458 | } | 458 | } | |
459 | if (__predict_false(fp->f_type != DTYPE_VNODE)) { | 459 | if (__predict_false(fp->f_type != DTYPE_VNODE)) { | |
460 | fd_putfile(fd); | 460 | fd_putfile(fd); | |
461 | return EINVAL; | 461 | return EINVAL; | |
462 | } | 462 | } | |
463 | vp = fp->f_data; | 463 | vp = fp->f_data; | |
464 | if (__predict_false(vp->v_type == VBAD)) { | 464 | if (__predict_false(vp->v_type == VBAD)) { | |
465 | /* XXX Is this case really necessary? */ | 465 | /* XXX Is this case really necessary? */ | |
466 | fd_putfile(fd); | 466 | fd_putfile(fd); | |
467 | return EBADF; | 467 | return EBADF; | |
468 | } | 468 | } | |
469 | *fpp = fp; | 469 | *fpp = fp; | |
470 | return 0; | 470 | return 0; | |
471 | } | 471 | } | |
472 | 472 | |||
473 | /* | 473 | /* | |
474 | * Convenience wrapper around fd_getfile() that returns reference | 474 | * Convenience wrapper around fd_getfile() that returns reference | |
475 | * to a socket. | 475 | * to a socket. | |
476 | */ | 476 | */ | |
477 | int | 477 | int | |
478 | fd_getsock(unsigned fd, struct socket **sop) | 478 | fd_getsock(unsigned fd, struct socket **sop) | |
479 | { | 479 | { | |
480 | file_t *fp; | 480 | file_t *fp; | |
481 | 481 | |||
482 | fp = fd_getfile(fd); | 482 | fp = fd_getfile(fd); | |
483 | if (__predict_false(fp == NULL)) { | 483 | if (__predict_false(fp == NULL)) { | |
484 | return EBADF; | 484 | return EBADF; | |
485 | } | 485 | } | |
486 | if (__predict_false(fp->f_type != DTYPE_SOCKET)) { | 486 | if (__predict_false(fp->f_type != DTYPE_SOCKET)) { | |
487 | fd_putfile(fd); | 487 | fd_putfile(fd); | |
488 | return ENOTSOCK; | 488 | return ENOTSOCK; | |
489 | } | 489 | } | |
490 | *sop = fp->f_data; | 490 | *sop = fp->f_data; | |
491 | return 0; | 491 | return 0; | |
492 | } | 492 | } | |
493 | 493 | |||
494 | /* | 494 | /* | |
495 | * Look up the file structure corresponding to a file descriptor | 495 | * Look up the file structure corresponding to a file descriptor | |
496 | * and return it with a reference held on the file, not the | 496 | * and return it with a reference held on the file, not the | |
497 | * descriptor. | 497 | * descriptor. | |
498 | * | 498 | * | |
499 | * This is heavyweight and only used when accessing descriptors | 499 | * This is heavyweight and only used when accessing descriptors | |
500 | * from a foreign process. The caller must ensure that `p' does | 500 | * from a foreign process. The caller must ensure that `p' does | |
501 | * not exit or fork across this call. | 501 | * not exit or fork across this call. | |
502 | * | 502 | * | |
503 | * To release the file (not descriptor) reference, use closef(). | 503 | * To release the file (not descriptor) reference, use closef(). | |
504 | */ | 504 | */ | |
505 | file_t * | 505 | file_t * | |
506 | fd_getfile2(proc_t *p, unsigned fd) | 506 | fd_getfile2(proc_t *p, unsigned fd) | |
507 | { | 507 | { | |
508 | filedesc_t *fdp; | 508 | filedesc_t *fdp; | |
509 | fdfile_t *ff; | 509 | fdfile_t *ff; | |
510 | file_t *fp; | 510 | file_t *fp; | |
511 | fdtab_t *dt; | 511 | fdtab_t *dt; | |
512 | 512 | |||
513 | fdp = p->p_fd; | 513 | fdp = p->p_fd; | |
514 | mutex_enter(&fdp->fd_lock); | 514 | mutex_enter(&fdp->fd_lock); | |
515 | dt = fdp->fd_dt; | 515 | dt = fdp->fd_dt; | |
516 | if (fd >= dt->dt_nfiles) { | 516 | if (fd >= dt->dt_nfiles) { | |
517 | mutex_exit(&fdp->fd_lock); | 517 | mutex_exit(&fdp->fd_lock); | |
518 | return NULL; | 518 | return NULL; | |
519 | } | 519 | } | |
520 | if ((ff = dt->dt_ff[fd]) == NULL) { | 520 | if ((ff = dt->dt_ff[fd]) == NULL) { | |
521 | mutex_exit(&fdp->fd_lock); | 521 | mutex_exit(&fdp->fd_lock); | |
522 | return NULL; | 522 | return NULL; | |
523 | } | 523 | } | |
524 | if ((fp = ff->ff_file) == NULL) { | 524 | if ((fp = ff->ff_file) == NULL) { | |
525 | mutex_exit(&fdp->fd_lock); | 525 | mutex_exit(&fdp->fd_lock); | |
526 | return NULL; | 526 | return NULL; | |
527 | } | 527 | } | |
528 | mutex_enter(&fp->f_lock); | 528 | mutex_enter(&fp->f_lock); | |
529 | fp->f_count++; | 529 | fp->f_count++; | |
530 | mutex_exit(&fp->f_lock); | 530 | mutex_exit(&fp->f_lock); | |
531 | mutex_exit(&fdp->fd_lock); | 531 | mutex_exit(&fdp->fd_lock); | |
532 | 532 | |||
533 | return fp; | 533 | return fp; | |
534 | } | 534 | } | |
535 | 535 | |||
536 | /* | 536 | /* | |
537 | * Internal form of close. Must be called with a reference to the | 537 | * Internal form of close. Must be called with a reference to the | |
538 | * descriptor, and will drop the reference. When all descriptor | 538 | * descriptor, and will drop the reference. When all descriptor | |
539 | * references are dropped, releases the descriptor slot and a single | 539 | * references are dropped, releases the descriptor slot and a single | |
540 | * reference to the file structure. | 540 | * reference to the file structure. | |
541 | */ | 541 | */ | |
542 | int | 542 | int | |
543 | fd_close(unsigned fd) | 543 | fd_close(unsigned fd) | |
544 | { | 544 | { | |
545 | struct flock lf; | 545 | struct flock lf; | |
546 | filedesc_t *fdp; | 546 | filedesc_t *fdp; | |
547 | fdfile_t *ff; | 547 | fdfile_t *ff; | |
548 | file_t *fp; | 548 | file_t *fp; | |
549 | proc_t *p; | 549 | proc_t *p; | |
550 | lwp_t *l; | 550 | lwp_t *l; | |
551 | u_int refcnt; | 551 | u_int refcnt; | |
552 | 552 | |||
553 | l = curlwp; | 553 | l = curlwp; | |
554 | p = l->l_proc; | 554 | p = l->l_proc; | |
555 | fdp = l->l_fd; | 555 | fdp = l->l_fd; | |
556 | ff = fdp->fd_dt->dt_ff[fd]; | 556 | ff = fdp->fd_dt->dt_ff[fd]; | |
557 | 557 | |||
558 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | 558 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | |
559 | 559 | |||
560 | mutex_enter(&fdp->fd_lock); | 560 | mutex_enter(&fdp->fd_lock); | |
561 | KASSERT((ff->ff_refcnt & FR_MASK) > 0); | 561 | KASSERT((ff->ff_refcnt & FR_MASK) > 0); | |
562 | if (__predict_false(ff->ff_file == NULL)) { | 562 | if (__predict_false(ff->ff_file == NULL)) { | |
563 | /* | 563 | /* | |
564 | * Another user of the file is already closing, and is | 564 | * Another user of the file is already closing, and is | |
565 | * waiting for other users of the file to drain. Release | 565 | * waiting for other users of the file to drain. Release | |
566 | * our reference, and wake up the closer. | 566 | * our reference, and wake up the closer. | |
567 | */ | 567 | */ | |
568 | atomic_dec_uint(&ff->ff_refcnt); | 568 | atomic_dec_uint(&ff->ff_refcnt); | |
569 | cv_broadcast(&ff->ff_closing); | 569 | cv_broadcast(&ff->ff_closing); | |
570 | mutex_exit(&fdp->fd_lock); | 570 | mutex_exit(&fdp->fd_lock); | |
571 | 571 | |||
572 | /* | 572 | /* | |
573 | * An application error, so pretend that the descriptor | 573 | * An application error, so pretend that the descriptor | |
574 | * was already closed. We can't safely wait for it to | 574 | * was already closed. We can't safely wait for it to | |
575 | * be closed without potentially deadlocking. | 575 | * be closed without potentially deadlocking. | |
576 | */ | 576 | */ | |
577 | return (EBADF); | 577 | return (EBADF); | |
578 | } | 578 | } | |
579 | KASSERT((ff->ff_refcnt & FR_CLOSING) == 0); | 579 | KASSERT((ff->ff_refcnt & FR_CLOSING) == 0); | |
580 | 580 | |||
581 | /* | 581 | /* | |
582 | * There may be multiple users of this file within the process. | 582 | * There may be multiple users of this file within the process. | |
583 | * Notify existing and new users that the file is closing. This | 583 | * Notify existing and new users that the file is closing. This | |
584 | * will prevent them from adding additional uses to this file | 584 | * will prevent them from adding additional uses to this file | |
585 | * while we are closing it. | 585 | * while we are closing it. | |
586 | */ | 586 | */ | |
587 | fp = ff->ff_file; | 587 | fp = ff->ff_file; | |
588 | ff->ff_file = NULL; | 588 | ff->ff_file = NULL; | |
589 | ff->ff_exclose = false; | 589 | ff->ff_exclose = false; | |
590 | 590 | |||
591 | /* | 591 | /* | |
592 | * We expect the caller to hold a descriptor reference - drop it. | 592 | * We expect the caller to hold a descriptor reference - drop it. | |
593 | * The reference count may increase beyond zero at this point due | 593 | * The reference count may increase beyond zero at this point due | |
594 | * to an erroneous descriptor reference by an application, but | 594 | * to an erroneous descriptor reference by an application, but | |
595 | * fd_getfile() will notice that the file is being closed and drop | 595 | * fd_getfile() will notice that the file is being closed and drop | |
596 | * the reference again. | 596 | * the reference again. | |
597 | */ | 597 | */ | |
598 | if (fdp->fd_refcnt == 1) { | 598 | if (fdp->fd_refcnt == 1) { | |
599 | /* Single threaded. */ | 599 | /* Single threaded. */ | |
600 | refcnt = --(ff->ff_refcnt); | 600 | refcnt = --(ff->ff_refcnt); | |
601 | } else { | 601 | } else { | |
602 | /* Multi threaded. */ | 602 | /* Multi threaded. */ | |
603 | #ifndef __HAVE_ATOMIC_AS_MEMBAR | 603 | #ifndef __HAVE_ATOMIC_AS_MEMBAR | |
604 | membar_producer(); | 604 | membar_producer(); | |
605 | #endif | 605 | #endif | |
606 | refcnt = atomic_dec_uint_nv(&ff->ff_refcnt); | 606 | refcnt = atomic_dec_uint_nv(&ff->ff_refcnt); | |
607 | } | 607 | } | |
608 | if (__predict_false(refcnt != 0)) { | 608 | if (__predict_false(refcnt != 0)) { | |
609 | /* | 609 | /* | |
610 | * Wait for other references to drain. This is typically | 610 | * Wait for other references to drain. This is typically | |
611 | * an application error - the descriptor is being closed | 611 | * an application error - the descriptor is being closed | |
612 | * while still in use. | 612 | * while still in use. | |
613 | * | 613 | * | |
614 | */ | 614 | */ | |
615 | atomic_or_uint(&ff->ff_refcnt, FR_CLOSING); | 615 | atomic_or_uint(&ff->ff_refcnt, FR_CLOSING); | |
616 | 616 | |||
617 | /* | 617 | /* | |
618 | * Remove any knotes attached to the file. A knote | 618 | * Remove any knotes attached to the file. A knote | |
619 | * attached to the descriptor can hold references on it. | 619 | * attached to the descriptor can hold references on it. | |
620 | */ | 620 | */ | |
621 | mutex_exit(&fdp->fd_lock); | 621 | mutex_exit(&fdp->fd_lock); | |
622 | if (!SLIST_EMPTY(&ff->ff_knlist)) { | 622 | if (!SLIST_EMPTY(&ff->ff_knlist)) { | |
623 | knote_fdclose(fd); | 623 | knote_fdclose(fd); | |
624 | } | 624 | } | |
625 | 625 | |||
626 | /* Try to drain out descriptor references. */ | 626 | /* Try to drain out descriptor references. */ | |
627 | (*fp->f_ops->fo_drain)(fp); | 627 | (*fp->f_ops->fo_drain)(fp); | |
628 | mutex_enter(&fdp->fd_lock); | 628 | mutex_enter(&fdp->fd_lock); | |
629 | 629 | |||
630 | /* | 630 | /* | |
631 | * We need to see the count drop to zero at least once, | 631 | * We need to see the count drop to zero at least once, | |
632 | * in order to ensure that all pre-existing references | 632 | * in order to ensure that all pre-existing references | |
633 | * have been drained. New references past this point are | 633 | * have been drained. New references past this point are | |
634 | * of no interest. | 634 | * of no interest. | |
635 | */ | 635 | */ | |
636 | while ((ff->ff_refcnt & FR_MASK) != 0) { | 636 | while ((ff->ff_refcnt & FR_MASK) != 0) { | |
637 | cv_wait(&ff->ff_closing, &fdp->fd_lock); | 637 | cv_wait(&ff->ff_closing, &fdp->fd_lock); | |
638 | } | 638 | } | |
639 | atomic_and_uint(&ff->ff_refcnt, ~FR_CLOSING); | 639 | atomic_and_uint(&ff->ff_refcnt, ~FR_CLOSING); | |
640 | } else { | 640 | } else { | |
641 | /* If no references, there must be no knotes. */ | 641 | /* If no references, there must be no knotes. */ | |
642 | KASSERT(SLIST_EMPTY(&ff->ff_knlist)); | 642 | KASSERT(SLIST_EMPTY(&ff->ff_knlist)); | |
643 | } | 643 | } | |
644 | 644 | |||
645 | /* | 645 | /* | |
646 | * POSIX record locking dictates that any close releases ALL | 646 | * POSIX record locking dictates that any close releases ALL | |
647 | * locks owned by this process. This is handled by setting | 647 | * locks owned by this process. This is handled by setting | |
648 | * a flag in the unlock to free ONLY locks obeying POSIX | 648 | * a flag in the unlock to free ONLY locks obeying POSIX | |
649 | * semantics, and not to free BSD-style file locks. | 649 | * semantics, and not to free BSD-style file locks. | |
650 | * If the descriptor was in a message, POSIX-style locks | 650 | * If the descriptor was in a message, POSIX-style locks | |
651 | * aren't passed with the descriptor. | 651 | * aren't passed with the descriptor. | |
652 | */ | 652 | */ | |
653 | if (__predict_false((p->p_flag & PK_ADVLOCK) != 0 && | 653 | if (__predict_false((p->p_flag & PK_ADVLOCK) != 0 && | |
654 | fp->f_type == DTYPE_VNODE)) { | 654 | fp->f_type == DTYPE_VNODE)) { | |
655 | lf.l_whence = SEEK_SET; | 655 | lf.l_whence = SEEK_SET; | |
656 | lf.l_start = 0; | 656 | lf.l_start = 0; | |
657 | lf.l_len = 0; | 657 | lf.l_len = 0; | |
658 | lf.l_type = F_UNLCK; | 658 | lf.l_type = F_UNLCK; | |
659 | mutex_exit(&fdp->fd_lock); | 659 | mutex_exit(&fdp->fd_lock); | |
660 | (void)VOP_ADVLOCK(fp->f_data, p, F_UNLCK, &lf, F_POSIX); | 660 | (void)VOP_ADVLOCK(fp->f_data, p, F_UNLCK, &lf, F_POSIX); | |
661 | mutex_enter(&fdp->fd_lock); | 661 | mutex_enter(&fdp->fd_lock); | |
662 | } | 662 | } | |
663 | 663 | |||
664 | /* Free descriptor slot. */ | 664 | /* Free descriptor slot. */ | |
665 | fd_unused(fdp, fd); | 665 | fd_unused(fdp, fd); | |
666 | mutex_exit(&fdp->fd_lock); | 666 | mutex_exit(&fdp->fd_lock); | |
667 | 667 | |||
668 | /* Now drop reference to the file itself. */ | 668 | /* Now drop reference to the file itself. */ | |
669 | return closef(fp); | 669 | return closef(fp); | |
670 | } | 670 | } | |
671 | 671 | |||
672 | /* | 672 | /* | |
673 | * Duplicate a file descriptor. | 673 | * Duplicate a file descriptor. | |
674 | */ | 674 | */ | |
675 | int | 675 | int | |
676 | fd_dup(file_t *fp, int minfd, int *newp, bool exclose) | 676 | fd_dup(file_t *fp, int minfd, int *newp, bool exclose) | |
677 | { | 677 | { | |
678 | proc_t *p; | 678 | proc_t *p; | |
679 | int error; | 679 | int error; | |
680 | 680 | |||
681 | p = curproc; | 681 | p = curproc; | |
682 | 682 | |||
683 | while ((error = fd_alloc(p, minfd, newp)) != 0) { | 683 | while ((error = fd_alloc(p, minfd, newp)) != 0) { | |
684 | if (error != ENOSPC) { | 684 | if (error != ENOSPC) { | |
685 | return error; | 685 | return error; | |
686 | } | 686 | } | |
687 | fd_tryexpand(p); | 687 | fd_tryexpand(p); | |
688 | } | 688 | } | |
689 | 689 | |||
690 | curlwp->l_fd->fd_dt->dt_ff[*newp]->ff_exclose = exclose; | 690 | curlwp->l_fd->fd_dt->dt_ff[*newp]->ff_exclose = exclose; | |
691 | fd_affix(p, fp, *newp); | 691 | fd_affix(p, fp, *newp); | |
692 | return 0; | 692 | return 0; | |
693 | } | 693 | } | |
694 | 694 | |||
695 | /* | 695 | /* | |
696 | * dup2 operation. | 696 | * dup2 operation. | |
697 | */ | 697 | */ | |
698 | int | 698 | int | |
699 | fd_dup2(file_t *fp, unsigned new) | 699 | fd_dup2(file_t *fp, unsigned new) | |
700 | { | 700 | { | |
701 | filedesc_t *fdp; | 701 | filedesc_t *fdp; | |
702 | fdfile_t *ff; | 702 | fdfile_t *ff; | |
703 | fdtab_t *dt; | 703 | fdtab_t *dt; | |
704 | 704 | |||
705 | fdp = curlwp->l_fd; | 705 | fdp = curlwp->l_fd; | |
706 | 706 | |||
707 | /* | 707 | /* | |
708 | * Ensure there are enough slots in the descriptor table, | 708 | * Ensure there are enough slots in the descriptor table, | |
709 | * and allocate an fdfile_t up front in case we need it. | 709 | * and allocate an fdfile_t up front in case we need it. | |
710 | */ | 710 | */ | |
711 | while (new >= fdp->fd_dt->dt_nfiles) { | 711 | while (new >= fdp->fd_dt->dt_nfiles) { | |
712 | fd_tryexpand(curproc); | 712 | fd_tryexpand(curproc); | |
713 | } | 713 | } | |
714 | ff = pool_cache_get(fdfile_cache, PR_WAITOK); | 714 | ff = pool_cache_get(fdfile_cache, PR_WAITOK); | |
715 | 715 | |||
716 | /* | 716 | /* | |
717 | * If there is already a file open, close it. If the file is | 717 | * If there is already a file open, close it. If the file is | |
718 | * half open, wait for it to be constructed before closing it. | 718 | * half open, wait for it to be constructed before closing it. | |
719 | * XXX Potential for deadlock here? | 719 | * XXX Potential for deadlock here? | |
720 | */ | 720 | */ | |
721 | mutex_enter(&fdp->fd_lock); | 721 | mutex_enter(&fdp->fd_lock); | |
722 | while (fd_isused(fdp, new)) { | 722 | while (fd_isused(fdp, new)) { | |
723 | mutex_exit(&fdp->fd_lock); | 723 | mutex_exit(&fdp->fd_lock); | |
724 | if (fd_getfile(new) != NULL) { | 724 | if (fd_getfile(new) != NULL) { | |
725 | (void)fd_close(new); | 725 | (void)fd_close(new); | |
726 | } else { | 726 | } else { | |
727 | /* | 727 | /* | |
728 | * Crummy, but unlikely to happen. | 728 | * Crummy, but unlikely to happen. | |
729 | * Can occur if we interrupt another | 729 | * Can occur if we interrupt another | |
730 | * thread while it is opening a file. | 730 | * thread while it is opening a file. | |
731 | */ | 731 | */ | |
732 | kpause("dup2", false, 1, NULL); | 732 | kpause("dup2", false, 1, NULL); | |
733 | } | 733 | } | |
734 | mutex_enter(&fdp->fd_lock); | 734 | mutex_enter(&fdp->fd_lock); | |
735 | } | 735 | } | |
736 | dt = fdp->fd_dt; | 736 | dt = fdp->fd_dt; | |
737 | if (dt->dt_ff[new] == NULL) { | 737 | if (dt->dt_ff[new] == NULL) { | |
738 | KASSERT(new >= NDFDFILE); | 738 | KASSERT(new >= NDFDFILE); | |
739 | dt->dt_ff[new] = ff; | 739 | dt->dt_ff[new] = ff; | |
740 | ff = NULL; | 740 | ff = NULL; | |
741 | } | 741 | } | |
742 | fd_used(fdp, new); | 742 | fd_used(fdp, new); | |
743 | mutex_exit(&fdp->fd_lock); | 743 | mutex_exit(&fdp->fd_lock); | |
744 | 744 | |||
745 | /* Slot is now allocated. Insert copy of the file. */ | 745 | /* Slot is now allocated. Insert copy of the file. */ | |
746 | fd_affix(curproc, fp, new); | 746 | fd_affix(curproc, fp, new); | |
747 | if (ff != NULL) { | 747 | if (ff != NULL) { | |
748 | pool_cache_put(fdfile_cache, ff); | 748 | pool_cache_put(fdfile_cache, ff); | |
749 | } | 749 | } | |
750 | return 0; | 750 | return 0; | |
751 | } | 751 | } | |
752 | 752 | |||
753 | /* | 753 | /* | |
754 | * Drop reference to a file structure. | 754 | * Drop reference to a file structure. | |
755 | */ | 755 | */ | |
756 | int | 756 | int | |
757 | closef(file_t *fp) | 757 | closef(file_t *fp) | |
758 | { | 758 | { | |
759 | struct flock lf; | 759 | struct flock lf; | |
760 | int error; | 760 | int error; | |
761 | 761 | |||
762 | /* | 762 | /* | |
763 | * Drop reference. If referenced elsewhere it's still open | 763 | * Drop reference. If referenced elsewhere it's still open | |
764 | * and we have nothing more to do. | 764 | * and we have nothing more to do. | |
765 | */ | 765 | */ | |
766 | mutex_enter(&fp->f_lock); | 766 | mutex_enter(&fp->f_lock); | |
767 | KASSERT(fp->f_count > 0); | 767 | KASSERT(fp->f_count > 0); | |
768 | if (--fp->f_count > 0) { | 768 | if (--fp->f_count > 0) { | |
769 | mutex_exit(&fp->f_lock); | 769 | mutex_exit(&fp->f_lock); | |
770 | return 0; | 770 | return 0; | |
771 | } | 771 | } | |
772 | KASSERT(fp->f_count == 0); | 772 | KASSERT(fp->f_count == 0); | |
773 | mutex_exit(&fp->f_lock); | 773 | mutex_exit(&fp->f_lock); | |
774 | 774 | |||
775 | /* We held the last reference - release locks, close and free. */ | 775 | /* We held the last reference - release locks, close and free. */ | |
776 | if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) { | 776 | if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) { | |
777 | lf.l_whence = SEEK_SET; | 777 | lf.l_whence = SEEK_SET; | |
778 | lf.l_start = 0; | 778 | lf.l_start = 0; | |
779 | lf.l_len = 0; | 779 | lf.l_len = 0; | |
780 | lf.l_type = F_UNLCK; | 780 | lf.l_type = F_UNLCK; | |
781 | (void)VOP_ADVLOCK(fp->f_data, fp, F_UNLCK, &lf, F_FLOCK); | 781 | (void)VOP_ADVLOCK(fp->f_data, fp, F_UNLCK, &lf, F_FLOCK); | |
782 | } | 782 | } | |
783 | if (fp->f_ops != NULL) { | 783 | if (fp->f_ops != NULL) { | |
784 | error = (*fp->f_ops->fo_close)(fp); | 784 | error = (*fp->f_ops->fo_close)(fp); | |
785 | } else { | 785 | } else { | |
786 | error = 0; | 786 | error = 0; | |
787 | } | 787 | } | |
788 | KASSERT(fp->f_count == 0); | 788 | KASSERT(fp->f_count == 0); | |
789 | KASSERT(fp->f_cred != NULL); | 789 | KASSERT(fp->f_cred != NULL); | |
790 | pool_cache_put(file_cache, fp); | 790 | pool_cache_put(file_cache, fp); | |
791 | 791 | |||
792 | return error; | 792 | return error; | |
793 | } | 793 | } | |
794 | 794 | |||
795 | /* | 795 | /* | |
796 | * Allocate a file descriptor for the process. | 796 | * Allocate a file descriptor for the process. | |
797 | */ | 797 | */ | |
798 | int | 798 | int | |
799 | fd_alloc(proc_t *p, int want, int *result) | 799 | fd_alloc(proc_t *p, int want, int *result) | |
800 | { | 800 | { | |
801 | filedesc_t *fdp; | 801 | filedesc_t *fdp; | |
802 | int i, lim, last, error; | 802 | int i, lim, last, error; | |
803 | u_int off, new; | 803 | u_int off, new; | |
804 | fdtab_t *dt; | 804 | fdtab_t *dt; | |
805 | 805 | |||
806 | KASSERT(p == curproc || p == &proc0); | 806 | KASSERT(p == curproc || p == &proc0); | |
807 | 807 | |||
808 | fdp = p->p_fd; | 808 | fdp = p->p_fd; | |
809 | 809 | |||
810 | /* | 810 | /* | |
811 | * Search for a free descriptor starting at the higher | 811 | * Search for a free descriptor starting at the higher | |
812 | * of want or fd_freefile. | 812 | * of want or fd_freefile. | |
813 | */ | 813 | */ | |
814 | mutex_enter(&fdp->fd_lock); | 814 | mutex_enter(&fdp->fd_lock); | |
815 | fd_checkmaps(fdp); | 815 | fd_checkmaps(fdp); | |
816 | dt = fdp->fd_dt; | 816 | dt = fdp->fd_dt; | |
817 | KASSERT(dt->dt_ff[0] == (fdfile_t *)fdp->fd_dfdfile[0]); | 817 | KASSERT(dt->dt_ff[0] == (fdfile_t *)fdp->fd_dfdfile[0]); | |
818 | lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); | 818 | lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); | |
819 | last = min(dt->dt_nfiles, lim); | 819 | last = min(dt->dt_nfiles, lim); | |
820 | for (;;) { | 820 | for (;;) { | |
821 | if ((i = want) < fdp->fd_freefile) | 821 | if ((i = want) < fdp->fd_freefile) | |
822 | i = fdp->fd_freefile; | 822 | i = fdp->fd_freefile; | |
823 | off = i >> NDENTRYSHIFT; | 823 | off = i >> NDENTRYSHIFT; | |
824 | new = fd_next_zero(fdp, fdp->fd_himap, off, | 824 | new = fd_next_zero(fdp, fdp->fd_himap, off, | |
825 | (last + NDENTRIES - 1) >> NDENTRYSHIFT); | 825 | (last + NDENTRIES - 1) >> NDENTRYSHIFT); | |
826 | if (new == -1) | 826 | if (new == -1) | |
827 | break; | 827 | break; | |
828 | i = fd_next_zero(fdp, &fdp->fd_lomap[new], | 828 | i = fd_next_zero(fdp, &fdp->fd_lomap[new], | |
829 | new > off ? 0 : i & NDENTRYMASK, NDENTRIES); | 829 | new > off ? 0 : i & NDENTRYMASK, NDENTRIES); | |
830 | if (i == -1) { | 830 | if (i == -1) { | |
831 | /* | 831 | /* | |
832 | * Free file descriptor in this block was | 832 | * Free file descriptor in this block was | |
833 | * below want, try again with higher want. | 833 | * below want, try again with higher want. | |
834 | */ | 834 | */ | |
835 | want = (new + 1) << NDENTRYSHIFT; | 835 | want = (new + 1) << NDENTRYSHIFT; | |
836 | continue; | 836 | continue; | |
837 | } | 837 | } | |
838 | i += (new << NDENTRYSHIFT); | 838 | i += (new << NDENTRYSHIFT); | |
839 | if (i >= last) { | 839 | if (i >= last) { | |
840 | break; | 840 | break; | |
841 | } | 841 | } | |
842 | if (dt->dt_ff[i] == NULL) { | 842 | if (dt->dt_ff[i] == NULL) { | |
843 | KASSERT(i >= NDFDFILE); | 843 | KASSERT(i >= NDFDFILE); | |
844 | dt->dt_ff[i] = pool_cache_get(fdfile_cache, PR_WAITOK); | 844 | dt->dt_ff[i] = pool_cache_get(fdfile_cache, PR_WAITOK); | |
845 | } | 845 | } | |
846 | KASSERT(dt->dt_ff[i]->ff_refcnt == 0); | 846 | KASSERT(dt->dt_ff[i]->ff_refcnt == 0); | |
847 | KASSERT(dt->dt_ff[i]->ff_file == NULL); | 847 | KASSERT(dt->dt_ff[i]->ff_file == NULL); | |
848 | fd_used(fdp, i); | 848 | fd_used(fdp, i); | |
849 | if (want <= fdp->fd_freefile) { | 849 | if (want <= fdp->fd_freefile) { | |
850 | fdp->fd_freefile = i; | 850 | fdp->fd_freefile = i; | |
851 | } | 851 | } | |
852 | *result = i; | 852 | *result = i; | |
853 | KASSERT(i >= NDFDFILE || | 853 | KASSERT(i >= NDFDFILE || | |
854 | dt->dt_ff[i] == (fdfile_t *)fdp->fd_dfdfile[i]); | 854 | dt->dt_ff[i] == (fdfile_t *)fdp->fd_dfdfile[i]); | |
855 | fd_checkmaps(fdp); | 855 | fd_checkmaps(fdp); | |
856 | mutex_exit(&fdp->fd_lock); | 856 | mutex_exit(&fdp->fd_lock); | |
857 | return 0; | 857 | return 0; | |
858 | } | 858 | } | |
859 | 859 | |||
860 | /* No space in current array. Let the caller expand and retry. */ | 860 | /* No space in current array. Let the caller expand and retry. */ | |
861 | error = (dt->dt_nfiles >= lim) ? EMFILE : ENOSPC; | 861 | error = (dt->dt_nfiles >= lim) ? EMFILE : ENOSPC; | |
862 | mutex_exit(&fdp->fd_lock); | 862 | mutex_exit(&fdp->fd_lock); | |
863 | return error; | 863 | return error; | |
864 | } | 864 | } | |
865 | 865 | |||
866 | /* | 866 | /* | |
867 | * Allocate memory for a descriptor table. | 867 | * Allocate memory for a descriptor table. | |
868 | */ | 868 | */ | |
869 | static fdtab_t * | 869 | static fdtab_t * | |
870 | fd_dtab_alloc(int n) | 870 | fd_dtab_alloc(int n) | |
871 | { | 871 | { | |
872 | fdtab_t *dt; | 872 | fdtab_t *dt; | |
873 | size_t sz; | 873 | size_t sz; | |
874 | 874 | |||
875 | KASSERT(n > NDFILE); | 875 | KASSERT(n > NDFILE); | |
876 | 876 | |||
877 | sz = sizeof(*dt) + (n - NDFILE) * sizeof(dt->dt_ff[0]); | 877 | sz = sizeof(*dt) + (n - NDFILE) * sizeof(dt->dt_ff[0]); | |
878 | dt = kmem_alloc(sz, KM_SLEEP); | 878 | dt = kmem_alloc(sz, KM_SLEEP); | |
879 | #ifdef DIAGNOSTIC | 879 | #ifdef DIAGNOSTIC | |
880 | memset(dt, 0xff, sz); | 880 | memset(dt, 0xff, sz); | |
881 | #endif | 881 | #endif | |
882 | dt->dt_nfiles = n; | 882 | dt->dt_nfiles = n; | |
883 | dt->dt_link = NULL; | 883 | dt->dt_link = NULL; | |
884 | return dt; | 884 | return dt; | |
885 | } | 885 | } | |
886 | 886 | |||
887 | /* | 887 | /* | |
888 | * Free a descriptor table, and all tables linked for deferred free. | 888 | * Free a descriptor table, and all tables linked for deferred free. | |
889 | */ | 889 | */ | |
890 | static void | 890 | static void | |
891 | fd_dtab_free(fdtab_t *dt) | 891 | fd_dtab_free(fdtab_t *dt) | |
892 | { | 892 | { | |
893 | fdtab_t *next; | 893 | fdtab_t *next; | |
894 | size_t sz; | 894 | size_t sz; | |
895 | 895 | |||
896 | do { | 896 | do { | |
897 | next = dt->dt_link; | 897 | next = dt->dt_link; | |
898 | KASSERT(dt->dt_nfiles > NDFILE); | 898 | KASSERT(dt->dt_nfiles > NDFILE); | |
899 | sz = sizeof(*dt) + | 899 | sz = sizeof(*dt) + | |
900 | (dt->dt_nfiles - NDFILE) * sizeof(dt->dt_ff[0]); | 900 | (dt->dt_nfiles - NDFILE) * sizeof(dt->dt_ff[0]); | |
901 | #ifdef DIAGNOSTIC | 901 | #ifdef DIAGNOSTIC | |
902 | memset(dt, 0xff, sz); | 902 | memset(dt, 0xff, sz); | |
903 | #endif | 903 | #endif | |
904 | kmem_free(dt, sz); | 904 | kmem_free(dt, sz); | |
905 | dt = next; | 905 | dt = next; | |
906 | } while (dt != NULL); | 906 | } while (dt != NULL); | |
907 | } | 907 | } | |
908 | 908 | |||
909 | /* | 909 | /* | |
910 | * Allocate descriptor bitmap. | 910 | * Allocate descriptor bitmap. | |
911 | */ | 911 | */ | |
912 | static void | 912 | static void | |
913 | fd_map_alloc(int n, uint32_t **lo, uint32_t **hi) | 913 | fd_map_alloc(int n, uint32_t **lo, uint32_t **hi) | |
914 | { | 914 | { | |
915 | uint8_t *ptr; | 915 | uint8_t *ptr; | |
916 | size_t szlo, szhi; | 916 | size_t szlo, szhi; | |
917 | 917 | |||
918 | KASSERT(n > NDENTRIES); | 918 | KASSERT(n > NDENTRIES); | |
919 | 919 | |||
920 | szlo = NDLOSLOTS(n) * sizeof(uint32_t); | 920 | szlo = NDLOSLOTS(n) * sizeof(uint32_t); | |
921 | szhi = NDHISLOTS(n) * sizeof(uint32_t); | 921 | szhi = NDHISLOTS(n) * sizeof(uint32_t); | |
922 | ptr = kmem_alloc(szlo + szhi, KM_SLEEP); | 922 | ptr = kmem_alloc(szlo + szhi, KM_SLEEP); | |
923 | *lo = (uint32_t *)ptr; | 923 | *lo = (uint32_t *)ptr; | |
924 | *hi = (uint32_t *)(ptr + szlo); | 924 | *hi = (uint32_t *)(ptr + szlo); | |
925 | } | 925 | } | |
926 | 926 | |||
927 | /* | 927 | /* | |
928 | * Free descriptor bitmap. | 928 | * Free descriptor bitmap. | |
929 | */ | 929 | */ | |
930 | static void | 930 | static void | |
931 | fd_map_free(int n, uint32_t *lo, uint32_t *hi) | 931 | fd_map_free(int n, uint32_t *lo, uint32_t *hi) | |
932 | { | 932 | { | |
933 | size_t szlo, szhi; | 933 | size_t szlo, szhi; | |
934 | 934 | |||
935 | KASSERT(n > NDENTRIES); | 935 | KASSERT(n > NDENTRIES); | |
936 | 936 | |||
937 | szlo = NDLOSLOTS(n) * sizeof(uint32_t); | 937 | szlo = NDLOSLOTS(n) * sizeof(uint32_t); | |
938 | szhi = NDHISLOTS(n) * sizeof(uint32_t); | 938 | szhi = NDHISLOTS(n) * sizeof(uint32_t); | |
939 | KASSERT(hi == (uint32_t *)((uint8_t *)lo + szlo)); | 939 | KASSERT(hi == (uint32_t *)((uint8_t *)lo + szlo)); | |
940 | kmem_free(lo, szlo + szhi); | 940 | kmem_free(lo, szlo + szhi); | |
941 | } | 941 | } | |
942 | 942 | |||
943 | /* | 943 | /* | |
944 | * Expand a process' descriptor table. | 944 | * Expand a process' descriptor table. | |
945 | */ | 945 | */ | |
946 | void | 946 | void | |
947 | fd_tryexpand(proc_t *p) | 947 | fd_tryexpand(proc_t *p) | |
948 | { | 948 | { | |
949 | filedesc_t *fdp; | 949 | filedesc_t *fdp; | |
950 | int i, numfiles, oldnfiles; | 950 | int i, numfiles, oldnfiles; | |
951 | fdtab_t *newdt, *dt; | 951 | fdtab_t *newdt, *dt; | |
952 | uint32_t *newhimap, *newlomap; | 952 | uint32_t *newhimap, *newlomap; | |
953 | 953 | |||
954 | KASSERT(p == curproc || p == &proc0); | 954 | KASSERT(p == curproc || p == &proc0); | |
955 | 955 | |||
956 | fdp = p->p_fd; | 956 | fdp = p->p_fd; | |
957 | newhimap = NULL; | 957 | newhimap = NULL; | |
958 | newlomap = NULL; | 958 | newlomap = NULL; | |
959 | oldnfiles = fdp->fd_dt->dt_nfiles; | 959 | oldnfiles = fdp->fd_dt->dt_nfiles; | |
960 | 960 | |||
961 | if (oldnfiles < NDEXTENT) | 961 | if (oldnfiles < NDEXTENT) | |
962 | numfiles = NDEXTENT; | 962 | numfiles = NDEXTENT; | |
963 | else | 963 | else | |
964 | numfiles = 2 * oldnfiles; | 964 | numfiles = 2 * oldnfiles; | |
965 | 965 | |||
966 | newdt = fd_dtab_alloc(numfiles); | 966 | newdt = fd_dtab_alloc(numfiles); | |
967 | if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { | 967 | if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { | |
968 | fd_map_alloc(numfiles, &newlomap, &newhimap); | 968 | fd_map_alloc(numfiles, &newlomap, &newhimap); | |
969 | } | 969 | } | |
970 | 970 | |||
971 | mutex_enter(&fdp->fd_lock); | 971 | mutex_enter(&fdp->fd_lock); | |
972 | dt = fdp->fd_dt; | 972 | dt = fdp->fd_dt; | |
973 | KASSERT(dt->dt_ff[0] == (fdfile_t *)fdp->fd_dfdfile[0]); | 973 | KASSERT(dt->dt_ff[0] == (fdfile_t *)fdp->fd_dfdfile[0]); | |
974 | if (dt->dt_nfiles != oldnfiles) { | 974 | if (dt->dt_nfiles != oldnfiles) { | |
975 | /* fdp changed; caller must retry */ | 975 | /* fdp changed; caller must retry */ | |
976 | mutex_exit(&fdp->fd_lock); | 976 | mutex_exit(&fdp->fd_lock); | |
977 | fd_dtab_free(newdt); | 977 | fd_dtab_free(newdt); | |
978 | if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { | 978 | if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { | |
979 | fd_map_free(numfiles, newlomap, newhimap); | 979 | fd_map_free(numfiles, newlomap, newhimap); | |
980 | } | 980 | } | |
981 | return; | 981 | return; | |
982 | } | 982 | } | |
983 | 983 | |||
984 | /* Copy the existing descriptor table and zero the new portion. */ | 984 | /* Copy the existing descriptor table and zero the new portion. */ | |
985 | i = sizeof(fdfile_t *) * oldnfiles; | 985 | i = sizeof(fdfile_t *) * oldnfiles; | |
986 | memcpy(newdt->dt_ff, dt->dt_ff, i); | 986 | memcpy(newdt->dt_ff, dt->dt_ff, i); | |
987 | memset((uint8_t *)newdt->dt_ff + i, 0, | 987 | memset((uint8_t *)newdt->dt_ff + i, 0, | |
988 | numfiles * sizeof(fdfile_t *) - i); | 988 | numfiles * sizeof(fdfile_t *) - i); | |
989 | 989 | |||
990 | /* | 990 | /* | |
991 | * Link old descriptor array into list to be discarded. We defer | 991 | * Link old descriptor array into list to be discarded. We defer | |
992 | * freeing until the last reference to the descriptor table goes | 992 | * freeing until the last reference to the descriptor table goes | |
993 | * away (usually process exit). This allows us to do lockless | 993 | * away (usually process exit). This allows us to do lockless | |
994 | * lookups in fd_getfile(). | 994 | * lookups in fd_getfile(). | |
995 | */ | 995 | */ | |
996 | if (oldnfiles > NDFILE) { | 996 | if (oldnfiles > NDFILE) { | |
997 | if (fdp->fd_refcnt > 1) { | 997 | if (fdp->fd_refcnt > 1) { | |
998 | newdt->dt_link = dt; | 998 | newdt->dt_link = dt; | |
999 | } else { | 999 | } else { | |
1000 | fd_dtab_free(dt); | 1000 | fd_dtab_free(dt); | |
1001 | } | 1001 | } | |
1002 | } | 1002 | } | |
1003 | 1003 | |||
1004 | if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { | 1004 | if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { | |
1005 | i = NDHISLOTS(oldnfiles) * sizeof(uint32_t); | 1005 | i = NDHISLOTS(oldnfiles) * sizeof(uint32_t); | |
1006 | memcpy(newhimap, fdp->fd_himap, i); | 1006 | memcpy(newhimap, fdp->fd_himap, i); | |
1007 | memset((uint8_t *)newhimap + i, 0, | 1007 | memset((uint8_t *)newhimap + i, 0, | |
1008 | NDHISLOTS(numfiles) * sizeof(uint32_t) - i); | 1008 | NDHISLOTS(numfiles) * sizeof(uint32_t) - i); | |
1009 | 1009 | |||
1010 | i = NDLOSLOTS(oldnfiles) * sizeof(uint32_t); | 1010 | i = NDLOSLOTS(oldnfiles) * sizeof(uint32_t); | |
1011 | memcpy(newlomap, fdp->fd_lomap, i); | 1011 | memcpy(newlomap, fdp->fd_lomap, i); | |
1012 | memset((uint8_t *)newlomap + i, 0, | 1012 | memset((uint8_t *)newlomap + i, 0, | |
1013 | NDLOSLOTS(numfiles) * sizeof(uint32_t) - i); | 1013 | NDLOSLOTS(numfiles) * sizeof(uint32_t) - i); | |
1014 | 1014 | |||
1015 | if (NDHISLOTS(oldnfiles) > NDHISLOTS(NDFILE)) { | 1015 | if (NDHISLOTS(oldnfiles) > NDHISLOTS(NDFILE)) { | |
1016 | fd_map_free(oldnfiles, fdp->fd_lomap, fdp->fd_himap); | 1016 | fd_map_free(oldnfiles, fdp->fd_lomap, fdp->fd_himap); | |
1017 | } | 1017 | } | |
1018 | fdp->fd_himap = newhimap; | 1018 | fdp->fd_himap = newhimap; | |
1019 | fdp->fd_lomap = newlomap; | 1019 | fdp->fd_lomap = newlomap; | |
1020 | } | 1020 | } | |
1021 | 1021 | |||
1022 | /* | 1022 | /* | |
1023 | * All other modifications must become globally visible before | 1023 | * All other modifications must become globally visible before | |
1024 | * the change to fd_dt. See fd_getfile(). | 1024 | * the change to fd_dt. See fd_getfile(). | |
1025 | */ | 1025 | */ | |
1026 | membar_producer(); | 1026 | membar_producer(); | |
1027 | fdp->fd_dt = newdt; | 1027 | fdp->fd_dt = newdt; | |
1028 | KASSERT(newdt->dt_ff[0] == (fdfile_t *)fdp->fd_dfdfile[0]); | 1028 | KASSERT(newdt->dt_ff[0] == (fdfile_t *)fdp->fd_dfdfile[0]); | |
1029 | fd_checkmaps(fdp); | 1029 | fd_checkmaps(fdp); | |
1030 | mutex_exit(&fdp->fd_lock); | 1030 | mutex_exit(&fdp->fd_lock); | |
1031 | } | 1031 | } | |
1032 | 1032 | |||
1033 | /* | 1033 | /* | |
1034 | * Create a new open file structure and allocate a file descriptor | 1034 | * Create a new open file structure and allocate a file descriptor | |
1035 | * for the current process. | 1035 | * for the current process. | |
1036 | */ | 1036 | */ | |
1037 | int | 1037 | int | |
1038 | fd_allocfile(file_t **resultfp, int *resultfd) | 1038 | fd_allocfile(file_t **resultfp, int *resultfd) | |
1039 | { | 1039 | { | |
1040 | kauth_cred_t cred; | 1040 | kauth_cred_t cred; | |
1041 | file_t *fp; | 1041 | file_t *fp; | |
1042 | proc_t *p; | 1042 | proc_t *p; | |
1043 | int error; | 1043 | int error; | |
1044 | 1044 | |||
1045 | p = curproc; | 1045 | p = curproc; | |
1046 | 1046 | |||
1047 | while ((error = fd_alloc(p, 0, resultfd)) != 0) { | 1047 | while ((error = fd_alloc(p, 0, resultfd)) != 0) { | |
1048 | if (error != ENOSPC) { | 1048 | if (error != ENOSPC) { | |
1049 | return error; | 1049 | return error; | |
1050 | } | 1050 | } | |
1051 | fd_tryexpand(p); | 1051 | fd_tryexpand(p); | |
1052 | } | 1052 | } | |
1053 | 1053 | |||
1054 | fp = pool_cache_get(file_cache, PR_WAITOK); | 1054 | fp = pool_cache_get(file_cache, PR_WAITOK); | |
1055 | if (fp == NULL) { | 1055 | if (fp == NULL) { | |
1056 | return ENFILE; | 1056 | return ENFILE; | |
1057 | } | 1057 | } | |
1058 | KASSERT(fp->f_count == 0); | 1058 | KASSERT(fp->f_count == 0); | |
1059 | KASSERT(fp->f_msgcount == 0); | 1059 | KASSERT(fp->f_msgcount == 0); | |
1060 | KASSERT(fp->f_unpcount == 0); | 1060 | KASSERT(fp->f_unpcount == 0); | |
1061 | 1061 | |||
1062 | /* Replace cached credentials if not what we need. */ | 1062 | /* Replace cached credentials if not what we need. */ | |
1063 | cred = curlwp->l_cred; | 1063 | cred = curlwp->l_cred; | |
1064 | if (__predict_false(cred != fp->f_cred)) { | 1064 | if (__predict_false(cred != fp->f_cred)) { | |
1065 | kauth_cred_free(fp->f_cred); | 1065 | kauth_cred_free(fp->f_cred); | |
1066 | kauth_cred_hold(cred); | 1066 | kauth_cred_hold(cred); | |
1067 | fp->f_cred = cred; | 1067 | fp->f_cred = cred; | |
1068 | } | 1068 | } | |
1069 | 1069 | |||
1070 | /* | 1070 | /* | |
1071 | * Don't allow recycled files to be scanned. | 1071 | * Don't allow recycled files to be scanned. | |
1072 | * See uipc_usrreq.c. | 1072 | * See uipc_usrreq.c. | |
1073 | */ | 1073 | */ | |
1074 | if (__predict_false((fp->f_flag & FSCAN) != 0)) { | 1074 | if (__predict_false((fp->f_flag & FSCAN) != 0)) { | |
1075 | mutex_enter(&fp->f_lock); | 1075 | mutex_enter(&fp->f_lock); | |
1076 | atomic_and_uint(&fp->f_flag, ~FSCAN); | 1076 | atomic_and_uint(&fp->f_flag, ~FSCAN); | |
1077 | mutex_exit(&fp->f_lock); | 1077 | mutex_exit(&fp->f_lock); | |
1078 | } | 1078 | } | |
1079 | 1079 | |||
1080 | fp->f_advice = 0; | 1080 | fp->f_advice = 0; | |
1081 | fp->f_offset = 0; | 1081 | fp->f_offset = 0; | |
1082 | *resultfp = fp; | 1082 | *resultfp = fp; | |
1083 | 1083 | |||
1084 | return 0; | 1084 | return 0; | |
1085 | } | 1085 | } | |
1086 | 1086 | |||
1087 | /* | 1087 | /* | |
1088 | * Successful creation of a new descriptor: make visible to the process. | 1088 | * Successful creation of a new descriptor: make visible to the process. | |
1089 | */ | 1089 | */ | |
1090 | void | 1090 | void | |
1091 | fd_affix(proc_t *p, file_t *fp, unsigned fd) | 1091 | fd_affix(proc_t *p, file_t *fp, unsigned fd) | |
1092 | { | 1092 | { | |
1093 | fdfile_t *ff; | 1093 | fdfile_t *ff; | |
1094 | filedesc_t *fdp; | 1094 | filedesc_t *fdp; | |
1095 | 1095 | |||
1096 | KASSERT(p == curproc || p == &proc0); | 1096 | KASSERT(p == curproc || p == &proc0); | |
1097 | 1097 | |||
1098 | /* Add a reference to the file structure. */ | 1098 | /* Add a reference to the file structure. */ | |
1099 | mutex_enter(&fp->f_lock); | 1099 | mutex_enter(&fp->f_lock); | |
1100 | fp->f_count++; | 1100 | fp->f_count++; | |
1101 | mutex_exit(&fp->f_lock); | 1101 | mutex_exit(&fp->f_lock); | |
1102 | 1102 | |||
1103 | /* | 1103 | /* | |
1104 | * Insert the new file into the descriptor slot. | 1104 | * Insert the new file into the descriptor slot. | |
1105 | * | 1105 | * | |
1106 | * The memory barriers provided by lock activity in this routine | 1106 | * The memory barriers provided by lock activity in this routine | |
1107 | * ensure that any updates to the file structure become globally | 1107 | * ensure that any updates to the file structure become globally | |
1108 | * visible before the file becomes visible to other LWPs in the | 1108 | * visible before the file becomes visible to other LWPs in the | |
1109 | * current process. | 1109 | * current process. | |
1110 | */ | 1110 | */ | |
1111 | fdp = p->p_fd; | 1111 | fdp = p->p_fd; | |
1112 | ff = fdp->fd_dt->dt_ff[fd]; | 1112 | ff = fdp->fd_dt->dt_ff[fd]; | |
1113 | 1113 | |||
1114 | KASSERT(ff != NULL); | 1114 | KASSERT(ff != NULL); | |
1115 | KASSERT(ff->ff_file == NULL); | 1115 | KASSERT(ff->ff_file == NULL); | |
1116 | KASSERT(ff->ff_allocated); | 1116 | KASSERT(ff->ff_allocated); | |
1117 | KASSERT(fd_isused(fdp, fd)); | 1117 | KASSERT(fd_isused(fdp, fd)); | |
1118 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | 1118 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | |
1119 | 1119 | |||
1120 | /* No need to lock in order to make file initially visible. */ | 1120 | /* No need to lock in order to make file initially visible. */ | |
1121 | ff->ff_file = fp; | 1121 | ff->ff_file = fp; | |
1122 | } | 1122 | } | |
1123 | 1123 | |||
1124 | /* | 1124 | /* | |
1125 | * Abort creation of a new descriptor: free descriptor slot and file. | 1125 | * Abort creation of a new descriptor: free descriptor slot and file. | |
1126 | */ | 1126 | */ | |
1127 | void | 1127 | void | |
1128 | fd_abort(proc_t *p, file_t *fp, unsigned fd) | 1128 | fd_abort(proc_t *p, file_t *fp, unsigned fd) | |
1129 | { | 1129 | { | |
1130 | filedesc_t *fdp; | 1130 | filedesc_t *fdp; | |
1131 | fdfile_t *ff; | 1131 | fdfile_t *ff; | |
1132 | 1132 | |||
1133 | KASSERT(p == curproc || p == &proc0); | 1133 | KASSERT(p == curproc || p == &proc0); | |
1134 | 1134 | |||
1135 | fdp = p->p_fd; | 1135 | fdp = p->p_fd; | |
1136 | ff = fdp->fd_dt->dt_ff[fd]; | 1136 | ff = fdp->fd_dt->dt_ff[fd]; | |
1137 | 1137 | |||
1138 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | 1138 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | |
1139 | 1139 | |||
1140 | mutex_enter(&fdp->fd_lock); | 1140 | mutex_enter(&fdp->fd_lock); | |
1141 | KASSERT(fd_isused(fdp, fd)); | 1141 | KASSERT(fd_isused(fdp, fd)); | |
1142 | fd_unused(fdp, fd); | 1142 | fd_unused(fdp, fd); | |
1143 | mutex_exit(&fdp->fd_lock); | 1143 | mutex_exit(&fdp->fd_lock); | |
1144 | 1144 | |||
1145 | if (fp != NULL) { | 1145 | if (fp != NULL) { | |
1146 | KASSERT(fp->f_count == 0); | 1146 | KASSERT(fp->f_count == 0); | |
1147 | KASSERT(fp->f_cred != NULL); | 1147 | KASSERT(fp->f_cred != NULL); | |
1148 | pool_cache_put(file_cache, fp); | 1148 | pool_cache_put(file_cache, fp); | |
1149 | } | 1149 | } | |
1150 | } | 1150 | } | |
1151 | 1151 | |||
1152 | static int | 1152 | static int | |
1153 | file_ctor(void *arg, void *obj, int flags) | 1153 | file_ctor(void *arg, void *obj, int flags) | |
1154 | { | 1154 | { | |
1155 | file_t *fp = obj; | 1155 | file_t *fp = obj; | |
1156 | 1156 | |||
1157 | memset(fp, 0, sizeof(*fp)); | 1157 | memset(fp, 0, sizeof(*fp)); | |
1158 | 1158 | |||
1159 | mutex_enter(&filelist_lock); | 1159 | mutex_enter(&filelist_lock); | |
1160 | if (__predict_false(nfiles >= maxfiles)) { | 1160 | if (__predict_false(nfiles >= maxfiles)) { | |
1161 | mutex_exit(&filelist_lock); | 1161 | mutex_exit(&filelist_lock); | |
1162 | tablefull("file", "increase kern.maxfiles or MAXFILES"); | 1162 | tablefull("file", "increase kern.maxfiles or MAXFILES"); | |
1163 | return ENFILE; | 1163 | return ENFILE; | |
1164 | } | 1164 | } | |
1165 | nfiles++; | 1165 | nfiles++; | |
1166 | LIST_INSERT_HEAD(&filehead, fp, f_list); | 1166 | LIST_INSERT_HEAD(&filehead, fp, f_list); | |
1167 | mutex_init(&fp->f_lock, MUTEX_DEFAULT, IPL_NONE); | 1167 | mutex_init(&fp->f_lock, MUTEX_DEFAULT, IPL_NONE); | |
1168 | fp->f_cred = curlwp->l_cred; | 1168 | fp->f_cred = curlwp->l_cred; | |
1169 | kauth_cred_hold(fp->f_cred); | 1169 | kauth_cred_hold(fp->f_cred); | |
1170 | mutex_exit(&filelist_lock); | 1170 | mutex_exit(&filelist_lock); | |
1171 | 1171 | |||
1172 | return 0; | 1172 | return 0; | |
1173 | } | 1173 | } | |
1174 | 1174 | |||
1175 | static void | 1175 | static void | |
1176 | file_dtor(void *arg, void *obj) | 1176 | file_dtor(void *arg, void *obj) | |
1177 | { | 1177 | { | |
1178 | file_t *fp = obj; | 1178 | file_t *fp = obj; | |
1179 | 1179 | |||
1180 | mutex_enter(&filelist_lock); | 1180 | mutex_enter(&filelist_lock); | |
1181 | nfiles--; | 1181 | nfiles--; | |
1182 | LIST_REMOVE(fp, f_list); | 1182 | LIST_REMOVE(fp, f_list); | |
1183 | mutex_exit(&filelist_lock); | 1183 | mutex_exit(&filelist_lock); | |
1184 | 1184 | |||
1185 | kauth_cred_free(fp->f_cred); | 1185 | kauth_cred_free(fp->f_cred); | |
1186 | mutex_destroy(&fp->f_lock); | 1186 | mutex_destroy(&fp->f_lock); | |
1187 | } | 1187 | } | |
1188 | 1188 | |||
1189 | static int | 1189 | static int | |
1190 | fdfile_ctor(void *arg, void *obj, int flags) | 1190 | fdfile_ctor(void *arg, void *obj, int flags) | |
1191 | { | 1191 | { | |
1192 | fdfile_t *ff = obj; | 1192 | fdfile_t *ff = obj; | |
1193 | 1193 | |||
1194 | memset(ff, 0, sizeof(*ff)); | 1194 | memset(ff, 0, sizeof(*ff)); | |
1195 | cv_init(&ff->ff_closing, "fdclose"); | 1195 | cv_init(&ff->ff_closing, "fdclose"); | |
1196 | 1196 | |||
1197 | return 0; | 1197 | return 0; | |
1198 | } | 1198 | } | |
1199 | 1199 | |||
1200 | static void | 1200 | static void | |
1201 | fdfile_dtor(void *arg, void *obj) | 1201 | fdfile_dtor(void *arg, void *obj) | |
1202 | { | 1202 | { | |
1203 | fdfile_t *ff = obj; | 1203 | fdfile_t *ff = obj; | |
1204 | 1204 | |||
1205 | cv_destroy(&ff->ff_closing); | 1205 | cv_destroy(&ff->ff_closing); | |
1206 | } | 1206 | } | |
1207 | 1207 | |||
1208 | file_t * | 1208 | file_t * | |
1209 | fgetdummy(void) | 1209 | fgetdummy(void) | |
1210 | { | 1210 | { | |
1211 | file_t *fp; | 1211 | file_t *fp; | |
1212 | 1212 | |||
1213 | fp = kmem_alloc(sizeof(*fp), KM_SLEEP); | 1213 | fp = kmem_alloc(sizeof(*fp), KM_SLEEP); | |
1214 | if (fp != NULL) { | 1214 | if (fp != NULL) { | |
1215 | memset(fp, 0, sizeof(*fp)); | 1215 | memset(fp, 0, sizeof(*fp)); | |
1216 | mutex_init(&fp->f_lock, MUTEX_DEFAULT, IPL_NONE); | 1216 | mutex_init(&fp->f_lock, MUTEX_DEFAULT, IPL_NONE); | |
1217 | } | 1217 | } | |
1218 | return fp; | 1218 | return fp; | |
1219 | } | 1219 | } | |
1220 | 1220 | |||
1221 | void | 1221 | void | |
1222 | fputdummy(file_t *fp) | 1222 | fputdummy(file_t *fp) | |
1223 | { | 1223 | { | |
1224 | 1224 | |||
1225 | mutex_destroy(&fp->f_lock); | 1225 | mutex_destroy(&fp->f_lock); | |
1226 | kmem_free(fp, sizeof(*fp)); | 1226 | kmem_free(fp, sizeof(*fp)); | |
1227 | } | 1227 | } | |
1228 | 1228 | |||
1229 | /* | 1229 | /* | |
1230 | * Create an initial filedesc structure. | 1230 | * Create an initial filedesc structure. | |
1231 | */ | 1231 | */ | |
1232 | filedesc_t * | 1232 | filedesc_t * | |
1233 | fd_init(filedesc_t *fdp) | 1233 | fd_init(filedesc_t *fdp) | |
1234 | { | 1234 | { | |
1235 | #ifdef DIAGNOSTIC | 1235 | #ifdef DIAGNOSTIC | |
1236 | unsigned fd; | 1236 | unsigned fd; | |
1237 | #endif | 1237 | #endif | |
1238 | 1238 | |||
1239 | if (__predict_true(fdp == NULL)) { | 1239 | if (__predict_true(fdp == NULL)) { | |
1240 | fdp = pool_cache_get(filedesc_cache, PR_WAITOK); | 1240 | fdp = pool_cache_get(filedesc_cache, PR_WAITOK); | |
1241 | } else { | 1241 | } else { | |
1242 | /* XXXRUMP KASSERT(fdp == &filedesc0); */ | 1242 | /* XXXRUMP KASSERT(fdp == &filedesc0); */ | |
1243 | filedesc_ctor(NULL, fdp, PR_WAITOK); | 1243 | filedesc_ctor(NULL, fdp, PR_WAITOK); | |
1244 | } | 1244 | } | |
1245 | 1245 | |||
1246 | #ifdef DIAGNOSTIC | 1246 | #ifdef DIAGNOSTIC | |
1247 | KASSERT(fdp->fd_lastfile == -1); | 1247 | KASSERT(fdp->fd_lastfile == -1); | |
1248 | KASSERT(fdp->fd_lastkqfile == -1); | 1248 | KASSERT(fdp->fd_lastkqfile == -1); | |
1249 | KASSERT(fdp->fd_knhash == NULL); | 1249 | KASSERT(fdp->fd_knhash == NULL); | |
1250 | KASSERT(fdp->fd_freefile == 0); | 1250 | KASSERT(fdp->fd_freefile == 0); | |
1251 | KASSERT(fdp->fd_exclose == false); | 1251 | KASSERT(fdp->fd_exclose == false); | |
1252 | KASSERT(fdp->fd_dt == &fdp->fd_dtbuiltin); | 1252 | KASSERT(fdp->fd_dt == &fdp->fd_dtbuiltin); | |
1253 | KASSERT(fdp->fd_dtbuiltin.dt_nfiles == NDFILE); | 1253 | KASSERT(fdp->fd_dtbuiltin.dt_nfiles == NDFILE); | |
1254 | for (fd = 0; fd < NDFDFILE; fd++) { | 1254 | for (fd = 0; fd < NDFDFILE; fd++) { | |
1255 | KASSERT(fdp->fd_dtbuiltin.dt_ff[fd] == | 1255 | KASSERT(fdp->fd_dtbuiltin.dt_ff[fd] == | |
1256 | (fdfile_t *)fdp->fd_dfdfile[fd]); | 1256 | (fdfile_t *)fdp->fd_dfdfile[fd]); | |
1257 | } | 1257 | } | |
1258 | for (fd = NDFDFILE; fd < NDFILE; fd++) { | 1258 | for (fd = NDFDFILE; fd < NDFILE; fd++) { | |
1259 | KASSERT(fdp->fd_dtbuiltin.dt_ff[fd] == NULL); | 1259 | KASSERT(fdp->fd_dtbuiltin.dt_ff[fd] == NULL); | |
1260 | } | 1260 | } | |
1261 | KASSERT(fdp->fd_himap == fdp->fd_dhimap); | 1261 | KASSERT(fdp->fd_himap == fdp->fd_dhimap); | |
1262 | KASSERT(fdp->fd_lomap == fdp->fd_dlomap); | 1262 | KASSERT(fdp->fd_lomap == fdp->fd_dlomap); | |
1263 | #endif /* DIAGNOSTIC */ | 1263 | #endif /* DIAGNOSTIC */ | |
1264 | 1264 | |||
1265 | fdp->fd_refcnt = 1; | 1265 | fdp->fd_refcnt = 1; | |
1266 | fd_checkmaps(fdp); | 1266 | fd_checkmaps(fdp); | |
1267 | 1267 | |||
1268 | return fdp; | 1268 | return fdp; | |
1269 | } | 1269 | } | |
1270 | 1270 | |||
1271 | /* | 1271 | /* | |
1272 | * Initialize a file descriptor table. | 1272 | * Initialize a file descriptor table. | |
1273 | */ | 1273 | */ | |
1274 | static int | 1274 | static int | |
1275 | filedesc_ctor(void *arg, void *obj, int flag) | 1275 | filedesc_ctor(void *arg, void *obj, int flag) | |
1276 | { | 1276 | { | |
1277 | filedesc_t *fdp = obj; | 1277 | filedesc_t *fdp = obj; | |
1278 | fdfile_t **ffp; | 1278 | fdfile_t **ffp; | |
1279 | int i; | 1279 | int i; | |
1280 | 1280 | |||
1281 | memset(fdp, 0, sizeof(*fdp)); | 1281 | memset(fdp, 0, sizeof(*fdp)); | |
1282 | mutex_init(&fdp->fd_lock, MUTEX_DEFAULT, IPL_NONE); | 1282 | mutex_init(&fdp->fd_lock, MUTEX_DEFAULT, IPL_NONE); | |
1283 | fdp->fd_lastfile = -1; | 1283 | fdp->fd_lastfile = -1; | |
1284 | fdp->fd_lastkqfile = -1; | 1284 | fdp->fd_lastkqfile = -1; | |
1285 | fdp->fd_dt = &fdp->fd_dtbuiltin; | 1285 | fdp->fd_dt = &fdp->fd_dtbuiltin; | |
1286 | fdp->fd_dtbuiltin.dt_nfiles = NDFILE; | 1286 | fdp->fd_dtbuiltin.dt_nfiles = NDFILE; | |
1287 | fdp->fd_himap = fdp->fd_dhimap; | 1287 | fdp->fd_himap = fdp->fd_dhimap; | |
1288 | fdp->fd_lomap = fdp->fd_dlomap; | 1288 | fdp->fd_lomap = fdp->fd_dlomap; | |
1289 | 1289 | |||
1290 | CTASSERT(sizeof(fdp->fd_dfdfile[0]) >= sizeof(fdfile_t)); | 1290 | CTASSERT(sizeof(fdp->fd_dfdfile[0]) >= sizeof(fdfile_t)); | |
1291 | for (i = 0, ffp = fdp->fd_dt->dt_ff; i < NDFDFILE; i++, ffp++) { | 1291 | for (i = 0, ffp = fdp->fd_dt->dt_ff; i < NDFDFILE; i++, ffp++) { | |
1292 | *ffp = (fdfile_t *)fdp->fd_dfdfile[i]; | 1292 | *ffp = (fdfile_t *)fdp->fd_dfdfile[i]; | |
1293 | (void)fdfile_ctor(NULL, fdp->fd_dfdfile[i], PR_WAITOK); | 1293 | (void)fdfile_ctor(NULL, fdp->fd_dfdfile[i], PR_WAITOK); | |
1294 | } | 1294 | } | |
1295 | 1295 | |||
1296 | return 0; | 1296 | return 0; | |
1297 | } | 1297 | } | |
1298 | 1298 | |||
1299 | static void | 1299 | static void | |
1300 | filedesc_dtor(void *arg, void *obj) | 1300 | filedesc_dtor(void *arg, void *obj) | |
1301 | { | 1301 | { | |
1302 | filedesc_t *fdp = obj; | 1302 | filedesc_t *fdp = obj; | |
1303 | int i; | 1303 | int i; | |
1304 | 1304 | |||
1305 | for (i = 0; i < NDFDFILE; i++) { | 1305 | for (i = 0; i < NDFDFILE; i++) { | |
1306 | fdfile_dtor(NULL, fdp->fd_dfdfile[i]); | 1306 | fdfile_dtor(NULL, fdp->fd_dfdfile[i]); | |
1307 | } | 1307 | } | |
1308 | 1308 | |||
1309 | mutex_destroy(&fdp->fd_lock); | 1309 | mutex_destroy(&fdp->fd_lock); | |
1310 | } | 1310 | } | |
1311 | 1311 | |||
1312 | /* | 1312 | /* | |
1313 | * Make p2 share p1's filedesc structure. | 1313 | * Make p2 share p1's filedesc structure. | |
1314 | */ | 1314 | */ | |
1315 | void | 1315 | void | |
1316 | fd_share(struct proc *p2) | 1316 | fd_share(struct proc *p2) | |
1317 | { | 1317 | { | |
1318 | filedesc_t *fdp; | 1318 | filedesc_t *fdp; | |
1319 | 1319 | |||
1320 | fdp = curlwp->l_fd; | 1320 | fdp = curlwp->l_fd; | |
1321 | p2->p_fd = fdp; | 1321 | p2->p_fd = fdp; | |
1322 | atomic_inc_uint(&fdp->fd_refcnt); | 1322 | atomic_inc_uint(&fdp->fd_refcnt); | |
1323 | } | 1323 | } | |
1324 | 1324 | |||
1325 | /* | 1325 | /* | |
1326 | * Acquire a hold on a filedesc structure. | 1326 | * Acquire a hold on a filedesc structure. | |
1327 | */ | 1327 | */ | |
1328 | void | 1328 | void | |
1329 | fd_hold(void) | 1329 | fd_hold(lwp_t *l) | |
1330 | { | 1330 | { | |
1331 | filedesc_t *fdp = l->l_fd; | |||
1331 | 1332 | |||
1332 | atomic_inc_uint(&curlwp->l_fd->fd_refcnt); | 1333 | KASSERT(fdp == curlwp->l_fd || fdp == lwp0.l_fd); | |
1334 | atomic_inc_uint(&fdp->fd_refcnt); | |||
1333 | } | 1335 | } | |
1334 | 1336 | |||
1335 | /* | 1337 | /* | |
1336 | * Copy a filedesc structure. | 1338 | * Copy a filedesc structure. | |
1337 | */ | 1339 | */ | |
1338 | filedesc_t * | 1340 | filedesc_t * | |
1339 | fd_copy(void) | 1341 | fd_copy(void) | |
1340 | { | 1342 | { | |
1341 | filedesc_t *newfdp, *fdp; | 1343 | filedesc_t *newfdp, *fdp; | |
1342 | fdfile_t *ff, **ffp, **nffp, *ff2; | 1344 | fdfile_t *ff, **ffp, **nffp, *ff2; | |
1343 | int i, j, numfiles, lastfile, newlast; | 1345 | int i, j, numfiles, lastfile, newlast; | |
1344 | file_t *fp; | 1346 | file_t *fp; | |
1345 | fdtab_t *newdt; | 1347 | fdtab_t *newdt; | |
1346 | 1348 | |||
1347 | fdp = curproc->p_fd; | 1349 | fdp = curproc->p_fd; | |
1348 | newfdp = pool_cache_get(filedesc_cache, PR_WAITOK); | 1350 | newfdp = pool_cache_get(filedesc_cache, PR_WAITOK); | |
1349 | newfdp->fd_refcnt = 1; | 1351 | newfdp->fd_refcnt = 1; | |
1350 | 1352 | |||
1351 | #ifdef DIAGNOSTIC | 1353 | #ifdef DIAGNOSTIC | |
1352 | KASSERT(newfdp->fd_lastfile == -1); | 1354 | KASSERT(newfdp->fd_lastfile == -1); | |
1353 | KASSERT(newfdp->fd_lastkqfile == -1); | 1355 | KASSERT(newfdp->fd_lastkqfile == -1); | |
1354 | KASSERT(newfdp->fd_knhash == NULL); | 1356 | KASSERT(newfdp->fd_knhash == NULL); | |
1355 | KASSERT(newfdp->fd_freefile == 0); | 1357 | KASSERT(newfdp->fd_freefile == 0); | |
1356 | KASSERT(newfdp->fd_exclose == false); | 1358 | KASSERT(newfdp->fd_exclose == false); | |
1357 | KASSERT(newfdp->fd_dt == &newfdp->fd_dtbuiltin); | 1359 | KASSERT(newfdp->fd_dt == &newfdp->fd_dtbuiltin); | |
1358 | KASSERT(newfdp->fd_dtbuiltin.dt_nfiles == NDFILE); | 1360 | KASSERT(newfdp->fd_dtbuiltin.dt_nfiles == NDFILE); | |
1359 | for (i = 0; i < NDFDFILE; i++) { | 1361 | for (i = 0; i < NDFDFILE; i++) { | |
1360 | KASSERT(newfdp->fd_dtbuiltin.dt_ff[i] == | 1362 | KASSERT(newfdp->fd_dtbuiltin.dt_ff[i] == | |
1361 | (fdfile_t *)&newfdp->fd_dfdfile[i]); | 1363 | (fdfile_t *)&newfdp->fd_dfdfile[i]); | |
1362 | } | 1364 | } | |
1363 | for (i = NDFDFILE; i < NDFILE; i++) { | 1365 | for (i = NDFDFILE; i < NDFILE; i++) { | |
1364 | KASSERT(newfdp->fd_dtbuiltin.dt_ff[i] == NULL); | 1366 | KASSERT(newfdp->fd_dtbuiltin.dt_ff[i] == NULL); | |
1365 | } | 1367 | } | |
1366 | #endif /* DIAGNOSTIC */ | 1368 | #endif /* DIAGNOSTIC */ | |
1367 | 1369 | |||
1368 | mutex_enter(&fdp->fd_lock); | 1370 | mutex_enter(&fdp->fd_lock); | |
1369 | fd_checkmaps(fdp); | 1371 | fd_checkmaps(fdp); | |
1370 | numfiles = fdp->fd_dt->dt_nfiles; | 1372 | numfiles = fdp->fd_dt->dt_nfiles; | |
1371 | lastfile = fdp->fd_lastfile; | 1373 | lastfile = fdp->fd_lastfile; | |
1372 | 1374 | |||
1373 | /* | 1375 | /* | |
1374 | * If the number of open files fits in the internal arrays | 1376 | * If the number of open files fits in the internal arrays | |
1375 | * of the open file structure, use them, otherwise allocate | 1377 | * of the open file structure, use them, otherwise allocate | |
1376 | * additional memory for the number of descriptors currently | 1378 | * additional memory for the number of descriptors currently | |
1377 | * in use. | 1379 | * in use. | |
1378 | */ | 1380 | */ | |
1379 | if (lastfile < NDFILE) { | 1381 | if (lastfile < NDFILE) { | |
1380 | i = NDFILE; | 1382 | i = NDFILE; | |
1381 | newdt = newfdp->fd_dt; | 1383 | newdt = newfdp->fd_dt; | |
1382 | KASSERT(newfdp->fd_dt == &newfdp->fd_dtbuiltin); | 1384 | KASSERT(newfdp->fd_dt == &newfdp->fd_dtbuiltin); | |
1383 | } else { | 1385 | } else { | |
1384 | /* | 1386 | /* | |
1385 | * Compute the smallest multiple of NDEXTENT needed | 1387 | * Compute the smallest multiple of NDEXTENT needed | |
1386 | * for the file descriptors currently in use, | 1388 | * for the file descriptors currently in use, | |
1387 | * allowing the table to shrink. | 1389 | * allowing the table to shrink. | |
1388 | */ | 1390 | */ | |
1389 | i = numfiles; | 1391 | i = numfiles; | |
1390 | while (i >= 2 * NDEXTENT && i > lastfile * 2) { | 1392 | while (i >= 2 * NDEXTENT && i > lastfile * 2) { | |
1391 | i /= 2; | 1393 | i /= 2; | |
1392 | } | 1394 | } | |
1393 | KASSERT(i > NDFILE); | 1395 | KASSERT(i > NDFILE); | |
1394 | newdt = fd_dtab_alloc(i); | 1396 | newdt = fd_dtab_alloc(i); | |
1395 | newfdp->fd_dt = newdt; | 1397 | newfdp->fd_dt = newdt; | |
1396 | memcpy(newdt->dt_ff, newfdp->fd_dtbuiltin.dt_ff, | 1398 | memcpy(newdt->dt_ff, newfdp->fd_dtbuiltin.dt_ff, | |
1397 | NDFDFILE * sizeof(fdfile_t **)); | 1399 | NDFDFILE * sizeof(fdfile_t **)); | |
1398 | memset(newdt->dt_ff + NDFDFILE, 0, | 1400 | memset(newdt->dt_ff + NDFDFILE, 0, | |
1399 | (i - NDFDFILE) * sizeof(fdfile_t **)); | 1401 | (i - NDFDFILE) * sizeof(fdfile_t **)); | |
1400 | } | 1402 | } | |
1401 | if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) { | 1403 | if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) { | |
1402 | newfdp->fd_himap = newfdp->fd_dhimap; | 1404 | newfdp->fd_himap = newfdp->fd_dhimap; | |
1403 | newfdp->fd_lomap = newfdp->fd_dlomap; | 1405 | newfdp->fd_lomap = newfdp->fd_dlomap; | |
1404 | } else { | 1406 | } else { | |
1405 | fd_map_alloc(i, &newfdp->fd_lomap, &newfdp->fd_himap); | 1407 | fd_map_alloc(i, &newfdp->fd_lomap, &newfdp->fd_himap); | |
1406 | KASSERT(i >= NDENTRIES * NDENTRIES); | 1408 | KASSERT(i >= NDENTRIES * NDENTRIES); | |
1407 | memset(newfdp->fd_himap, 0, NDHISLOTS(i)*sizeof(uint32_t)); | 1409 | memset(newfdp->fd_himap, 0, NDHISLOTS(i)*sizeof(uint32_t)); | |
1408 | memset(newfdp->fd_lomap, 0, NDLOSLOTS(i)*sizeof(uint32_t)); | 1410 | memset(newfdp->fd_lomap, 0, NDLOSLOTS(i)*sizeof(uint32_t)); | |
1409 | } | 1411 | } | |
1410 | newfdp->fd_freefile = fdp->fd_freefile; | 1412 | newfdp->fd_freefile = fdp->fd_freefile; | |
1411 | newfdp->fd_exclose = fdp->fd_exclose; | 1413 | newfdp->fd_exclose = fdp->fd_exclose; | |
1412 | 1414 | |||
1413 | ffp = fdp->fd_dt->dt_ff; | 1415 | ffp = fdp->fd_dt->dt_ff; | |
1414 | nffp = newdt->dt_ff; | 1416 | nffp = newdt->dt_ff; | |
1415 | newlast = -1; | 1417 | newlast = -1; | |
1416 | for (i = 0; i <= (int)lastfile; i++, ffp++, nffp++) { | 1418 | for (i = 0; i <= (int)lastfile; i++, ffp++, nffp++) { | |
1417 | KASSERT(i >= NDFDFILE || | 1419 | KASSERT(i >= NDFDFILE || | |
1418 | *nffp == (fdfile_t *)newfdp->fd_dfdfile[i]); | 1420 | *nffp == (fdfile_t *)newfdp->fd_dfdfile[i]); | |
1419 | ff = *ffp; | 1421 | ff = *ffp; | |
1420 | if (ff == NULL || (fp = ff->ff_file) == NULL) { | 1422 | if (ff == NULL || (fp = ff->ff_file) == NULL) { | |
1421 | /* Descriptor unused, or descriptor half open. */ | 1423 | /* Descriptor unused, or descriptor half open. */ | |
1422 | KASSERT(!fd_isused(newfdp, i)); | 1424 | KASSERT(!fd_isused(newfdp, i)); | |
1423 | continue; | 1425 | continue; | |
1424 | } | 1426 | } | |
1425 | if (__predict_false(fp->f_type == DTYPE_KQUEUE)) { | 1427 | if (__predict_false(fp->f_type == DTYPE_KQUEUE)) { | |
1426 | /* kqueue descriptors cannot be copied. */ | 1428 | /* kqueue descriptors cannot be copied. */ | |
1427 | if (i < newfdp->fd_freefile) | 1429 | if (i < newfdp->fd_freefile) | |
1428 | newfdp->fd_freefile = i; | 1430 | newfdp->fd_freefile = i; | |
1429 | continue; | 1431 | continue; | |
1430 | } | 1432 | } | |
1431 | /* It's active: add a reference to the file. */ | 1433 | /* It's active: add a reference to the file. */ | |
1432 | mutex_enter(&fp->f_lock); | 1434 | mutex_enter(&fp->f_lock); | |
1433 | fp->f_count++; | 1435 | fp->f_count++; | |
1434 | mutex_exit(&fp->f_lock); | 1436 | mutex_exit(&fp->f_lock); | |
1435 | 1437 | |||
1436 | /* Allocate an fdfile_t to represent it. */ | 1438 | /* Allocate an fdfile_t to represent it. */ | |
1437 | if (i >= NDFDFILE) { | 1439 | if (i >= NDFDFILE) { | |
1438 | ff2 = pool_cache_get(fdfile_cache, PR_WAITOK); | 1440 | ff2 = pool_cache_get(fdfile_cache, PR_WAITOK); | |
1439 | *nffp = ff2; | 1441 | *nffp = ff2; | |
1440 | } else { | 1442 | } else { | |
1441 | ff2 = newdt->dt_ff[i]; | 1443 | ff2 = newdt->dt_ff[i]; | |
1442 | } | 1444 | } | |
1443 | ff2->ff_file = fp; | 1445 | ff2->ff_file = fp; | |
1444 | ff2->ff_exclose = ff->ff_exclose; | 1446 | ff2->ff_exclose = ff->ff_exclose; | |
1445 | ff2->ff_allocated = true; | 1447 | ff2->ff_allocated = true; | |
1446 | 1448 | |||
1447 | /* Fix up bitmaps. */ | 1449 | /* Fix up bitmaps. */ | |
1448 | j = i >> NDENTRYSHIFT; | 1450 | j = i >> NDENTRYSHIFT; | |
1449 | KASSERT((newfdp->fd_lomap[j] & (1 << (i & NDENTRYMASK))) == 0); | 1451 | KASSERT((newfdp->fd_lomap[j] & (1 << (i & NDENTRYMASK))) == 0); | |
1450 | newfdp->fd_lomap[j] |= 1 << (i & NDENTRYMASK); | 1452 | newfdp->fd_lomap[j] |= 1 << (i & NDENTRYMASK); | |
1451 | if (__predict_false(newfdp->fd_lomap[j] == ~0)) { | 1453 | if (__predict_false(newfdp->fd_lomap[j] == ~0)) { | |
1452 | KASSERT((newfdp->fd_himap[j >> NDENTRYSHIFT] & | 1454 | KASSERT((newfdp->fd_himap[j >> NDENTRYSHIFT] & | |
1453 | (1 << (j & NDENTRYMASK))) == 0); | 1455 | (1 << (j & NDENTRYMASK))) == 0); | |
1454 | newfdp->fd_himap[j >> NDENTRYSHIFT] |= | 1456 | newfdp->fd_himap[j >> NDENTRYSHIFT] |= | |
1455 | 1 << (j & NDENTRYMASK); | 1457 | 1 << (j & NDENTRYMASK); | |
1456 | } | 1458 | } | |
1457 | newlast = i; | 1459 | newlast = i; | |
1458 | } | 1460 | } | |
1459 | KASSERT(newdt->dt_ff[0] == (fdfile_t *)newfdp->fd_dfdfile[0]); | 1461 | KASSERT(newdt->dt_ff[0] == (fdfile_t *)newfdp->fd_dfdfile[0]); | |
1460 | newfdp->fd_lastfile = newlast; | 1462 | newfdp->fd_lastfile = newlast; | |
1461 | fd_checkmaps(newfdp); | 1463 | fd_checkmaps(newfdp); | |
1462 | mutex_exit(&fdp->fd_lock); | 1464 | mutex_exit(&fdp->fd_lock); | |
1463 | 1465 | |||
1464 | return (newfdp); | 1466 | return (newfdp); | |
1465 | } | 1467 | } | |
1466 | 1468 | |||
1467 | /* | 1469 | /* | |
1468 | * Release a filedesc structure. | 1470 | * Release a filedesc structure. | |
1469 | */ | 1471 | */ | |
1470 | void | 1472 | void | |
1471 | fd_free(void) | 1473 | fd_free(void) | |
1472 | { | 1474 | { | |
1473 | fdfile_t *ff; | 1475 | fdfile_t *ff; | |
1474 | file_t *fp; | 1476 | file_t *fp; | |
1475 | int fd, nf; | 1477 | int fd, nf; | |
1476 | fdtab_t *dt; | 1478 | fdtab_t *dt; | |
1477 | lwp_t * const l = curlwp; | 1479 | lwp_t * const l = curlwp; | |
1478 | filedesc_t * const fdp = l->l_fd; | 1480 | filedesc_t * const fdp = l->l_fd; | |
1479 | const bool noadvlock = (l->l_proc->p_flag & PK_ADVLOCK) == 0; | 1481 | const bool noadvlock = (l->l_proc->p_flag & PK_ADVLOCK) == 0; | |
1480 | 1482 | |||
1481 | KASSERT(fdp->fd_dt->dt_ff[0] == (fdfile_t *)fdp->fd_dfdfile[0]); | 1483 | KASSERT(fdp->fd_dt->dt_ff[0] == (fdfile_t *)fdp->fd_dfdfile[0]); | |
1482 | KASSERT(fdp->fd_dtbuiltin.dt_nfiles == NDFILE); | 1484 | KASSERT(fdp->fd_dtbuiltin.dt_nfiles == NDFILE); | |
1483 | KASSERT(fdp->fd_dtbuiltin.dt_link == NULL); | 1485 | KASSERT(fdp->fd_dtbuiltin.dt_link == NULL); | |
1484 | 1486 | |||
1485 | #ifndef __HAVE_ATOMIC_AS_MEMBAR | 1487 | #ifndef __HAVE_ATOMIC_AS_MEMBAR | |
1486 | membar_exit(); | 1488 | membar_exit(); | |
1487 | #endif | 1489 | #endif | |
1488 | if (atomic_dec_uint_nv(&fdp->fd_refcnt) > 0) | 1490 | if (atomic_dec_uint_nv(&fdp->fd_refcnt) > 0) | |
1489 | return; | 1491 | return; | |
1490 | 1492 | |||
1491 | /* | 1493 | /* | |
1492 | * Close any files that the process holds open. | 1494 | * Close any files that the process holds open. | |
1493 | */ | 1495 | */ | |
1494 | dt = fdp->fd_dt; | 1496 | dt = fdp->fd_dt; | |
1495 | fd_checkmaps(fdp); | 1497 | fd_checkmaps(fdp); | |
1496 | #ifdef DEBUG | 1498 | #ifdef DEBUG | |
1497 | fdp->fd_refcnt = -1; /* see fd_checkmaps */ | 1499 | fdp->fd_refcnt = -1; /* see fd_checkmaps */ | |
1498 | #endif | 1500 | #endif | |
1499 | for (fd = 0, nf = dt->dt_nfiles; fd < nf; fd++) { | 1501 | for (fd = 0, nf = dt->dt_nfiles; fd < nf; fd++) { | |
1500 | ff = dt->dt_ff[fd]; | 1502 | ff = dt->dt_ff[fd]; | |
1501 | KASSERT(fd >= NDFDFILE || | 1503 | KASSERT(fd >= NDFDFILE || | |
1502 | ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | 1504 | ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | |
1503 | if (ff == NULL) | 1505 | if (ff == NULL) | |
1504 | continue; | 1506 | continue; | |
1505 | if ((fp = ff->ff_file) != NULL) { | 1507 | if ((fp = ff->ff_file) != NULL) { | |
1506 | /* | 1508 | /* | |
1507 | * Must use fd_close() here if there is | 1509 | * Must use fd_close() here if there is | |
1508 | * a reference from kqueue or we might have posix | 1510 | * a reference from kqueue or we might have posix | |
1509 | * advisory locks. | 1511 | * advisory locks. | |
1510 | */ | 1512 | */ | |
1511 | if (__predict_true(ff->ff_refcnt == 0) && | 1513 | if (__predict_true(ff->ff_refcnt == 0) && | |
1512 | (noadvlock || fp->f_type != DTYPE_VNODE)) { | 1514 | (noadvlock || fp->f_type != DTYPE_VNODE)) { | |
1513 | ff->ff_file = NULL; | 1515 | ff->ff_file = NULL; | |
1514 | ff->ff_exclose = false; | 1516 | ff->ff_exclose = false; | |
1515 | ff->ff_allocated = false; | 1517 | ff->ff_allocated = false; | |
1516 | closef(fp); | 1518 | closef(fp); | |
1517 | } else { | 1519 | } else { | |
1518 | ff->ff_refcnt++; | 1520 | ff->ff_refcnt++; | |
1519 | fd_close(fd); | 1521 | fd_close(fd); | |
1520 | } | 1522 | } | |
1521 | } | 1523 | } | |
1522 | KASSERT(ff->ff_refcnt == 0); | 1524 | KASSERT(ff->ff_refcnt == 0); | |
1523 | KASSERT(ff->ff_file == NULL); | 1525 | KASSERT(ff->ff_file == NULL); | |
1524 | KASSERT(!ff->ff_exclose); | 1526 | KASSERT(!ff->ff_exclose); | |
1525 | KASSERT(!ff->ff_allocated); | 1527 | KASSERT(!ff->ff_allocated); | |
1526 | if (fd >= NDFDFILE) { | 1528 | if (fd >= NDFDFILE) { | |
1527 | pool_cache_put(fdfile_cache, ff); | 1529 | pool_cache_put(fdfile_cache, ff); | |
1528 | dt->dt_ff[fd] = NULL; | 1530 | dt->dt_ff[fd] = NULL; | |
1529 | } | 1531 | } | |
1530 | } | 1532 | } | |
1531 | 1533 | |||
1532 | /* | 1534 | /* | |
1533 | * Clean out the descriptor table for the next user and return | 1535 | * Clean out the descriptor table for the next user and return | |
1534 | * to the cache. | 1536 | * to the cache. | |
1535 | */ | 1537 | */ | |
1536 | if (__predict_false(dt != &fdp->fd_dtbuiltin)) { | 1538 | if (__predict_false(dt != &fdp->fd_dtbuiltin)) { | |
1537 | fd_dtab_free(fdp->fd_dt); | 1539 | fd_dtab_free(fdp->fd_dt); | |
1538 | /* Otherwise, done above. */ | 1540 | /* Otherwise, done above. */ | |
1539 | memset(&fdp->fd_dtbuiltin.dt_ff[NDFDFILE], 0, | 1541 | memset(&fdp->fd_dtbuiltin.dt_ff[NDFDFILE], 0, | |
1540 | (NDFILE - NDFDFILE) * sizeof(fdp->fd_dtbuiltin.dt_ff[0])); | 1542 | (NDFILE - NDFDFILE) * sizeof(fdp->fd_dtbuiltin.dt_ff[0])); | |
1541 | fdp->fd_dt = &fdp->fd_dtbuiltin; | 1543 | fdp->fd_dt = &fdp->fd_dtbuiltin; | |
1542 | } | 1544 | } | |
1543 | if (__predict_false(NDHISLOTS(nf) > NDHISLOTS(NDFILE))) { | 1545 | if (__predict_false(NDHISLOTS(nf) > NDHISLOTS(NDFILE))) { | |
1544 | KASSERT(fdp->fd_himap != fdp->fd_dhimap); | 1546 | KASSERT(fdp->fd_himap != fdp->fd_dhimap); | |
1545 | KASSERT(fdp->fd_lomap != fdp->fd_dlomap); | 1547 | KASSERT(fdp->fd_lomap != fdp->fd_dlomap); | |
1546 | fd_map_free(nf, fdp->fd_lomap, fdp->fd_himap); | 1548 | fd_map_free(nf, fdp->fd_lomap, fdp->fd_himap); | |
1547 | } | 1549 | } | |
1548 | if (__predict_false(fdp->fd_knhash != NULL)) { | 1550 | if (__predict_false(fdp->fd_knhash != NULL)) { | |
1549 | hashdone(fdp->fd_knhash, HASH_LIST, fdp->fd_knhashmask); | 1551 | hashdone(fdp->fd_knhash, HASH_LIST, fdp->fd_knhashmask); | |
1550 | fdp->fd_knhash = NULL; | 1552 | fdp->fd_knhash = NULL; | |
1551 | fdp->fd_knhashmask = 0; | 1553 | fdp->fd_knhashmask = 0; | |
1552 | } else { | 1554 | } else { | |
1553 | KASSERT(fdp->fd_knhashmask == 0); | 1555 | KASSERT(fdp->fd_knhashmask == 0); | |
1554 | } | 1556 | } | |
1555 | fdp->fd_dt = &fdp->fd_dtbuiltin; | 1557 | fdp->fd_dt = &fdp->fd_dtbuiltin; | |
1556 | fdp->fd_lastkqfile = -1; | 1558 | fdp->fd_lastkqfile = -1; | |
1557 | fdp->fd_lastfile = -1; | 1559 | fdp->fd_lastfile = -1; | |
1558 | fdp->fd_freefile = 0; | 1560 | fdp->fd_freefile = 0; | |
1559 | fdp->fd_exclose = false; | 1561 | fdp->fd_exclose = false; | |
1560 | memset(&fdp->fd_startzero, 0, sizeof(*fdp) - | 1562 | memset(&fdp->fd_startzero, 0, sizeof(*fdp) - | |
1561 | offsetof(filedesc_t, fd_startzero)); | 1563 | offsetof(filedesc_t, fd_startzero)); | |
1562 | fdp->fd_himap = fdp->fd_dhimap; | 1564 | fdp->fd_himap = fdp->fd_dhimap; | |
1563 | fdp->fd_lomap = fdp->fd_dlomap; | 1565 | fdp->fd_lomap = fdp->fd_dlomap; | |
1564 | KASSERT(fdp->fd_dtbuiltin.dt_nfiles == NDFILE); | 1566 | KASSERT(fdp->fd_dtbuiltin.dt_nfiles == NDFILE); | |
1565 | KASSERT(fdp->fd_dtbuiltin.dt_link == NULL); | 1567 | KASSERT(fdp->fd_dtbuiltin.dt_link == NULL); | |
1566 | KASSERT(fdp->fd_dt == &fdp->fd_dtbuiltin); | 1568 | KASSERT(fdp->fd_dt == &fdp->fd_dtbuiltin); | |
1567 | #ifdef DEBUG | 1569 | #ifdef DEBUG | |
1568 | fdp->fd_refcnt = 0; /* see fd_checkmaps */ | 1570 | fdp->fd_refcnt = 0; /* see fd_checkmaps */ | |
1569 | #endif | 1571 | #endif | |
1570 | fd_checkmaps(fdp); | 1572 | fd_checkmaps(fdp); | |
1571 | pool_cache_put(filedesc_cache, fdp); | 1573 | pool_cache_put(filedesc_cache, fdp); | |
1572 | } | 1574 | } | |
1573 | 1575 | |||
1574 | /* | 1576 | /* | |
1575 | * File Descriptor pseudo-device driver (/dev/fd/). | 1577 | * File Descriptor pseudo-device driver (/dev/fd/). | |
1576 | * | 1578 | * | |
1577 | * Opening minor device N dup()s the file (if any) connected to file | 1579 | * Opening minor device N dup()s the file (if any) connected to file | |
1578 | * descriptor N belonging to the calling process. Note that this driver | 1580 | * descriptor N belonging to the calling process. Note that this driver | |
1579 | * consists of only the ``open()'' routine, because all subsequent | 1581 | * consists of only the ``open()'' routine, because all subsequent | |
1580 | * references to this file will be direct to the other driver. | 1582 | * references to this file will be direct to the other driver. | |
1581 | */ | 1583 | */ | |
1582 | static int | 1584 | static int | |
1583 | filedescopen(dev_t dev, int mode, int type, lwp_t *l) | 1585 | filedescopen(dev_t dev, int mode, int type, lwp_t *l) | |
1584 | { | 1586 | { | |
1585 | 1587 | |||
1586 | /* | 1588 | /* | |
1587 | * XXX Kludge: set dupfd to contain the value of the | 1589 | * XXX Kludge: set dupfd to contain the value of the | |
1588 | * the file descriptor being sought for duplication. The error | 1590 | * the file descriptor being sought for duplication. The error | |
1589 | * return ensures that the vnode for this device will be released | 1591 | * return ensures that the vnode for this device will be released | |
1590 | * by vn_open. Open will detect this special error and take the | 1592 | * by vn_open. Open will detect this special error and take the | |
1591 | * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN | 1593 | * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN | |
1592 | * will simply report the error. | 1594 | * will simply report the error. | |
1593 | */ | 1595 | */ | |
1594 | l->l_dupfd = minor(dev); /* XXX */ | 1596 | l->l_dupfd = minor(dev); /* XXX */ | |
1595 | return EDUPFD; | 1597 | return EDUPFD; | |
1596 | } | 1598 | } | |
1597 | 1599 | |||
1598 | /* | 1600 | /* | |
1599 | * Duplicate the specified descriptor to a free descriptor. | 1601 | * Duplicate the specified descriptor to a free descriptor. | |
1600 | */ | 1602 | */ | |
1601 | int | 1603 | int | |
1602 | fd_dupopen(int old, int *new, int mode, int error) | 1604 | fd_dupopen(int old, int *new, int mode, int error) | |
1603 | { | 1605 | { | |
1604 | filedesc_t *fdp; | 1606 | filedesc_t *fdp; | |
1605 | fdfile_t *ff; | 1607 | fdfile_t *ff; | |
1606 | file_t *fp; | 1608 | file_t *fp; | |
1607 | fdtab_t *dt; | 1609 | fdtab_t *dt; | |
1608 | 1610 | |||
1609 | if ((fp = fd_getfile(old)) == NULL) { | 1611 | if ((fp = fd_getfile(old)) == NULL) { | |
1610 | return EBADF; | 1612 | return EBADF; | |
1611 | } | 1613 | } | |
1612 | fdp = curlwp->l_fd; | 1614 | fdp = curlwp->l_fd; | |
1613 | dt = fdp->fd_dt; | 1615 | dt = fdp->fd_dt; | |
1614 | ff = dt->dt_ff[old]; | 1616 | ff = dt->dt_ff[old]; | |
1615 | 1617 | |||
1616 | /* | 1618 | /* | |
1617 | * There are two cases of interest here. | 1619 | * There are two cases of interest here. | |
1618 | * | 1620 | * | |
1619 | * For EDUPFD simply dup (dfd) to file descriptor | 1621 | * For EDUPFD simply dup (dfd) to file descriptor | |
1620 | * (indx) and return. | 1622 | * (indx) and return. | |
1621 | * | 1623 | * | |
1622 | * For EMOVEFD steal away the file structure from (dfd) and | 1624 | * For EMOVEFD steal away the file structure from (dfd) and | |
1623 | * store it in (indx). (dfd) is effectively closed by | 1625 | * store it in (indx). (dfd) is effectively closed by | |
1624 | * this operation. | 1626 | * this operation. | |
1625 | * | 1627 | * | |
1626 | * Any other error code is just returned. | 1628 | * Any other error code is just returned. | |
1627 | */ | 1629 | */ | |
1628 | switch (error) { | 1630 | switch (error) { | |
1629 | case EDUPFD: | 1631 | case EDUPFD: | |
1630 | /* | 1632 | /* | |
1631 | * Check that the mode the file is being opened for is a | 1633 | * Check that the mode the file is being opened for is a | |
1632 | * subset of the mode of the existing descriptor. | 1634 | * subset of the mode of the existing descriptor. | |
1633 | */ | 1635 | */ | |
1634 | if (((mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) { | 1636 | if (((mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) { | |
1635 | error = EACCES; | 1637 | error = EACCES; | |
1636 | break; | 1638 | break; | |
1637 | } | 1639 | } | |
1638 | 1640 | |||
1639 | /* Copy it. */ | 1641 | /* Copy it. */ | |
1640 | error = fd_dup(fp, 0, new, ff->ff_exclose); | 1642 | error = fd_dup(fp, 0, new, ff->ff_exclose); | |
1641 | break; | 1643 | break; | |
1642 | 1644 | |||
1643 | case EMOVEFD: | 1645 | case EMOVEFD: | |
1644 | /* Copy it. */ | 1646 | /* Copy it. */ | |
1645 | error = fd_dup(fp, 0, new, ff->ff_exclose); | 1647 | error = fd_dup(fp, 0, new, ff->ff_exclose); | |
1646 | if (error != 0) { | 1648 | if (error != 0) { | |
1647 | break; | 1649 | break; | |
1648 | } | 1650 | } | |
1649 | 1651 | |||
1650 | /* Steal away the file pointer from 'old'. */ | 1652 | /* Steal away the file pointer from 'old'. */ | |
1651 | (void)fd_close(old); | 1653 | (void)fd_close(old); | |
1652 | return 0; | 1654 | return 0; | |
1653 | } | 1655 | } | |
1654 | 1656 | |||
1655 | fd_putfile(old); | 1657 | fd_putfile(old); | |
1656 | return error; | 1658 | return error; | |
1657 | } | 1659 | } | |
1658 | 1660 | |||
1659 | /* | 1661 | /* | |
1660 | * Sets descriptor owner. If the owner is a process, 'pgid' | 1662 | * Sets descriptor owner. If the owner is a process, 'pgid' | |
1661 | * is set to positive value, process ID. If the owner is process group, | 1663 | * is set to positive value, process ID. If the owner is process group, | |
1662 | * 'pgid' is set to -pg_id. | 1664 | * 'pgid' is set to -pg_id. | |
1663 | */ | 1665 | */ | |
1664 | int | 1666 | int | |
1665 | fsetown(pid_t *pgid, u_long cmd, const void *data) | 1667 | fsetown(pid_t *pgid, u_long cmd, const void *data) | |
1666 | { | 1668 | { | |
1667 | int id = *(const int *)data; | 1669 | int id = *(const int *)data; | |
1668 | int error; | 1670 | int error; | |
1669 | 1671 | |||
1670 | switch (cmd) { | 1672 | switch (cmd) { | |
1671 | case TIOCSPGRP: | 1673 | case TIOCSPGRP: | |
1672 | if (id < 0) | 1674 | if (id < 0) | |
1673 | return (EINVAL); | 1675 | return (EINVAL); | |
1674 | id = -id; | 1676 | id = -id; | |
1675 | break; | 1677 | break; | |
1676 | default: | 1678 | default: | |
1677 | break; | 1679 | break; | |
1678 | } | 1680 | } | |
1679 | 1681 | |||
1680 | if (id > 0 && !pfind(id)) | 1682 | if (id > 0 && !pfind(id)) | |
1681 | return (ESRCH); | 1683 | return (ESRCH); | |
1682 | else if (id < 0 && (error = pgid_in_session(curproc, -id))) | 1684 | else if (id < 0 && (error = pgid_in_session(curproc, -id))) | |
1683 | return (error); | 1685 | return (error); | |
1684 | 1686 | |||
1685 | *pgid = id; | 1687 | *pgid = id; | |
1686 | return (0); | 1688 | return (0); | |
1687 | } | 1689 | } | |
1688 | 1690 | |||
1689 | /* | 1691 | /* | |
1690 | * Return descriptor owner information. If the value is positive, | 1692 | * Return descriptor owner information. If the value is positive, | |
1691 | * it's process ID. If it's negative, it's process group ID and | 1693 | * it's process ID. If it's negative, it's process group ID and | |
1692 | * needs the sign removed before use. | 1694 | * needs the sign removed before use. | |
1693 | */ | 1695 | */ | |
1694 | int | 1696 | int | |
1695 | fgetown(pid_t pgid, u_long cmd, void *data) | 1697 | fgetown(pid_t pgid, u_long cmd, void *data) | |
1696 | { | 1698 | { | |
1697 | 1699 | |||
1698 | switch (cmd) { | 1700 | switch (cmd) { | |
1699 | case TIOCGPGRP: | 1701 | case TIOCGPGRP: | |
1700 | *(int *)data = -pgid; | 1702 | *(int *)data = -pgid; | |
1701 | break; | 1703 | break; | |
1702 | default: | 1704 | default: | |
1703 | *(int *)data = pgid; | 1705 | *(int *)data = pgid; | |
1704 | break; | 1706 | break; | |
1705 | } | 1707 | } | |
1706 | return (0); | 1708 | return (0); | |
1707 | } | 1709 | } | |
1708 | 1710 | |||
1709 | /* | 1711 | /* | |
1710 | * Send signal to descriptor owner, either process or process group. | 1712 | * Send signal to descriptor owner, either process or process group. | |
1711 | */ | 1713 | */ | |
1712 | void | 1714 | void | |
1713 | fownsignal(pid_t pgid, int signo, int code, int band, void *fdescdata) | 1715 | fownsignal(pid_t pgid, int signo, int code, int band, void *fdescdata) | |
1714 | { | 1716 | { | |
1715 | ksiginfo_t ksi; | 1717 | ksiginfo_t ksi; | |
1716 | 1718 | |||
1717 | KASSERT(!cpu_intr_p()); | 1719 | KASSERT(!cpu_intr_p()); | |
1718 | 1720 | |||
1719 | if (pgid == 0) { | 1721 | if (pgid == 0) { | |
1720 | return; | 1722 | return; | |
1721 | } | 1723 | } | |
1722 | 1724 | |||
1723 | KSI_INIT(&ksi); | 1725 | KSI_INIT(&ksi); | |
1724 | ksi.ksi_signo = signo; | 1726 | ksi.ksi_signo = signo; | |
1725 | ksi.ksi_code = code; | 1727 | ksi.ksi_code = code; | |
1726 | ksi.ksi_band = band; | 1728 | ksi.ksi_band = band; | |
1727 | 1729 | |||
1728 | mutex_enter(proc_lock); | 1730 | mutex_enter(proc_lock); | |
1729 | if (pgid > 0) { | 1731 | if (pgid > 0) { | |
1730 | struct proc *p1; | 1732 | struct proc *p1; | |
1731 | 1733 | |||
1732 | p1 = p_find(pgid, PFIND_LOCKED); | 1734 | p1 = p_find(pgid, PFIND_LOCKED); | |
1733 | if (p1 != NULL) { | 1735 | if (p1 != NULL) { | |
1734 | kpsignal(p1, &ksi, fdescdata); | 1736 | kpsignal(p1, &ksi, fdescdata); | |
1735 | } | 1737 | } | |
1736 | } else { | 1738 | } else { | |
1737 | struct pgrp *pgrp; | 1739 | struct pgrp *pgrp; | |
1738 | 1740 | |||
1739 | KASSERT(pgid < 0); | 1741 | KASSERT(pgid < 0); | |
1740 | pgrp = pg_find(-pgid, PFIND_LOCKED); | 1742 | pgrp = pg_find(-pgid, PFIND_LOCKED); | |
1741 | if (pgrp != NULL) { | 1743 | if (pgrp != NULL) { | |
1742 | kpgsignal(pgrp, &ksi, fdescdata, 0); | 1744 | kpgsignal(pgrp, &ksi, fdescdata, 0); | |
1743 | } | 1745 | } | |
1744 | } | 1746 | } | |
1745 | mutex_exit(proc_lock); | 1747 | mutex_exit(proc_lock); | |
1746 | } | 1748 | } | |
1747 | 1749 | |||
1748 | int | 1750 | int | |
1749 | fd_clone(file_t *fp, unsigned fd, int flag, const struct fileops *fops, | 1751 | fd_clone(file_t *fp, unsigned fd, int flag, const struct fileops *fops, | |
1750 | void *data) | 1752 | void *data) | |
1751 | { | 1753 | { | |
1752 | 1754 | |||
1753 | fp->f_flag = flag; | 1755 | fp->f_flag = flag; | |
1754 | fp->f_type = DTYPE_MISC; | 1756 | fp->f_type = DTYPE_MISC; | |
1755 | fp->f_ops = fops; | 1757 | fp->f_ops = fops; | |
1756 | fp->f_data = data; | 1758 | fp->f_data = data; | |
1757 | curlwp->l_dupfd = fd; | 1759 | curlwp->l_dupfd = fd; | |
1758 | fd_affix(curproc, fp, fd); | 1760 | fd_affix(curproc, fp, fd); | |
1759 | 1761 | |||
1760 | return EMOVEFD; | 1762 | return EMOVEFD; | |
1761 | } | 1763 | } | |
1762 | 1764 | |||
1763 | int | 1765 | int | |
1764 | fnullop_fcntl(file_t *fp, u_int cmd, void *data) | 1766 | fnullop_fcntl(file_t *fp, u_int cmd, void *data) | |
1765 | { | 1767 | { | |
1766 | 1768 | |||
1767 | if (cmd == F_SETFL) | 1769 | if (cmd == F_SETFL) | |
1768 | return 0; | 1770 | return 0; | |
1769 | 1771 | |||
1770 | return EOPNOTSUPP; | 1772 | return EOPNOTSUPP; | |
1771 | } | 1773 | } | |
1772 | 1774 | |||
1773 | int | 1775 | int | |
1774 | fnullop_poll(file_t *fp, int which) | 1776 | fnullop_poll(file_t *fp, int which) | |
1775 | { | 1777 | { | |
1776 | 1778 | |||
1777 | return 0; | 1779 | return 0; | |
1778 | } | 1780 | } | |
1779 | 1781 | |||
1780 | int | 1782 | int | |
1781 | fnullop_kqfilter(file_t *fp, struct knote *kn) | 1783 | fnullop_kqfilter(file_t *fp, struct knote *kn) | |
1782 | { | 1784 | { | |
1783 | 1785 | |||
1784 | return 0; | 1786 | return 0; | |
1785 | } | 1787 | } | |
1786 | 1788 | |||
1787 | void | 1789 | void | |
1788 | fnullop_drain(file_t *fp) | 1790 | fnullop_drain(file_t *fp) | |
1789 | { | 1791 | { | |
1790 | 1792 | |||
1791 | } | 1793 | } | |
1792 | 1794 | |||
1793 | int | 1795 | int | |
1794 | fbadop_read(file_t *fp, off_t *offset, struct uio *uio, | 1796 | fbadop_read(file_t *fp, off_t *offset, struct uio *uio, | |
1795 | kauth_cred_t cred, int flags) | 1797 | kauth_cred_t cred, int flags) | |
1796 | { | 1798 | { | |
1797 | 1799 | |||
1798 | return EOPNOTSUPP; | 1800 | return EOPNOTSUPP; | |
1799 | } | 1801 | } | |
1800 | 1802 | |||
1801 | int | 1803 | int | |
1802 | fbadop_write(file_t *fp, off_t *offset, struct uio *uio, | 1804 | fbadop_write(file_t *fp, off_t *offset, struct uio *uio, | |
1803 | kauth_cred_t cred, int flags) | 1805 | kauth_cred_t cred, int flags) | |
1804 | { | 1806 | { | |
1805 | 1807 | |||
1806 | return EOPNOTSUPP; | 1808 | return EOPNOTSUPP; | |
1807 | } | 1809 | } | |
1808 | 1810 | |||
1809 | int | 1811 | int | |
1810 | fbadop_ioctl(file_t *fp, u_long com, void *data) | 1812 | fbadop_ioctl(file_t *fp, u_long com, void *data) | |
1811 | { | 1813 | { | |
1812 | 1814 | |||
1813 | return EOPNOTSUPP; | 1815 | return EOPNOTSUPP; | |
1814 | } | 1816 | } | |
1815 | 1817 | |||
1816 | int | 1818 | int | |
1817 | fbadop_stat(file_t *fp, struct stat *sb) | 1819 | fbadop_stat(file_t *fp, struct stat *sb) | |
1818 | { | 1820 | { | |
1819 | 1821 | |||
1820 | return EOPNOTSUPP; | 1822 | return EOPNOTSUPP; | |
1821 | } | 1823 | } | |
1822 | 1824 | |||
1823 | int | 1825 | int | |
1824 | fbadop_close(file_t *fp) | 1826 | fbadop_close(file_t *fp) | |
1825 | { | 1827 | { | |
1826 | 1828 | |||
1827 | return EOPNOTSUPP; | 1829 | return EOPNOTSUPP; | |
1828 | } | 1830 | } |
--- src/sys/kern/kern_lwp.c 2009/10/22 22:28:57 1.135
+++ src/sys/kern/kern_lwp.c 2009/10/27 02:58:28 1.136
@@ -1,1608 +1,1608 @@ | @@ -1,1608 +1,1608 @@ | |||
1 | /* $NetBSD: kern_lwp.c,v 1.135 2009/10/22 22:28:57 rmind Exp $ */ | 1 | /* $NetBSD: kern_lwp.c,v 1.136 2009/10/27 02:58:28 rmind Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 2001, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc. | 4 | * Copyright (c) 2001, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc. | |
5 | * All rights reserved. | 5 | * All rights reserved. | |
6 | * | 6 | * | |
7 | * This code is derived from software contributed to The NetBSD Foundation | 7 | * This code is derived from software contributed to The NetBSD Foundation | |
8 | * by Nathan J. Williams, and Andrew Doran. | 8 | * by Nathan J. Williams, and Andrew Doran. | |
9 | * | 9 | * | |
10 | * Redistribution and use in source and binary forms, with or without | 10 | * Redistribution and use in source and binary forms, with or without | |
11 | * modification, are permitted provided that the following conditions | 11 | * modification, are permitted provided that the following conditions | |
12 | * are met: | 12 | * are met: | |
13 | * 1. Redistributions of source code must retain the above copyright | 13 | * 1. Redistributions of source code must retain the above copyright | |
14 | * notice, this list of conditions and the following disclaimer. | 14 | * notice, this list of conditions and the following disclaimer. | |
15 | * 2. Redistributions in binary form must reproduce the above copyright | 15 | * 2. Redistributions in binary form must reproduce the above copyright | |
16 | * notice, this list of conditions and the following disclaimer in the | 16 | * notice, this list of conditions and the following disclaimer in the | |
17 | * documentation and/or other materials provided with the distribution. | 17 | * documentation and/or other materials provided with the distribution. | |
18 | * | 18 | * | |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
29 | * POSSIBILITY OF SUCH DAMAGE. | 29 | * POSSIBILITY OF SUCH DAMAGE. | |
30 | */ | 30 | */ | |
31 | 31 | |||
32 | /* | 32 | /* | |
33 | * Overview | 33 | * Overview | |
34 | * | 34 | * | |
35 | * Lightweight processes (LWPs) are the basic unit or thread of | 35 | * Lightweight processes (LWPs) are the basic unit or thread of | |
36 | * execution within the kernel. The core state of an LWP is described | 36 | * execution within the kernel. The core state of an LWP is described | |
37 | * by "struct lwp", also known as lwp_t. | 37 | * by "struct lwp", also known as lwp_t. | |
38 | * | 38 | * | |
39 | * Each LWP is contained within a process (described by "struct proc"), | 39 | * Each LWP is contained within a process (described by "struct proc"), | |
40 | * Every process contains at least one LWP, but may contain more. The | 40 | * Every process contains at least one LWP, but may contain more. The | |
41 | * process describes attributes shared among all of its LWPs such as a | 41 | * process describes attributes shared among all of its LWPs such as a | |
42 | * private address space, global execution state (stopped, active, | 42 | * private address space, global execution state (stopped, active, | |
43 | * zombie, ...), signal disposition and so on. On a multiprocessor | 43 | * zombie, ...), signal disposition and so on. On a multiprocessor | |
44 | * machine, multiple LWPs be executing concurrently in the kernel. | 44 | * machine, multiple LWPs be executing concurrently in the kernel. | |
45 | * | 45 | * | |
46 | * Execution states | 46 | * Execution states | |
47 | * | 47 | * | |
48 | * At any given time, an LWP has overall state that is described by | 48 | * At any given time, an LWP has overall state that is described by | |
49 | * lwp::l_stat. The states are broken into two sets below. The first | 49 | * lwp::l_stat. The states are broken into two sets below. The first | |
50 | * set is guaranteed to represent the absolute, current state of the | 50 | * set is guaranteed to represent the absolute, current state of the | |
51 | * LWP: | 51 | * LWP: | |
52 | * | 52 | * | |
53 | * LSONPROC | 53 | * LSONPROC | |
54 | * | 54 | * | |
55 | * On processor: the LWP is executing on a CPU, either in the | 55 | * On processor: the LWP is executing on a CPU, either in the | |
56 | * kernel or in user space. | 56 | * kernel or in user space. | |
57 | * | 57 | * | |
58 | * LSRUN | 58 | * LSRUN | |
59 | * | 59 | * | |
60 | * Runnable: the LWP is parked on a run queue, and may soon be | 60 | * Runnable: the LWP is parked on a run queue, and may soon be | |
61 | * chosen to run by an idle processor, or by a processor that | 61 | * chosen to run by an idle processor, or by a processor that | |
62 | * has been asked to preempt a currently runnning but lower | 62 | * has been asked to preempt a currently runnning but lower | |
63 | * priority LWP. | 63 | * priority LWP. | |
64 | * | 64 | * | |
65 | * LSIDL | 65 | * LSIDL | |
66 | * | 66 | * | |
67 | * Idle: the LWP has been created but has not yet executed, | 67 | * Idle: the LWP has been created but has not yet executed, | |
68 | * or it has ceased executing a unit of work and is waiting | 68 | * or it has ceased executing a unit of work and is waiting | |
69 | * to be started again. | 69 | * to be started again. | |
70 | * | 70 | * | |
71 | * LSSUSPENDED: | 71 | * LSSUSPENDED: | |
72 | * | 72 | * | |
73 | * Suspended: the LWP has had its execution suspended by | 73 | * Suspended: the LWP has had its execution suspended by | |
74 | * another LWP in the same process using the _lwp_suspend() | 74 | * another LWP in the same process using the _lwp_suspend() | |
75 | * system call. User-level LWPs also enter the suspended | 75 | * system call. User-level LWPs also enter the suspended | |
76 | * state when the system is shutting down. | 76 | * state when the system is shutting down. | |
77 | * | 77 | * | |
78 | * The second set represent a "statement of intent" on behalf of the | 78 | * The second set represent a "statement of intent" on behalf of the | |
79 | * LWP. The LWP may in fact be executing on a processor, may be | 79 | * LWP. The LWP may in fact be executing on a processor, may be | |
80 | * sleeping or idle. It is expected to take the necessary action to | 80 | * sleeping or idle. It is expected to take the necessary action to | |
81 | * stop executing or become "running" again within a short timeframe. | 81 | * stop executing or become "running" again within a short timeframe. | |
82 | * The LP_RUNNING flag in lwp::l_pflag indicates that an LWP is running. | 82 | * The LP_RUNNING flag in lwp::l_pflag indicates that an LWP is running. | |
83 | * Importantly, it indicates that its state is tied to a CPU. | 83 | * Importantly, it indicates that its state is tied to a CPU. | |
84 | * | 84 | * | |
85 | * LSZOMB: | 85 | * LSZOMB: | |
86 | * | 86 | * | |
87 | * Dead or dying: the LWP has released most of its resources | 87 | * Dead or dying: the LWP has released most of its resources | |
88 | * and is about to switch away into oblivion, or has already | 88 | * and is about to switch away into oblivion, or has already | |
89 | * switched away. When it switches away, its few remaining | 89 | * switched away. When it switches away, its few remaining | |
90 | * resources can be collected. | 90 | * resources can be collected. | |
91 | * | 91 | * | |
92 | * LSSLEEP: | 92 | * LSSLEEP: | |
93 | * | 93 | * | |
94 | * Sleeping: the LWP has entered itself onto a sleep queue, and | 94 | * Sleeping: the LWP has entered itself onto a sleep queue, and | |
95 | * has switched away or will switch away shortly to allow other | 95 | * has switched away or will switch away shortly to allow other | |
96 | * LWPs to run on the CPU. | 96 | * LWPs to run on the CPU. | |
97 | * | 97 | * | |
98 | * LSSTOP: | 98 | * LSSTOP: | |
99 | * | 99 | * | |
100 | * Stopped: the LWP has been stopped as a result of a job | 100 | * Stopped: the LWP has been stopped as a result of a job | |
101 | * control signal, or as a result of the ptrace() interface. | 101 | * control signal, or as a result of the ptrace() interface. | |
102 | * | 102 | * | |
103 | * Stopped LWPs may run briefly within the kernel to handle | 103 | * Stopped LWPs may run briefly within the kernel to handle | |
104 | * signals that they receive, but will not return to user space | 104 | * signals that they receive, but will not return to user space | |
105 | * until their process' state is changed away from stopped. | 105 | * until their process' state is changed away from stopped. | |
106 | * | 106 | * | |
107 | * Single LWPs within a process can not be set stopped | 107 | * Single LWPs within a process can not be set stopped | |
108 | * selectively: all actions that can stop or continue LWPs | 108 | * selectively: all actions that can stop or continue LWPs | |
109 | * occur at the process level. | 109 | * occur at the process level. | |
110 | * | 110 | * | |
111 | * State transitions | 111 | * State transitions | |
112 | * | 112 | * | |
113 | * Note that the LSSTOP state may only be set when returning to | 113 | * Note that the LSSTOP state may only be set when returning to | |
114 | * user space in userret(), or when sleeping interruptably. The | 114 | * user space in userret(), or when sleeping interruptably. The | |
115 | * LSSUSPENDED state may only be set in userret(). Before setting | 115 | * LSSUSPENDED state may only be set in userret(). Before setting | |
116 | * those states, we try to ensure that the LWPs will release all | 116 | * those states, we try to ensure that the LWPs will release all | |
117 | * locks that they hold, and at a minimum try to ensure that the | 117 | * locks that they hold, and at a minimum try to ensure that the | |
118 | * LWP can be set runnable again by a signal. | 118 | * LWP can be set runnable again by a signal. | |
119 | * | 119 | * | |
120 | * LWPs may transition states in the following ways: | 120 | * LWPs may transition states in the following ways: | |
121 | * | 121 | * | |
122 | * RUN -------> ONPROC ONPROC -----> RUN | 122 | * RUN -------> ONPROC ONPROC -----> RUN | |
123 | * > SLEEP | 123 | * > SLEEP | |
124 | * > STOPPED | 124 | * > STOPPED | |
125 | * > SUSPENDED | 125 | * > SUSPENDED | |
126 | * > ZOMB | 126 | * > ZOMB | |
127 | * > IDL (special cases) | 127 | * > IDL (special cases) | |
128 | * | 128 | * | |
129 | * STOPPED ---> RUN SUSPENDED --> RUN | 129 | * STOPPED ---> RUN SUSPENDED --> RUN | |
130 | * > SLEEP | 130 | * > SLEEP | |
131 | * | 131 | * | |
132 | * SLEEP -----> ONPROC IDL --------> RUN | 132 | * SLEEP -----> ONPROC IDL --------> RUN | |
133 | * > RUN > SUSPENDED | 133 | * > RUN > SUSPENDED | |
134 | * > STOPPED > STOPPED | 134 | * > STOPPED > STOPPED | |
135 | * > ONPROC (special cases) | 135 | * > ONPROC (special cases) | |
136 | * | 136 | * | |
137 | * Some state transitions are only possible with kernel threads (eg | 137 | * Some state transitions are only possible with kernel threads (eg | |
138 | * ONPROC -> IDL) and happen under tightly controlled circumstances | 138 | * ONPROC -> IDL) and happen under tightly controlled circumstances | |
139 | * free of unwanted side effects. | 139 | * free of unwanted side effects. | |
140 | * | 140 | * | |
141 | * Migration | 141 | * Migration | |
142 | * | 142 | * | |
143 | * Migration of threads from one CPU to another could be performed | 143 | * Migration of threads from one CPU to another could be performed | |
144 | * internally by the scheduler via sched_takecpu() or sched_catchlwp() | 144 | * internally by the scheduler via sched_takecpu() or sched_catchlwp() | |
145 | * functions. The universal lwp_migrate() function should be used for | 145 | * functions. The universal lwp_migrate() function should be used for | |
146 | * any other cases. Subsystems in the kernel must be aware that CPU | 146 | * any other cases. Subsystems in the kernel must be aware that CPU | |
147 | * of LWP may change, while it is not locked. | 147 | * of LWP may change, while it is not locked. | |
148 | * | 148 | * | |
149 | * Locking | 149 | * Locking | |
150 | * | 150 | * | |
151 | * The majority of fields in 'struct lwp' are covered by a single, | 151 | * The majority of fields in 'struct lwp' are covered by a single, | |
152 | * general spin lock pointed to by lwp::l_mutex. The locks covering | 152 | * general spin lock pointed to by lwp::l_mutex. The locks covering | |
153 | * each field are documented in sys/lwp.h. | 153 | * each field are documented in sys/lwp.h. | |
154 | * | 154 | * | |
155 | * State transitions must be made with the LWP's general lock held, | 155 | * State transitions must be made with the LWP's general lock held, | |
156 | * and may cause the LWP's lock pointer to change. Manipulation of | 156 | * and may cause the LWP's lock pointer to change. Manipulation of | |
157 | * the general lock is not performed directly, but through calls to | 157 | * the general lock is not performed directly, but through calls to | |
158 | * lwp_lock(), lwp_relock() and similar. | 158 | * lwp_lock(), lwp_relock() and similar. | |
159 | * | 159 | * | |
160 | * States and their associated locks: | 160 | * States and their associated locks: | |
161 | * | 161 | * | |
162 | * LSONPROC, LSZOMB: | 162 | * LSONPROC, LSZOMB: | |
163 | * | 163 | * | |
164 | * Always covered by spc_lwplock, which protects running LWPs. | 164 | * Always covered by spc_lwplock, which protects running LWPs. | |
165 | * This is a per-CPU lock and matches lwp::l_cpu. | 165 | * This is a per-CPU lock and matches lwp::l_cpu. | |
166 | * | 166 | * | |
167 | * LSIDL, LSRUN: | 167 | * LSIDL, LSRUN: | |
168 | * | 168 | * | |
169 | * Always covered by spc_mutex, which protects the run queues. | 169 | * Always covered by spc_mutex, which protects the run queues. | |
170 | * This is a per-CPU lock and matches lwp::l_cpu. | 170 | * This is a per-CPU lock and matches lwp::l_cpu. | |
171 | * | 171 | * | |
172 | * LSSLEEP: | 172 | * LSSLEEP: | |
173 | * | 173 | * | |
174 | * Covered by a lock associated with the sleep queue that the | 174 | * Covered by a lock associated with the sleep queue that the | |
175 | * LWP resides on. Matches lwp::l_sleepq::sq_mutex. | 175 | * LWP resides on. Matches lwp::l_sleepq::sq_mutex. | |
176 | * | 176 | * | |
177 | * LSSTOP, LSSUSPENDED: | 177 | * LSSTOP, LSSUSPENDED: | |
178 | * | 178 | * | |
179 | * If the LWP was previously sleeping (l_wchan != NULL), then | 179 | * If the LWP was previously sleeping (l_wchan != NULL), then | |
180 | * l_mutex references the sleep queue lock. If the LWP was | 180 | * l_mutex references the sleep queue lock. If the LWP was | |
181 | * runnable or on the CPU when halted, or has been removed from | 181 | * runnable or on the CPU when halted, or has been removed from | |
182 | * the sleep queue since halted, then the lock is spc_lwplock. | 182 | * the sleep queue since halted, then the lock is spc_lwplock. | |
183 | * | 183 | * | |
184 | * The lock order is as follows: | 184 | * The lock order is as follows: | |
185 | * | 185 | * | |
186 | * spc::spc_lwplock -> | 186 | * spc::spc_lwplock -> | |
187 | * sleeptab::st_mutex -> | 187 | * sleeptab::st_mutex -> | |
188 | * tschain_t::tc_mutex -> | 188 | * tschain_t::tc_mutex -> | |
189 | * spc::spc_mutex | 189 | * spc::spc_mutex | |
190 | * | 190 | * | |
191 | * Each process has an scheduler state lock (proc::p_lock), and a | 191 | * Each process has an scheduler state lock (proc::p_lock), and a | |
192 | * number of counters on LWPs and their states: p_nzlwps, p_nrlwps, and | 192 | * number of counters on LWPs and their states: p_nzlwps, p_nrlwps, and | |
193 | * so on. When an LWP is to be entered into or removed from one of the | 193 | * so on. When an LWP is to be entered into or removed from one of the | |
194 | * following states, p_lock must be held and the process wide counters | 194 | * following states, p_lock must be held and the process wide counters | |
195 | * adjusted: | 195 | * adjusted: | |
196 | * | 196 | * | |
197 | * LSIDL, LSZOMB, LSSTOP, LSSUSPENDED | 197 | * LSIDL, LSZOMB, LSSTOP, LSSUSPENDED | |
198 | * | 198 | * | |
199 | * (But not always for kernel threads. There are some special cases | 199 | * (But not always for kernel threads. There are some special cases | |
200 | * as mentioned above. See kern_softint.c.) | 200 | * as mentioned above. See kern_softint.c.) | |
201 | * | 201 | * | |
202 | * Note that an LWP is considered running or likely to run soon if in | 202 | * Note that an LWP is considered running or likely to run soon if in | |
203 | * one of the following states. This affects the value of p_nrlwps: | 203 | * one of the following states. This affects the value of p_nrlwps: | |
204 | * | 204 | * | |
205 | * LSRUN, LSONPROC, LSSLEEP | 205 | * LSRUN, LSONPROC, LSSLEEP | |
206 | * | 206 | * | |
207 | * p_lock does not need to be held when transitioning among these | 207 | * p_lock does not need to be held when transitioning among these | |
208 | * three states, hence p_lock is rarely taken for state transitions. | 208 | * three states, hence p_lock is rarely taken for state transitions. | |
209 | */ | 209 | */ | |
210 | 210 | |||
211 | #include <sys/cdefs.h> | 211 | #include <sys/cdefs.h> | |
212 | __KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.135 2009/10/22 22:28:57 rmind Exp $"); | 212 | __KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.136 2009/10/27 02:58:28 rmind Exp $"); | |
213 | 213 | |||
214 | #include "opt_ddb.h" | 214 | #include "opt_ddb.h" | |
215 | #include "opt_lockdebug.h" | 215 | #include "opt_lockdebug.h" | |
216 | #include "opt_sa.h" | 216 | #include "opt_sa.h" | |
217 | 217 | |||
218 | #define _LWP_API_PRIVATE | 218 | #define _LWP_API_PRIVATE | |
219 | 219 | |||
220 | #include <sys/param.h> | 220 | #include <sys/param.h> | |
221 | #include <sys/systm.h> | 221 | #include <sys/systm.h> | |
222 | #include <sys/cpu.h> | 222 | #include <sys/cpu.h> | |
223 | #include <sys/pool.h> | 223 | #include <sys/pool.h> | |
224 | #include <sys/proc.h> | 224 | #include <sys/proc.h> | |
225 | #include <sys/sa.h> | 225 | #include <sys/sa.h> | |
226 | #include <sys/savar.h> | 226 | #include <sys/savar.h> | |
227 | #include <sys/syscallargs.h> | 227 | #include <sys/syscallargs.h> | |
228 | #include <sys/syscall_stats.h> | 228 | #include <sys/syscall_stats.h> | |
229 | #include <sys/kauth.h> | 229 | #include <sys/kauth.h> | |
230 | #include <sys/sleepq.h> | 230 | #include <sys/sleepq.h> | |
231 | #include <sys/user.h> | 231 | #include <sys/user.h> | |
232 | #include <sys/lockdebug.h> | 232 | #include <sys/lockdebug.h> | |
233 | #include <sys/kmem.h> | 233 | #include <sys/kmem.h> | |
234 | #include <sys/pset.h> | 234 | #include <sys/pset.h> | |
235 | #include <sys/intr.h> | 235 | #include <sys/intr.h> | |
236 | #include <sys/lwpctl.h> | 236 | #include <sys/lwpctl.h> | |
237 | #include <sys/atomic.h> | 237 | #include <sys/atomic.h> | |
238 | #include <sys/filedesc.h> | 238 | #include <sys/filedesc.h> | |
239 | 239 | |||
240 | #include <uvm/uvm_extern.h> | 240 | #include <uvm/uvm_extern.h> | |
241 | #include <uvm/uvm_object.h> | 241 | #include <uvm/uvm_object.h> | |
242 | 242 | |||
243 | struct lwplist alllwp = LIST_HEAD_INITIALIZER(alllwp); | 243 | struct lwplist alllwp = LIST_HEAD_INITIALIZER(alllwp); | |
244 | 244 | |||
245 | struct pool lwp_uc_pool; | 245 | struct pool lwp_uc_pool; | |
246 | 246 | |||
247 | static pool_cache_t lwp_cache; | 247 | static pool_cache_t lwp_cache; | |
248 | static specificdata_domain_t lwp_specificdata_domain; | 248 | static specificdata_domain_t lwp_specificdata_domain; | |
249 | 249 | |||
250 | void | 250 | void | |
251 | lwpinit(void) | 251 | lwpinit(void) | |
252 | { | 252 | { | |
253 | 253 | |||
254 | pool_init(&lwp_uc_pool, sizeof(ucontext_t), 0, 0, 0, "lwpucpl", | 254 | pool_init(&lwp_uc_pool, sizeof(ucontext_t), 0, 0, 0, "lwpucpl", | |
255 | &pool_allocator_nointr, IPL_NONE); | 255 | &pool_allocator_nointr, IPL_NONE); | |
256 | lwp_specificdata_domain = specificdata_domain_create(); | 256 | lwp_specificdata_domain = specificdata_domain_create(); | |
257 | KASSERT(lwp_specificdata_domain != NULL); | 257 | KASSERT(lwp_specificdata_domain != NULL); | |
258 | lwp_sys_init(); | 258 | lwp_sys_init(); | |
259 | lwp_cache = pool_cache_init(sizeof(lwp_t), MIN_LWP_ALIGNMENT, 0, 0, | 259 | lwp_cache = pool_cache_init(sizeof(lwp_t), MIN_LWP_ALIGNMENT, 0, 0, | |
260 | "lwppl", NULL, IPL_NONE, NULL, NULL, NULL); | 260 | "lwppl", NULL, IPL_NONE, NULL, NULL, NULL); | |
261 | } | 261 | } | |
262 | 262 | |||
263 | /* | 263 | /* | |
264 | * Set an suspended. | 264 | * Set an suspended. | |
265 | * | 265 | * | |
266 | * Must be called with p_lock held, and the LWP locked. Will unlock the | 266 | * Must be called with p_lock held, and the LWP locked. Will unlock the | |
267 | * LWP before return. | 267 | * LWP before return. | |
268 | */ | 268 | */ | |
269 | int | 269 | int | |
270 | lwp_suspend(struct lwp *curl, struct lwp *t) | 270 | lwp_suspend(struct lwp *curl, struct lwp *t) | |
271 | { | 271 | { | |
272 | int error; | 272 | int error; | |
273 | 273 | |||
274 | KASSERT(mutex_owned(t->l_proc->p_lock)); | 274 | KASSERT(mutex_owned(t->l_proc->p_lock)); | |
275 | KASSERT(lwp_locked(t, NULL)); | 275 | KASSERT(lwp_locked(t, NULL)); | |
276 | 276 | |||
277 | KASSERT(curl != t || curl->l_stat == LSONPROC); | 277 | KASSERT(curl != t || curl->l_stat == LSONPROC); | |
278 | 278 | |||
279 | /* | 279 | /* | |
280 | * If the current LWP has been told to exit, we must not suspend anyone | 280 | * If the current LWP has been told to exit, we must not suspend anyone | |
281 | * else or deadlock could occur. We won't return to userspace. | 281 | * else or deadlock could occur. We won't return to userspace. | |
282 | */ | 282 | */ | |
283 | if ((curl->l_flag & (LW_WEXIT | LW_WCORE)) != 0) { | 283 | if ((curl->l_flag & (LW_WEXIT | LW_WCORE)) != 0) { | |
284 | lwp_unlock(t); | 284 | lwp_unlock(t); | |
285 | return (EDEADLK); | 285 | return (EDEADLK); | |
286 | } | 286 | } | |
287 | 287 | |||
288 | error = 0; | 288 | error = 0; | |
289 | 289 | |||
290 | switch (t->l_stat) { | 290 | switch (t->l_stat) { | |
291 | case LSRUN: | 291 | case LSRUN: | |
292 | case LSONPROC: | 292 | case LSONPROC: | |
293 | t->l_flag |= LW_WSUSPEND; | 293 | t->l_flag |= LW_WSUSPEND; | |
294 | lwp_need_userret(t); | 294 | lwp_need_userret(t); | |
295 | lwp_unlock(t); | 295 | lwp_unlock(t); | |
296 | break; | 296 | break; | |
297 | 297 | |||
298 | case LSSLEEP: | 298 | case LSSLEEP: | |
299 | t->l_flag |= LW_WSUSPEND; | 299 | t->l_flag |= LW_WSUSPEND; | |
300 | 300 | |||
301 | /* | 301 | /* | |
302 | * Kick the LWP and try to get it to the kernel boundary | 302 | * Kick the LWP and try to get it to the kernel boundary | |
303 | * so that it will release any locks that it holds. | 303 | * so that it will release any locks that it holds. | |
304 | * setrunnable() will release the lock. | 304 | * setrunnable() will release the lock. | |
305 | */ | 305 | */ | |
306 | if ((t->l_flag & LW_SINTR) != 0) | 306 | if ((t->l_flag & LW_SINTR) != 0) | |
307 | setrunnable(t); | 307 | setrunnable(t); | |
308 | else | 308 | else | |
309 | lwp_unlock(t); | 309 | lwp_unlock(t); | |
310 | break; | 310 | break; | |
311 | 311 | |||
312 | case LSSUSPENDED: | 312 | case LSSUSPENDED: | |
313 | lwp_unlock(t); | 313 | lwp_unlock(t); | |
314 | break; | 314 | break; | |
315 | 315 | |||
316 | case LSSTOP: | 316 | case LSSTOP: | |
317 | t->l_flag |= LW_WSUSPEND; | 317 | t->l_flag |= LW_WSUSPEND; | |
318 | setrunnable(t); | 318 | setrunnable(t); | |
319 | break; | 319 | break; | |
320 | 320 | |||
321 | case LSIDL: | 321 | case LSIDL: | |
322 | case LSZOMB: | 322 | case LSZOMB: | |
323 | error = EINTR; /* It's what Solaris does..... */ | 323 | error = EINTR; /* It's what Solaris does..... */ | |
324 | lwp_unlock(t); | 324 | lwp_unlock(t); | |
325 | break; | 325 | break; | |
326 | } | 326 | } | |
327 | 327 | |||
328 | return (error); | 328 | return (error); | |
329 | } | 329 | } | |
330 | 330 | |||
331 | /* | 331 | /* | |
332 | * Restart a suspended LWP. | 332 | * Restart a suspended LWP. | |
333 | * | 333 | * | |
334 | * Must be called with p_lock held, and the LWP locked. Will unlock the | 334 | * Must be called with p_lock held, and the LWP locked. Will unlock the | |
335 | * LWP before return. | 335 | * LWP before return. | |
336 | */ | 336 | */ | |
337 | void | 337 | void | |
338 | lwp_continue(struct lwp *l) | 338 | lwp_continue(struct lwp *l) | |
339 | { | 339 | { | |
340 | 340 | |||
341 | KASSERT(mutex_owned(l->l_proc->p_lock)); | 341 | KASSERT(mutex_owned(l->l_proc->p_lock)); | |
342 | KASSERT(lwp_locked(l, NULL)); | 342 | KASSERT(lwp_locked(l, NULL)); | |
343 | 343 | |||
344 | /* If rebooting or not suspended, then just bail out. */ | 344 | /* If rebooting or not suspended, then just bail out. */ | |
345 | if ((l->l_flag & LW_WREBOOT) != 0) { | 345 | if ((l->l_flag & LW_WREBOOT) != 0) { | |
346 | lwp_unlock(l); | 346 | lwp_unlock(l); | |
347 | return; | 347 | return; | |
348 | } | 348 | } | |
349 | 349 | |||
350 | l->l_flag &= ~LW_WSUSPEND; | 350 | l->l_flag &= ~LW_WSUSPEND; | |
351 | 351 | |||
352 | if (l->l_stat != LSSUSPENDED) { | 352 | if (l->l_stat != LSSUSPENDED) { | |
353 | lwp_unlock(l); | 353 | lwp_unlock(l); | |
354 | return; | 354 | return; | |
355 | } | 355 | } | |
356 | 356 | |||
357 | /* setrunnable() will release the lock. */ | 357 | /* setrunnable() will release the lock. */ | |
358 | setrunnable(l); | 358 | setrunnable(l); | |
359 | } | 359 | } | |
360 | 360 | |||
361 | /* | 361 | /* | |
362 | * Wait for an LWP within the current process to exit. If 'lid' is | 362 | * Wait for an LWP within the current process to exit. If 'lid' is | |
363 | * non-zero, we are waiting for a specific LWP. | 363 | * non-zero, we are waiting for a specific LWP. | |
364 | * | 364 | * | |
365 | * Must be called with p->p_lock held. | 365 | * Must be called with p->p_lock held. | |
366 | */ | 366 | */ | |
367 | int | 367 | int | |
368 | lwp_wait1(struct lwp *l, lwpid_t lid, lwpid_t *departed, int flags) | 368 | lwp_wait1(struct lwp *l, lwpid_t lid, lwpid_t *departed, int flags) | |
369 | { | 369 | { | |
370 | struct proc *p = l->l_proc; | 370 | struct proc *p = l->l_proc; | |
371 | struct lwp *l2; | 371 | struct lwp *l2; | |
372 | int nfound, error; | 372 | int nfound, error; | |
373 | lwpid_t curlid; | 373 | lwpid_t curlid; | |
374 | bool exiting; | 374 | bool exiting; | |
375 | 375 | |||
376 | KASSERT(mutex_owned(p->p_lock)); | 376 | KASSERT(mutex_owned(p->p_lock)); | |
377 | 377 | |||
378 | p->p_nlwpwait++; | 378 | p->p_nlwpwait++; | |
379 | l->l_waitingfor = lid; | 379 | l->l_waitingfor = lid; | |
380 | curlid = l->l_lid; | 380 | curlid = l->l_lid; | |
381 | exiting = ((flags & LWPWAIT_EXITCONTROL) != 0); | 381 | exiting = ((flags & LWPWAIT_EXITCONTROL) != 0); | |
382 | 382 | |||
383 | for (;;) { | 383 | for (;;) { | |
384 | /* | 384 | /* | |
385 | * Avoid a race between exit1() and sigexit(): if the | 385 | * Avoid a race between exit1() and sigexit(): if the | |
386 | * process is dumping core, then we need to bail out: call | 386 | * process is dumping core, then we need to bail out: call | |
387 | * into lwp_userret() where we will be suspended until the | 387 | * into lwp_userret() where we will be suspended until the | |
388 | * deed is done. | 388 | * deed is done. | |
389 | */ | 389 | */ | |
390 | if ((p->p_sflag & PS_WCORE) != 0) { | 390 | if ((p->p_sflag & PS_WCORE) != 0) { | |
391 | mutex_exit(p->p_lock); | 391 | mutex_exit(p->p_lock); | |
392 | lwp_userret(l); | 392 | lwp_userret(l); | |
393 | #ifdef DIAGNOSTIC | 393 | #ifdef DIAGNOSTIC | |
394 | panic("lwp_wait1"); | 394 | panic("lwp_wait1"); | |
395 | #endif | 395 | #endif | |
396 | /* NOTREACHED */ | 396 | /* NOTREACHED */ | |
397 | } | 397 | } | |
398 | 398 | |||
399 | /* | 399 | /* | |
400 | * First off, drain any detached LWP that is waiting to be | 400 | * First off, drain any detached LWP that is waiting to be | |
401 | * reaped. | 401 | * reaped. | |
402 | */ | 402 | */ | |
403 | while ((l2 = p->p_zomblwp) != NULL) { | 403 | while ((l2 = p->p_zomblwp) != NULL) { | |
404 | p->p_zomblwp = NULL; | 404 | p->p_zomblwp = NULL; | |
405 | lwp_free(l2, false, false);/* releases proc mutex */ | 405 | lwp_free(l2, false, false);/* releases proc mutex */ | |
406 | mutex_enter(p->p_lock); | 406 | mutex_enter(p->p_lock); | |
407 | } | 407 | } | |
408 | 408 | |||
409 | /* | 409 | /* | |
410 | * Now look for an LWP to collect. If the whole process is | 410 | * Now look for an LWP to collect. If the whole process is | |
411 | * exiting, count detached LWPs as eligible to be collected, | 411 | * exiting, count detached LWPs as eligible to be collected, | |
412 | * but don't drain them here. | 412 | * but don't drain them here. | |
413 | */ | 413 | */ | |
414 | nfound = 0; | 414 | nfound = 0; | |
415 | error = 0; | 415 | error = 0; | |
416 | LIST_FOREACH(l2, &p->p_lwps, l_sibling) { | 416 | LIST_FOREACH(l2, &p->p_lwps, l_sibling) { | |
417 | /* | 417 | /* | |
418 | * If a specific wait and the target is waiting on | 418 | * If a specific wait and the target is waiting on | |
419 | * us, then avoid deadlock. This also traps LWPs | 419 | * us, then avoid deadlock. This also traps LWPs | |
420 | * that try to wait on themselves. | 420 | * that try to wait on themselves. | |
421 | * | 421 | * | |
422 | * Note that this does not handle more complicated | 422 | * Note that this does not handle more complicated | |
423 | * cycles, like: t1 -> t2 -> t3 -> t1. The process | 423 | * cycles, like: t1 -> t2 -> t3 -> t1. The process | |
424 | * can still be killed so it is not a major problem. | 424 | * can still be killed so it is not a major problem. | |
425 | */ | 425 | */ | |
426 | if (l2->l_lid == lid && l2->l_waitingfor == curlid) { | 426 | if (l2->l_lid == lid && l2->l_waitingfor == curlid) { | |
427 | error = EDEADLK; | 427 | error = EDEADLK; | |
428 | break; | 428 | break; | |
429 | } | 429 | } | |
430 | if (l2 == l) | 430 | if (l2 == l) | |
431 | continue; | 431 | continue; | |
432 | if ((l2->l_prflag & LPR_DETACHED) != 0) { | 432 | if ((l2->l_prflag & LPR_DETACHED) != 0) { | |
433 | nfound += exiting; | 433 | nfound += exiting; | |
434 | continue; | 434 | continue; | |
435 | } | 435 | } | |
436 | if (lid != 0) { | 436 | if (lid != 0) { | |
437 | if (l2->l_lid != lid) | 437 | if (l2->l_lid != lid) | |
438 | continue; | 438 | continue; | |
439 | /* | 439 | /* | |
440 | * Mark this LWP as the first waiter, if there | 440 | * Mark this LWP as the first waiter, if there | |
441 | * is no other. | 441 | * is no other. | |
442 | */ | 442 | */ | |
443 | if (l2->l_waiter == 0) | 443 | if (l2->l_waiter == 0) | |
444 | l2->l_waiter = curlid; | 444 | l2->l_waiter = curlid; | |
445 | } else if (l2->l_waiter != 0) { | 445 | } else if (l2->l_waiter != 0) { | |
446 | /* | 446 | /* | |
447 | * It already has a waiter - so don't | 447 | * It already has a waiter - so don't | |
448 | * collect it. If the waiter doesn't | 448 | * collect it. If the waiter doesn't | |
449 | * grab it we'll get another chance | 449 | * grab it we'll get another chance | |
450 | * later. | 450 | * later. | |
451 | */ | 451 | */ | |
452 | nfound++; | 452 | nfound++; | |
453 | continue; | 453 | continue; | |
454 | } | 454 | } | |
455 | nfound++; | 455 | nfound++; | |
456 | 456 | |||
457 | /* No need to lock the LWP in order to see LSZOMB. */ | 457 | /* No need to lock the LWP in order to see LSZOMB. */ | |
458 | if (l2->l_stat != LSZOMB) | 458 | if (l2->l_stat != LSZOMB) | |
459 | continue; | 459 | continue; | |
460 | 460 | |||
461 | /* | 461 | /* | |
462 | * We're no longer waiting. Reset the "first waiter" | 462 | * We're no longer waiting. Reset the "first waiter" | |
463 | * pointer on the target, in case it was us. | 463 | * pointer on the target, in case it was us. | |
464 | */ | 464 | */ | |
465 | l->l_waitingfor = 0; | 465 | l->l_waitingfor = 0; | |
466 | l2->l_waiter = 0; | 466 | l2->l_waiter = 0; | |
467 | p->p_nlwpwait--; | 467 | p->p_nlwpwait--; | |
468 | if (departed) | 468 | if (departed) | |
469 | *departed = l2->l_lid; | 469 | *departed = l2->l_lid; | |
470 | sched_lwp_collect(l2); | 470 | sched_lwp_collect(l2); | |
471 | 471 | |||
472 | /* lwp_free() releases the proc lock. */ | 472 | /* lwp_free() releases the proc lock. */ | |
473 | lwp_free(l2, false, false); | 473 | lwp_free(l2, false, false); | |
474 | mutex_enter(p->p_lock); | 474 | mutex_enter(p->p_lock); | |
475 | return 0; | 475 | return 0; | |
476 | } | 476 | } | |
477 | 477 | |||
478 | if (error != 0) | 478 | if (error != 0) | |
479 | break; | 479 | break; | |
480 | if (nfound == 0) { | 480 | if (nfound == 0) { | |
481 | error = ESRCH; | 481 | error = ESRCH; | |
482 | break; | 482 | break; | |
483 | } | 483 | } | |
484 | 484 | |||
485 | /* | 485 | /* | |
486 | * The kernel is careful to ensure that it can not deadlock | 486 | * The kernel is careful to ensure that it can not deadlock | |
487 | * when exiting - just keep waiting. | 487 | * when exiting - just keep waiting. | |
488 | */ | 488 | */ | |
489 | if (exiting) { | 489 | if (exiting) { | |
490 | KASSERT(p->p_nlwps > 1); | 490 | KASSERT(p->p_nlwps > 1); | |
491 | cv_wait(&p->p_lwpcv, p->p_lock); | 491 | cv_wait(&p->p_lwpcv, p->p_lock); | |
492 | continue; | 492 | continue; | |
493 | } | 493 | } | |
494 | 494 | |||
495 | /* | 495 | /* | |
496 | * If all other LWPs are waiting for exits or suspends | 496 | * If all other LWPs are waiting for exits or suspends | |
497 | * and the supply of zombies and potential zombies is | 497 | * and the supply of zombies and potential zombies is | |
498 | * exhausted, then we are about to deadlock. | 498 | * exhausted, then we are about to deadlock. | |
499 | * | 499 | * | |
500 | * If the process is exiting (and this LWP is not the one | 500 | * If the process is exiting (and this LWP is not the one | |
501 | * that is coordinating the exit) then bail out now. | 501 | * that is coordinating the exit) then bail out now. | |
502 | */ | 502 | */ | |
503 | if ((p->p_sflag & PS_WEXIT) != 0 || | 503 | if ((p->p_sflag & PS_WEXIT) != 0 || | |
504 | p->p_nrlwps + p->p_nzlwps - p->p_ndlwps <= p->p_nlwpwait) { | 504 | p->p_nrlwps + p->p_nzlwps - p->p_ndlwps <= p->p_nlwpwait) { | |
505 | error = EDEADLK; | 505 | error = EDEADLK; | |
506 | break; | 506 | break; | |
507 | } | 507 | } | |
508 | 508 | |||
509 | /* | 509 | /* | |
510 | * Sit around and wait for something to happen. We'll be | 510 | * Sit around and wait for something to happen. We'll be | |
511 | * awoken if any of the conditions examined change: if an | 511 | * awoken if any of the conditions examined change: if an | |
512 | * LWP exits, is collected, or is detached. | 512 | * LWP exits, is collected, or is detached. | |
513 | */ | 513 | */ | |
514 | if ((error = cv_wait_sig(&p->p_lwpcv, p->p_lock)) != 0) | 514 | if ((error = cv_wait_sig(&p->p_lwpcv, p->p_lock)) != 0) | |
515 | break; | 515 | break; | |
516 | } | 516 | } | |
517 | 517 | |||
518 | /* | 518 | /* | |
519 | * We didn't find any LWPs to collect, we may have received a | 519 | * We didn't find any LWPs to collect, we may have received a | |
520 | * signal, or some other condition has caused us to bail out. | 520 | * signal, or some other condition has caused us to bail out. | |
521 | * | 521 | * | |
522 | * If waiting on a specific LWP, clear the waiters marker: some | 522 | * If waiting on a specific LWP, clear the waiters marker: some | |
523 | * other LWP may want it. Then, kick all the remaining waiters | 523 | * other LWP may want it. Then, kick all the remaining waiters | |
524 | * so that they can re-check for zombies and for deadlock. | 524 | * so that they can re-check for zombies and for deadlock. | |
525 | */ | 525 | */ | |
526 | if (lid != 0) { | 526 | if (lid != 0) { | |
527 | LIST_FOREACH(l2, &p->p_lwps, l_sibling) { | 527 | LIST_FOREACH(l2, &p->p_lwps, l_sibling) { | |
528 | if (l2->l_lid == lid) { | 528 | if (l2->l_lid == lid) { | |
529 | if (l2->l_waiter == curlid) | 529 | if (l2->l_waiter == curlid) | |
530 | l2->l_waiter = 0; | 530 | l2->l_waiter = 0; | |
531 | break; | 531 | break; | |
532 | } | 532 | } | |
533 | } | 533 | } | |
534 | } | 534 | } | |
535 | p->p_nlwpwait--; | 535 | p->p_nlwpwait--; | |
536 | l->l_waitingfor = 0; | 536 | l->l_waitingfor = 0; | |
537 | cv_broadcast(&p->p_lwpcv); | 537 | cv_broadcast(&p->p_lwpcv); | |
538 | 538 | |||
539 | return error; | 539 | return error; | |
540 | } | 540 | } | |
541 | 541 | |||
542 | /* | 542 | /* | |
543 | * Create a new LWP within process 'p2', using LWP 'l1' as a template. | 543 | * Create a new LWP within process 'p2', using LWP 'l1' as a template. | |
544 | * The new LWP is created in state LSIDL and must be set running, | 544 | * The new LWP is created in state LSIDL and must be set running, | |
545 | * suspended, or stopped by the caller. | 545 | * suspended, or stopped by the caller. | |
546 | */ | 546 | */ | |
547 | int | 547 | int | |
548 | lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, int flags, | 548 | lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, int flags, | |
549 | void *stack, size_t stacksize, void (*func)(void *), void *arg, | 549 | void *stack, size_t stacksize, void (*func)(void *), void *arg, | |
550 | lwp_t **rnewlwpp, int sclass) | 550 | lwp_t **rnewlwpp, int sclass) | |
551 | { | 551 | { | |
552 | struct lwp *l2, *isfree; | 552 | struct lwp *l2, *isfree; | |
553 | turnstile_t *ts; | 553 | turnstile_t *ts; | |
554 | 554 | |||
555 | KASSERT(l1 == curlwp || l1->l_proc == &proc0); | 555 | KASSERT(l1 == curlwp || l1->l_proc == &proc0); | |
556 | 556 | |||
557 | /* | 557 | /* | |
558 | * First off, reap any detached LWP waiting to be collected. | 558 | * First off, reap any detached LWP waiting to be collected. | |
559 | * We can re-use its LWP structure and turnstile. | 559 | * We can re-use its LWP structure and turnstile. | |
560 | */ | 560 | */ | |
561 | isfree = NULL; | 561 | isfree = NULL; | |
562 | if (p2->p_zomblwp != NULL) { | 562 | if (p2->p_zomblwp != NULL) { | |
563 | mutex_enter(p2->p_lock); | 563 | mutex_enter(p2->p_lock); | |
564 | if ((isfree = p2->p_zomblwp) != NULL) { | 564 | if ((isfree = p2->p_zomblwp) != NULL) { | |
565 | p2->p_zomblwp = NULL; | 565 | p2->p_zomblwp = NULL; | |
566 | lwp_free(isfree, true, false);/* releases proc mutex */ | 566 | lwp_free(isfree, true, false);/* releases proc mutex */ | |
567 | } else | 567 | } else | |
568 | mutex_exit(p2->p_lock); | 568 | mutex_exit(p2->p_lock); | |
569 | } | 569 | } | |
570 | if (isfree == NULL) { | 570 | if (isfree == NULL) { | |
571 | l2 = pool_cache_get(lwp_cache, PR_WAITOK); | 571 | l2 = pool_cache_get(lwp_cache, PR_WAITOK); | |
572 | memset(l2, 0, sizeof(*l2)); | 572 | memset(l2, 0, sizeof(*l2)); | |
573 | l2->l_ts = pool_cache_get(turnstile_cache, PR_WAITOK); | 573 | l2->l_ts = pool_cache_get(turnstile_cache, PR_WAITOK); | |
574 | SLIST_INIT(&l2->l_pi_lenders); | 574 | SLIST_INIT(&l2->l_pi_lenders); | |
575 | } else { | 575 | } else { | |
576 | l2 = isfree; | 576 | l2 = isfree; | |
577 | ts = l2->l_ts; | 577 | ts = l2->l_ts; | |
578 | KASSERT(l2->l_inheritedprio == -1); | 578 | KASSERT(l2->l_inheritedprio == -1); | |
579 | KASSERT(SLIST_EMPTY(&l2->l_pi_lenders)); | 579 | KASSERT(SLIST_EMPTY(&l2->l_pi_lenders)); | |
580 | memset(l2, 0, sizeof(*l2)); | 580 | memset(l2, 0, sizeof(*l2)); | |
581 | l2->l_ts = ts; | 581 | l2->l_ts = ts; | |
582 | } | 582 | } | |
583 | 583 | |||
584 | l2->l_stat = LSIDL; | 584 | l2->l_stat = LSIDL; | |
585 | l2->l_proc = p2; | 585 | l2->l_proc = p2; | |
586 | l2->l_refcnt = 1; | 586 | l2->l_refcnt = 1; | |
587 | l2->l_class = sclass; | 587 | l2->l_class = sclass; | |
588 | 588 | |||
589 | /* | 589 | /* | |
590 | * If vfork(), we want the LWP to run fast and on the same CPU | 590 | * If vfork(), we want the LWP to run fast and on the same CPU | |
591 | * as its parent, so that it can reuse the VM context and cache | 591 | * as its parent, so that it can reuse the VM context and cache | |
592 | * footprint on the local CPU. | 592 | * footprint on the local CPU. | |
593 | */ | 593 | */ | |
594 | l2->l_kpriority = ((flags & LWP_VFORK) ? true : false); | 594 | l2->l_kpriority = ((flags & LWP_VFORK) ? true : false); | |
595 | l2->l_kpribase = PRI_KERNEL; | 595 | l2->l_kpribase = PRI_KERNEL; | |
596 | l2->l_priority = l1->l_priority; | 596 | l2->l_priority = l1->l_priority; | |
597 | l2->l_inheritedprio = -1; | 597 | l2->l_inheritedprio = -1; | |
598 | l2->l_flag = 0; | 598 | l2->l_flag = 0; | |
599 | l2->l_pflag = LP_MPSAFE; | 599 | l2->l_pflag = LP_MPSAFE; | |
600 | TAILQ_INIT(&l2->l_ld_locks); | 600 | TAILQ_INIT(&l2->l_ld_locks); | |
601 | 601 | |||
602 | /* | 602 | /* | |
603 | * If not the first LWP in the process, grab a reference to the | 603 | * If not the first LWP in the process, grab a reference to the | |
604 | * descriptor table. | 604 | * descriptor table. | |
605 | */ | 605 | */ | |
606 | l2->l_fd = p2->p_fd; | 606 | l2->l_fd = p2->p_fd; | |
607 | if (p2->p_nlwps != 0) { | 607 | if (p2->p_nlwps != 0) { | |
608 | KASSERT(l1->l_proc == p2); | 608 | KASSERT(l1->l_proc == p2); | |
609 | atomic_inc_uint(&l2->l_fd->fd_refcnt); | 609 | fd_hold(l2); | |
610 | } else { | 610 | } else { | |
611 | KASSERT(l1->l_proc != p2); | 611 | KASSERT(l1->l_proc != p2); | |
612 | } | 612 | } | |
613 | 613 | |||
614 | if (p2->p_flag & PK_SYSTEM) { | 614 | if (p2->p_flag & PK_SYSTEM) { | |
615 | /* Mark it as a system LWP. */ | 615 | /* Mark it as a system LWP. */ | |
616 | l2->l_flag |= LW_SYSTEM; | 616 | l2->l_flag |= LW_SYSTEM; | |
617 | } | 617 | } | |
618 | 618 | |||
619 | kpreempt_disable(); | 619 | kpreempt_disable(); | |
620 | l2->l_mutex = l1->l_cpu->ci_schedstate.spc_mutex; | 620 | l2->l_mutex = l1->l_cpu->ci_schedstate.spc_mutex; | |
621 | l2->l_cpu = l1->l_cpu; | 621 | l2->l_cpu = l1->l_cpu; | |
622 | kpreempt_enable(); | 622 | kpreempt_enable(); | |
623 | 623 | |||
624 | lwp_initspecific(l2); | 624 | lwp_initspecific(l2); | |
625 | sched_lwp_fork(l1, l2); | 625 | sched_lwp_fork(l1, l2); | |
626 | lwp_update_creds(l2); | 626 | lwp_update_creds(l2); | |
627 | callout_init(&l2->l_timeout_ch, CALLOUT_MPSAFE); | 627 | callout_init(&l2->l_timeout_ch, CALLOUT_MPSAFE); | |
628 | callout_setfunc(&l2->l_timeout_ch, sleepq_timeout, l2); | 628 | callout_setfunc(&l2->l_timeout_ch, sleepq_timeout, l2); | |
629 | cv_init(&l2->l_sigcv, "sigwait"); | 629 | cv_init(&l2->l_sigcv, "sigwait"); | |
630 | l2->l_syncobj = &sched_syncobj; | 630 | l2->l_syncobj = &sched_syncobj; | |
631 | 631 | |||
632 | if (rnewlwpp != NULL) | 632 | if (rnewlwpp != NULL) | |
633 | *rnewlwpp = l2; | 633 | *rnewlwpp = l2; | |
634 | 634 | |||
635 | l2->l_addr = UAREA_TO_USER(uaddr); | 635 | l2->l_addr = UAREA_TO_USER(uaddr); | |
636 | uvm_lwp_fork(l1, l2, stack, stacksize, func, | 636 | uvm_lwp_fork(l1, l2, stack, stacksize, func, | |
637 | (arg != NULL) ? arg : l2); | 637 | (arg != NULL) ? arg : l2); | |
638 | 638 | |||
639 | mutex_enter(p2->p_lock); | 639 | mutex_enter(p2->p_lock); | |
640 | 640 | |||
641 | if ((flags & LWP_DETACHED) != 0) { | 641 | if ((flags & LWP_DETACHED) != 0) { | |
642 | l2->l_prflag = LPR_DETACHED; | 642 | l2->l_prflag = LPR_DETACHED; | |
643 | p2->p_ndlwps++; | 643 | p2->p_ndlwps++; | |
644 | } else | 644 | } else | |
645 | l2->l_prflag = 0; | 645 | l2->l_prflag = 0; | |
646 | 646 | |||
647 | l2->l_sigmask = l1->l_sigmask; | 647 | l2->l_sigmask = l1->l_sigmask; | |
648 | CIRCLEQ_INIT(&l2->l_sigpend.sp_info); | 648 | CIRCLEQ_INIT(&l2->l_sigpend.sp_info); | |
649 | sigemptyset(&l2->l_sigpend.sp_set); | 649 | sigemptyset(&l2->l_sigpend.sp_set); | |
650 | 650 | |||
651 | p2->p_nlwpid++; | 651 | p2->p_nlwpid++; | |
652 | if (p2->p_nlwpid == 0) | 652 | if (p2->p_nlwpid == 0) | |
653 | p2->p_nlwpid++; | 653 | p2->p_nlwpid++; | |
654 | l2->l_lid = p2->p_nlwpid; | 654 | l2->l_lid = p2->p_nlwpid; | |
655 | LIST_INSERT_HEAD(&p2->p_lwps, l2, l_sibling); | 655 | LIST_INSERT_HEAD(&p2->p_lwps, l2, l_sibling); | |
656 | p2->p_nlwps++; | 656 | p2->p_nlwps++; | |
657 | 657 | |||
658 | if ((p2->p_flag & PK_SYSTEM) == 0) { | 658 | if ((p2->p_flag & PK_SYSTEM) == 0) { | |
659 | /* Inherit an affinity */ | 659 | /* Inherit an affinity */ | |
660 | if (l1->l_flag & LW_AFFINITY) { | 660 | if (l1->l_flag & LW_AFFINITY) { | |
661 | /* | 661 | /* | |
662 | * Note that we hold the state lock while inheriting | 662 | * Note that we hold the state lock while inheriting | |
663 | * the affinity to avoid race with sched_setaffinity(). | 663 | * the affinity to avoid race with sched_setaffinity(). | |
664 | */ | 664 | */ | |
665 | lwp_lock(l1); | 665 | lwp_lock(l1); | |
666 | if (l1->l_flag & LW_AFFINITY) { | 666 | if (l1->l_flag & LW_AFFINITY) { | |
667 | kcpuset_use(l1->l_affinity); | 667 | kcpuset_use(l1->l_affinity); | |
668 | l2->l_affinity = l1->l_affinity; | 668 | l2->l_affinity = l1->l_affinity; | |
669 | l2->l_flag |= LW_AFFINITY; | 669 | l2->l_flag |= LW_AFFINITY; | |
670 | } | 670 | } | |
671 | lwp_unlock(l1); | 671 | lwp_unlock(l1); | |
672 | } | 672 | } | |
673 | lwp_lock(l2); | 673 | lwp_lock(l2); | |
674 | /* Inherit a processor-set */ | 674 | /* Inherit a processor-set */ | |
675 | l2->l_psid = l1->l_psid; | 675 | l2->l_psid = l1->l_psid; | |
676 | /* Look for a CPU to start */ | 676 | /* Look for a CPU to start */ | |
677 | l2->l_cpu = sched_takecpu(l2); | 677 | l2->l_cpu = sched_takecpu(l2); | |
678 | lwp_unlock_to(l2, l2->l_cpu->ci_schedstate.spc_mutex); | 678 | lwp_unlock_to(l2, l2->l_cpu->ci_schedstate.spc_mutex); | |
679 | } | 679 | } | |
680 | mutex_exit(p2->p_lock); | 680 | mutex_exit(p2->p_lock); | |
681 | 681 | |||
682 | mutex_enter(proc_lock); | 682 | mutex_enter(proc_lock); | |
683 | LIST_INSERT_HEAD(&alllwp, l2, l_list); | 683 | LIST_INSERT_HEAD(&alllwp, l2, l_list); | |
684 | mutex_exit(proc_lock); | 684 | mutex_exit(proc_lock); | |
685 | 685 | |||
686 | SYSCALL_TIME_LWP_INIT(l2); | 686 | SYSCALL_TIME_LWP_INIT(l2); | |
687 | 687 | |||
688 | if (p2->p_emul->e_lwp_fork) | 688 | if (p2->p_emul->e_lwp_fork) | |
689 | (*p2->p_emul->e_lwp_fork)(l1, l2); | 689 | (*p2->p_emul->e_lwp_fork)(l1, l2); | |
690 | 690 | |||
691 | return (0); | 691 | return (0); | |
692 | } | 692 | } | |
693 | 693 | |||
694 | /* | 694 | /* | |
695 | * Called by MD code when a new LWP begins execution. Must be called | 695 | * Called by MD code when a new LWP begins execution. Must be called | |
696 | * with the previous LWP locked (so at splsched), or if there is no | 696 | * with the previous LWP locked (so at splsched), or if there is no | |
697 | * previous LWP, at splsched. | 697 | * previous LWP, at splsched. | |
698 | */ | 698 | */ | |
699 | void | 699 | void | |
700 | lwp_startup(struct lwp *prev, struct lwp *new) | 700 | lwp_startup(struct lwp *prev, struct lwp *new) | |
701 | { | 701 | { | |
702 | 702 | |||
703 | KASSERT(kpreempt_disabled()); | 703 | KASSERT(kpreempt_disabled()); | |
704 | if (prev != NULL) { | 704 | if (prev != NULL) { | |
705 | /* | 705 | /* | |
706 | * Normalize the count of the spin-mutexes, it was | 706 | * Normalize the count of the spin-mutexes, it was | |
707 | * increased in mi_switch(). Unmark the state of | 707 | * increased in mi_switch(). Unmark the state of | |
708 | * context switch - it is finished for previous LWP. | 708 | * context switch - it is finished for previous LWP. | |
709 | */ | 709 | */ | |
710 | curcpu()->ci_mtx_count++; | 710 | curcpu()->ci_mtx_count++; | |
711 | membar_exit(); | 711 | membar_exit(); | |
712 | prev->l_ctxswtch = 0; | 712 | prev->l_ctxswtch = 0; | |
713 | } | 713 | } | |
714 | KPREEMPT_DISABLE(new); | 714 | KPREEMPT_DISABLE(new); | |
715 | spl0(); | 715 | spl0(); | |
716 | pmap_activate(new); | 716 | pmap_activate(new); | |
717 | LOCKDEBUG_BARRIER(NULL, 0); | 717 | LOCKDEBUG_BARRIER(NULL, 0); | |
718 | KPREEMPT_ENABLE(new); | 718 | KPREEMPT_ENABLE(new); | |
719 | if ((new->l_pflag & LP_MPSAFE) == 0) { | 719 | if ((new->l_pflag & LP_MPSAFE) == 0) { | |
720 | KERNEL_LOCK(1, new); | 720 | KERNEL_LOCK(1, new); | |
721 | } | 721 | } | |
722 | } | 722 | } | |
723 | 723 | |||
724 | /* | 724 | /* | |
725 | * Exit an LWP. | 725 | * Exit an LWP. | |
726 | */ | 726 | */ | |
727 | void | 727 | void | |
728 | lwp_exit(struct lwp *l) | 728 | lwp_exit(struct lwp *l) | |
729 | { | 729 | { | |
730 | struct proc *p = l->l_proc; | 730 | struct proc *p = l->l_proc; | |
731 | struct lwp *l2; | 731 | struct lwp *l2; | |
732 | bool current; | 732 | bool current; | |
733 | 733 | |||
734 | current = (l == curlwp); | 734 | current = (l == curlwp); | |
735 | 735 | |||
736 | KASSERT(current || (l->l_stat == LSIDL && l->l_target_cpu == NULL)); | 736 | KASSERT(current || (l->l_stat == LSIDL && l->l_target_cpu == NULL)); | |
737 | KASSERT(p == curproc); | 737 | KASSERT(p == curproc); | |
738 | 738 | |||
739 | /* | 739 | /* | |
740 | * Verify that we hold no locks other than the kernel lock. | 740 | * Verify that we hold no locks other than the kernel lock. | |
741 | */ | 741 | */ | |
742 | LOCKDEBUG_BARRIER(&kernel_lock, 0); | 742 | LOCKDEBUG_BARRIER(&kernel_lock, 0); | |
743 | 743 | |||
744 | /* | 744 | /* | |
745 | * If we are the last live LWP in a process, we need to exit the | 745 | * If we are the last live LWP in a process, we need to exit the | |
746 | * entire process. We do so with an exit status of zero, because | 746 | * entire process. We do so with an exit status of zero, because | |
747 | * it's a "controlled" exit, and because that's what Solaris does. | 747 | * it's a "controlled" exit, and because that's what Solaris does. | |
748 | * | 748 | * | |
749 | * We are not quite a zombie yet, but for accounting purposes we | 749 | * We are not quite a zombie yet, but for accounting purposes we | |
750 | * must increment the count of zombies here. | 750 | * must increment the count of zombies here. | |
751 | * | 751 | * | |
752 | * Note: the last LWP's specificdata will be deleted here. | 752 | * Note: the last LWP's specificdata will be deleted here. | |
753 | */ | 753 | */ | |
754 | mutex_enter(p->p_lock); | 754 | mutex_enter(p->p_lock); | |
755 | if (p->p_nlwps - p->p_nzlwps == 1) { | 755 | if (p->p_nlwps - p->p_nzlwps == 1) { | |
756 | KASSERT(current == true); | 756 | KASSERT(current == true); | |
757 | /* XXXSMP kernel_lock not held */ | 757 | /* XXXSMP kernel_lock not held */ | |
758 | exit1(l, 0); | 758 | exit1(l, 0); | |
759 | /* NOTREACHED */ | 759 | /* NOTREACHED */ | |
760 | } | 760 | } | |
761 | p->p_nzlwps++; | 761 | p->p_nzlwps++; | |
762 | mutex_exit(p->p_lock); | 762 | mutex_exit(p->p_lock); | |
763 | 763 | |||
764 | if (p->p_emul->e_lwp_exit) | 764 | if (p->p_emul->e_lwp_exit) | |
765 | (*p->p_emul->e_lwp_exit)(l); | 765 | (*p->p_emul->e_lwp_exit)(l); | |
766 | 766 | |||
767 | /* Drop filedesc reference. */ | 767 | /* Drop filedesc reference. */ | |
768 | fd_free(); | 768 | fd_free(); | |
769 | 769 | |||
770 | /* Delete the specificdata while it's still safe to sleep. */ | 770 | /* Delete the specificdata while it's still safe to sleep. */ | |
771 | specificdata_fini(lwp_specificdata_domain, &l->l_specdataref); | 771 | specificdata_fini(lwp_specificdata_domain, &l->l_specdataref); | |
772 | 772 | |||
773 | /* | 773 | /* | |
774 | * Release our cached credentials. | 774 | * Release our cached credentials. | |
775 | */ | 775 | */ | |
776 | kauth_cred_free(l->l_cred); | 776 | kauth_cred_free(l->l_cred); | |
777 | callout_destroy(&l->l_timeout_ch); | 777 | callout_destroy(&l->l_timeout_ch); | |
778 | 778 | |||
779 | /* | 779 | /* | |
780 | * Remove the LWP from the global list. | 780 | * Remove the LWP from the global list. | |
781 | */ | 781 | */ | |
782 | mutex_enter(proc_lock); | 782 | mutex_enter(proc_lock); | |
783 | LIST_REMOVE(l, l_list); | 783 | LIST_REMOVE(l, l_list); | |
784 | mutex_exit(proc_lock); | 784 | mutex_exit(proc_lock); | |
785 | 785 | |||
786 | /* | 786 | /* | |
787 | * Get rid of all references to the LWP that others (e.g. procfs) | 787 | * Get rid of all references to the LWP that others (e.g. procfs) | |
788 | * may have, and mark the LWP as a zombie. If the LWP is detached, | 788 | * may have, and mark the LWP as a zombie. If the LWP is detached, | |
789 | * mark it waiting for collection in the proc structure. Note that | 789 | * mark it waiting for collection in the proc structure. Note that | |
790 | * before we can do that, we need to free any other dead, deatched | 790 | * before we can do that, we need to free any other dead, deatched | |
791 | * LWP waiting to meet its maker. | 791 | * LWP waiting to meet its maker. | |
792 | */ | 792 | */ | |
793 | mutex_enter(p->p_lock); | 793 | mutex_enter(p->p_lock); | |
794 | lwp_drainrefs(l); | 794 | lwp_drainrefs(l); | |
795 | 795 | |||
796 | if ((l->l_prflag & LPR_DETACHED) != 0) { | 796 | if ((l->l_prflag & LPR_DETACHED) != 0) { | |
797 | while ((l2 = p->p_zomblwp) != NULL) { | 797 | while ((l2 = p->p_zomblwp) != NULL) { | |
798 | p->p_zomblwp = NULL; | 798 | p->p_zomblwp = NULL; | |
799 | lwp_free(l2, false, false);/* releases proc mutex */ | 799 | lwp_free(l2, false, false);/* releases proc mutex */ | |
800 | mutex_enter(p->p_lock); | 800 | mutex_enter(p->p_lock); | |
801 | l->l_refcnt++; | 801 | l->l_refcnt++; | |
802 | lwp_drainrefs(l); | 802 | lwp_drainrefs(l); | |
803 | } | 803 | } | |
804 | p->p_zomblwp = l; | 804 | p->p_zomblwp = l; | |
805 | } | 805 | } | |
806 | 806 | |||
807 | /* | 807 | /* | |
808 | * If we find a pending signal for the process and we have been | 808 | * If we find a pending signal for the process and we have been | |
809 | * asked to check for signals, then we loose: arrange to have | 809 | * asked to check for signals, then we loose: arrange to have | |
810 | * all other LWPs in the process check for signals. | 810 | * all other LWPs in the process check for signals. | |
811 | */ | 811 | */ | |
812 | if ((l->l_flag & LW_PENDSIG) != 0 && | 812 | if ((l->l_flag & LW_PENDSIG) != 0 && | |
813 | firstsig(&p->p_sigpend.sp_set) != 0) { | 813 | firstsig(&p->p_sigpend.sp_set) != 0) { | |
814 | LIST_FOREACH(l2, &p->p_lwps, l_sibling) { | 814 | LIST_FOREACH(l2, &p->p_lwps, l_sibling) { | |
815 | lwp_lock(l2); | 815 | lwp_lock(l2); | |
816 | l2->l_flag |= LW_PENDSIG; | 816 | l2->l_flag |= LW_PENDSIG; | |
817 | lwp_unlock(l2); | 817 | lwp_unlock(l2); | |
818 | } | 818 | } | |
819 | } | 819 | } | |
820 | 820 | |||
821 | lwp_lock(l); | 821 | lwp_lock(l); | |
822 | l->l_stat = LSZOMB; | 822 | l->l_stat = LSZOMB; | |
823 | if (l->l_name != NULL) | 823 | if (l->l_name != NULL) | |
824 | strcpy(l->l_name, "(zombie)"); | 824 | strcpy(l->l_name, "(zombie)"); | |
825 | if (l->l_flag & LW_AFFINITY) { | 825 | if (l->l_flag & LW_AFFINITY) { | |
826 | l->l_flag &= ~LW_AFFINITY; | 826 | l->l_flag &= ~LW_AFFINITY; | |
827 | } else { | 827 | } else { | |
828 | KASSERT(l->l_affinity == NULL); | 828 | KASSERT(l->l_affinity == NULL); | |
829 | } | 829 | } | |
830 | lwp_unlock(l); | 830 | lwp_unlock(l); | |
831 | p->p_nrlwps--; | 831 | p->p_nrlwps--; | |
832 | cv_broadcast(&p->p_lwpcv); | 832 | cv_broadcast(&p->p_lwpcv); | |
833 | if (l->l_lwpctl != NULL) | 833 | if (l->l_lwpctl != NULL) | |
834 | l->l_lwpctl->lc_curcpu = LWPCTL_CPU_EXITED; | 834 | l->l_lwpctl->lc_curcpu = LWPCTL_CPU_EXITED; | |
835 | mutex_exit(p->p_lock); | 835 | mutex_exit(p->p_lock); | |
836 | 836 | |||
837 | /* Safe without lock since LWP is in zombie state */ | 837 | /* Safe without lock since LWP is in zombie state */ | |
838 | if (l->l_affinity) { | 838 | if (l->l_affinity) { | |
839 | kcpuset_unuse(l->l_affinity, NULL); | 839 | kcpuset_unuse(l->l_affinity, NULL); | |
840 | l->l_affinity = NULL; | 840 | l->l_affinity = NULL; | |
841 | } | 841 | } | |
842 | 842 | |||
843 | /* | 843 | /* | |
844 | * We can no longer block. At this point, lwp_free() may already | 844 | * We can no longer block. At this point, lwp_free() may already | |
845 | * be gunning for us. On a multi-CPU system, we may be off p_lwps. | 845 | * be gunning for us. On a multi-CPU system, we may be off p_lwps. | |
846 | * | 846 | * | |
847 | * Free MD LWP resources. | 847 | * Free MD LWP resources. | |
848 | */ | 848 | */ | |
849 | cpu_lwp_free(l, 0); | 849 | cpu_lwp_free(l, 0); | |
850 | 850 | |||
851 | if (current) { | 851 | if (current) { | |
852 | pmap_deactivate(l); | 852 | pmap_deactivate(l); | |
853 | 853 | |||
854 | /* | 854 | /* | |
855 | * Release the kernel lock, and switch away into | 855 | * Release the kernel lock, and switch away into | |
856 | * oblivion. | 856 | * oblivion. | |
857 | */ | 857 | */ | |
858 | #ifdef notyet | 858 | #ifdef notyet | |
859 | /* XXXSMP hold in lwp_userret() */ | 859 | /* XXXSMP hold in lwp_userret() */ | |
860 | KERNEL_UNLOCK_LAST(l); | 860 | KERNEL_UNLOCK_LAST(l); | |
861 | #else | 861 | #else | |
862 | KERNEL_UNLOCK_ALL(l, NULL); | 862 | KERNEL_UNLOCK_ALL(l, NULL); | |
863 | #endif | 863 | #endif | |
864 | lwp_exit_switchaway(l); | 864 | lwp_exit_switchaway(l); | |
865 | } | 865 | } | |
866 | } | 866 | } | |
867 | 867 | |||
868 | /* | 868 | /* | |
869 | * Free a dead LWP's remaining resources. | 869 | * Free a dead LWP's remaining resources. | |
870 | * | 870 | * | |
871 | * XXXLWP limits. | 871 | * XXXLWP limits. | |
872 | */ | 872 | */ | |
873 | void | 873 | void | |
874 | lwp_free(struct lwp *l, bool recycle, bool last) | 874 | lwp_free(struct lwp *l, bool recycle, bool last) | |
875 | { | 875 | { | |
876 | struct proc *p = l->l_proc; | 876 | struct proc *p = l->l_proc; | |
877 | struct rusage *ru; | 877 | struct rusage *ru; | |
878 | ksiginfoq_t kq; | 878 | ksiginfoq_t kq; | |
879 | 879 | |||
880 | KASSERT(l != curlwp); | 880 | KASSERT(l != curlwp); | |
881 | 881 | |||
882 | /* | 882 | /* | |
883 | * If this was not the last LWP in the process, then adjust | 883 | * If this was not the last LWP in the process, then adjust | |
884 | * counters and unlock. | 884 | * counters and unlock. | |
885 | */ | 885 | */ | |
886 | if (!last) { | 886 | if (!last) { | |
887 | /* | 887 | /* | |
888 | * Add the LWP's run time to the process' base value. | 888 | * Add the LWP's run time to the process' base value. | |
889 | * This needs to co-incide with coming off p_lwps. | 889 | * This needs to co-incide with coming off p_lwps. | |
890 | */ | 890 | */ | |
891 | bintime_add(&p->p_rtime, &l->l_rtime); | 891 | bintime_add(&p->p_rtime, &l->l_rtime); | |
892 | p->p_pctcpu += l->l_pctcpu; | 892 | p->p_pctcpu += l->l_pctcpu; | |
893 | ru = &p->p_stats->p_ru; | 893 | ru = &p->p_stats->p_ru; | |
894 | ruadd(ru, &l->l_ru); | 894 | ruadd(ru, &l->l_ru); | |
895 | ru->ru_nvcsw += (l->l_ncsw - l->l_nivcsw); | 895 | ru->ru_nvcsw += (l->l_ncsw - l->l_nivcsw); | |
896 | ru->ru_nivcsw += l->l_nivcsw; | 896 | ru->ru_nivcsw += l->l_nivcsw; | |
897 | LIST_REMOVE(l, l_sibling); | 897 | LIST_REMOVE(l, l_sibling); | |
898 | p->p_nlwps--; | 898 | p->p_nlwps--; | |
899 | p->p_nzlwps--; | 899 | p->p_nzlwps--; | |
900 | if ((l->l_prflag & LPR_DETACHED) != 0) | 900 | if ((l->l_prflag & LPR_DETACHED) != 0) | |
901 | p->p_ndlwps--; | 901 | p->p_ndlwps--; | |
902 | 902 | |||
903 | /* | 903 | /* | |
904 | * Have any LWPs sleeping in lwp_wait() recheck for | 904 | * Have any LWPs sleeping in lwp_wait() recheck for | |
905 | * deadlock. | 905 | * deadlock. | |
906 | */ | 906 | */ | |
907 | cv_broadcast(&p->p_lwpcv); | 907 | cv_broadcast(&p->p_lwpcv); | |
908 | mutex_exit(p->p_lock); | 908 | mutex_exit(p->p_lock); | |
909 | } | 909 | } | |
910 | 910 | |||
911 | #ifdef MULTIPROCESSOR | 911 | #ifdef MULTIPROCESSOR | |
912 | /* | 912 | /* | |
913 | * In the unlikely event that the LWP is still on the CPU, | 913 | * In the unlikely event that the LWP is still on the CPU, | |
914 | * then spin until it has switched away. We need to release | 914 | * then spin until it has switched away. We need to release | |
915 | * all locks to avoid deadlock against interrupt handlers on | 915 | * all locks to avoid deadlock against interrupt handlers on | |
916 | * the target CPU. | 916 | * the target CPU. | |
917 | */ | 917 | */ | |
918 | if ((l->l_pflag & LP_RUNNING) != 0 || l->l_cpu->ci_curlwp == l) { | 918 | if ((l->l_pflag & LP_RUNNING) != 0 || l->l_cpu->ci_curlwp == l) { | |
919 | int count; | 919 | int count; | |
920 | (void)count; /* XXXgcc */ | 920 | (void)count; /* XXXgcc */ | |
921 | KERNEL_UNLOCK_ALL(curlwp, &count); | 921 | KERNEL_UNLOCK_ALL(curlwp, &count); | |
922 | while ((l->l_pflag & LP_RUNNING) != 0 || | 922 | while ((l->l_pflag & LP_RUNNING) != 0 || | |
923 | l->l_cpu->ci_curlwp == l) | 923 | l->l_cpu->ci_curlwp == l) | |
924 | SPINLOCK_BACKOFF_HOOK; | 924 | SPINLOCK_BACKOFF_HOOK; | |
925 | KERNEL_LOCK(count, curlwp); | 925 | KERNEL_LOCK(count, curlwp); | |
926 | } | 926 | } | |
927 | #endif | 927 | #endif | |
928 | 928 | |||
929 | /* | 929 | /* | |
930 | * Destroy the LWP's remaining signal information. | 930 | * Destroy the LWP's remaining signal information. | |
931 | */ | 931 | */ | |
932 | ksiginfo_queue_init(&kq); | 932 | ksiginfo_queue_init(&kq); | |
933 | sigclear(&l->l_sigpend, NULL, &kq); | 933 | sigclear(&l->l_sigpend, NULL, &kq); | |
934 | ksiginfo_queue_drain(&kq); | 934 | ksiginfo_queue_drain(&kq); | |
935 | cv_destroy(&l->l_sigcv); | 935 | cv_destroy(&l->l_sigcv); | |
936 | 936 | |||
937 | /* | 937 | /* | |
938 | * Free the LWP's turnstile and the LWP structure itself unless the | 938 | * Free the LWP's turnstile and the LWP structure itself unless the | |
939 | * caller wants to recycle them. Also, free the scheduler specific | 939 | * caller wants to recycle them. Also, free the scheduler specific | |
940 | * data. | 940 | * data. | |
941 | * | 941 | * | |
942 | * We can't return turnstile0 to the pool (it didn't come from it), | 942 | * We can't return turnstile0 to the pool (it didn't come from it), | |
943 | * so if it comes up just drop it quietly and move on. | 943 | * so if it comes up just drop it quietly and move on. | |
944 | * | 944 | * | |
945 | * We don't recycle the VM resources at this time. | 945 | * We don't recycle the VM resources at this time. | |
946 | */ | 946 | */ | |
947 | if (l->l_lwpctl != NULL) | 947 | if (l->l_lwpctl != NULL) | |
948 | lwp_ctl_free(l); | 948 | lwp_ctl_free(l); | |
949 | 949 | |||
950 | if (!recycle && l->l_ts != &turnstile0) | 950 | if (!recycle && l->l_ts != &turnstile0) | |
951 | pool_cache_put(turnstile_cache, l->l_ts); | 951 | pool_cache_put(turnstile_cache, l->l_ts); | |
952 | if (l->l_name != NULL) | 952 | if (l->l_name != NULL) | |
953 | kmem_free(l->l_name, MAXCOMLEN); | 953 | kmem_free(l->l_name, MAXCOMLEN); | |
954 | 954 | |||
955 | cpu_lwp_free2(l); | 955 | cpu_lwp_free2(l); | |
956 | uvm_lwp_exit(l); | 956 | uvm_lwp_exit(l); | |
957 | 957 | |||
958 | KASSERT(SLIST_EMPTY(&l->l_pi_lenders)); | 958 | KASSERT(SLIST_EMPTY(&l->l_pi_lenders)); | |
959 | KASSERT(l->l_inheritedprio == -1); | 959 | KASSERT(l->l_inheritedprio == -1); | |
960 | if (!recycle) | 960 | if (!recycle) | |
961 | pool_cache_put(lwp_cache, l); | 961 | pool_cache_put(lwp_cache, l); | |
962 | } | 962 | } | |
963 | 963 | |||
964 | /* | 964 | /* | |
965 | * Migrate the LWP to the another CPU. Unlocks the LWP. | 965 | * Migrate the LWP to the another CPU. Unlocks the LWP. | |
966 | */ | 966 | */ | |
967 | void | 967 | void | |
968 | lwp_migrate(lwp_t *l, struct cpu_info *tci) | 968 | lwp_migrate(lwp_t *l, struct cpu_info *tci) | |
969 | { | 969 | { | |
970 | struct schedstate_percpu *tspc; | 970 | struct schedstate_percpu *tspc; | |
971 | int lstat = l->l_stat; | 971 | int lstat = l->l_stat; | |
972 | 972 | |||
973 | KASSERT(lwp_locked(l, NULL)); | 973 | KASSERT(lwp_locked(l, NULL)); | |
974 | KASSERT(tci != NULL); | 974 | KASSERT(tci != NULL); | |
975 | 975 | |||
976 | /* If LWP is still on the CPU, it must be handled like LSONPROC */ | 976 | /* If LWP is still on the CPU, it must be handled like LSONPROC */ | |
977 | if ((l->l_pflag & LP_RUNNING) != 0) { | 977 | if ((l->l_pflag & LP_RUNNING) != 0) { | |
978 | lstat = LSONPROC; | 978 | lstat = LSONPROC; | |
979 | } | 979 | } | |
980 | 980 | |||
981 | /* | 981 | /* | |
982 | * The destination CPU could be changed while previous migration | 982 | * The destination CPU could be changed while previous migration | |
983 | * was not finished. | 983 | * was not finished. | |
984 | */ | 984 | */ | |
985 | if (l->l_target_cpu != NULL) { | 985 | if (l->l_target_cpu != NULL) { | |
986 | l->l_target_cpu = tci; | 986 | l->l_target_cpu = tci; | |
987 | lwp_unlock(l); | 987 | lwp_unlock(l); | |
988 | return; | 988 | return; | |
989 | } | 989 | } | |
990 | 990 | |||
991 | /* Nothing to do if trying to migrate to the same CPU */ | 991 | /* Nothing to do if trying to migrate to the same CPU */ | |
992 | if (l->l_cpu == tci) { | 992 | if (l->l_cpu == tci) { | |
993 | lwp_unlock(l); | 993 | lwp_unlock(l); | |
994 | return; | 994 | return; | |
995 | } | 995 | } | |
996 | 996 | |||
997 | KASSERT(l->l_target_cpu == NULL); | 997 | KASSERT(l->l_target_cpu == NULL); | |
998 | tspc = &tci->ci_schedstate; | 998 | tspc = &tci->ci_schedstate; | |
999 | switch (lstat) { | 999 | switch (lstat) { | |
1000 | case LSRUN: | 1000 | case LSRUN: | |
1001 | l->l_target_cpu = tci; | 1001 | l->l_target_cpu = tci; | |
1002 | break; | 1002 | break; | |
1003 | case LSIDL: | 1003 | case LSIDL: | |
1004 | l->l_cpu = tci; | 1004 | l->l_cpu = tci; | |
1005 | lwp_unlock_to(l, tspc->spc_mutex); | 1005 | lwp_unlock_to(l, tspc->spc_mutex); | |
1006 | return; | 1006 | return; | |
1007 | case LSSLEEP: | 1007 | case LSSLEEP: | |
1008 | l->l_cpu = tci; | 1008 | l->l_cpu = tci; | |
1009 | break; | 1009 | break; | |
1010 | case LSSTOP: | 1010 | case LSSTOP: | |
1011 | case LSSUSPENDED: | 1011 | case LSSUSPENDED: | |
1012 | l->l_cpu = tci; | 1012 | l->l_cpu = tci; | |
1013 | if (l->l_wchan == NULL) { | 1013 | if (l->l_wchan == NULL) { | |
1014 | lwp_unlock_to(l, tspc->spc_lwplock); | 1014 | lwp_unlock_to(l, tspc->spc_lwplock); | |
1015 | return; | 1015 | return; | |
1016 | } | 1016 | } | |
1017 | break; | 1017 | break; | |
1018 | case LSONPROC: | 1018 | case LSONPROC: | |
1019 | l->l_target_cpu = tci; | 1019 | l->l_target_cpu = tci; | |
1020 | spc_lock(l->l_cpu); | 1020 | spc_lock(l->l_cpu); | |
1021 | cpu_need_resched(l->l_cpu, RESCHED_KPREEMPT); | 1021 | cpu_need_resched(l->l_cpu, RESCHED_KPREEMPT); | |
1022 | spc_unlock(l->l_cpu); | 1022 | spc_unlock(l->l_cpu); | |
1023 | break; | 1023 | break; | |
1024 | } | 1024 | } | |
1025 | lwp_unlock(l); | 1025 | lwp_unlock(l); | |
1026 | } | 1026 | } | |
1027 | 1027 | |||
1028 | /* | 1028 | /* | |
1029 | * Find the LWP in the process. Arguments may be zero, in such case, | 1029 | * Find the LWP in the process. Arguments may be zero, in such case, | |
1030 | * the calling process and first LWP in the list will be used. | 1030 | * the calling process and first LWP in the list will be used. | |
1031 | * On success - returns proc locked. | 1031 | * On success - returns proc locked. | |
1032 | */ | 1032 | */ | |
1033 | struct lwp * | 1033 | struct lwp * | |
1034 | lwp_find2(pid_t pid, lwpid_t lid) | 1034 | lwp_find2(pid_t pid, lwpid_t lid) | |
1035 | { | 1035 | { | |
1036 | proc_t *p; | 1036 | proc_t *p; | |
1037 | lwp_t *l; | 1037 | lwp_t *l; | |
1038 | 1038 | |||
1039 | /* Find the process */ | 1039 | /* Find the process */ | |
1040 | p = (pid == 0) ? curlwp->l_proc : p_find(pid, PFIND_UNLOCK_FAIL); | 1040 | p = (pid == 0) ? curlwp->l_proc : p_find(pid, PFIND_UNLOCK_FAIL); | |
1041 | if (p == NULL) | 1041 | if (p == NULL) | |
1042 | return NULL; | 1042 | return NULL; | |
1043 | mutex_enter(p->p_lock); | 1043 | mutex_enter(p->p_lock); | |
1044 | if (pid != 0) { | 1044 | if (pid != 0) { | |
1045 | /* Case of p_find */ | 1045 | /* Case of p_find */ | |
1046 | mutex_exit(proc_lock); | 1046 | mutex_exit(proc_lock); | |
1047 | } | 1047 | } | |
1048 | 1048 | |||
1049 | /* Find the thread */ | 1049 | /* Find the thread */ | |
1050 | l = (lid == 0) ? LIST_FIRST(&p->p_lwps) : lwp_find(p, lid); | 1050 | l = (lid == 0) ? LIST_FIRST(&p->p_lwps) : lwp_find(p, lid); | |
1051 | if (l == NULL) { | 1051 | if (l == NULL) { | |
1052 | mutex_exit(p->p_lock); | 1052 | mutex_exit(p->p_lock); | |
1053 | } | 1053 | } | |
1054 | 1054 | |||
1055 | return l; | 1055 | return l; | |
1056 | } | 1056 | } | |
1057 | 1057 | |||
1058 | /* | 1058 | /* | |
1059 | * Look up a live LWP within the speicifed process, and return it locked. | 1059 | * Look up a live LWP within the speicifed process, and return it locked. | |
1060 | * | 1060 | * | |
1061 | * Must be called with p->p_lock held. | 1061 | * Must be called with p->p_lock held. | |
1062 | */ | 1062 | */ | |
1063 | struct lwp * | 1063 | struct lwp * | |
1064 | lwp_find(struct proc *p, int id) | 1064 | lwp_find(struct proc *p, int id) | |
1065 | { | 1065 | { | |
1066 | struct lwp *l; | 1066 | struct lwp *l; | |
1067 | 1067 | |||
1068 | KASSERT(mutex_owned(p->p_lock)); | 1068 | KASSERT(mutex_owned(p->p_lock)); | |
1069 | 1069 | |||
1070 | LIST_FOREACH(l, &p->p_lwps, l_sibling) { | 1070 | LIST_FOREACH(l, &p->p_lwps, l_sibling) { | |
1071 | if (l->l_lid == id) | 1071 | if (l->l_lid == id) | |
1072 | break; | 1072 | break; | |
1073 | } | 1073 | } | |
1074 | 1074 | |||
1075 | /* | 1075 | /* | |
1076 | * No need to lock - all of these conditions will | 1076 | * No need to lock - all of these conditions will | |
1077 | * be visible with the process level mutex held. | 1077 | * be visible with the process level mutex held. | |
1078 | */ | 1078 | */ | |
1079 | if (l != NULL && (l->l_stat == LSIDL || l->l_stat == LSZOMB)) | 1079 | if (l != NULL && (l->l_stat == LSIDL || l->l_stat == LSZOMB)) | |
1080 | l = NULL; | 1080 | l = NULL; | |
1081 | 1081 | |||
1082 | return l; | 1082 | return l; | |
1083 | } | 1083 | } | |
1084 | 1084 | |||
1085 | /* | 1085 | /* | |
1086 | * Update an LWP's cached credentials to mirror the process' master copy. | 1086 | * Update an LWP's cached credentials to mirror the process' master copy. | |
1087 | * | 1087 | * | |
1088 | * This happens early in the syscall path, on user trap, and on LWP | 1088 | * This happens early in the syscall path, on user trap, and on LWP | |
1089 | * creation. A long-running LWP can also voluntarily choose to update | 1089 | * creation. A long-running LWP can also voluntarily choose to update | |
1090 | * it's credentials by calling this routine. This may be called from | 1090 | * it's credentials by calling this routine. This may be called from | |
1091 | * LWP_CACHE_CREDS(), which checks l->l_cred != p->p_cred beforehand. | 1091 | * LWP_CACHE_CREDS(), which checks l->l_cred != p->p_cred beforehand. | |
1092 | */ | 1092 | */ | |
1093 | void | 1093 | void | |
1094 | lwp_update_creds(struct lwp *l) | 1094 | lwp_update_creds(struct lwp *l) | |
1095 | { | 1095 | { | |
1096 | kauth_cred_t oc; | 1096 | kauth_cred_t oc; | |
1097 | struct proc *p; | 1097 | struct proc *p; | |
1098 | 1098 | |||
1099 | p = l->l_proc; | 1099 | p = l->l_proc; | |
1100 | oc = l->l_cred; | 1100 | oc = l->l_cred; | |
1101 | 1101 | |||
1102 | mutex_enter(p->p_lock); | 1102 | mutex_enter(p->p_lock); | |
1103 | kauth_cred_hold(p->p_cred); | 1103 | kauth_cred_hold(p->p_cred); | |
1104 | l->l_cred = p->p_cred; | 1104 | l->l_cred = p->p_cred; | |
1105 | l->l_prflag &= ~LPR_CRMOD; | 1105 | l->l_prflag &= ~LPR_CRMOD; | |
1106 | mutex_exit(p->p_lock); | 1106 | mutex_exit(p->p_lock); | |
1107 | if (oc != NULL) | 1107 | if (oc != NULL) | |
1108 | kauth_cred_free(oc); | 1108 | kauth_cred_free(oc); | |
1109 | } | 1109 | } | |
1110 | 1110 | |||
1111 | /* | 1111 | /* | |
1112 | * Verify that an LWP is locked, and optionally verify that the lock matches | 1112 | * Verify that an LWP is locked, and optionally verify that the lock matches | |
1113 | * one we specify. | 1113 | * one we specify. | |
1114 | */ | 1114 | */ | |
1115 | int | 1115 | int | |
1116 | lwp_locked(struct lwp *l, kmutex_t *mtx) | 1116 | lwp_locked(struct lwp *l, kmutex_t *mtx) | |
1117 | { | 1117 | { | |
1118 | kmutex_t *cur = l->l_mutex; | 1118 | kmutex_t *cur = l->l_mutex; | |
1119 | 1119 | |||
1120 | return mutex_owned(cur) && (mtx == cur || mtx == NULL); | 1120 | return mutex_owned(cur) && (mtx == cur || mtx == NULL); | |
1121 | } | 1121 | } | |
1122 | 1122 | |||
1123 | /* | 1123 | /* | |
1124 | * Lock an LWP. | 1124 | * Lock an LWP. | |
1125 | */ | 1125 | */ | |
1126 | kmutex_t * | 1126 | kmutex_t * | |
1127 | lwp_lock_retry(struct lwp *l, kmutex_t *old) | 1127 | lwp_lock_retry(struct lwp *l, kmutex_t *old) | |
1128 | { | 1128 | { | |
1129 | 1129 | |||
1130 | /* | 1130 | /* | |
1131 | * XXXgcc ignoring kmutex_t * volatile on i386 | 1131 | * XXXgcc ignoring kmutex_t * volatile on i386 | |
1132 | * | 1132 | * | |
1133 | * gcc version 4.1.2 20061021 prerelease (NetBSD nb1 20061021) | 1133 | * gcc version 4.1.2 20061021 prerelease (NetBSD nb1 20061021) | |
1134 | */ | 1134 | */ | |
1135 | #if 1 | 1135 | #if 1 | |
1136 | while (l->l_mutex != old) { | 1136 | while (l->l_mutex != old) { | |
1137 | #else | 1137 | #else | |
1138 | for (;;) { | 1138 | for (;;) { | |
1139 | #endif | 1139 | #endif | |
1140 | mutex_spin_exit(old); | 1140 | mutex_spin_exit(old); | |
1141 | old = l->l_mutex; | 1141 | old = l->l_mutex; | |
1142 | mutex_spin_enter(old); | 1142 | mutex_spin_enter(old); | |
1143 | 1143 | |||
1144 | /* | 1144 | /* | |
1145 | * mutex_enter() will have posted a read barrier. Re-test | 1145 | * mutex_enter() will have posted a read barrier. Re-test | |
1146 | * l->l_mutex. If it has changed, we need to try again. | 1146 | * l->l_mutex. If it has changed, we need to try again. | |
1147 | */ | 1147 | */ | |
1148 | #if 1 | 1148 | #if 1 | |
1149 | } | 1149 | } | |
1150 | #else | 1150 | #else | |
1151 | } while (__predict_false(l->l_mutex != old)); | 1151 | } while (__predict_false(l->l_mutex != old)); | |
1152 | #endif | 1152 | #endif | |
1153 | 1153 | |||
1154 | return old; | 1154 | return old; | |
1155 | } | 1155 | } | |
1156 | 1156 | |||
1157 | /* | 1157 | /* | |
1158 | * Lend a new mutex to an LWP. The old mutex must be held. | 1158 | * Lend a new mutex to an LWP. The old mutex must be held. | |
1159 | */ | 1159 | */ | |
1160 | void | 1160 | void | |
1161 | lwp_setlock(struct lwp *l, kmutex_t *new) | 1161 | lwp_setlock(struct lwp *l, kmutex_t *new) | |
1162 | { | 1162 | { | |
1163 | 1163 | |||
1164 | KASSERT(mutex_owned(l->l_mutex)); | 1164 | KASSERT(mutex_owned(l->l_mutex)); | |
1165 | 1165 | |||
1166 | membar_exit(); | 1166 | membar_exit(); | |
1167 | l->l_mutex = new; | 1167 | l->l_mutex = new; | |
1168 | } | 1168 | } | |
1169 | 1169 | |||
1170 | /* | 1170 | /* | |
1171 | * Lend a new mutex to an LWP, and release the old mutex. The old mutex | 1171 | * Lend a new mutex to an LWP, and release the old mutex. The old mutex | |
1172 | * must be held. | 1172 | * must be held. | |
1173 | */ | 1173 | */ | |
1174 | void | 1174 | void | |
1175 | lwp_unlock_to(struct lwp *l, kmutex_t *new) | 1175 | lwp_unlock_to(struct lwp *l, kmutex_t *new) | |
1176 | { | 1176 | { | |
1177 | kmutex_t *old; | 1177 | kmutex_t *old; | |
1178 | 1178 | |||
1179 | KASSERT(mutex_owned(l->l_mutex)); | 1179 | KASSERT(mutex_owned(l->l_mutex)); | |
1180 | 1180 | |||
1181 | old = l->l_mutex; | 1181 | old = l->l_mutex; | |
1182 | membar_exit(); | 1182 | membar_exit(); | |
1183 | l->l_mutex = new; | 1183 | l->l_mutex = new; | |
1184 | mutex_spin_exit(old); | 1184 | mutex_spin_exit(old); | |
1185 | } | 1185 | } | |
1186 | 1186 | |||
1187 | /* | 1187 | /* | |
1188 | * Acquire a new mutex, and donate it to an LWP. The LWP must already be | 1188 | * Acquire a new mutex, and donate it to an LWP. The LWP must already be | |
1189 | * locked. | 1189 | * locked. | |
1190 | */ | 1190 | */ | |
1191 | void | 1191 | void | |
1192 | lwp_relock(struct lwp *l, kmutex_t *new) | 1192 | lwp_relock(struct lwp *l, kmutex_t *new) | |
1193 | { | 1193 | { | |
1194 | kmutex_t *old; | 1194 | kmutex_t *old; | |
1195 | 1195 | |||
1196 | KASSERT(mutex_owned(l->l_mutex)); | 1196 | KASSERT(mutex_owned(l->l_mutex)); | |
1197 | 1197 | |||
1198 | old = l->l_mutex; | 1198 | old = l->l_mutex; | |
1199 | if (old != new) { | 1199 | if (old != new) { | |
1200 | mutex_spin_enter(new); | 1200 | mutex_spin_enter(new); | |
1201 | l->l_mutex = new; | 1201 | l->l_mutex = new; | |
1202 | mutex_spin_exit(old); | 1202 | mutex_spin_exit(old); | |
1203 | } | 1203 | } | |
1204 | } | 1204 | } | |
1205 | 1205 | |||
1206 | int | 1206 | int | |
1207 | lwp_trylock(struct lwp *l) | 1207 | lwp_trylock(struct lwp *l) | |
1208 | { | 1208 | { | |
1209 | kmutex_t *old; | 1209 | kmutex_t *old; | |
1210 | 1210 | |||
1211 | for (;;) { | 1211 | for (;;) { | |
1212 | if (!mutex_tryenter(old = l->l_mutex)) | 1212 | if (!mutex_tryenter(old = l->l_mutex)) | |
1213 | return 0; | 1213 | return 0; | |
1214 | if (__predict_true(l->l_mutex == old)) | 1214 | if (__predict_true(l->l_mutex == old)) | |
1215 | return 1; | 1215 | return 1; | |
1216 | mutex_spin_exit(old); | 1216 | mutex_spin_exit(old); | |
1217 | } | 1217 | } | |
1218 | } | 1218 | } | |
1219 | 1219 | |||
1220 | void | 1220 | void | |
1221 | lwp_unsleep(lwp_t *l, bool cleanup) | 1221 | lwp_unsleep(lwp_t *l, bool cleanup) | |
1222 | { | 1222 | { | |
1223 | 1223 | |||
1224 | KASSERT(mutex_owned(l->l_mutex)); | 1224 | KASSERT(mutex_owned(l->l_mutex)); | |
1225 | (*l->l_syncobj->sobj_unsleep)(l, cleanup); | 1225 | (*l->l_syncobj->sobj_unsleep)(l, cleanup); | |
1226 | } | 1226 | } | |
1227 | 1227 | |||
1228 | 1228 | |||
1229 | /* | 1229 | /* | |
1230 | * Handle exceptions for mi_userret(). Called if a member of LW_USERRET is | 1230 | * Handle exceptions for mi_userret(). Called if a member of LW_USERRET is | |
1231 | * set. | 1231 | * set. | |
1232 | */ | 1232 | */ | |
1233 | void | 1233 | void | |
1234 | lwp_userret(struct lwp *l) | 1234 | lwp_userret(struct lwp *l) | |
1235 | { | 1235 | { | |
1236 | struct proc *p; | 1236 | struct proc *p; | |
1237 | void (*hook)(void); | 1237 | void (*hook)(void); | |
1238 | int sig; | 1238 | int sig; | |
1239 | 1239 | |||
1240 | KASSERT(l == curlwp); | 1240 | KASSERT(l == curlwp); | |
1241 | KASSERT(l->l_stat == LSONPROC); | 1241 | KASSERT(l->l_stat == LSONPROC); | |
1242 | p = l->l_proc; | 1242 | p = l->l_proc; | |
1243 | 1243 | |||
1244 | #ifndef __HAVE_FAST_SOFTINTS | 1244 | #ifndef __HAVE_FAST_SOFTINTS | |
1245 | /* Run pending soft interrupts. */ | 1245 | /* Run pending soft interrupts. */ | |
1246 | if (l->l_cpu->ci_data.cpu_softints != 0) | 1246 | if (l->l_cpu->ci_data.cpu_softints != 0) | |
1247 | softint_overlay(); | 1247 | softint_overlay(); | |
1248 | #endif | 1248 | #endif | |
1249 | 1249 | |||
1250 | #ifdef KERN_SA | 1250 | #ifdef KERN_SA | |
1251 | /* Generate UNBLOCKED upcall if needed */ | 1251 | /* Generate UNBLOCKED upcall if needed */ | |
1252 | if (l->l_flag & LW_SA_BLOCKING) { | 1252 | if (l->l_flag & LW_SA_BLOCKING) { | |
1253 | sa_unblock_userret(l); | 1253 | sa_unblock_userret(l); | |
1254 | /* NOTREACHED */ | 1254 | /* NOTREACHED */ | |
1255 | } | 1255 | } | |
1256 | #endif | 1256 | #endif | |
1257 | 1257 | |||
1258 | /* | 1258 | /* | |
1259 | * It should be safe to do this read unlocked on a multiprocessor | 1259 | * It should be safe to do this read unlocked on a multiprocessor | |
1260 | * system.. | 1260 | * system.. | |
1261 | * | 1261 | * | |
1262 | * LW_SA_UPCALL will be handled after the while() loop, so don't | 1262 | * LW_SA_UPCALL will be handled after the while() loop, so don't | |
1263 | * consider it now. | 1263 | * consider it now. | |
1264 | */ | 1264 | */ | |
1265 | while ((l->l_flag & (LW_USERRET & ~(LW_SA_UPCALL))) != 0) { | 1265 | while ((l->l_flag & (LW_USERRET & ~(LW_SA_UPCALL))) != 0) { | |
1266 | /* | 1266 | /* | |
1267 | * Process pending signals first, unless the process | 1267 | * Process pending signals first, unless the process | |
1268 | * is dumping core or exiting, where we will instead | 1268 | * is dumping core or exiting, where we will instead | |
1269 | * enter the LW_WSUSPEND case below. | 1269 | * enter the LW_WSUSPEND case below. | |
1270 | */ | 1270 | */ | |
1271 | if ((l->l_flag & (LW_PENDSIG | LW_WCORE | LW_WEXIT)) == | 1271 | if ((l->l_flag & (LW_PENDSIG | LW_WCORE | LW_WEXIT)) == | |
1272 | LW_PENDSIG) { | 1272 | LW_PENDSIG) { | |
1273 | mutex_enter(p->p_lock); | 1273 | mutex_enter(p->p_lock); | |
1274 | while ((sig = issignal(l)) != 0) | 1274 | while ((sig = issignal(l)) != 0) | |
1275 | postsig(sig); | 1275 | postsig(sig); | |
1276 | mutex_exit(p->p_lock); | 1276 | mutex_exit(p->p_lock); | |
1277 | } | 1277 | } | |
1278 | 1278 | |||
1279 | /* | 1279 | /* | |
1280 | * Core-dump or suspend pending. | 1280 | * Core-dump or suspend pending. | |
1281 | * | 1281 | * | |
1282 | * In case of core dump, suspend ourselves, so that the | 1282 | * In case of core dump, suspend ourselves, so that the | |
1283 | * kernel stack and therefore the userland registers saved | 1283 | * kernel stack and therefore the userland registers saved | |
1284 | * in the trapframe are around for coredump() to write them | 1284 | * in the trapframe are around for coredump() to write them | |
1285 | * out. We issue a wakeup on p->p_lwpcv so that sigexit() | 1285 | * out. We issue a wakeup on p->p_lwpcv so that sigexit() | |
1286 | * will write the core file out once all other LWPs are | 1286 | * will write the core file out once all other LWPs are | |
1287 | * suspended. | 1287 | * suspended. | |
1288 | */ | 1288 | */ | |
1289 | if ((l->l_flag & LW_WSUSPEND) != 0) { | 1289 | if ((l->l_flag & LW_WSUSPEND) != 0) { | |
1290 | mutex_enter(p->p_lock); | 1290 | mutex_enter(p->p_lock); | |
1291 | p->p_nrlwps--; | 1291 | p->p_nrlwps--; | |
1292 | cv_broadcast(&p->p_lwpcv); | 1292 | cv_broadcast(&p->p_lwpcv); | |
1293 | lwp_lock(l); | 1293 | lwp_lock(l); | |
1294 | l->l_stat = LSSUSPENDED; | 1294 | l->l_stat = LSSUSPENDED; | |
1295 | lwp_unlock(l); | 1295 | lwp_unlock(l); | |
1296 | mutex_exit(p->p_lock); | 1296 | mutex_exit(p->p_lock); | |
1297 | lwp_lock(l); | 1297 | lwp_lock(l); | |
1298 | mi_switch(l); | 1298 | mi_switch(l); | |
1299 | } | 1299 | } | |
1300 | 1300 | |||
1301 | /* Process is exiting. */ | 1301 | /* Process is exiting. */ | |
1302 | if ((l->l_flag & LW_WEXIT) != 0) { | 1302 | if ((l->l_flag & LW_WEXIT) != 0) { | |
1303 | lwp_exit(l); | 1303 | lwp_exit(l); | |
1304 | KASSERT(0); | 1304 | KASSERT(0); | |
1305 | /* NOTREACHED */ | 1305 | /* NOTREACHED */ | |
1306 | } | 1306 | } | |
1307 | 1307 | |||
1308 | /* Call userret hook; used by Linux emulation. */ | 1308 | /* Call userret hook; used by Linux emulation. */ | |
1309 | if ((l->l_flag & LW_WUSERRET) != 0) { | 1309 | if ((l->l_flag & LW_WUSERRET) != 0) { | |
1310 | lwp_lock(l); | 1310 | lwp_lock(l); | |
1311 | l->l_flag &= ~LW_WUSERRET; | 1311 | l->l_flag &= ~LW_WUSERRET; | |
1312 | lwp_unlock(l); | 1312 | lwp_unlock(l); | |
1313 | hook = p->p_userret; | 1313 | hook = p->p_userret; | |
1314 | p->p_userret = NULL; | 1314 | p->p_userret = NULL; | |
1315 | (*hook)(); | 1315 | (*hook)(); | |
1316 | } | 1316 | } | |
1317 | } | 1317 | } | |
1318 | 1318 | |||
1319 | #ifdef KERN_SA | 1319 | #ifdef KERN_SA | |
1320 | /* | 1320 | /* | |
1321 | * Timer events are handled specially. We only try once to deliver | 1321 | * Timer events are handled specially. We only try once to deliver | |
1322 | * pending timer upcalls; if if fails, we can try again on the next | 1322 | * pending timer upcalls; if if fails, we can try again on the next | |
1323 | * loop around. If we need to re-enter lwp_userret(), MD code will | 1323 | * loop around. If we need to re-enter lwp_userret(), MD code will | |
1324 | * bounce us back here through the trap path after we return. | 1324 | * bounce us back here through the trap path after we return. | |
1325 | */ | 1325 | */ | |
1326 | if (p->p_timerpend) | 1326 | if (p->p_timerpend) | |
1327 | timerupcall(l); | 1327 | timerupcall(l); | |
1328 | if (l->l_flag & LW_SA_UPCALL) | 1328 | if (l->l_flag & LW_SA_UPCALL) | |
1329 | sa_upcall_userret(l); | 1329 | sa_upcall_userret(l); | |
1330 | #endif /* KERN_SA */ | 1330 | #endif /* KERN_SA */ | |
1331 | } | 1331 | } | |
1332 | 1332 | |||
1333 | /* | 1333 | /* | |
1334 | * Force an LWP to enter the kernel, to take a trip through lwp_userret(). | 1334 | * Force an LWP to enter the kernel, to take a trip through lwp_userret(). | |
1335 | */ | 1335 | */ | |
1336 | void | 1336 | void | |
1337 | lwp_need_userret(struct lwp *l) | 1337 | lwp_need_userret(struct lwp *l) | |
1338 | { | 1338 | { | |
1339 | KASSERT(lwp_locked(l, NULL)); | 1339 | KASSERT(lwp_locked(l, NULL)); | |
1340 | 1340 | |||
1341 | /* | 1341 | /* | |
1342 | * Since the tests in lwp_userret() are done unlocked, make sure | 1342 | * Since the tests in lwp_userret() are done unlocked, make sure | |
1343 | * that the condition will be seen before forcing the LWP to enter | 1343 | * that the condition will be seen before forcing the LWP to enter | |
1344 | * kernel mode. | 1344 | * kernel mode. | |
1345 | */ | 1345 | */ | |
1346 | membar_producer(); | 1346 | membar_producer(); | |
1347 | cpu_signotify(l); | 1347 | cpu_signotify(l); | |
1348 | } | 1348 | } | |
1349 | 1349 | |||
1350 | /* | 1350 | /* | |
1351 | * Add one reference to an LWP. This will prevent the LWP from | 1351 | * Add one reference to an LWP. This will prevent the LWP from | |
1352 | * exiting, thus keep the lwp structure and PCB around to inspect. | 1352 | * exiting, thus keep the lwp structure and PCB around to inspect. | |
1353 | */ | 1353 | */ | |
1354 | void | 1354 | void | |
1355 | lwp_addref(struct lwp *l) | 1355 | lwp_addref(struct lwp *l) | |
1356 | { | 1356 | { | |
1357 | 1357 | |||
1358 | KASSERT(mutex_owned(l->l_proc->p_lock)); | 1358 | KASSERT(mutex_owned(l->l_proc->p_lock)); | |
1359 | KASSERT(l->l_stat != LSZOMB); | 1359 | KASSERT(l->l_stat != LSZOMB); | |
1360 | KASSERT(l->l_refcnt != 0); | 1360 | KASSERT(l->l_refcnt != 0); | |
1361 | 1361 | |||
1362 | l->l_refcnt++; | 1362 | l->l_refcnt++; | |
1363 | } | 1363 | } | |
1364 | 1364 | |||
1365 | /* | 1365 | /* | |
1366 | * Remove one reference to an LWP. If this is the last reference, | 1366 | * Remove one reference to an LWP. If this is the last reference, | |
1367 | * then we must finalize the LWP's death. | 1367 | * then we must finalize the LWP's death. | |
1368 | */ | 1368 | */ | |
1369 | void | 1369 | void | |
1370 | lwp_delref(struct lwp *l) | 1370 | lwp_delref(struct lwp *l) | |
1371 | { | 1371 | { | |
1372 | struct proc *p = l->l_proc; | 1372 | struct proc *p = l->l_proc; | |
1373 | 1373 | |||
1374 | mutex_enter(p->p_lock); | 1374 | mutex_enter(p->p_lock); | |
1375 | KASSERT(l->l_stat != LSZOMB); | 1375 | KASSERT(l->l_stat != LSZOMB); | |
1376 | KASSERT(l->l_refcnt > 0); | 1376 | KASSERT(l->l_refcnt > 0); | |
1377 | if (--l->l_refcnt == 0) | 1377 | if (--l->l_refcnt == 0) | |
1378 | cv_broadcast(&p->p_lwpcv); | 1378 | cv_broadcast(&p->p_lwpcv); | |
1379 | mutex_exit(p->p_lock); | 1379 | mutex_exit(p->p_lock); | |
1380 | } | 1380 | } | |
1381 | 1381 | |||
1382 | /* | 1382 | /* | |
1383 | * Drain all references to the current LWP. | 1383 | * Drain all references to the current LWP. | |
1384 | */ | 1384 | */ | |
1385 | void | 1385 | void | |
1386 | lwp_drainrefs(struct lwp *l) | 1386 | lwp_drainrefs(struct lwp *l) | |
1387 | { | 1387 | { | |
1388 | struct proc *p = l->l_proc; | 1388 | struct proc *p = l->l_proc; | |
1389 | 1389 | |||
1390 | KASSERT(mutex_owned(p->p_lock)); | 1390 | KASSERT(mutex_owned(p->p_lock)); | |
1391 | KASSERT(l->l_refcnt != 0); | 1391 | KASSERT(l->l_refcnt != 0); | |
1392 | 1392 | |||
1393 | l->l_refcnt--; | 1393 | l->l_refcnt--; | |
1394 | while (l->l_refcnt != 0) | 1394 | while (l->l_refcnt != 0) | |
1395 | cv_wait(&p->p_lwpcv, p->p_lock); | 1395 | cv_wait(&p->p_lwpcv, p->p_lock); | |
1396 | } | 1396 | } | |
1397 | 1397 | |||
1398 | /* | 1398 | /* | |
1399 | * Return true if the specified LWP is 'alive'. Only p->p_lock need | 1399 | * Return true if the specified LWP is 'alive'. Only p->p_lock need | |
1400 | * be held. | 1400 | * be held. | |
1401 | */ | 1401 | */ | |
1402 | bool | 1402 | bool | |
1403 | lwp_alive(lwp_t *l) | 1403 | lwp_alive(lwp_t *l) | |
1404 | { | 1404 | { | |
1405 | 1405 | |||
1406 | KASSERT(mutex_owned(l->l_proc->p_lock)); | 1406 | KASSERT(mutex_owned(l->l_proc->p_lock)); | |
1407 | 1407 | |||
1408 | switch (l->l_stat) { | 1408 | switch (l->l_stat) { | |
1409 | case LSSLEEP: | 1409 | case LSSLEEP: | |
1410 | case LSRUN: | 1410 | case LSRUN: | |
1411 | case LSONPROC: | 1411 | case LSONPROC: | |
1412 | case LSSTOP: | 1412 | case LSSTOP: | |
1413 | case LSSUSPENDED: | 1413 | case LSSUSPENDED: | |
1414 | return true; | 1414 | return true; | |
1415 | default: | 1415 | default: | |
1416 | return false; | 1416 | return false; | |
1417 | } | 1417 | } | |
1418 | } | 1418 | } | |
1419 | 1419 | |||
1420 | /* | 1420 | /* | |
1421 | * Return first live LWP in the process. | 1421 | * Return first live LWP in the process. | |
1422 | */ | 1422 | */ | |
1423 | lwp_t * | 1423 | lwp_t * | |
1424 | lwp_find_first(proc_t *p) | 1424 | lwp_find_first(proc_t *p) | |
1425 | { | 1425 | { | |
1426 | lwp_t *l; | 1426 | lwp_t *l; | |
1427 | 1427 | |||
1428 | KASSERT(mutex_owned(p->p_lock)); | 1428 | KASSERT(mutex_owned(p->p_lock)); | |
1429 | 1429 | |||
1430 | LIST_FOREACH(l, &p->p_lwps, l_sibling) { | 1430 | LIST_FOREACH(l, &p->p_lwps, l_sibling) { | |
1431 | if (lwp_alive(l)) { | 1431 | if (lwp_alive(l)) { | |
1432 | return l; | 1432 | return l; | |
1433 | } | 1433 | } | |
1434 | } | 1434 | } | |
1435 | 1435 | |||
1436 | return NULL; | 1436 | return NULL; | |
1437 | } | 1437 | } | |
1438 | 1438 | |||
1439 | /* | 1439 | /* | |
1440 | * lwp_specific_key_create -- | 1440 | * lwp_specific_key_create -- | |
1441 | * Create a key for subsystem lwp-specific data. | 1441 | * Create a key for subsystem lwp-specific data. | |
1442 | */ | 1442 | */ | |
1443 | int | 1443 | int | |
1444 | lwp_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) | 1444 | lwp_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) | |
1445 | { | 1445 | { | |
1446 | 1446 | |||
1447 | return (specificdata_key_create(lwp_specificdata_domain, keyp, dtor)); | 1447 | return (specificdata_key_create(lwp_specificdata_domain, keyp, dtor)); | |
1448 | } | 1448 | } | |
1449 | 1449 | |||
1450 | /* | 1450 | /* | |
1451 | * lwp_specific_key_delete -- | 1451 | * lwp_specific_key_delete -- | |
1452 | * Delete a key for subsystem lwp-specific data. | 1452 | * Delete a key for subsystem lwp-specific data. | |
1453 | */ | 1453 | */ | |
1454 | void | 1454 | void | |
1455 | lwp_specific_key_delete(specificdata_key_t key) | 1455 | lwp_specific_key_delete(specificdata_key_t key) | |
1456 | { | 1456 | { | |
1457 | 1457 | |||
1458 | specificdata_key_delete(lwp_specificdata_domain, key); | 1458 | specificdata_key_delete(lwp_specificdata_domain, key); | |
1459 | } | 1459 | } | |
1460 | 1460 | |||
1461 | /* | 1461 | /* | |
1462 | * lwp_initspecific -- | 1462 | * lwp_initspecific -- | |
1463 | * Initialize an LWP's specificdata container. | 1463 | * Initialize an LWP's specificdata container. | |
1464 | */ | 1464 | */ | |
1465 | void | 1465 | void | |
1466 | lwp_initspecific(struct lwp *l) | 1466 | lwp_initspecific(struct lwp *l) | |
1467 | { | 1467 | { | |
1468 | int error; | 1468 | int error; | |
1469 | 1469 | |||
1470 | error = specificdata_init(lwp_specificdata_domain, &l->l_specdataref); | 1470 | error = specificdata_init(lwp_specificdata_domain, &l->l_specdataref); | |
1471 | KASSERT(error == 0); | 1471 | KASSERT(error == 0); | |
1472 | } | 1472 | } | |
1473 | 1473 | |||
1474 | /* | 1474 | /* | |
1475 | * lwp_finispecific -- | 1475 | * lwp_finispecific -- | |
1476 | * Finalize an LWP's specificdata container. | 1476 | * Finalize an LWP's specificdata container. | |
1477 | */ | 1477 | */ | |
1478 | void | 1478 | void | |
1479 | lwp_finispecific(struct lwp *l) | 1479 | lwp_finispecific(struct lwp *l) | |
1480 | { | 1480 | { | |
1481 | 1481 | |||
1482 | specificdata_fini(lwp_specificdata_domain, &l->l_specdataref); | 1482 | specificdata_fini(lwp_specificdata_domain, &l->l_specdataref); | |
1483 | } | 1483 | } | |
1484 | 1484 | |||
1485 | /* | 1485 | /* | |
1486 | * lwp_getspecific -- | 1486 | * lwp_getspecific -- | |
1487 | * Return lwp-specific data corresponding to the specified key. | 1487 | * Return lwp-specific data corresponding to the specified key. | |
1488 | * | 1488 | * | |
1489 | * Note: LWP specific data is NOT INTERLOCKED. An LWP should access | 1489 | * Note: LWP specific data is NOT INTERLOCKED. An LWP should access | |
1490 | * only its OWN SPECIFIC DATA. If it is necessary to access another | 1490 | * only its OWN SPECIFIC DATA. If it is necessary to access another | |
1491 | * LWP's specifc data, care must be taken to ensure that doing so | 1491 | * LWP's specifc data, care must be taken to ensure that doing so | |
1492 | * would not cause internal data structure inconsistency (i.e. caller | 1492 | * would not cause internal data structure inconsistency (i.e. caller | |
1493 | * can guarantee that the target LWP is not inside an lwp_getspecific() | 1493 | * can guarantee that the target LWP is not inside an lwp_getspecific() | |
1494 | * or lwp_setspecific() call). | 1494 | * or lwp_setspecific() call). | |
1495 | */ | 1495 | */ | |
1496 | void * | 1496 | void * | |
1497 | lwp_getspecific(specificdata_key_t key) | 1497 | lwp_getspecific(specificdata_key_t key) | |
1498 | { | 1498 | { | |
1499 | 1499 | |||
1500 | return (specificdata_getspecific_unlocked(lwp_specificdata_domain, | 1500 | return (specificdata_getspecific_unlocked(lwp_specificdata_domain, | |
1501 | &curlwp->l_specdataref, key)); | 1501 | &curlwp->l_specdataref, key)); | |
1502 | } | 1502 | } | |
1503 | 1503 | |||
1504 | void * | 1504 | void * | |
1505 | _lwp_getspecific_by_lwp(struct lwp *l, specificdata_key_t key) | 1505 | _lwp_getspecific_by_lwp(struct lwp *l, specificdata_key_t key) | |
1506 | { | 1506 | { | |
1507 | 1507 | |||
1508 | return (specificdata_getspecific_unlocked(lwp_specificdata_domain, | 1508 | return (specificdata_getspecific_unlocked(lwp_specificdata_domain, | |
1509 | &l->l_specdataref, key)); | 1509 | &l->l_specdataref, key)); | |
1510 | } | 1510 | } | |
1511 | 1511 | |||
1512 | /* | 1512 | /* | |
1513 | * lwp_setspecific -- | 1513 | * lwp_setspecific -- | |
1514 | * Set lwp-specific data corresponding to the specified key. | 1514 | * Set lwp-specific data corresponding to the specified key. | |
1515 | */ | 1515 | */ | |
1516 | void | 1516 | void | |
1517 | lwp_setspecific(specificdata_key_t key, void *data) | 1517 | lwp_setspecific(specificdata_key_t key, void *data) | |
1518 | { | 1518 | { | |
1519 | 1519 | |||
1520 | specificdata_setspecific(lwp_specificdata_domain, | 1520 | specificdata_setspecific(lwp_specificdata_domain, | |
1521 | &curlwp->l_specdataref, key, data); | 1521 | &curlwp->l_specdataref, key, data); | |
1522 | } | 1522 | } | |
1523 | 1523 | |||
1524 | /* | 1524 | /* | |
1525 | * Allocate a new lwpctl structure for a user LWP. | 1525 | * Allocate a new lwpctl structure for a user LWP. | |
1526 | */ | 1526 | */ | |
1527 | int | 1527 | int | |
1528 | lwp_ctl_alloc(vaddr_t *uaddr) | 1528 | lwp_ctl_alloc(vaddr_t *uaddr) | |
1529 | { | 1529 | { | |
1530 | lcproc_t *lp; | 1530 | lcproc_t *lp; | |
1531 | u_int bit, i, offset; | 1531 | u_int bit, i, offset; | |
1532 | struct uvm_object *uao; | 1532 | struct uvm_object *uao; | |
1533 | int error; | 1533 | int error; | |
1534 | lcpage_t *lcp; | 1534 | lcpage_t *lcp; | |
1535 | proc_t *p; | 1535 | proc_t *p; | |
1536 | lwp_t *l; | 1536 | lwp_t *l; | |
1537 | 1537 | |||
1538 | l = curlwp; | 1538 | l = curlwp; | |
1539 | p = l->l_proc; | 1539 | p = l->l_proc; | |
1540 | 1540 | |||
1541 | if (l->l_lcpage != NULL) { | 1541 | if (l->l_lcpage != NULL) { | |
1542 | lcp = l->l_lcpage; | 1542 | lcp = l->l_lcpage; | |
1543 | *uaddr = lcp->lcp_uaddr + (vaddr_t)l->l_lwpctl - lcp->lcp_kaddr; | 1543 | *uaddr = lcp->lcp_uaddr + (vaddr_t)l->l_lwpctl - lcp->lcp_kaddr; | |
1544 | return (EINVAL); | 1544 | return (EINVAL); | |
1545 | } | 1545 | } | |
1546 | 1546 | |||
1547 | /* First time around, allocate header structure for the process. */ | 1547 | /* First time around, allocate header structure for the process. */ | |
1548 | if ((lp = p->p_lwpctl) == NULL) { | 1548 | if ((lp = p->p_lwpctl) == NULL) { | |
1549 | lp = kmem_alloc(sizeof(*lp), KM_SLEEP); | 1549 | lp = kmem_alloc(sizeof(*lp), KM_SLEEP); | |
1550 | mutex_init(&lp->lp_lock, MUTEX_DEFAULT, IPL_NONE); | 1550 | mutex_init(&lp->lp_lock, MUTEX_DEFAULT, IPL_NONE); | |
1551 | lp->lp_uao = NULL; | 1551 | lp->lp_uao = NULL; | |
1552 | TAILQ_INIT(&lp->lp_pages); | 1552 | TAILQ_INIT(&lp->lp_pages); | |
1553 | mutex_enter(p->p_lock); | 1553 | mutex_enter(p->p_lock); | |
1554 | if (p->p_lwpctl == NULL) { | 1554 | if (p->p_lwpctl == NULL) { | |
1555 | p->p_lwpctl = lp; | 1555 | p->p_lwpctl = lp; | |
1556 | mutex_exit(p->p_lock); | 1556 | mutex_exit(p->p_lock); | |
1557 | } else { | 1557 | } else { | |
1558 | mutex_exit(p->p_lock); | 1558 | mutex_exit(p->p_lock); | |
1559 | mutex_destroy(&lp->lp_lock); | 1559 | mutex_destroy(&lp->lp_lock); | |
1560 | kmem_free(lp, sizeof(*lp)); | 1560 | kmem_free(lp, sizeof(*lp)); | |
1561 | lp = p->p_lwpctl; | 1561 | lp = p->p_lwpctl; | |
1562 | } | 1562 | } | |
1563 | } | 1563 | } | |
1564 | 1564 | |||
1565 | /* | 1565 | /* | |
1566 | * Set up an anonymous memory region to hold the shared pages. | 1566 | * Set up an anonymous memory region to hold the shared pages. | |
1567 | * Map them into the process' address space. The user vmspace | 1567 | * Map them into the process' address space. The user vmspace | |
1568 | * gets the first reference on the UAO. | 1568 | * gets the first reference on the UAO. | |
1569 | */ | 1569 | */ | |
1570 | mutex_enter(&lp->lp_lock); | 1570 | mutex_enter(&lp->lp_lock); | |
1571 | if (lp->lp_uao == NULL) { | 1571 | if (lp->lp_uao == NULL) { | |
1572 | lp->lp_uao = uao_create(LWPCTL_UAREA_SZ, 0); | 1572 | lp->lp_uao = uao_create(LWPCTL_UAREA_SZ, 0); | |
1573 | lp->lp_cur = 0; | 1573 | lp->lp_cur = 0; | |
1574 | lp->lp_max = LWPCTL_UAREA_SZ; | 1574 | lp->lp_max = LWPCTL_UAREA_SZ; | |
1575 | lp->lp_uva = p->p_emul->e_vm_default_addr(p, | 1575 | lp->lp_uva = p->p_emul->e_vm_default_addr(p, | |
1576 | (vaddr_t)p->p_vmspace->vm_daddr, LWPCTL_UAREA_SZ); | 1576 | (vaddr_t)p->p_vmspace->vm_daddr, LWPCTL_UAREA_SZ); | |
1577 | error = uvm_map(&p->p_vmspace->vm_map, &lp->lp_uva, | 1577 | error = uvm_map(&p->p_vmspace->vm_map, &lp->lp_uva, | |
1578 | LWPCTL_UAREA_SZ, lp->lp_uao, 0, 0, UVM_MAPFLAG(UVM_PROT_RW, | 1578 | LWPCTL_UAREA_SZ, lp->lp_uao, 0, 0, UVM_MAPFLAG(UVM_PROT_RW, | |
1579 | UVM_PROT_RW, UVM_INH_NONE, UVM_ADV_NORMAL, 0)); | 1579 | UVM_PROT_RW, UVM_INH_NONE, UVM_ADV_NORMAL, 0)); | |
1580 | if (error != 0) { | 1580 | if (error != 0) { | |
1581 | uao_detach(lp->lp_uao); | 1581 | uao_detach(lp->lp_uao); | |
1582 | lp->lp_uao = NULL; | 1582 | lp->lp_uao = NULL; | |
1583 | mutex_exit(&lp->lp_lock); | 1583 | mutex_exit(&lp->lp_lock); | |
1584 | return error; | 1584 | return error; | |
1585 | } | 1585 | } | |
1586 | } | 1586 | } | |
1587 | 1587 | |||
1588 | /* Get a free block and allocate for this LWP. */ | 1588 | /* Get a free block and allocate for this LWP. */ | |
1589 | TAILQ_FOREACH(lcp, &lp->lp_pages, lcp_chain) { | 1589 | TAILQ_FOREACH(lcp, &lp->lp_pages, lcp_chain) { | |
1590 | if (lcp->lcp_nfree != 0) | 1590 | if (lcp->lcp_nfree != 0) | |
1591 | break; | 1591 | break; | |
1592 | } | 1592 | } | |
1593 | if (lcp == NULL) { | 1593 | if (lcp == NULL) { | |
1594 | /* Nothing available - try to set up a free page. */ | 1594 | /* Nothing available - try to set up a free page. */ | |
1595 | if (lp->lp_cur == lp->lp_max) { | 1595 | if (lp->lp_cur == lp->lp_max) { | |
1596 | mutex_exit(&lp->lp_lock); | 1596 | mutex_exit(&lp->lp_lock); | |
1597 | return ENOMEM; | 1597 | return ENOMEM; | |
1598 | } | 1598 | } | |
1599 | lcp = kmem_alloc(LWPCTL_LCPAGE_SZ, KM_SLEEP); | 1599 | lcp = kmem_alloc(LWPCTL_LCPAGE_SZ, KM_SLEEP); | |
1600 | if (lcp == NULL) { | 1600 | if (lcp == NULL) { | |
1601 | mutex_exit(&lp->lp_lock); | 1601 | mutex_exit(&lp->lp_lock); | |
1602 | return ENOMEM; | 1602 | return ENOMEM; | |
1603 | } | 1603 | } | |
1604 | /* | 1604 | /* | |
1605 | * Wire the next page down in kernel space. Since this | 1605 | * Wire the next page down in kernel space. Since this | |
1606 | * is a new mapping, we must add a reference. | 1606 | * is a new mapping, we must add a reference. | |
1607 | */ | 1607 | */ | |
1608 | uao = lp->lp_uao; | 1608 | uao = lp->lp_uao; |
--- src/sys/sys/filedesc.h 2009/05/25 03:59:45 1.56
+++ src/sys/sys/filedesc.h 2009/10/27 02:58:28 1.57
@@ -1,236 +1,236 @@ | @@ -1,236 +1,236 @@ | |||
1 | /* $NetBSD: filedesc.h,v 1.56 2009/05/25 03:59:45 yamt Exp $ */ | 1 | /* $NetBSD: filedesc.h,v 1.57 2009/10/27 02:58:28 rmind Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 2008 The NetBSD Foundation, Inc. | 4 | * Copyright (c) 2008 The NetBSD Foundation, Inc. | |
5 | * All rights reserved. | 5 | * All rights reserved. | |
6 | * | 6 | * | |
7 | * Redistribution and use in source and binary forms, with or without | 7 | * Redistribution and use in source and binary forms, with or without | |
8 | * modification, are permitted provided that the following conditions | 8 | * modification, are permitted provided that the following conditions | |
9 | * are met: | 9 | * are met: | |
10 | * 1. Redistributions of source code must retain the above copyright | 10 | * 1. Redistributions of source code must retain the above copyright | |
11 | * notice, this list of conditions and the following disclaimer. | 11 | * notice, this list of conditions and the following disclaimer. | |
12 | * 2. Redistributions in binary form must reproduce the above copyright | 12 | * 2. Redistributions in binary form must reproduce the above copyright | |
13 | * notice, this list of conditions and the following disclaimer in the | 13 | * notice, this list of conditions and the following disclaimer in the | |
14 | * documentation and/or other materials provided with the distribution. | 14 | * documentation and/or other materials provided with the distribution. | |
15 | * | 15 | * | |
16 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | 16 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |
17 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | 17 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
18 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 18 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
19 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | 19 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |
20 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 20 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
26 | * POSSIBILITY OF SUCH DAMAGE. | 26 | * POSSIBILITY OF SUCH DAMAGE. | |
27 | */ | 27 | */ | |
28 | 28 | |||
29 | /* | 29 | /* | |
30 | * Copyright (c) 1990, 1993 | 30 | * Copyright (c) 1990, 1993 | |
31 | * The Regents of the University of California. All rights reserved. | 31 | * The Regents of the University of California. All rights reserved. | |
32 | * | 32 | * | |
33 | * Redistribution and use in source and binary forms, with or without | 33 | * Redistribution and use in source and binary forms, with or without | |
34 | * modification, are permitted provided that the following conditions | 34 | * modification, are permitted provided that the following conditions | |
35 | * are met: | 35 | * are met: | |
36 | * 1. Redistributions of source code must retain the above copyright | 36 | * 1. Redistributions of source code must retain the above copyright | |
37 | * notice, this list of conditions and the following disclaimer. | 37 | * notice, this list of conditions and the following disclaimer. | |
38 | * 2. Redistributions in binary form must reproduce the above copyright | 38 | * 2. Redistributions in binary form must reproduce the above copyright | |
39 | * notice, this list of conditions and the following disclaimer in the | 39 | * notice, this list of conditions and the following disclaimer in the | |
40 | * documentation and/or other materials provided with the distribution. | 40 | * documentation and/or other materials provided with the distribution. | |
41 | * 3. Neither the name of the University nor the names of its contributors | 41 | * 3. Neither the name of the University nor the names of its contributors | |
42 | * may be used to endorse or promote products derived from this software | 42 | * may be used to endorse or promote products derived from this software | |
43 | * without specific prior written permission. | 43 | * without specific prior written permission. | |
44 | * | 44 | * | |
45 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | 45 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
46 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 46 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
47 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 47 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
48 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | 48 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
49 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 49 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
50 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 50 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
51 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 51 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
52 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 52 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
53 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 53 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
54 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 54 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
55 | * SUCH DAMAGE. | 55 | * SUCH DAMAGE. | |
56 | * | 56 | * | |
57 | * @(#)filedesc.h 8.1 (Berkeley) 6/2/93 | 57 | * @(#)filedesc.h 8.1 (Berkeley) 6/2/93 | |
58 | */ | 58 | */ | |
59 | 59 | |||
60 | #ifndef _SYS_FILEDESC_H_ | 60 | #ifndef _SYS_FILEDESC_H_ | |
61 | #define _SYS_FILEDESC_H_ | 61 | #define _SYS_FILEDESC_H_ | |
62 | 62 | |||
63 | #include <sys/param.h> | 63 | #include <sys/param.h> | |
64 | #include <sys/queue.h> | 64 | #include <sys/queue.h> | |
65 | #include <sys/mutex.h> | 65 | #include <sys/mutex.h> | |
66 | #include <sys/rwlock.h> | 66 | #include <sys/rwlock.h> | |
67 | #include <sys/condvar.h> | 67 | #include <sys/condvar.h> | |
68 | 68 | |||
69 | /* | 69 | /* | |
70 | * This structure is used for the management of descriptors. It may be | 70 | * This structure is used for the management of descriptors. It may be | |
71 | * shared by multiple processes. | 71 | * shared by multiple processes. | |
72 | * | 72 | * | |
73 | * A process is initially started out with NDFILE descriptors stored within | 73 | * A process is initially started out with NDFILE descriptors stored within | |
74 | * this structure, selected to be enough for typical applications based on | 74 | * this structure, selected to be enough for typical applications based on | |
75 | * the historical limit of 20 open files (and the usage of descriptors by | 75 | * the historical limit of 20 open files (and the usage of descriptors by | |
76 | * shells). If these descriptors are exhausted, a larger descriptor table | 76 | * shells). If these descriptors are exhausted, a larger descriptor table | |
77 | * may be allocated, up to a process' resource limit; the internal arrays | 77 | * may be allocated, up to a process' resource limit; the internal arrays | |
78 | * are then unused. The initial expansion is set to NDEXTENT; each time | 78 | * are then unused. The initial expansion is set to NDEXTENT; each time | |
79 | * it runs out, it is doubled until the resource limit is reached. NDEXTENT | 79 | * it runs out, it is doubled until the resource limit is reached. NDEXTENT | |
80 | * should be selected to be the biggest multiple of OFILESIZE (see below) | 80 | * should be selected to be the biggest multiple of OFILESIZE (see below) | |
81 | * that will fit in a power-of-two sized piece of memory. | 81 | * that will fit in a power-of-two sized piece of memory. | |
82 | */ | 82 | */ | |
83 | #define NDFILE 20 | 83 | #define NDFILE 20 | |
84 | #define NDEXTENT 50 /* 250 bytes in 256-byte alloc */ | 84 | #define NDEXTENT 50 /* 250 bytes in 256-byte alloc */ | |
85 | #define NDENTRIES 32 /* 32 fds per entry */ | 85 | #define NDENTRIES 32 /* 32 fds per entry */ | |
86 | #define NDENTRYMASK (NDENTRIES - 1) | 86 | #define NDENTRYMASK (NDENTRIES - 1) | |
87 | #define NDENTRYSHIFT 5 /* bits per entry */ | 87 | #define NDENTRYSHIFT 5 /* bits per entry */ | |
88 | #define NDLOSLOTS(x) (((x) + NDENTRIES - 1) >> NDENTRYSHIFT) | 88 | #define NDLOSLOTS(x) (((x) + NDENTRIES - 1) >> NDENTRYSHIFT) | |
89 | #define NDHISLOTS(x) ((NDLOSLOTS(x) + NDENTRIES - 1) >> NDENTRYSHIFT) | 89 | #define NDHISLOTS(x) ((NDLOSLOTS(x) + NDENTRIES - 1) >> NDENTRYSHIFT) | |
90 | #define NDFDFILE 6 /* first 6 descriptors are free */ | 90 | #define NDFDFILE 6 /* first 6 descriptors are free */ | |
91 | 91 | |||
92 | /* | 92 | /* | |
93 | * Process-private descriptor reference, one for each descriptor slot | 93 | * Process-private descriptor reference, one for each descriptor slot | |
94 | * in use. Locks: | 94 | * in use. Locks: | |
95 | * | 95 | * | |
96 | * : unlocked | 96 | * : unlocked | |
97 | * a atomic operations + filedesc_t::fd_lock in some cases | 97 | * a atomic operations + filedesc_t::fd_lock in some cases | |
98 | * d filedesc_t::fd_lock | 98 | * d filedesc_t::fd_lock | |
99 | * | 99 | * | |
100 | * Note that ff_exclose and ff_allocated are likely to be byte sized | 100 | * Note that ff_exclose and ff_allocated are likely to be byte sized | |
101 | * (bool). In general adjacent sub-word sized fields must be locked | 101 | * (bool). In general adjacent sub-word sized fields must be locked | |
102 | * the same way, but in this case it's ok: ff_exclose can only be | 102 | * the same way, but in this case it's ok: ff_exclose can only be | |
103 | * modified while the descriptor slot is live, and ff_allocated when | 103 | * modified while the descriptor slot is live, and ff_allocated when | |
104 | * it's invalid. | 104 | * it's invalid. | |
105 | */ | 105 | */ | |
106 | typedef struct fdfile { | 106 | typedef struct fdfile { | |
107 | bool ff_exclose; /* :: close on exec flag */ | 107 | bool ff_exclose; /* :: close on exec flag */ | |
108 | bool ff_allocated; /* d: descriptor slot is allocated */ | 108 | bool ff_allocated; /* d: descriptor slot is allocated */ | |
109 | u_int ff_refcnt; /* a: reference count on structure */ | 109 | u_int ff_refcnt; /* a: reference count on structure */ | |
110 | struct file *ff_file; /* d: pointer to file if open */ | 110 | struct file *ff_file; /* d: pointer to file if open */ | |
111 | SLIST_HEAD(,knote) ff_knlist; /* d: knotes attached to this fd */ | 111 | SLIST_HEAD(,knote) ff_knlist; /* d: knotes attached to this fd */ | |
112 | kcondvar_t ff_closing; /* d: notifier for close */ | 112 | kcondvar_t ff_closing; /* d: notifier for close */ | |
113 | } fdfile_t; | 113 | } fdfile_t; | |
114 | 114 | |||
115 | /* Reference count */ | 115 | /* Reference count */ | |
116 | #define FR_CLOSING (0x80000000) /* closing: must interlock */ | 116 | #define FR_CLOSING (0x80000000) /* closing: must interlock */ | |
117 | #define FR_MASK (~FR_CLOSING) /* reference count */ | 117 | #define FR_MASK (~FR_CLOSING) /* reference count */ | |
118 | 118 | |||
119 | /* | 119 | /* | |
120 | * Open file table, potentially many 'active' tables per filedesc_t | 120 | * Open file table, potentially many 'active' tables per filedesc_t | |
121 | * in a multi-threaded process, or with a shared filedesc_t (clone()). | 121 | * in a multi-threaded process, or with a shared filedesc_t (clone()). | |
122 | * nfiles is first to avoid pointer arithmetic. | 122 | * nfiles is first to avoid pointer arithmetic. | |
123 | */ | 123 | */ | |
124 | typedef struct fdtab { | 124 | typedef struct fdtab { | |
125 | u_int dt_nfiles; /* number of open files allocated */ | 125 | u_int dt_nfiles; /* number of open files allocated */ | |
126 | struct fdtab *dt_link; /* for lists of dtab */ | 126 | struct fdtab *dt_link; /* for lists of dtab */ | |
127 | fdfile_t *dt_ff[NDFILE]; /* file structures for open fds */ | 127 | fdfile_t *dt_ff[NDFILE]; /* file structures for open fds */ | |
128 | } fdtab_t; | 128 | } fdtab_t; | |
129 | 129 | |||
130 | typedef struct filedesc { | 130 | typedef struct filedesc { | |
131 | /* | 131 | /* | |
132 | * Built-in fdfile_t records first, since they have strict | 132 | * Built-in fdfile_t records first, since they have strict | |
133 | * alignment requirements. | 133 | * alignment requirements. | |
134 | */ | 134 | */ | |
135 | uint8_t fd_dfdfile[NDFDFILE][CACHE_LINE_SIZE]; | 135 | uint8_t fd_dfdfile[NDFDFILE][CACHE_LINE_SIZE]; | |
136 | /* | 136 | /* | |
137 | * All of the remaining fields are locked by fd_lock. | 137 | * All of the remaining fields are locked by fd_lock. | |
138 | */ | 138 | */ | |
139 | kmutex_t fd_lock; /* lock on structure */ | 139 | kmutex_t fd_lock; /* lock on structure */ | |
140 | fdtab_t * volatile fd_dt; /* active descriptor table */ | 140 | fdtab_t * volatile fd_dt; /* active descriptor table */ | |
141 | uint32_t *fd_himap; /* each bit points to 32 fds */ | 141 | uint32_t *fd_himap; /* each bit points to 32 fds */ | |
142 | uint32_t *fd_lomap; /* bitmap of free fds */ | 142 | uint32_t *fd_lomap; /* bitmap of free fds */ | |
143 | struct klist *fd_knhash; /* hash of attached non-fd knotes */ | 143 | struct klist *fd_knhash; /* hash of attached non-fd knotes */ | |
144 | int fd_lastkqfile; /* max descriptor for kqueue */ | 144 | int fd_lastkqfile; /* max descriptor for kqueue */ | |
145 | int fd_lastfile; /* high-water mark of fd_ofiles */ | 145 | int fd_lastfile; /* high-water mark of fd_ofiles */ | |
146 | int fd_refcnt; /* reference count */ | 146 | int fd_refcnt; /* reference count */ | |
147 | u_long fd_knhashmask; /* size of fd_knhash */ | 147 | u_long fd_knhashmask; /* size of fd_knhash */ | |
148 | int fd_freefile; /* approx. next free file */ | 148 | int fd_freefile; /* approx. next free file */ | |
149 | int fd_unused; /* unused */ | 149 | int fd_unused; /* unused */ | |
150 | bool fd_exclose; /* non-zero if >0 fd with EXCLOSE */ | 150 | bool fd_exclose; /* non-zero if >0 fd with EXCLOSE */ | |
151 | /* | 151 | /* | |
152 | * This structure is used when the number of open files is | 152 | * This structure is used when the number of open files is | |
153 | * <= NDFILE, and are then pointed to by the pointers above. | 153 | * <= NDFILE, and are then pointed to by the pointers above. | |
154 | */ | 154 | */ | |
155 | fdtab_t fd_dtbuiltin; | 155 | fdtab_t fd_dtbuiltin; | |
156 | /* | 156 | /* | |
157 | * These arrays are used when the number of open files is | 157 | * These arrays are used when the number of open files is | |
158 | * <= 1024, and are then pointed to by the pointers above. | 158 | * <= 1024, and are then pointed to by the pointers above. | |
159 | */ | 159 | */ | |
160 | #define fd_startzero fd_dhimap /* area to zero on return to cache */ | 160 | #define fd_startzero fd_dhimap /* area to zero on return to cache */ | |
161 | uint32_t fd_dhimap[NDENTRIES >> NDENTRYSHIFT]; | 161 | uint32_t fd_dhimap[NDENTRIES >> NDENTRYSHIFT]; | |
162 | uint32_t fd_dlomap[NDENTRIES]; | 162 | uint32_t fd_dlomap[NDENTRIES]; | |
163 | } filedesc_t; | 163 | } filedesc_t; | |
164 | 164 | |||
165 | typedef struct cwdinfo { | 165 | typedef struct cwdinfo { | |
166 | struct vnode *cwdi_cdir; /* current directory */ | 166 | struct vnode *cwdi_cdir; /* current directory */ | |
167 | struct vnode *cwdi_rdir; /* root directory */ | 167 | struct vnode *cwdi_rdir; /* root directory */ | |
168 | struct vnode *cwdi_edir; /* emulation root (if known) */ | 168 | struct vnode *cwdi_edir; /* emulation root (if known) */ | |
169 | krwlock_t cwdi_lock; /* lock on entire struct */ | 169 | krwlock_t cwdi_lock; /* lock on entire struct */ | |
170 | u_short cwdi_cmask; /* mask for file creation */ | 170 | u_short cwdi_cmask; /* mask for file creation */ | |
171 | u_int cwdi_refcnt; /* reference count */ | 171 | u_int cwdi_refcnt; /* reference count */ | |
172 | } cwdinfo_t; | 172 | } cwdinfo_t; | |
173 | 173 | |||
174 | #ifdef _KERNEL | 174 | #ifdef _KERNEL | |
175 | 175 | |||
176 | struct fileops; | 176 | struct fileops; | |
177 | struct socket; | 177 | struct socket; | |
178 | struct proc; | 178 | struct proc; | |
179 | 179 | |||
180 | /* | 180 | /* | |
181 | * Kernel global variables and routines. | 181 | * Kernel global variables and routines. | |
182 | */ | 182 | */ | |
183 | void fd_sys_init(void); | 183 | void fd_sys_init(void); | |
184 | int fd_dupopen(int, int *, int, int); | 184 | int fd_dupopen(int, int *, int, int); | |
185 | int fd_alloc(struct proc *, int, int *); | 185 | int fd_alloc(struct proc *, int, int *); | |
186 | void fd_tryexpand(struct proc *); | 186 | void fd_tryexpand(struct proc *); | |
187 | int fd_allocfile(file_t **, int *); | 187 | int fd_allocfile(file_t **, int *); | |
188 | void fd_affix(struct proc *, file_t *, unsigned); | 188 | void fd_affix(struct proc *, file_t *, unsigned); | |
189 | void fd_abort(struct proc *, file_t *, unsigned); | 189 | void fd_abort(struct proc *, file_t *, unsigned); | |
190 | filedesc_t *fd_copy(void); | 190 | filedesc_t *fd_copy(void); | |
191 | filedesc_t *fd_init(filedesc_t *); | 191 | filedesc_t *fd_init(filedesc_t *); | |
192 | void fd_share(proc_t *); | 192 | void fd_share(proc_t *); | |
193 | void fd_hold(void); | 193 | void fd_hold(lwp_t *); | |
194 | void fd_free(void); | 194 | void fd_free(void); | |
195 | void fd_closeexec(void); | 195 | void fd_closeexec(void); | |
196 | int fd_checkstd(void); | 196 | int fd_checkstd(void); | |
197 | file_t *fd_getfile(unsigned); | 197 | file_t *fd_getfile(unsigned); | |
198 | file_t *fd_getfile2(proc_t *, unsigned); | 198 | file_t *fd_getfile2(proc_t *, unsigned); | |
199 | void fd_putfile(unsigned); | 199 | void fd_putfile(unsigned); | |
200 | int fd_getvnode(unsigned, file_t **); | 200 | int fd_getvnode(unsigned, file_t **); | |
201 | int fd_getsock(unsigned, struct socket **); | 201 | int fd_getsock(unsigned, struct socket **); | |
202 | void fd_putvnode(unsigned); | 202 | void fd_putvnode(unsigned); | |
203 | void fd_putsock(unsigned); | 203 | void fd_putsock(unsigned); | |
204 | int fd_close(unsigned); | 204 | int fd_close(unsigned); | |
205 | int fd_dup(file_t *, int, int *, bool); | 205 | int fd_dup(file_t *, int, int *, bool); | |
206 | int fd_dup2(file_t *, unsigned); | 206 | int fd_dup2(file_t *, unsigned); | |
207 | int fd_clone(file_t *, unsigned, int, const struct fileops *, void *); | 207 | int fd_clone(file_t *, unsigned, int, const struct fileops *, void *); | |
208 | 208 | |||
209 | void cwd_sys_init(void); | 209 | void cwd_sys_init(void); | |
210 | struct cwdinfo *cwdinit(void); | 210 | struct cwdinfo *cwdinit(void); | |
211 | void cwdshare(proc_t *); | 211 | void cwdshare(proc_t *); | |
212 | void cwdunshare(proc_t *); | 212 | void cwdunshare(proc_t *); | |
213 | void cwdfree(struct cwdinfo *); | 213 | void cwdfree(struct cwdinfo *); | |
214 | 214 | |||
215 | #define GETCWD_CHECK_ACCESS 0x0001 | 215 | #define GETCWD_CHECK_ACCESS 0x0001 | |
216 | int getcwd_common(struct vnode *, struct vnode *, char **, char *, int, | 216 | int getcwd_common(struct vnode *, struct vnode *, char **, char *, int, | |
217 | int, struct lwp *); | 217 | int, struct lwp *); | |
218 | int vnode_to_path(char *, size_t, struct vnode *, struct lwp *, | 218 | int vnode_to_path(char *, size_t, struct vnode *, struct lwp *, | |
219 | struct proc *); | 219 | struct proc *); | |
220 | 220 | |||
221 | int closef(file_t *); | 221 | int closef(file_t *); | |
222 | file_t *fgetdummy(void); | 222 | file_t *fgetdummy(void); | |
223 | void fputdummy(file_t *); | 223 | void fputdummy(file_t *); | |
224 | 224 | |||
225 | struct stat; | 225 | struct stat; | |
226 | int do_sys_fstat(int, struct stat *); | 226 | int do_sys_fstat(int, struct stat *); | |
227 | struct flock; | 227 | struct flock; | |
228 | int do_fcntl_lock(int, int, struct flock *); | 228 | int do_fcntl_lock(int, int, struct flock *); | |
229 | int do_posix_fadvise(int, off_t, off_t, int); | 229 | int do_posix_fadvise(int, off_t, off_t, int); | |
230 | 230 | |||
231 | extern kmutex_t filelist_lock; | 231 | extern kmutex_t filelist_lock; | |
232 | extern filedesc_t filedesc0; | 232 | extern filedesc_t filedesc0; | |
233 | 233 | |||
234 | #endif /* _KERNEL */ | 234 | #endif /* _KERNEL */ | |
235 | 235 | |||
236 | #endif /* !_SYS_FILEDESC_H_ */ | 236 | #endif /* !_SYS_FILEDESC_H_ */ |