| @@ -1,1072 +1,1072 @@ | | | @@ -1,1072 +1,1072 @@ |
1 | /* $NetBSD: kern_descrip.c,v 1.247 2020/08/26 22:56:55 christos Exp $ */ | | 1 | /* $NetBSD: kern_descrip.c,v 1.248 2020/08/28 06:47:18 riastradh Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. | | 4 | * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. |
5 | * All rights reserved. | | 5 | * All rights reserved. |
6 | * | | 6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation | | 7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Andrew Doran. | | 8 | * by Andrew Doran. |
9 | * | | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
12 | * are met: | | 12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright | | 15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the | | 16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. | | 17 | * documentation and/or other materials provided with the distribution. |
18 | * | | 18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | | 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | | 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | * POSSIBILITY OF SUCH DAMAGE. | | 29 | * POSSIBILITY OF SUCH DAMAGE. |
30 | */ | | 30 | */ |
31 | | | 31 | |
32 | /* | | 32 | /* |
33 | * Copyright (c) 1982, 1986, 1989, 1991, 1993 | | 33 | * Copyright (c) 1982, 1986, 1989, 1991, 1993 |
34 | * The Regents of the University of California. All rights reserved. | | 34 | * The Regents of the University of California. All rights reserved. |
35 | * (c) UNIX System Laboratories, Inc. | | 35 | * (c) UNIX System Laboratories, Inc. |
36 | * All or some portions of this file are derived from material licensed | | 36 | * All or some portions of this file are derived from material licensed |
37 | * to the University of California by American Telephone and Telegraph | | 37 | * to the University of California by American Telephone and Telegraph |
38 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | | 38 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with |
39 | * the permission of UNIX System Laboratories, Inc. | | 39 | * the permission of UNIX System Laboratories, Inc. |
40 | * | | 40 | * |
41 | * Redistribution and use in source and binary forms, with or without | | 41 | * Redistribution and use in source and binary forms, with or without |
42 | * modification, are permitted provided that the following conditions | | 42 | * modification, are permitted provided that the following conditions |
43 | * are met: | | 43 | * are met: |
44 | * 1. Redistributions of source code must retain the above copyright | | 44 | * 1. Redistributions of source code must retain the above copyright |
45 | * notice, this list of conditions and the following disclaimer. | | 45 | * notice, this list of conditions and the following disclaimer. |
46 | * 2. Redistributions in binary form must reproduce the above copyright | | 46 | * 2. Redistributions in binary form must reproduce the above copyright |
47 | * notice, this list of conditions and the following disclaimer in the | | 47 | * notice, this list of conditions and the following disclaimer in the |
48 | * documentation and/or other materials provided with the distribution. | | 48 | * documentation and/or other materials provided with the distribution. |
49 | * 3. Neither the name of the University nor the names of its contributors | | 49 | * 3. Neither the name of the University nor the names of its contributors |
50 | * may be used to endorse or promote products derived from this software | | 50 | * may be used to endorse or promote products derived from this software |
51 | * without specific prior written permission. | | 51 | * without specific prior written permission. |
52 | * | | 52 | * |
53 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | | 53 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
54 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | | 54 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
55 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | | 55 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
56 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | | 56 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
57 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 57 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
58 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | | 58 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
59 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 59 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
60 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 60 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
61 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 61 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
62 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 62 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
63 | * SUCH DAMAGE. | | 63 | * SUCH DAMAGE. |
64 | * | | 64 | * |
65 | * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95 | | 65 | * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95 |
66 | */ | | 66 | */ |
67 | | | 67 | |
68 | /* | | 68 | /* |
69 | * File descriptor management. | | 69 | * File descriptor management. |
70 | */ | | 70 | */ |
71 | | | 71 | |
72 | #include <sys/cdefs.h> | | 72 | #include <sys/cdefs.h> |
73 | __KERNEL_RCSID(0, "$NetBSD: kern_descrip.c,v 1.247 2020/08/26 22:56:55 christos Exp $"); | | 73 | __KERNEL_RCSID(0, "$NetBSD: kern_descrip.c,v 1.248 2020/08/28 06:47:18 riastradh Exp $"); |
74 | | | 74 | |
75 | #include <sys/param.h> | | 75 | #include <sys/param.h> |
76 | #include <sys/systm.h> | | 76 | #include <sys/systm.h> |
77 | #include <sys/filedesc.h> | | 77 | #include <sys/filedesc.h> |
78 | #include <sys/kernel.h> | | 78 | #include <sys/kernel.h> |
79 | #include <sys/proc.h> | | 79 | #include <sys/proc.h> |
80 | #include <sys/file.h> | | 80 | #include <sys/file.h> |
81 | #include <sys/socket.h> | | 81 | #include <sys/socket.h> |
82 | #include <sys/socketvar.h> | | 82 | #include <sys/socketvar.h> |
83 | #include <sys/stat.h> | | 83 | #include <sys/stat.h> |
84 | #include <sys/ioctl.h> | | 84 | #include <sys/ioctl.h> |
85 | #include <sys/fcntl.h> | | 85 | #include <sys/fcntl.h> |
86 | #include <sys/pool.h> | | 86 | #include <sys/pool.h> |
87 | #include <sys/unistd.h> | | 87 | #include <sys/unistd.h> |
88 | #include <sys/resourcevar.h> | | 88 | #include <sys/resourcevar.h> |
89 | #include <sys/conf.h> | | 89 | #include <sys/conf.h> |
90 | #include <sys/event.h> | | 90 | #include <sys/event.h> |
91 | #include <sys/kauth.h> | | 91 | #include <sys/kauth.h> |
92 | #include <sys/atomic.h> | | 92 | #include <sys/atomic.h> |
93 | #include <sys/syscallargs.h> | | 93 | #include <sys/syscallargs.h> |
94 | #include <sys/cpu.h> | | 94 | #include <sys/cpu.h> |
95 | #include <sys/kmem.h> | | 95 | #include <sys/kmem.h> |
96 | #include <sys/vnode.h> | | 96 | #include <sys/vnode.h> |
97 | #include <sys/sysctl.h> | | 97 | #include <sys/sysctl.h> |
98 | #include <sys/ktrace.h> | | 98 | #include <sys/ktrace.h> |
99 | | | 99 | |
100 | /* | | 100 | /* |
101 | * A list (head) of open files, counter, and lock protecting them. | | 101 | * A list (head) of open files, counter, and lock protecting them. |
102 | */ | | 102 | */ |
103 | struct filelist filehead __cacheline_aligned; | | 103 | struct filelist filehead __cacheline_aligned; |
104 | static u_int nfiles __cacheline_aligned; | | 104 | static u_int nfiles __cacheline_aligned; |
105 | kmutex_t filelist_lock __cacheline_aligned; | | 105 | kmutex_t filelist_lock __cacheline_aligned; |
106 | | | 106 | |
107 | static pool_cache_t filedesc_cache __read_mostly; | | 107 | static pool_cache_t filedesc_cache __read_mostly; |
108 | static pool_cache_t file_cache __read_mostly; | | 108 | static pool_cache_t file_cache __read_mostly; |
109 | static pool_cache_t fdfile_cache __read_mostly; | | 109 | static pool_cache_t fdfile_cache __read_mostly; |
110 | | | 110 | |
111 | static int file_ctor(void *, void *, int); | | 111 | static int file_ctor(void *, void *, int); |
112 | static void file_dtor(void *, void *); | | 112 | static void file_dtor(void *, void *); |
113 | static int fdfile_ctor(void *, void *, int); | | 113 | static int fdfile_ctor(void *, void *, int); |
114 | static void fdfile_dtor(void *, void *); | | 114 | static void fdfile_dtor(void *, void *); |
115 | static int filedesc_ctor(void *, void *, int); | | 115 | static int filedesc_ctor(void *, void *, int); |
116 | static void filedesc_dtor(void *, void *); | | 116 | static void filedesc_dtor(void *, void *); |
117 | static int filedescopen(dev_t, int, int, lwp_t *); | | 117 | static int filedescopen(dev_t, int, int, lwp_t *); |
118 | | | 118 | |
119 | static int sysctl_kern_file(SYSCTLFN_PROTO); | | 119 | static int sysctl_kern_file(SYSCTLFN_PROTO); |
120 | static int sysctl_kern_file2(SYSCTLFN_PROTO); | | 120 | static int sysctl_kern_file2(SYSCTLFN_PROTO); |
121 | static void fill_file(struct file *, const struct file *); | | 121 | static void fill_file(struct file *, const struct file *); |
122 | static void fill_file2(struct kinfo_file *, const file_t *, const fdfile_t *, | | 122 | static void fill_file2(struct kinfo_file *, const file_t *, const fdfile_t *, |
123 | int, pid_t); | | 123 | int, pid_t); |
124 | | | 124 | |
125 | const struct cdevsw filedesc_cdevsw = { | | 125 | const struct cdevsw filedesc_cdevsw = { |
126 | .d_open = filedescopen, | | 126 | .d_open = filedescopen, |
127 | .d_close = noclose, | | 127 | .d_close = noclose, |
128 | .d_read = noread, | | 128 | .d_read = noread, |
129 | .d_write = nowrite, | | 129 | .d_write = nowrite, |
130 | .d_ioctl = noioctl, | | 130 | .d_ioctl = noioctl, |
131 | .d_stop = nostop, | | 131 | .d_stop = nostop, |
132 | .d_tty = notty, | | 132 | .d_tty = notty, |
133 | .d_poll = nopoll, | | 133 | .d_poll = nopoll, |
134 | .d_mmap = nommap, | | 134 | .d_mmap = nommap, |
135 | .d_kqfilter = nokqfilter, | | 135 | .d_kqfilter = nokqfilter, |
136 | .d_discard = nodiscard, | | 136 | .d_discard = nodiscard, |
137 | .d_flag = D_OTHER | D_MPSAFE | | 137 | .d_flag = D_OTHER | D_MPSAFE |
138 | }; | | 138 | }; |
139 | | | 139 | |
140 | /* For ease of reading. */ | | 140 | /* For ease of reading. */ |
141 | __strong_alias(fd_putvnode,fd_putfile) | | 141 | __strong_alias(fd_putvnode,fd_putfile) |
142 | __strong_alias(fd_putsock,fd_putfile) | | 142 | __strong_alias(fd_putsock,fd_putfile) |
143 | | | 143 | |
144 | /* | | 144 | /* |
145 | * Initialize the descriptor system. | | 145 | * Initialize the descriptor system. |
146 | */ | | 146 | */ |
147 | void | | 147 | void |
148 | fd_sys_init(void) | | 148 | fd_sys_init(void) |
149 | { | | 149 | { |
150 | static struct sysctllog *clog; | | 150 | static struct sysctllog *clog; |
151 | | | 151 | |
152 | mutex_init(&filelist_lock, MUTEX_DEFAULT, IPL_NONE); | | 152 | mutex_init(&filelist_lock, MUTEX_DEFAULT, IPL_NONE); |
153 | | | 153 | |
154 | LIST_INIT(&filehead); | | 154 | LIST_INIT(&filehead); |
155 | | | 155 | |
156 | file_cache = pool_cache_init(sizeof(file_t), coherency_unit, 0, | | 156 | file_cache = pool_cache_init(sizeof(file_t), coherency_unit, 0, |
157 | 0, "file", NULL, IPL_NONE, file_ctor, file_dtor, NULL); | | 157 | 0, "file", NULL, IPL_NONE, file_ctor, file_dtor, NULL); |
158 | KASSERT(file_cache != NULL); | | 158 | KASSERT(file_cache != NULL); |
159 | | | 159 | |
160 | fdfile_cache = pool_cache_init(sizeof(fdfile_t), coherency_unit, 0, | | 160 | fdfile_cache = pool_cache_init(sizeof(fdfile_t), coherency_unit, 0, |
161 | PR_LARGECACHE, "fdfile", NULL, IPL_NONE, fdfile_ctor, fdfile_dtor, | | 161 | PR_LARGECACHE, "fdfile", NULL, IPL_NONE, fdfile_ctor, fdfile_dtor, |
162 | NULL); | | 162 | NULL); |
163 | KASSERT(fdfile_cache != NULL); | | 163 | KASSERT(fdfile_cache != NULL); |
164 | | | 164 | |
165 | filedesc_cache = pool_cache_init(sizeof(filedesc_t), coherency_unit, | | 165 | filedesc_cache = pool_cache_init(sizeof(filedesc_t), coherency_unit, |
166 | 0, 0, "filedesc", NULL, IPL_NONE, filedesc_ctor, filedesc_dtor, | | 166 | 0, 0, "filedesc", NULL, IPL_NONE, filedesc_ctor, filedesc_dtor, |
167 | NULL); | | 167 | NULL); |
168 | KASSERT(filedesc_cache != NULL); | | 168 | KASSERT(filedesc_cache != NULL); |
169 | | | 169 | |
170 | sysctl_createv(&clog, 0, NULL, NULL, | | 170 | sysctl_createv(&clog, 0, NULL, NULL, |
171 | CTLFLAG_PERMANENT, | | 171 | CTLFLAG_PERMANENT, |
172 | CTLTYPE_STRUCT, "file", | | 172 | CTLTYPE_STRUCT, "file", |
173 | SYSCTL_DESCR("System open file table"), | | 173 | SYSCTL_DESCR("System open file table"), |
174 | sysctl_kern_file, 0, NULL, 0, | | 174 | sysctl_kern_file, 0, NULL, 0, |
175 | CTL_KERN, KERN_FILE, CTL_EOL); | | 175 | CTL_KERN, KERN_FILE, CTL_EOL); |
176 | sysctl_createv(&clog, 0, NULL, NULL, | | 176 | sysctl_createv(&clog, 0, NULL, NULL, |
177 | CTLFLAG_PERMANENT, | | 177 | CTLFLAG_PERMANENT, |
178 | CTLTYPE_STRUCT, "file2", | | 178 | CTLTYPE_STRUCT, "file2", |
179 | SYSCTL_DESCR("System open file table"), | | 179 | SYSCTL_DESCR("System open file table"), |
180 | sysctl_kern_file2, 0, NULL, 0, | | 180 | sysctl_kern_file2, 0, NULL, 0, |
181 | CTL_KERN, KERN_FILE2, CTL_EOL); | | 181 | CTL_KERN, KERN_FILE2, CTL_EOL); |
182 | } | | 182 | } |
183 | | | 183 | |
184 | static bool | | 184 | static bool |
185 | fd_isused(filedesc_t *fdp, unsigned fd) | | 185 | fd_isused(filedesc_t *fdp, unsigned fd) |
186 | { | | 186 | { |
187 | u_int off = fd >> NDENTRYSHIFT; | | 187 | u_int off = fd >> NDENTRYSHIFT; |
188 | | | 188 | |
189 | KASSERT(fd < atomic_load_consume(&fdp->fd_dt)->dt_nfiles); | | 189 | KASSERT(fd < atomic_load_consume(&fdp->fd_dt)->dt_nfiles); |
190 | | | 190 | |
191 | return (fdp->fd_lomap[off] & (1U << (fd & NDENTRYMASK))) != 0; | | 191 | return (fdp->fd_lomap[off] & (1U << (fd & NDENTRYMASK))) != 0; |
192 | } | | 192 | } |
193 | | | 193 | |
194 | /* | | 194 | /* |
195 | * Verify that the bitmaps match the descriptor table. | | 195 | * Verify that the bitmaps match the descriptor table. |
196 | */ | | 196 | */ |
197 | static inline void | | 197 | static inline void |
198 | fd_checkmaps(filedesc_t *fdp) | | 198 | fd_checkmaps(filedesc_t *fdp) |
199 | { | | 199 | { |
200 | #ifdef DEBUG | | 200 | #ifdef DEBUG |
201 | fdtab_t *dt; | | 201 | fdtab_t *dt; |
202 | u_int fd; | | 202 | u_int fd; |
203 | | | 203 | |
204 | KASSERT(fdp->fd_refcnt <= 1 || mutex_owned(&fdp->fd_lock)); | | 204 | KASSERT(fdp->fd_refcnt <= 1 || mutex_owned(&fdp->fd_lock)); |
205 | | | 205 | |
206 | dt = fdp->fd_dt; | | 206 | dt = fdp->fd_dt; |
207 | if (fdp->fd_refcnt == -1) { | | 207 | if (fdp->fd_refcnt == -1) { |
208 | /* | | 208 | /* |
209 | * fd_free tears down the table without maintaining its bitmap. | | 209 | * fd_free tears down the table without maintaining its bitmap. |
210 | */ | | 210 | */ |
211 | return; | | 211 | return; |
212 | } | | 212 | } |
213 | for (fd = 0; fd < dt->dt_nfiles; fd++) { | | 213 | for (fd = 0; fd < dt->dt_nfiles; fd++) { |
214 | if (fd < NDFDFILE) { | | 214 | if (fd < NDFDFILE) { |
215 | KASSERT(dt->dt_ff[fd] == | | 215 | KASSERT(dt->dt_ff[fd] == |
216 | (fdfile_t *)fdp->fd_dfdfile[fd]); | | 216 | (fdfile_t *)fdp->fd_dfdfile[fd]); |
217 | } | | 217 | } |
218 | if (dt->dt_ff[fd] == NULL) { | | 218 | if (dt->dt_ff[fd] == NULL) { |
219 | KASSERT(!fd_isused(fdp, fd)); | | 219 | KASSERT(!fd_isused(fdp, fd)); |
220 | } else if (dt->dt_ff[fd]->ff_file != NULL) { | | 220 | } else if (dt->dt_ff[fd]->ff_file != NULL) { |
221 | KASSERT(fd_isused(fdp, fd)); | | 221 | KASSERT(fd_isused(fdp, fd)); |
222 | } | | 222 | } |
223 | } | | 223 | } |
224 | #endif | | 224 | #endif |
225 | } | | 225 | } |
226 | | | 226 | |
227 | static int | | 227 | static int |
228 | fd_next_zero(filedesc_t *fdp, uint32_t *bitmap, int want, u_int bits) | | 228 | fd_next_zero(filedesc_t *fdp, uint32_t *bitmap, int want, u_int bits) |
229 | { | | 229 | { |
230 | int i, off, maxoff; | | 230 | int i, off, maxoff; |
231 | uint32_t sub; | | 231 | uint32_t sub; |
232 | | | 232 | |
233 | KASSERT(mutex_owned(&fdp->fd_lock)); | | 233 | KASSERT(mutex_owned(&fdp->fd_lock)); |
234 | | | 234 | |
235 | fd_checkmaps(fdp); | | 235 | fd_checkmaps(fdp); |
236 | | | 236 | |
237 | if (want > bits) | | 237 | if (want > bits) |
238 | return -1; | | 238 | return -1; |
239 | | | 239 | |
240 | off = want >> NDENTRYSHIFT; | | 240 | off = want >> NDENTRYSHIFT; |
241 | i = want & NDENTRYMASK; | | 241 | i = want & NDENTRYMASK; |
242 | if (i) { | | 242 | if (i) { |
243 | sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i)); | | 243 | sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i)); |
244 | if (sub != ~0) | | 244 | if (sub != ~0) |
245 | goto found; | | 245 | goto found; |
246 | off++; | | 246 | off++; |
247 | } | | 247 | } |
248 | | | 248 | |
249 | maxoff = NDLOSLOTS(bits); | | 249 | maxoff = NDLOSLOTS(bits); |
250 | while (off < maxoff) { | | 250 | while (off < maxoff) { |
251 | if ((sub = bitmap[off]) != ~0) | | 251 | if ((sub = bitmap[off]) != ~0) |
252 | goto found; | | 252 | goto found; |
253 | off++; | | 253 | off++; |
254 | } | | 254 | } |
255 | | | 255 | |
256 | return -1; | | 256 | return -1; |
257 | | | 257 | |
258 | found: | | 258 | found: |
259 | return (off << NDENTRYSHIFT) + ffs(~sub) - 1; | | 259 | return (off << NDENTRYSHIFT) + ffs(~sub) - 1; |
260 | } | | 260 | } |
261 | | | 261 | |
262 | static int | | 262 | static int |
263 | fd_last_set(filedesc_t *fd, int last) | | 263 | fd_last_set(filedesc_t *fd, int last) |
264 | { | | 264 | { |
265 | int off, i; | | 265 | int off, i; |
266 | fdfile_t **ff = fd->fd_dt->dt_ff; | | 266 | fdfile_t **ff = fd->fd_dt->dt_ff; |
267 | uint32_t *bitmap = fd->fd_lomap; | | 267 | uint32_t *bitmap = fd->fd_lomap; |
268 | | | 268 | |
269 | KASSERT(mutex_owned(&fd->fd_lock)); | | 269 | KASSERT(mutex_owned(&fd->fd_lock)); |
270 | | | 270 | |
271 | fd_checkmaps(fd); | | 271 | fd_checkmaps(fd); |
272 | | | 272 | |
273 | off = (last - 1) >> NDENTRYSHIFT; | | 273 | off = (last - 1) >> NDENTRYSHIFT; |
274 | | | 274 | |
275 | while (off >= 0 && !bitmap[off]) | | 275 | while (off >= 0 && !bitmap[off]) |
276 | off--; | | 276 | off--; |
277 | | | 277 | |
278 | if (off < 0) | | 278 | if (off < 0) |
279 | return -1; | | 279 | return -1; |
280 | | | 280 | |
281 | i = ((off + 1) << NDENTRYSHIFT) - 1; | | 281 | i = ((off + 1) << NDENTRYSHIFT) - 1; |
282 | if (i >= last) | | 282 | if (i >= last) |
283 | i = last - 1; | | 283 | i = last - 1; |
284 | | | 284 | |
285 | /* XXX should use bitmap */ | | 285 | /* XXX should use bitmap */ |
286 | while (i > 0 && (ff[i] == NULL || !ff[i]->ff_allocated)) | | 286 | while (i > 0 && (ff[i] == NULL || !ff[i]->ff_allocated)) |
287 | i--; | | 287 | i--; |
288 | | | 288 | |
289 | return i; | | 289 | return i; |
290 | } | | 290 | } |
291 | | | 291 | |
292 | static inline void | | 292 | static inline void |
293 | fd_used(filedesc_t *fdp, unsigned fd) | | 293 | fd_used(filedesc_t *fdp, unsigned fd) |
294 | { | | 294 | { |
295 | u_int off = fd >> NDENTRYSHIFT; | | 295 | u_int off = fd >> NDENTRYSHIFT; |
296 | fdfile_t *ff; | | 296 | fdfile_t *ff; |
297 | | | 297 | |
298 | ff = fdp->fd_dt->dt_ff[fd]; | | 298 | ff = fdp->fd_dt->dt_ff[fd]; |
299 | | | 299 | |
300 | KASSERT(mutex_owned(&fdp->fd_lock)); | | 300 | KASSERT(mutex_owned(&fdp->fd_lock)); |
301 | KASSERT((fdp->fd_lomap[off] & (1U << (fd & NDENTRYMASK))) == 0); | | 301 | KASSERT((fdp->fd_lomap[off] & (1U << (fd & NDENTRYMASK))) == 0); |
302 | KASSERT(ff != NULL); | | 302 | KASSERT(ff != NULL); |
303 | KASSERT(ff->ff_file == NULL); | | 303 | KASSERT(ff->ff_file == NULL); |
304 | KASSERT(!ff->ff_allocated); | | 304 | KASSERT(!ff->ff_allocated); |
305 | | | 305 | |
306 | ff->ff_allocated = true; | | 306 | ff->ff_allocated = true; |
307 | fdp->fd_lomap[off] |= 1U << (fd & NDENTRYMASK); | | 307 | fdp->fd_lomap[off] |= 1U << (fd & NDENTRYMASK); |
308 | if (__predict_false(fdp->fd_lomap[off] == ~0)) { | | 308 | if (__predict_false(fdp->fd_lomap[off] == ~0)) { |
309 | KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & | | 309 | KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & |
310 | (1U << (off & NDENTRYMASK))) == 0); | | 310 | (1U << (off & NDENTRYMASK))) == 0); |
311 | fdp->fd_himap[off >> NDENTRYSHIFT] |= 1U << (off & NDENTRYMASK); | | 311 | fdp->fd_himap[off >> NDENTRYSHIFT] |= 1U << (off & NDENTRYMASK); |
312 | } | | 312 | } |
313 | | | 313 | |
314 | if ((int)fd > fdp->fd_lastfile) { | | 314 | if ((int)fd > fdp->fd_lastfile) { |
315 | fdp->fd_lastfile = fd; | | 315 | fdp->fd_lastfile = fd; |
316 | } | | 316 | } |
317 | | | 317 | |
318 | fd_checkmaps(fdp); | | 318 | fd_checkmaps(fdp); |
319 | } | | 319 | } |
320 | | | 320 | |
321 | static inline void | | 321 | static inline void |
322 | fd_unused(filedesc_t *fdp, unsigned fd) | | 322 | fd_unused(filedesc_t *fdp, unsigned fd) |
323 | { | | 323 | { |
324 | u_int off = fd >> NDENTRYSHIFT; | | 324 | u_int off = fd >> NDENTRYSHIFT; |
325 | fdfile_t *ff; | | 325 | fdfile_t *ff; |
326 | | | 326 | |
327 | ff = fdp->fd_dt->dt_ff[fd]; | | 327 | ff = fdp->fd_dt->dt_ff[fd]; |
328 | | | 328 | |
329 | KASSERT(mutex_owned(&fdp->fd_lock)); | | 329 | KASSERT(mutex_owned(&fdp->fd_lock)); |
330 | KASSERT(ff != NULL); | | 330 | KASSERT(ff != NULL); |
331 | KASSERT(ff->ff_file == NULL); | | 331 | KASSERT(ff->ff_file == NULL); |
332 | KASSERT(ff->ff_allocated); | | 332 | KASSERT(ff->ff_allocated); |
333 | | | 333 | |
334 | if (fd < fdp->fd_freefile) { | | 334 | if (fd < fdp->fd_freefile) { |
335 | fdp->fd_freefile = fd; | | 335 | fdp->fd_freefile = fd; |
336 | } | | 336 | } |
337 | | | 337 | |
338 | if (fdp->fd_lomap[off] == ~0) { | | 338 | if (fdp->fd_lomap[off] == ~0) { |
339 | KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & | | 339 | KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & |
340 | (1U << (off & NDENTRYMASK))) != 0); | | 340 | (1U << (off & NDENTRYMASK))) != 0); |
341 | fdp->fd_himap[off >> NDENTRYSHIFT] &= | | 341 | fdp->fd_himap[off >> NDENTRYSHIFT] &= |
342 | ~(1U << (off & NDENTRYMASK)); | | 342 | ~(1U << (off & NDENTRYMASK)); |
343 | } | | 343 | } |
344 | KASSERT((fdp->fd_lomap[off] & (1U << (fd & NDENTRYMASK))) != 0); | | 344 | KASSERT((fdp->fd_lomap[off] & (1U << (fd & NDENTRYMASK))) != 0); |
345 | fdp->fd_lomap[off] &= ~(1U << (fd & NDENTRYMASK)); | | 345 | fdp->fd_lomap[off] &= ~(1U << (fd & NDENTRYMASK)); |
346 | ff->ff_allocated = false; | | 346 | ff->ff_allocated = false; |
347 | | | 347 | |
348 | KASSERT(fd <= fdp->fd_lastfile); | | 348 | KASSERT(fd <= fdp->fd_lastfile); |
349 | if (fd == fdp->fd_lastfile) { | | 349 | if (fd == fdp->fd_lastfile) { |
350 | fdp->fd_lastfile = fd_last_set(fdp, fd); | | 350 | fdp->fd_lastfile = fd_last_set(fdp, fd); |
351 | } | | 351 | } |
352 | fd_checkmaps(fdp); | | 352 | fd_checkmaps(fdp); |
353 | } | | 353 | } |
354 | | | 354 | |
355 | /* | | 355 | /* |
356 | * Look up the file structure corresponding to a file descriptor | | 356 | * Look up the file structure corresponding to a file descriptor |
357 | * and return the file, holding a reference on the descriptor. | | 357 | * and return the file, holding a reference on the descriptor. |
358 | */ | | 358 | */ |
359 | file_t * | | 359 | file_t * |
360 | fd_getfile(unsigned fd) | | 360 | fd_getfile(unsigned fd) |
361 | { | | 361 | { |
362 | filedesc_t *fdp; | | 362 | filedesc_t *fdp; |
363 | fdfile_t *ff; | | 363 | fdfile_t *ff; |
364 | file_t *fp; | | 364 | file_t *fp; |
365 | fdtab_t *dt; | | 365 | fdtab_t *dt; |
366 | | | 366 | |
367 | /* | | 367 | /* |
368 | * Look up the fdfile structure representing this descriptor. | | 368 | * Look up the fdfile structure representing this descriptor. |
369 | * We are doing this unlocked. See fd_tryexpand(). | | 369 | * We are doing this unlocked. See fd_tryexpand(). |
370 | */ | | 370 | */ |
371 | fdp = curlwp->l_fd; | | 371 | fdp = curlwp->l_fd; |
372 | dt = atomic_load_consume(&fdp->fd_dt); | | 372 | dt = atomic_load_consume(&fdp->fd_dt); |
373 | if (__predict_false(fd >= dt->dt_nfiles)) { | | 373 | if (__predict_false(fd >= dt->dt_nfiles)) { |
374 | return NULL; | | 374 | return NULL; |
375 | } | | 375 | } |
376 | ff = dt->dt_ff[fd]; | | 376 | ff = dt->dt_ff[fd]; |
377 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | | 377 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); |
378 | if (__predict_false(ff == NULL)) { | | 378 | if (__predict_false(ff == NULL)) { |
379 | return NULL; | | 379 | return NULL; |
380 | } | | 380 | } |
381 | | | 381 | |
382 | /* Now get a reference to the descriptor. */ | | 382 | /* Now get a reference to the descriptor. */ |
383 | if (fdp->fd_refcnt == 1) { | | 383 | if (fdp->fd_refcnt == 1) { |
384 | /* | | 384 | /* |
385 | * Single threaded: don't need to worry about concurrent | | 385 | * Single threaded: don't need to worry about concurrent |
386 | * access (other than earlier calls to kqueue, which may | | 386 | * access (other than earlier calls to kqueue, which may |
387 | * hold a reference to the descriptor). | | 387 | * hold a reference to the descriptor). |
388 | */ | | 388 | */ |
389 | ff->ff_refcnt++; | | 389 | ff->ff_refcnt++; |
390 | } else { | | 390 | } else { |
391 | /* | | 391 | /* |
392 | * Multi threaded: issue a memory barrier to ensure that we | | 392 | * Multi threaded: issue a memory barrier to ensure that we |
393 | * acquire the file pointer _after_ adding a reference. If | | 393 | * acquire the file pointer _after_ adding a reference. If |
394 | * no memory barrier, we could fetch a stale pointer. | | 394 | * no memory barrier, we could fetch a stale pointer. |
395 | */ | | 395 | */ |
396 | atomic_inc_uint(&ff->ff_refcnt); | | 396 | atomic_inc_uint(&ff->ff_refcnt); |
397 | #ifndef __HAVE_ATOMIC_AS_MEMBAR | | 397 | #ifndef __HAVE_ATOMIC_AS_MEMBAR |
398 | membar_enter(); | | 398 | membar_enter(); |
399 | #endif | | 399 | #endif |
400 | } | | 400 | } |
401 | | | 401 | |
402 | /* | | 402 | /* |
403 | * If the file is not open or is being closed then put the | | 403 | * If the file is not open or is being closed then put the |
404 | * reference back. | | 404 | * reference back. |
405 | */ | | 405 | */ |
406 | fp = atomic_load_consume(&ff->ff_file); | | 406 | fp = atomic_load_consume(&ff->ff_file); |
407 | if (__predict_true(fp != NULL)) { | | 407 | if (__predict_true(fp != NULL)) { |
408 | return fp; | | 408 | return fp; |
409 | } | | 409 | } |
410 | fd_putfile(fd); | | 410 | fd_putfile(fd); |
411 | return NULL; | | 411 | return NULL; |
412 | } | | 412 | } |
413 | | | 413 | |
414 | /* | | 414 | /* |
415 | * Release a reference to a file descriptor acquired with fd_getfile(). | | 415 | * Release a reference to a file descriptor acquired with fd_getfile(). |
416 | */ | | 416 | */ |
417 | void | | 417 | void |
418 | fd_putfile(unsigned fd) | | 418 | fd_putfile(unsigned fd) |
419 | { | | 419 | { |
420 | filedesc_t *fdp; | | 420 | filedesc_t *fdp; |
421 | fdfile_t *ff; | | 421 | fdfile_t *ff; |
422 | u_int u, v; | | 422 | u_int u, v; |
423 | | | 423 | |
424 | fdp = curlwp->l_fd; | | 424 | fdp = curlwp->l_fd; |
425 | KASSERT(fd < atomic_load_consume(&fdp->fd_dt)->dt_nfiles); | | 425 | KASSERT(fd < atomic_load_consume(&fdp->fd_dt)->dt_nfiles); |
426 | ff = atomic_load_consume(&fdp->fd_dt)->dt_ff[fd]; | | 426 | ff = atomic_load_consume(&fdp->fd_dt)->dt_ff[fd]; |
427 | | | 427 | |
428 | KASSERT(ff != NULL); | | 428 | KASSERT(ff != NULL); |
429 | KASSERT((ff->ff_refcnt & FR_MASK) > 0); | | 429 | KASSERT((ff->ff_refcnt & FR_MASK) > 0); |
430 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | | 430 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); |
431 | | | 431 | |
432 | if (fdp->fd_refcnt == 1) { | | 432 | if (fdp->fd_refcnt == 1) { |
433 | /* | | 433 | /* |
434 | * Single threaded: don't need to worry about concurrent | | 434 | * Single threaded: don't need to worry about concurrent |
435 | * access (other than earlier calls to kqueue, which may | | 435 | * access (other than earlier calls to kqueue, which may |
436 | * hold a reference to the descriptor). | | 436 | * hold a reference to the descriptor). |
437 | */ | | 437 | */ |
438 | if (__predict_false((ff->ff_refcnt & FR_CLOSING) != 0)) { | | 438 | if (__predict_false((ff->ff_refcnt & FR_CLOSING) != 0)) { |
439 | fd_close(fd); | | 439 | fd_close(fd); |
440 | return; | | 440 | return; |
441 | } | | 441 | } |
442 | ff->ff_refcnt--; | | 442 | ff->ff_refcnt--; |
443 | return; | | 443 | return; |
444 | } | | 444 | } |
445 | | | 445 | |
446 | /* | | 446 | /* |
447 | * Ensure that any use of the file is complete and globally | | 447 | * Ensure that any use of the file is complete and globally |
448 | * visible before dropping the final reference. If no membar, | | 448 | * visible before dropping the final reference. If no membar, |
449 | * the current CPU could still access memory associated with | | 449 | * the current CPU could still access memory associated with |
450 | * the file after it has been freed or recycled by another | | 450 | * the file after it has been freed or recycled by another |
451 | * CPU. | | 451 | * CPU. |
452 | */ | | 452 | */ |
453 | #ifndef __HAVE_ATOMIC_AS_MEMBAR | | 453 | #ifndef __HAVE_ATOMIC_AS_MEMBAR |
454 | membar_exit(); | | 454 | membar_exit(); |
455 | #endif | | 455 | #endif |
456 | | | 456 | |
457 | /* | | 457 | /* |
458 | * Be optimistic and start out with the assumption that no other | | 458 | * Be optimistic and start out with the assumption that no other |
459 | * threads are trying to close the descriptor. If the CAS fails, | | 459 | * threads are trying to close the descriptor. If the CAS fails, |
460 | * we lost a race and/or it's being closed. | | 460 | * we lost a race and/or it's being closed. |
461 | */ | | 461 | */ |
462 | for (u = ff->ff_refcnt & FR_MASK;; u = v) { | | 462 | for (u = ff->ff_refcnt & FR_MASK;; u = v) { |
463 | v = atomic_cas_uint(&ff->ff_refcnt, u, u - 1); | | 463 | v = atomic_cas_uint(&ff->ff_refcnt, u, u - 1); |
464 | if (__predict_true(u == v)) { | | 464 | if (__predict_true(u == v)) { |
465 | return; | | 465 | return; |
466 | } | | 466 | } |
467 | if (__predict_false((v & FR_CLOSING) != 0)) { | | 467 | if (__predict_false((v & FR_CLOSING) != 0)) { |
468 | break; | | 468 | break; |
469 | } | | 469 | } |
470 | } | | 470 | } |
471 | | | 471 | |
472 | /* Another thread is waiting to close the file: join it. */ | | 472 | /* Another thread is waiting to close the file: join it. */ |
473 | (void)fd_close(fd); | | 473 | (void)fd_close(fd); |
474 | } | | 474 | } |
475 | | | 475 | |
476 | /* | | 476 | /* |
477 | * Convenience wrapper around fd_getfile() that returns reference | | 477 | * Convenience wrapper around fd_getfile() that returns reference |
478 | * to a vnode. | | 478 | * to a vnode. |
479 | */ | | 479 | */ |
480 | int | | 480 | int |
481 | fd_getvnode(unsigned fd, file_t **fpp) | | 481 | fd_getvnode(unsigned fd, file_t **fpp) |
482 | { | | 482 | { |
483 | vnode_t *vp; | | 483 | vnode_t *vp; |
484 | file_t *fp; | | 484 | file_t *fp; |
485 | | | 485 | |
486 | fp = fd_getfile(fd); | | 486 | fp = fd_getfile(fd); |
487 | if (__predict_false(fp == NULL)) { | | 487 | if (__predict_false(fp == NULL)) { |
488 | return EBADF; | | 488 | return EBADF; |
489 | } | | 489 | } |
490 | if (__predict_false(fp->f_type != DTYPE_VNODE)) { | | 490 | if (__predict_false(fp->f_type != DTYPE_VNODE)) { |
491 | fd_putfile(fd); | | 491 | fd_putfile(fd); |
492 | return EINVAL; | | 492 | return EINVAL; |
493 | } | | 493 | } |
494 | vp = fp->f_vnode; | | 494 | vp = fp->f_vnode; |
495 | if (__predict_false(vp->v_type == VBAD)) { | | 495 | if (__predict_false(vp->v_type == VBAD)) { |
496 | /* XXX Is this case really necessary? */ | | 496 | /* XXX Is this case really necessary? */ |
497 | fd_putfile(fd); | | 497 | fd_putfile(fd); |
498 | return EBADF; | | 498 | return EBADF; |
499 | } | | 499 | } |
500 | *fpp = fp; | | 500 | *fpp = fp; |
501 | return 0; | | 501 | return 0; |
502 | } | | 502 | } |
503 | | | 503 | |
504 | /* | | 504 | /* |
505 | * Convenience wrapper around fd_getfile() that returns reference | | 505 | * Convenience wrapper around fd_getfile() that returns reference |
506 | * to a socket. | | 506 | * to a socket. |
507 | */ | | 507 | */ |
508 | int | | 508 | int |
509 | fd_getsock1(unsigned fd, struct socket **sop, file_t **fp) | | 509 | fd_getsock1(unsigned fd, struct socket **sop, file_t **fp) |
510 | { | | 510 | { |
511 | *fp = fd_getfile(fd); | | 511 | *fp = fd_getfile(fd); |
512 | if (__predict_false(*fp == NULL)) { | | 512 | if (__predict_false(*fp == NULL)) { |
513 | return EBADF; | | 513 | return EBADF; |
514 | } | | 514 | } |
515 | if (__predict_false((*fp)->f_type != DTYPE_SOCKET)) { | | 515 | if (__predict_false((*fp)->f_type != DTYPE_SOCKET)) { |
516 | fd_putfile(fd); | | 516 | fd_putfile(fd); |
517 | return ENOTSOCK; | | 517 | return ENOTSOCK; |
518 | } | | 518 | } |
519 | *sop = (*fp)->f_socket; | | 519 | *sop = (*fp)->f_socket; |
520 | return 0; | | 520 | return 0; |
521 | } | | 521 | } |
522 | | | 522 | |
523 | int | | 523 | int |
524 | fd_getsock(unsigned fd, struct socket **sop) | | 524 | fd_getsock(unsigned fd, struct socket **sop) |
525 | { | | 525 | { |
526 | file_t *fp; | | 526 | file_t *fp; |
527 | return fd_getsock1(fd, sop, &fp); | | 527 | return fd_getsock1(fd, sop, &fp); |
528 | } | | 528 | } |
529 | | | 529 | |
530 | /* | | 530 | /* |
531 | * Look up the file structure corresponding to a file descriptor | | 531 | * Look up the file structure corresponding to a file descriptor |
532 | * and return it with a reference held on the file, not the | | 532 | * and return it with a reference held on the file, not the |
533 | * descriptor. | | 533 | * descriptor. |
534 | * | | 534 | * |
535 | * This is heavyweight and only used when accessing descriptors | | 535 | * This is heavyweight and only used when accessing descriptors |
536 | * from a foreign process. The caller must ensure that `p' does | | 536 | * from a foreign process. The caller must ensure that `p' does |
537 | * not exit or fork across this call. | | 537 | * not exit or fork across this call. |
538 | * | | 538 | * |
539 | * To release the file (not descriptor) reference, use closef(). | | 539 | * To release the file (not descriptor) reference, use closef(). |
540 | */ | | 540 | */ |
541 | file_t * | | 541 | file_t * |
542 | fd_getfile2(proc_t *p, unsigned fd) | | 542 | fd_getfile2(proc_t *p, unsigned fd) |
543 | { | | 543 | { |
544 | filedesc_t *fdp; | | 544 | filedesc_t *fdp; |
545 | fdfile_t *ff; | | 545 | fdfile_t *ff; |
546 | file_t *fp; | | 546 | file_t *fp; |
547 | fdtab_t *dt; | | 547 | fdtab_t *dt; |
548 | | | 548 | |
549 | fdp = p->p_fd; | | 549 | fdp = p->p_fd; |
550 | mutex_enter(&fdp->fd_lock); | | 550 | mutex_enter(&fdp->fd_lock); |
551 | dt = fdp->fd_dt; | | 551 | dt = fdp->fd_dt; |
552 | if (fd >= dt->dt_nfiles) { | | 552 | if (fd >= dt->dt_nfiles) { |
553 | mutex_exit(&fdp->fd_lock); | | 553 | mutex_exit(&fdp->fd_lock); |
554 | return NULL; | | 554 | return NULL; |
555 | } | | 555 | } |
556 | if ((ff = dt->dt_ff[fd]) == NULL) { | | 556 | if ((ff = dt->dt_ff[fd]) == NULL) { |
557 | mutex_exit(&fdp->fd_lock); | | 557 | mutex_exit(&fdp->fd_lock); |
558 | return NULL; | | 558 | return NULL; |
559 | } | | 559 | } |
560 | if ((fp = atomic_load_consume(&ff->ff_file)) == NULL) { | | 560 | if ((fp = atomic_load_consume(&ff->ff_file)) == NULL) { |
561 | mutex_exit(&fdp->fd_lock); | | 561 | mutex_exit(&fdp->fd_lock); |
562 | return NULL; | | 562 | return NULL; |
563 | } | | 563 | } |
564 | mutex_enter(&fp->f_lock); | | 564 | mutex_enter(&fp->f_lock); |
565 | fp->f_count++; | | 565 | fp->f_count++; |
566 | mutex_exit(&fp->f_lock); | | 566 | mutex_exit(&fp->f_lock); |
567 | mutex_exit(&fdp->fd_lock); | | 567 | mutex_exit(&fdp->fd_lock); |
568 | | | 568 | |
569 | return fp; | | 569 | return fp; |
570 | } | | 570 | } |
571 | | | 571 | |
572 | /* | | 572 | /* |
573 | * Internal form of close. Must be called with a reference to the | | 573 | * Internal form of close. Must be called with a reference to the |
574 | * descriptor, and will drop the reference. When all descriptor | | 574 | * descriptor, and will drop the reference. When all descriptor |
575 | * references are dropped, releases the descriptor slot and a single | | 575 | * references are dropped, releases the descriptor slot and a single |
576 | * reference to the file structure. | | 576 | * reference to the file structure. |
577 | */ | | 577 | */ |
578 | int | | 578 | int |
579 | fd_close(unsigned fd) | | 579 | fd_close(unsigned fd) |
580 | { | | 580 | { |
581 | struct flock lf; | | 581 | struct flock lf; |
582 | filedesc_t *fdp; | | 582 | filedesc_t *fdp; |
583 | fdfile_t *ff; | | 583 | fdfile_t *ff; |
584 | file_t *fp; | | 584 | file_t *fp; |
585 | proc_t *p; | | 585 | proc_t *p; |
586 | lwp_t *l; | | 586 | lwp_t *l; |
587 | u_int refcnt; | | 587 | u_int refcnt; |
588 | | | 588 | |
589 | l = curlwp; | | 589 | l = curlwp; |
590 | p = l->l_proc; | | 590 | p = l->l_proc; |
591 | fdp = l->l_fd; | | 591 | fdp = l->l_fd; |
592 | ff = atomic_load_consume(&fdp->fd_dt)->dt_ff[fd]; | | 592 | ff = atomic_load_consume(&fdp->fd_dt)->dt_ff[fd]; |
593 | | | 593 | |
594 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | | 594 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); |
595 | | | 595 | |
596 | mutex_enter(&fdp->fd_lock); | | 596 | mutex_enter(&fdp->fd_lock); |
597 | KASSERT((ff->ff_refcnt & FR_MASK) > 0); | | 597 | KASSERT((ff->ff_refcnt & FR_MASK) > 0); |
598 | fp = atomic_load_consume(&ff->ff_file); | | 598 | fp = atomic_load_consume(&ff->ff_file); |
599 | if (__predict_false(fp == NULL)) { | | 599 | if (__predict_false(fp == NULL)) { |
600 | /* | | 600 | /* |
601 | * Another user of the file is already closing, and is | | 601 | * Another user of the file is already closing, and is |
602 | * waiting for other users of the file to drain. Release | | 602 | * waiting for other users of the file to drain. Release |
603 | * our reference, and wake up the closer. | | 603 | * our reference, and wake up the closer. |
604 | */ | | 604 | */ |
605 | atomic_dec_uint(&ff->ff_refcnt); | | 605 | atomic_dec_uint(&ff->ff_refcnt); |
606 | cv_broadcast(&ff->ff_closing); | | 606 | cv_broadcast(&ff->ff_closing); |
607 | mutex_exit(&fdp->fd_lock); | | 607 | mutex_exit(&fdp->fd_lock); |
608 | | | 608 | |
609 | /* | | 609 | /* |
610 | * An application error, so pretend that the descriptor | | 610 | * An application error, so pretend that the descriptor |
611 | * was already closed. We can't safely wait for it to | | 611 | * was already closed. We can't safely wait for it to |
612 | * be closed without potentially deadlocking. | | 612 | * be closed without potentially deadlocking. |
613 | */ | | 613 | */ |
614 | return (EBADF); | | 614 | return (EBADF); |
615 | } | | 615 | } |
616 | KASSERT((ff->ff_refcnt & FR_CLOSING) == 0); | | 616 | KASSERT((ff->ff_refcnt & FR_CLOSING) == 0); |
617 | | | 617 | |
618 | /* | | 618 | /* |
619 | * There may be multiple users of this file within the process. | | 619 | * There may be multiple users of this file within the process. |
620 | * Notify existing and new users that the file is closing. This | | 620 | * Notify existing and new users that the file is closing. This |
621 | * will prevent them from adding additional uses to this file | | 621 | * will prevent them from adding additional uses to this file |
622 | * while we are closing it. | | 622 | * while we are closing it. |
623 | */ | | 623 | */ |
624 | ff->ff_file = NULL; | | 624 | ff->ff_file = NULL; |
625 | ff->ff_exclose = false; | | 625 | ff->ff_exclose = false; |
626 | | | 626 | |
627 | /* | | 627 | /* |
628 | * We expect the caller to hold a descriptor reference - drop it. | | 628 | * We expect the caller to hold a descriptor reference - drop it. |
629 | * The reference count may increase beyond zero at this point due | | 629 | * The reference count may increase beyond zero at this point due |
630 | * to an erroneous descriptor reference by an application, but | | 630 | * to an erroneous descriptor reference by an application, but |
631 | * fd_getfile() will notice that the file is being closed and drop | | 631 | * fd_getfile() will notice that the file is being closed and drop |
632 | * the reference again. | | 632 | * the reference again. |
633 | */ | | 633 | */ |
634 | if (fdp->fd_refcnt == 1) { | | 634 | if (fdp->fd_refcnt == 1) { |
635 | /* Single threaded. */ | | 635 | /* Single threaded. */ |
636 | refcnt = --(ff->ff_refcnt); | | 636 | refcnt = --(ff->ff_refcnt); |
637 | } else { | | 637 | } else { |
638 | /* Multi threaded. */ | | 638 | /* Multi threaded. */ |
639 | #ifndef __HAVE_ATOMIC_AS_MEMBAR | | 639 | #ifndef __HAVE_ATOMIC_AS_MEMBAR |
640 | membar_producer(); | | 640 | membar_producer(); |
641 | #endif | | 641 | #endif |
642 | refcnt = atomic_dec_uint_nv(&ff->ff_refcnt); | | 642 | refcnt = atomic_dec_uint_nv(&ff->ff_refcnt); |
643 | } | | 643 | } |
644 | if (__predict_false(refcnt != 0)) { | | 644 | if (__predict_false(refcnt != 0)) { |
645 | /* | | 645 | /* |
646 | * Wait for other references to drain. This is typically | | 646 | * Wait for other references to drain. This is typically |
647 | * an application error - the descriptor is being closed | | 647 | * an application error - the descriptor is being closed |
648 | * while still in use. | | 648 | * while still in use. |
649 | * (Or just a threaded application trying to unblock its | | 649 | * (Or just a threaded application trying to unblock its |
650 | * thread that sleeps in (say) accept()). | | 650 | * thread that sleeps in (say) accept()). |
651 | */ | | 651 | */ |
652 | atomic_or_uint(&ff->ff_refcnt, FR_CLOSING); | | 652 | atomic_or_uint(&ff->ff_refcnt, FR_CLOSING); |
653 | | | 653 | |
654 | /* | | 654 | /* |
655 | * Remove any knotes attached to the file. A knote | | 655 | * Remove any knotes attached to the file. A knote |
656 | * attached to the descriptor can hold references on it. | | 656 | * attached to the descriptor can hold references on it. |
657 | */ | | 657 | */ |
658 | mutex_exit(&fdp->fd_lock); | | 658 | mutex_exit(&fdp->fd_lock); |
659 | if (!SLIST_EMPTY(&ff->ff_knlist)) { | | 659 | if (!SLIST_EMPTY(&ff->ff_knlist)) { |
660 | knote_fdclose(fd); | | 660 | knote_fdclose(fd); |
661 | } | | 661 | } |
662 | | | 662 | |
663 | /* | | 663 | /* |
664 | * Since the file system code doesn't know which fd | | 664 | * Since the file system code doesn't know which fd |
665 | * each request came from (think dup()), we have to | | 665 | * each request came from (think dup()), we have to |
666 | * ask it to return ERESTART for any long-term blocks. | | 666 | * ask it to return ERESTART for any long-term blocks. |
667 | * The re-entry through read/write/etc will detect the | | 667 | * The re-entry through read/write/etc will detect the |
668 | * closed fd and return EBAFD. | | 668 | * closed fd and return EBAFD. |
669 | * Blocked partial writes may return a short length. | | 669 | * Blocked partial writes may return a short length. |
670 | */ | | 670 | */ |
671 | (*fp->f_ops->fo_restart)(fp); | | 671 | (*fp->f_ops->fo_restart)(fp); |
672 | mutex_enter(&fdp->fd_lock); | | 672 | mutex_enter(&fdp->fd_lock); |
673 | | | 673 | |
674 | /* | | 674 | /* |
675 | * We need to see the count drop to zero at least once, | | 675 | * We need to see the count drop to zero at least once, |
676 | * in order to ensure that all pre-existing references | | 676 | * in order to ensure that all pre-existing references |
677 | * have been drained. New references past this point are | | 677 | * have been drained. New references past this point are |
678 | * of no interest. | | 678 | * of no interest. |
679 | * XXX (dsl) this may need to call fo_restart() after a | | 679 | * XXX (dsl) this may need to call fo_restart() after a |
680 | * timeout to guarantee that all the system calls exit. | | 680 | * timeout to guarantee that all the system calls exit. |
681 | */ | | 681 | */ |
682 | while ((ff->ff_refcnt & FR_MASK) != 0) { | | 682 | while ((ff->ff_refcnt & FR_MASK) != 0) { |
683 | cv_wait(&ff->ff_closing, &fdp->fd_lock); | | 683 | cv_wait(&ff->ff_closing, &fdp->fd_lock); |
684 | } | | 684 | } |
685 | atomic_and_uint(&ff->ff_refcnt, ~FR_CLOSING); | | 685 | atomic_and_uint(&ff->ff_refcnt, ~FR_CLOSING); |
686 | } else { | | 686 | } else { |
687 | /* If no references, there must be no knotes. */ | | 687 | /* If no references, there must be no knotes. */ |
688 | KASSERT(SLIST_EMPTY(&ff->ff_knlist)); | | 688 | KASSERT(SLIST_EMPTY(&ff->ff_knlist)); |
689 | } | | 689 | } |
690 | | | 690 | |
691 | /* | | 691 | /* |
692 | * POSIX record locking dictates that any close releases ALL | | 692 | * POSIX record locking dictates that any close releases ALL |
693 | * locks owned by this process. This is handled by setting | | 693 | * locks owned by this process. This is handled by setting |
694 | * a flag in the unlock to free ONLY locks obeying POSIX | | 694 | * a flag in the unlock to free ONLY locks obeying POSIX |
695 | * semantics, and not to free BSD-style file locks. | | 695 | * semantics, and not to free BSD-style file locks. |
696 | * If the descriptor was in a message, POSIX-style locks | | 696 | * If the descriptor was in a message, POSIX-style locks |
697 | * aren't passed with the descriptor. | | 697 | * aren't passed with the descriptor. |
698 | */ | | 698 | */ |
699 | if (__predict_false((p->p_flag & PK_ADVLOCK) != 0 && | | 699 | if (__predict_false((p->p_flag & PK_ADVLOCK) != 0 && |
700 | fp->f_type == DTYPE_VNODE)) { | | 700 | fp->f_type == DTYPE_VNODE)) { |
701 | lf.l_whence = SEEK_SET; | | 701 | lf.l_whence = SEEK_SET; |
702 | lf.l_start = 0; | | 702 | lf.l_start = 0; |
703 | lf.l_len = 0; | | 703 | lf.l_len = 0; |
704 | lf.l_type = F_UNLCK; | | 704 | lf.l_type = F_UNLCK; |
705 | mutex_exit(&fdp->fd_lock); | | 705 | mutex_exit(&fdp->fd_lock); |
706 | (void)VOP_ADVLOCK(fp->f_vnode, p, F_UNLCK, &lf, F_POSIX); | | 706 | (void)VOP_ADVLOCK(fp->f_vnode, p, F_UNLCK, &lf, F_POSIX); |
707 | mutex_enter(&fdp->fd_lock); | | 707 | mutex_enter(&fdp->fd_lock); |
708 | } | | 708 | } |
709 | | | 709 | |
710 | /* Free descriptor slot. */ | | 710 | /* Free descriptor slot. */ |
711 | fd_unused(fdp, fd); | | 711 | fd_unused(fdp, fd); |
712 | mutex_exit(&fdp->fd_lock); | | 712 | mutex_exit(&fdp->fd_lock); |
713 | | | 713 | |
714 | /* Now drop reference to the file itself. */ | | 714 | /* Now drop reference to the file itself. */ |
715 | return closef(fp); | | 715 | return closef(fp); |
716 | } | | 716 | } |
717 | | | 717 | |
718 | /* | | 718 | /* |
719 | * Duplicate a file descriptor. | | 719 | * Duplicate a file descriptor. |
720 | */ | | 720 | */ |
721 | int | | 721 | int |
722 | fd_dup(file_t *fp, int minfd, int *newp, bool exclose) | | 722 | fd_dup(file_t *fp, int minfd, int *newp, bool exclose) |
723 | { | | 723 | { |
724 | proc_t *p = curproc; | | 724 | proc_t *p = curproc; |
725 | fdtab_t *dt; | | 725 | fdtab_t *dt; |
726 | int error; | | 726 | int error; |
727 | | | 727 | |
728 | while ((error = fd_alloc(p, minfd, newp)) != 0) { | | 728 | while ((error = fd_alloc(p, minfd, newp)) != 0) { |
729 | if (error != ENOSPC) { | | 729 | if (error != ENOSPC) { |
730 | return error; | | 730 | return error; |
731 | } | | 731 | } |
732 | fd_tryexpand(p); | | 732 | fd_tryexpand(p); |
733 | } | | 733 | } |
734 | | | 734 | |
735 | dt = atomic_load_consume(&curlwp->l_fd->fd_dt); | | 735 | dt = atomic_load_consume(&curlwp->l_fd->fd_dt); |
736 | dt->dt_ff[*newp]->ff_exclose = exclose; | | 736 | dt->dt_ff[*newp]->ff_exclose = exclose; |
737 | fd_affix(p, fp, *newp); | | 737 | fd_affix(p, fp, *newp); |
738 | return 0; | | 738 | return 0; |
739 | } | | 739 | } |
740 | | | 740 | |
741 | /* | | 741 | /* |
742 | * dup2 operation. | | 742 | * dup2 operation. |
743 | */ | | 743 | */ |
744 | int | | 744 | int |
745 | fd_dup2(file_t *fp, unsigned newfd, int flags) | | 745 | fd_dup2(file_t *fp, unsigned newfd, int flags) |
746 | { | | 746 | { |
747 | filedesc_t *fdp = curlwp->l_fd; | | 747 | filedesc_t *fdp = curlwp->l_fd; |
748 | fdfile_t *ff; | | 748 | fdfile_t *ff; |
749 | fdtab_t *dt; | | 749 | fdtab_t *dt; |
750 | | | 750 | |
751 | if (flags & ~(O_CLOEXEC|O_NONBLOCK|O_NOSIGPIPE)) | | 751 | if (flags & ~(O_CLOEXEC|O_NONBLOCK|O_NOSIGPIPE)) |
752 | return EINVAL; | | 752 | return EINVAL; |
753 | /* | | 753 | /* |
754 | * Ensure there are enough slots in the descriptor table, | | 754 | * Ensure there are enough slots in the descriptor table, |
755 | * and allocate an fdfile_t up front in case we need it. | | 755 | * and allocate an fdfile_t up front in case we need it. |
756 | */ | | 756 | */ |
757 | while (newfd >= atomic_load_consume(&fdp->fd_dt)->dt_nfiles) { | | 757 | while (newfd >= atomic_load_consume(&fdp->fd_dt)->dt_nfiles) { |
758 | fd_tryexpand(curproc); | | 758 | fd_tryexpand(curproc); |
759 | } | | 759 | } |
760 | ff = pool_cache_get(fdfile_cache, PR_WAITOK); | | 760 | ff = pool_cache_get(fdfile_cache, PR_WAITOK); |
761 | | | 761 | |
762 | /* | | 762 | /* |
763 | * If there is already a file open, close it. If the file is | | 763 | * If there is already a file open, close it. If the file is |
764 | * half open, wait for it to be constructed before closing it. | | 764 | * half open, wait for it to be constructed before closing it. |
765 | * XXX Potential for deadlock here? | | 765 | * XXX Potential for deadlock here? |
766 | */ | | 766 | */ |
767 | mutex_enter(&fdp->fd_lock); | | 767 | mutex_enter(&fdp->fd_lock); |
768 | while (fd_isused(fdp, newfd)) { | | 768 | while (fd_isused(fdp, newfd)) { |
769 | mutex_exit(&fdp->fd_lock); | | 769 | mutex_exit(&fdp->fd_lock); |
770 | if (fd_getfile(newfd) != NULL) { | | 770 | if (fd_getfile(newfd) != NULL) { |
771 | (void)fd_close(newfd); | | 771 | (void)fd_close(newfd); |
772 | } else { | | 772 | } else { |
773 | /* | | 773 | /* |
774 | * Crummy, but unlikely to happen. | | 774 | * Crummy, but unlikely to happen. |
775 | * Can occur if we interrupt another | | 775 | * Can occur if we interrupt another |
776 | * thread while it is opening a file. | | 776 | * thread while it is opening a file. |
777 | */ | | 777 | */ |
778 | kpause("dup2", false, 1, NULL); | | 778 | kpause("dup2", false, 1, NULL); |
779 | } | | 779 | } |
780 | mutex_enter(&fdp->fd_lock); | | 780 | mutex_enter(&fdp->fd_lock); |
781 | } | | 781 | } |
782 | dt = fdp->fd_dt; | | 782 | dt = fdp->fd_dt; |
783 | if (dt->dt_ff[newfd] == NULL) { | | 783 | if (dt->dt_ff[newfd] == NULL) { |
784 | KASSERT(newfd >= NDFDFILE); | | 784 | KASSERT(newfd >= NDFDFILE); |
785 | dt->dt_ff[newfd] = ff; | | 785 | dt->dt_ff[newfd] = ff; |
786 | ff = NULL; | | 786 | ff = NULL; |
787 | } | | 787 | } |
788 | fd_used(fdp, newfd); | | 788 | fd_used(fdp, newfd); |
789 | mutex_exit(&fdp->fd_lock); | | 789 | mutex_exit(&fdp->fd_lock); |
790 | | | 790 | |
791 | dt->dt_ff[newfd]->ff_exclose = (flags & O_CLOEXEC) != 0; | | 791 | dt->dt_ff[newfd]->ff_exclose = (flags & O_CLOEXEC) != 0; |
792 | fp->f_flag |= flags & (FNONBLOCK|FNOSIGPIPE); | | 792 | fp->f_flag |= flags & (FNONBLOCK|FNOSIGPIPE); |
793 | /* Slot is now allocated. Insert copy of the file. */ | | 793 | /* Slot is now allocated. Insert copy of the file. */ |
794 | fd_affix(curproc, fp, newfd); | | 794 | fd_affix(curproc, fp, newfd); |
795 | if (ff != NULL) { | | 795 | if (ff != NULL) { |
796 | pool_cache_put(fdfile_cache, ff); | | 796 | pool_cache_put(fdfile_cache, ff); |
797 | } | | 797 | } |
798 | return 0; | | 798 | return 0; |
799 | } | | 799 | } |
800 | | | 800 | |
801 | /* | | 801 | /* |
802 | * Drop reference to a file structure. | | 802 | * Drop reference to a file structure. |
803 | */ | | 803 | */ |
804 | int | | 804 | int |
805 | closef(file_t *fp) | | 805 | closef(file_t *fp) |
806 | { | | 806 | { |
807 | struct flock lf; | | 807 | struct flock lf; |
808 | int error; | | 808 | int error; |
809 | | | 809 | |
810 | /* | | 810 | /* |
811 | * Drop reference. If referenced elsewhere it's still open | | 811 | * Drop reference. If referenced elsewhere it's still open |
812 | * and we have nothing more to do. | | 812 | * and we have nothing more to do. |
813 | */ | | 813 | */ |
814 | mutex_enter(&fp->f_lock); | | 814 | mutex_enter(&fp->f_lock); |
815 | KASSERT(fp->f_count > 0); | | 815 | KASSERT(fp->f_count > 0); |
816 | if (--fp->f_count > 0) { | | 816 | if (--fp->f_count > 0) { |
817 | mutex_exit(&fp->f_lock); | | 817 | mutex_exit(&fp->f_lock); |
818 | return 0; | | 818 | return 0; |
819 | } | | 819 | } |
820 | KASSERT(fp->f_count == 0); | | 820 | KASSERT(fp->f_count == 0); |
821 | mutex_exit(&fp->f_lock); | | 821 | mutex_exit(&fp->f_lock); |
822 | | | 822 | |
823 | /* We held the last reference - release locks, close and free. */ | | 823 | /* We held the last reference - release locks, close and free. */ |
824 | if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) { | | 824 | if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) { |
825 | lf.l_whence = SEEK_SET; | | 825 | lf.l_whence = SEEK_SET; |
826 | lf.l_start = 0; | | 826 | lf.l_start = 0; |
827 | lf.l_len = 0; | | 827 | lf.l_len = 0; |
828 | lf.l_type = F_UNLCK; | | 828 | lf.l_type = F_UNLCK; |
829 | (void)VOP_ADVLOCK(fp->f_vnode, fp, F_UNLCK, &lf, F_FLOCK); | | 829 | (void)VOP_ADVLOCK(fp->f_vnode, fp, F_UNLCK, &lf, F_FLOCK); |
830 | } | | 830 | } |
831 | if (fp->f_ops != NULL) { | | 831 | if (fp->f_ops != NULL) { |
832 | error = (*fp->f_ops->fo_close)(fp); | | 832 | error = (*fp->f_ops->fo_close)(fp); |
833 | } else { | | 833 | } else { |
834 | error = 0; | | 834 | error = 0; |
835 | } | | 835 | } |
836 | KASSERT(fp->f_count == 0); | | 836 | KASSERT(fp->f_count == 0); |
837 | KASSERT(fp->f_cred != NULL); | | 837 | KASSERT(fp->f_cred != NULL); |
838 | pool_cache_put(file_cache, fp); | | 838 | pool_cache_put(file_cache, fp); |
839 | | | 839 | |
840 | return error; | | 840 | return error; |
841 | } | | 841 | } |
842 | | | 842 | |
843 | /* | | 843 | /* |
844 | * Allocate a file descriptor for the process. | | 844 | * Allocate a file descriptor for the process. |
845 | */ | | 845 | */ |
846 | int | | 846 | int |
847 | fd_alloc(proc_t *p, int want, int *result) | | 847 | fd_alloc(proc_t *p, int want, int *result) |
848 | { | | 848 | { |
849 | filedesc_t *fdp = p->p_fd; | | 849 | filedesc_t *fdp = p->p_fd; |
850 | int i, lim, last, error, hi; | | 850 | int i, lim, last, error, hi; |
851 | u_int off; | | 851 | u_int off; |
852 | fdtab_t *dt; | | 852 | fdtab_t *dt; |
853 | | | 853 | |
854 | KASSERT(p == curproc || p == &proc0); | | 854 | KASSERT(p == curproc || p == &proc0); |
855 | | | 855 | |
856 | /* | | 856 | /* |
857 | * Search for a free descriptor starting at the higher | | 857 | * Search for a free descriptor starting at the higher |
858 | * of want or fd_freefile. | | 858 | * of want or fd_freefile. |
859 | */ | | 859 | */ |
860 | mutex_enter(&fdp->fd_lock); | | 860 | mutex_enter(&fdp->fd_lock); |
861 | fd_checkmaps(fdp); | | 861 | fd_checkmaps(fdp); |
862 | dt = fdp->fd_dt; | | 862 | dt = fdp->fd_dt; |
863 | KASSERT(dt->dt_ff[0] == (fdfile_t *)fdp->fd_dfdfile[0]); | | 863 | KASSERT(dt->dt_ff[0] == (fdfile_t *)fdp->fd_dfdfile[0]); |
864 | lim = uimin((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); | | 864 | lim = uimin((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); |
865 | last = uimin(dt->dt_nfiles, lim); | | 865 | last = uimin(dt->dt_nfiles, lim); |
866 | for (;;) { | | 866 | for (;;) { |
867 | if ((i = want) < fdp->fd_freefile) | | 867 | if ((i = want) < fdp->fd_freefile) |
868 | i = fdp->fd_freefile; | | 868 | i = fdp->fd_freefile; |
869 | off = i >> NDENTRYSHIFT; | | 869 | off = i >> NDENTRYSHIFT; |
870 | hi = fd_next_zero(fdp, fdp->fd_himap, off, | | 870 | hi = fd_next_zero(fdp, fdp->fd_himap, off, |
871 | (last + NDENTRIES - 1) >> NDENTRYSHIFT); | | 871 | (last + NDENTRIES - 1) >> NDENTRYSHIFT); |
872 | if (hi == -1) | | 872 | if (hi == -1) |
873 | break; | | 873 | break; |
874 | i = fd_next_zero(fdp, &fdp->fd_lomap[hi], | | 874 | i = fd_next_zero(fdp, &fdp->fd_lomap[hi], |
875 | hi > off ? 0 : i & NDENTRYMASK, NDENTRIES); | | 875 | hi > off ? 0 : i & NDENTRYMASK, NDENTRIES); |
876 | if (i == -1) { | | 876 | if (i == -1) { |
877 | /* | | 877 | /* |
878 | * Free file descriptor in this block was | | 878 | * Free file descriptor in this block was |
879 | * below want, try again with higher want. | | 879 | * below want, try again with higher want. |
880 | */ | | 880 | */ |
881 | want = (hi + 1) << NDENTRYSHIFT; | | 881 | want = (hi + 1) << NDENTRYSHIFT; |
882 | continue; | | 882 | continue; |
883 | } | | 883 | } |
884 | i += (hi << NDENTRYSHIFT); | | 884 | i += (hi << NDENTRYSHIFT); |
885 | if (i >= last) { | | 885 | if (i >= last) { |
886 | break; | | 886 | break; |
887 | } | | 887 | } |
888 | if (dt->dt_ff[i] == NULL) { | | 888 | if (dt->dt_ff[i] == NULL) { |
889 | KASSERT(i >= NDFDFILE); | | 889 | KASSERT(i >= NDFDFILE); |
890 | dt->dt_ff[i] = pool_cache_get(fdfile_cache, PR_WAITOK); | | 890 | dt->dt_ff[i] = pool_cache_get(fdfile_cache, PR_WAITOK); |
891 | } | | 891 | } |
892 | KASSERT(dt->dt_ff[i]->ff_file == NULL); | | 892 | KASSERT(dt->dt_ff[i]->ff_file == NULL); |
893 | fd_used(fdp, i); | | 893 | fd_used(fdp, i); |
894 | if (want <= fdp->fd_freefile) { | | 894 | if (want <= fdp->fd_freefile) { |
895 | fdp->fd_freefile = i; | | 895 | fdp->fd_freefile = i; |
896 | } | | 896 | } |
897 | *result = i; | | 897 | *result = i; |
898 | KASSERT(i >= NDFDFILE || | | 898 | KASSERT(i >= NDFDFILE || |
899 | dt->dt_ff[i] == (fdfile_t *)fdp->fd_dfdfile[i]); | | 899 | dt->dt_ff[i] == (fdfile_t *)fdp->fd_dfdfile[i]); |
900 | fd_checkmaps(fdp); | | 900 | fd_checkmaps(fdp); |
901 | mutex_exit(&fdp->fd_lock); | | 901 | mutex_exit(&fdp->fd_lock); |
902 | return 0; | | 902 | return 0; |
903 | } | | 903 | } |
904 | | | 904 | |
905 | /* No space in current array. Let the caller expand and retry. */ | | 905 | /* No space in current array. Let the caller expand and retry. */ |
906 | error = (dt->dt_nfiles >= lim) ? EMFILE : ENOSPC; | | 906 | error = (dt->dt_nfiles >= lim) ? EMFILE : ENOSPC; |
907 | mutex_exit(&fdp->fd_lock); | | 907 | mutex_exit(&fdp->fd_lock); |
908 | return error; | | 908 | return error; |
909 | } | | 909 | } |
910 | | | 910 | |
911 | /* | | 911 | /* |
912 | * Allocate memory for a descriptor table. | | 912 | * Allocate memory for a descriptor table. |
913 | */ | | 913 | */ |
914 | static fdtab_t * | | 914 | static fdtab_t * |
915 | fd_dtab_alloc(int n) | | 915 | fd_dtab_alloc(int n) |
916 | { | | 916 | { |
917 | fdtab_t *dt; | | 917 | fdtab_t *dt; |
918 | size_t sz; | | 918 | size_t sz; |
919 | | | 919 | |
920 | KASSERT(n > NDFILE); | | 920 | KASSERT(n > NDFILE); |
921 | | | 921 | |
922 | sz = sizeof(*dt) + (n - NDFILE) * sizeof(dt->dt_ff[0]); | | 922 | sz = sizeof(*dt) + (n - NDFILE) * sizeof(dt->dt_ff[0]); |
923 | dt = kmem_alloc(sz, KM_SLEEP); | | 923 | dt = kmem_alloc(sz, KM_SLEEP); |
924 | #ifdef DIAGNOSTIC | | 924 | #ifdef DIAGNOSTIC |
925 | memset(dt, 0xff, sz); | | 925 | memset(dt, 0xff, sz); |
926 | #endif | | 926 | #endif |
927 | dt->dt_nfiles = n; | | 927 | dt->dt_nfiles = n; |
928 | dt->dt_link = NULL; | | 928 | dt->dt_link = NULL; |
929 | return dt; | | 929 | return dt; |
930 | } | | 930 | } |
931 | | | 931 | |
932 | /* | | 932 | /* |
933 | * Free a descriptor table, and all tables linked for deferred free. | | 933 | * Free a descriptor table, and all tables linked for deferred free. |
934 | */ | | 934 | */ |
935 | static void | | 935 | static void |
936 | fd_dtab_free(fdtab_t *dt) | | 936 | fd_dtab_free(fdtab_t *dt) |
937 | { | | 937 | { |
938 | fdtab_t *next; | | 938 | fdtab_t *next; |
939 | size_t sz; | | 939 | size_t sz; |
940 | | | 940 | |
941 | do { | | 941 | do { |
942 | next = dt->dt_link; | | 942 | next = dt->dt_link; |
943 | KASSERT(dt->dt_nfiles > NDFILE); | | 943 | KASSERT(dt->dt_nfiles > NDFILE); |
944 | sz = sizeof(*dt) + | | 944 | sz = sizeof(*dt) + |
945 | (dt->dt_nfiles - NDFILE) * sizeof(dt->dt_ff[0]); | | 945 | (dt->dt_nfiles - NDFILE) * sizeof(dt->dt_ff[0]); |
946 | #ifdef DIAGNOSTIC | | 946 | #ifdef DIAGNOSTIC |
947 | memset(dt, 0xff, sz); | | 947 | memset(dt, 0xff, sz); |
948 | #endif | | 948 | #endif |
949 | kmem_free(dt, sz); | | 949 | kmem_free(dt, sz); |
950 | dt = next; | | 950 | dt = next; |
951 | } while (dt != NULL); | | 951 | } while (dt != NULL); |
952 | } | | 952 | } |
953 | | | 953 | |
954 | /* | | 954 | /* |
955 | * Allocate descriptor bitmap. | | 955 | * Allocate descriptor bitmap. |
956 | */ | | 956 | */ |
957 | static void | | 957 | static void |
958 | fd_map_alloc(int n, uint32_t **lo, uint32_t **hi) | | 958 | fd_map_alloc(int n, uint32_t **lo, uint32_t **hi) |
959 | { | | 959 | { |
960 | uint8_t *ptr; | | 960 | uint8_t *ptr; |
961 | size_t szlo, szhi; | | 961 | size_t szlo, szhi; |
962 | | | 962 | |
963 | KASSERT(n > NDENTRIES); | | 963 | KASSERT(n > NDENTRIES); |
964 | | | 964 | |
965 | szlo = NDLOSLOTS(n) * sizeof(uint32_t); | | 965 | szlo = NDLOSLOTS(n) * sizeof(uint32_t); |
966 | szhi = NDHISLOTS(n) * sizeof(uint32_t); | | 966 | szhi = NDHISLOTS(n) * sizeof(uint32_t); |
967 | ptr = kmem_alloc(szlo + szhi, KM_SLEEP); | | 967 | ptr = kmem_alloc(szlo + szhi, KM_SLEEP); |
968 | *lo = (uint32_t *)ptr; | | 968 | *lo = (uint32_t *)ptr; |
969 | *hi = (uint32_t *)(ptr + szlo); | | 969 | *hi = (uint32_t *)(ptr + szlo); |
970 | } | | 970 | } |
971 | | | 971 | |
972 | /* | | 972 | /* |
973 | * Free descriptor bitmap. | | 973 | * Free descriptor bitmap. |
974 | */ | | 974 | */ |
975 | static void | | 975 | static void |
976 | fd_map_free(int n, uint32_t *lo, uint32_t *hi) | | 976 | fd_map_free(int n, uint32_t *lo, uint32_t *hi) |
977 | { | | 977 | { |
978 | size_t szlo, szhi; | | 978 | size_t szlo, szhi; |
979 | | | 979 | |
980 | KASSERT(n > NDENTRIES); | | 980 | KASSERT(n > NDENTRIES); |
981 | | | 981 | |
982 | szlo = NDLOSLOTS(n) * sizeof(uint32_t); | | 982 | szlo = NDLOSLOTS(n) * sizeof(uint32_t); |
983 | szhi = NDHISLOTS(n) * sizeof(uint32_t); | | 983 | szhi = NDHISLOTS(n) * sizeof(uint32_t); |
984 | KASSERT(hi == (uint32_t *)((uint8_t *)lo + szlo)); | | 984 | KASSERT(hi == (uint32_t *)((uint8_t *)lo + szlo)); |
985 | kmem_free(lo, szlo + szhi); | | 985 | kmem_free(lo, szlo + szhi); |
986 | } | | 986 | } |
987 | | | 987 | |
988 | /* | | 988 | /* |
989 | * Expand a process' descriptor table. | | 989 | * Expand a process' descriptor table. |
990 | */ | | 990 | */ |
991 | void | | 991 | void |
992 | fd_tryexpand(proc_t *p) | | 992 | fd_tryexpand(proc_t *p) |
993 | { | | 993 | { |
994 | filedesc_t *fdp; | | 994 | filedesc_t *fdp; |
995 | int i, numfiles, oldnfiles; | | 995 | int i, numfiles, oldnfiles; |
996 | fdtab_t *newdt, *dt; | | 996 | fdtab_t *newdt, *dt; |
997 | uint32_t *newhimap, *newlomap; | | 997 | uint32_t *newhimap, *newlomap; |
998 | | | 998 | |
999 | KASSERT(p == curproc || p == &proc0); | | 999 | KASSERT(p == curproc || p == &proc0); |
1000 | | | 1000 | |
1001 | fdp = p->p_fd; | | 1001 | fdp = p->p_fd; |
1002 | newhimap = NULL; | | 1002 | newhimap = NULL; |
1003 | newlomap = NULL; | | 1003 | newlomap = NULL; |
1004 | oldnfiles = atomic_load_consume(&fdp->fd_dt)->dt_nfiles; | | 1004 | oldnfiles = atomic_load_consume(&fdp->fd_dt)->dt_nfiles; |
1005 | | | 1005 | |
1006 | if (oldnfiles < NDEXTENT) | | 1006 | if (oldnfiles < NDEXTENT) |
1007 | numfiles = NDEXTENT; | | 1007 | numfiles = NDEXTENT; |
1008 | else | | 1008 | else |
1009 | numfiles = 2 * oldnfiles; | | 1009 | numfiles = 2 * oldnfiles; |
1010 | | | 1010 | |
1011 | newdt = fd_dtab_alloc(numfiles); | | 1011 | newdt = fd_dtab_alloc(numfiles); |
1012 | if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { | | 1012 | if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { |
1013 | fd_map_alloc(numfiles, &newlomap, &newhimap); | | 1013 | fd_map_alloc(numfiles, &newlomap, &newhimap); |
1014 | } | | 1014 | } |
1015 | | | 1015 | |
1016 | mutex_enter(&fdp->fd_lock); | | 1016 | mutex_enter(&fdp->fd_lock); |
1017 | dt = fdp->fd_dt; | | 1017 | dt = fdp->fd_dt; |
1018 | KASSERT(dt->dt_ff[0] == (fdfile_t *)fdp->fd_dfdfile[0]); | | 1018 | KASSERT(dt->dt_ff[0] == (fdfile_t *)fdp->fd_dfdfile[0]); |
1019 | if (dt->dt_nfiles != oldnfiles) { | | 1019 | if (dt->dt_nfiles != oldnfiles) { |
1020 | /* fdp changed; caller must retry */ | | 1020 | /* fdp changed; caller must retry */ |
1021 | mutex_exit(&fdp->fd_lock); | | 1021 | mutex_exit(&fdp->fd_lock); |
1022 | fd_dtab_free(newdt); | | 1022 | fd_dtab_free(newdt); |
1023 | if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { | | 1023 | if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { |
1024 | fd_map_free(numfiles, newlomap, newhimap); | | 1024 | fd_map_free(numfiles, newlomap, newhimap); |
1025 | } | | 1025 | } |
1026 | return; | | 1026 | return; |
1027 | } | | 1027 | } |
1028 | | | 1028 | |
1029 | /* Copy the existing descriptor table and zero the new portion. */ | | 1029 | /* Copy the existing descriptor table and zero the new portion. */ |
1030 | i = sizeof(fdfile_t *) * oldnfiles; | | 1030 | i = sizeof(fdfile_t *) * oldnfiles; |
1031 | memcpy(newdt->dt_ff, dt->dt_ff, i); | | 1031 | memcpy(newdt->dt_ff, dt->dt_ff, i); |
1032 | memset((uint8_t *)newdt->dt_ff + i, 0, | | 1032 | memset((uint8_t *)newdt->dt_ff + i, 0, |
1033 | numfiles * sizeof(fdfile_t *) - i); | | 1033 | numfiles * sizeof(fdfile_t *) - i); |
1034 | | | 1034 | |
1035 | /* | | 1035 | /* |
1036 | * Link old descriptor array into list to be discarded. We defer | | 1036 | * Link old descriptor array into list to be discarded. We defer |
1037 | * freeing until the last reference to the descriptor table goes | | 1037 | * freeing until the last reference to the descriptor table goes |
1038 | * away (usually process exit). This allows us to do lockless | | 1038 | * away (usually process exit). This allows us to do lockless |
1039 | * lookups in fd_getfile(). | | 1039 | * lookups in fd_getfile(). |
1040 | */ | | 1040 | */ |
1041 | if (oldnfiles > NDFILE) { | | 1041 | if (oldnfiles > NDFILE) { |
1042 | if (fdp->fd_refcnt > 1) { | | 1042 | if (fdp->fd_refcnt > 1) { |
1043 | newdt->dt_link = dt; | | 1043 | newdt->dt_link = dt; |
1044 | } else { | | 1044 | } else { |
1045 | fd_dtab_free(dt); | | 1045 | fd_dtab_free(dt); |
1046 | } | | 1046 | } |
1047 | } | | 1047 | } |
1048 | | | 1048 | |
1049 | if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { | | 1049 | if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { |
1050 | i = NDHISLOTS(oldnfiles) * sizeof(uint32_t); | | 1050 | i = NDHISLOTS(oldnfiles) * sizeof(uint32_t); |
1051 | memcpy(newhimap, fdp->fd_himap, i); | | 1051 | memcpy(newhimap, fdp->fd_himap, i); |
1052 | memset((uint8_t *)newhimap + i, 0, | | 1052 | memset((uint8_t *)newhimap + i, 0, |
1053 | NDHISLOTS(numfiles) * sizeof(uint32_t) - i); | | 1053 | NDHISLOTS(numfiles) * sizeof(uint32_t) - i); |
1054 | | | 1054 | |
1055 | i = NDLOSLOTS(oldnfiles) * sizeof(uint32_t); | | 1055 | i = NDLOSLOTS(oldnfiles) * sizeof(uint32_t); |
1056 | memcpy(newlomap, fdp->fd_lomap, i); | | 1056 | memcpy(newlomap, fdp->fd_lomap, i); |
1057 | memset((uint8_t *)newlomap + i, 0, | | 1057 | memset((uint8_t *)newlomap + i, 0, |
1058 | NDLOSLOTS(numfiles) * sizeof(uint32_t) - i); | | 1058 | NDLOSLOTS(numfiles) * sizeof(uint32_t) - i); |
1059 | | | 1059 | |
1060 | if (NDHISLOTS(oldnfiles) > NDHISLOTS(NDFILE)) { | | 1060 | if (NDHISLOTS(oldnfiles) > NDHISLOTS(NDFILE)) { |
1061 | fd_map_free(oldnfiles, fdp->fd_lomap, fdp->fd_himap); | | 1061 | fd_map_free(oldnfiles, fdp->fd_lomap, fdp->fd_himap); |
1062 | } | | 1062 | } |
1063 | fdp->fd_himap = newhimap; | | 1063 | fdp->fd_himap = newhimap; |
1064 | fdp->fd_lomap = newlomap; | | 1064 | fdp->fd_lomap = newlomap; |
1065 | } | | 1065 | } |
1066 | | | 1066 | |
1067 | /* | | 1067 | /* |
1068 | * All other modifications must become globally visible before | | 1068 | * All other modifications must become globally visible before |
1069 | * the change to fd_dt. See fd_getfile(). | | 1069 | * the change to fd_dt. See fd_getfile(). |
1070 | */ | | 1070 | */ |
1071 | atomic_store_release(&fdp->fd_dt, newdt); | | 1071 | atomic_store_release(&fdp->fd_dt, newdt); |
1072 | KASSERT(newdt->dt_ff[0] == (fdfile_t *)fdp->fd_dfdfile[0]); | | 1072 | KASSERT(newdt->dt_ff[0] == (fdfile_t *)fdp->fd_dfdfile[0]); |
| @@ -1313,1052 +1313,1052 @@ fd_init(filedesc_t *fdp) | | | @@ -1313,1052 +1313,1052 @@ fd_init(filedesc_t *fdp) |
1313 | return fdp; | | 1313 | return fdp; |
1314 | } | | 1314 | } |
1315 | | | 1315 | |
1316 | /* | | 1316 | /* |
1317 | * Initialize a file descriptor table. | | 1317 | * Initialize a file descriptor table. |
1318 | */ | | 1318 | */ |
1319 | static int | | 1319 | static int |
1320 | filedesc_ctor(void *arg, void *obj, int flag) | | 1320 | filedesc_ctor(void *arg, void *obj, int flag) |
1321 | { | | 1321 | { |
1322 | filedesc_t *fdp = obj; | | 1322 | filedesc_t *fdp = obj; |
1323 | fdfile_t **ffp; | | 1323 | fdfile_t **ffp; |
1324 | int i; | | 1324 | int i; |
1325 | | | 1325 | |
1326 | memset(fdp, 0, sizeof(*fdp)); | | 1326 | memset(fdp, 0, sizeof(*fdp)); |
1327 | mutex_init(&fdp->fd_lock, MUTEX_DEFAULT, IPL_NONE); | | 1327 | mutex_init(&fdp->fd_lock, MUTEX_DEFAULT, IPL_NONE); |
1328 | fdp->fd_lastfile = -1; | | 1328 | fdp->fd_lastfile = -1; |
1329 | fdp->fd_lastkqfile = -1; | | 1329 | fdp->fd_lastkqfile = -1; |
1330 | fdp->fd_dt = &fdp->fd_dtbuiltin; | | 1330 | fdp->fd_dt = &fdp->fd_dtbuiltin; |
1331 | fdp->fd_dtbuiltin.dt_nfiles = NDFILE; | | 1331 | fdp->fd_dtbuiltin.dt_nfiles = NDFILE; |
1332 | fdp->fd_himap = fdp->fd_dhimap; | | 1332 | fdp->fd_himap = fdp->fd_dhimap; |
1333 | fdp->fd_lomap = fdp->fd_dlomap; | | 1333 | fdp->fd_lomap = fdp->fd_dlomap; |
1334 | | | 1334 | |
1335 | CTASSERT(sizeof(fdp->fd_dfdfile[0]) >= sizeof(fdfile_t)); | | 1335 | CTASSERT(sizeof(fdp->fd_dfdfile[0]) >= sizeof(fdfile_t)); |
1336 | for (i = 0, ffp = fdp->fd_dt->dt_ff; i < NDFDFILE; i++, ffp++) { | | 1336 | for (i = 0, ffp = fdp->fd_dt->dt_ff; i < NDFDFILE; i++, ffp++) { |
1337 | *ffp = (fdfile_t *)fdp->fd_dfdfile[i]; | | 1337 | *ffp = (fdfile_t *)fdp->fd_dfdfile[i]; |
1338 | (void)fdfile_ctor(NULL, fdp->fd_dfdfile[i], PR_WAITOK); | | 1338 | (void)fdfile_ctor(NULL, fdp->fd_dfdfile[i], PR_WAITOK); |
1339 | } | | 1339 | } |
1340 | | | 1340 | |
1341 | return 0; | | 1341 | return 0; |
1342 | } | | 1342 | } |
1343 | | | 1343 | |
1344 | static void | | 1344 | static void |
1345 | filedesc_dtor(void *arg, void *obj) | | 1345 | filedesc_dtor(void *arg, void *obj) |
1346 | { | | 1346 | { |
1347 | filedesc_t *fdp = obj; | | 1347 | filedesc_t *fdp = obj; |
1348 | int i; | | 1348 | int i; |
1349 | | | 1349 | |
1350 | for (i = 0; i < NDFDFILE; i++) { | | 1350 | for (i = 0; i < NDFDFILE; i++) { |
1351 | fdfile_dtor(NULL, fdp->fd_dfdfile[i]); | | 1351 | fdfile_dtor(NULL, fdp->fd_dfdfile[i]); |
1352 | } | | 1352 | } |
1353 | | | 1353 | |
1354 | mutex_destroy(&fdp->fd_lock); | | 1354 | mutex_destroy(&fdp->fd_lock); |
1355 | } | | 1355 | } |
1356 | | | 1356 | |
1357 | /* | | 1357 | /* |
1358 | * Make p share curproc's filedesc structure. | | 1358 | * Make p share curproc's filedesc structure. |
1359 | */ | | 1359 | */ |
1360 | void | | 1360 | void |
1361 | fd_share(struct proc *p) | | 1361 | fd_share(struct proc *p) |
1362 | { | | 1362 | { |
1363 | filedesc_t *fdp; | | 1363 | filedesc_t *fdp; |
1364 | | | 1364 | |
1365 | fdp = curlwp->l_fd; | | 1365 | fdp = curlwp->l_fd; |
1366 | p->p_fd = fdp; | | 1366 | p->p_fd = fdp; |
1367 | atomic_inc_uint(&fdp->fd_refcnt); | | 1367 | atomic_inc_uint(&fdp->fd_refcnt); |
1368 | } | | 1368 | } |
1369 | | | 1369 | |
1370 | /* | | 1370 | /* |
1371 | * Acquire a hold on a filedesc structure. | | 1371 | * Acquire a hold on a filedesc structure. |
1372 | */ | | 1372 | */ |
1373 | void | | 1373 | void |
1374 | fd_hold(lwp_t *l) | | 1374 | fd_hold(lwp_t *l) |
1375 | { | | 1375 | { |
1376 | filedesc_t *fdp = l->l_fd; | | 1376 | filedesc_t *fdp = l->l_fd; |
1377 | | | 1377 | |
1378 | atomic_inc_uint(&fdp->fd_refcnt); | | 1378 | atomic_inc_uint(&fdp->fd_refcnt); |
1379 | } | | 1379 | } |
1380 | | | 1380 | |
1381 | /* | | 1381 | /* |
1382 | * Copy a filedesc structure. | | 1382 | * Copy a filedesc structure. |
1383 | */ | | 1383 | */ |
1384 | filedesc_t * | | 1384 | filedesc_t * |
1385 | fd_copy(void) | | 1385 | fd_copy(void) |
1386 | { | | 1386 | { |
1387 | filedesc_t *newfdp, *fdp; | | 1387 | filedesc_t *newfdp, *fdp; |
1388 | fdfile_t *ff, **ffp, **nffp, *ff2; | | 1388 | fdfile_t *ff, **ffp, **nffp, *ff2; |
1389 | int i, j, numfiles, lastfile, newlast; | | 1389 | int i, j, numfiles, lastfile, newlast; |
1390 | file_t *fp; | | 1390 | file_t *fp; |
1391 | fdtab_t *newdt; | | 1391 | fdtab_t *newdt; |
1392 | | | 1392 | |
1393 | fdp = curproc->p_fd; | | 1393 | fdp = curproc->p_fd; |
1394 | newfdp = pool_cache_get(filedesc_cache, PR_WAITOK); | | 1394 | newfdp = pool_cache_get(filedesc_cache, PR_WAITOK); |
1395 | newfdp->fd_refcnt = 1; | | 1395 | newfdp->fd_refcnt = 1; |
1396 | | | 1396 | |
1397 | #ifdef DIAGNOSTIC | | 1397 | #ifdef DIAGNOSTIC |
1398 | KASSERT(newfdp->fd_lastfile == -1); | | 1398 | KASSERT(newfdp->fd_lastfile == -1); |
1399 | KASSERT(newfdp->fd_lastkqfile == -1); | | 1399 | KASSERT(newfdp->fd_lastkqfile == -1); |
1400 | KASSERT(newfdp->fd_knhash == NULL); | | 1400 | KASSERT(newfdp->fd_knhash == NULL); |
1401 | KASSERT(newfdp->fd_freefile == 0); | | 1401 | KASSERT(newfdp->fd_freefile == 0); |
1402 | KASSERT(newfdp->fd_exclose == false); | | 1402 | KASSERT(newfdp->fd_exclose == false); |
1403 | KASSERT(newfdp->fd_dt == &newfdp->fd_dtbuiltin); | | 1403 | KASSERT(newfdp->fd_dt == &newfdp->fd_dtbuiltin); |
1404 | KASSERT(newfdp->fd_dtbuiltin.dt_nfiles == NDFILE); | | 1404 | KASSERT(newfdp->fd_dtbuiltin.dt_nfiles == NDFILE); |
1405 | for (i = 0; i < NDFDFILE; i++) { | | 1405 | for (i = 0; i < NDFDFILE; i++) { |
1406 | KASSERT(newfdp->fd_dtbuiltin.dt_ff[i] == | | 1406 | KASSERT(newfdp->fd_dtbuiltin.dt_ff[i] == |
1407 | (fdfile_t *)&newfdp->fd_dfdfile[i]); | | 1407 | (fdfile_t *)&newfdp->fd_dfdfile[i]); |
1408 | } | | 1408 | } |
1409 | for (i = NDFDFILE; i < NDFILE; i++) { | | 1409 | for (i = NDFDFILE; i < NDFILE; i++) { |
1410 | KASSERT(newfdp->fd_dtbuiltin.dt_ff[i] == NULL); | | 1410 | KASSERT(newfdp->fd_dtbuiltin.dt_ff[i] == NULL); |
1411 | } | | 1411 | } |
1412 | #endif /* DIAGNOSTIC */ | | 1412 | #endif /* DIAGNOSTIC */ |
1413 | | | 1413 | |
1414 | mutex_enter(&fdp->fd_lock); | | 1414 | mutex_enter(&fdp->fd_lock); |
1415 | fd_checkmaps(fdp); | | 1415 | fd_checkmaps(fdp); |
1416 | numfiles = fdp->fd_dt->dt_nfiles; | | 1416 | numfiles = fdp->fd_dt->dt_nfiles; |
1417 | lastfile = fdp->fd_lastfile; | | 1417 | lastfile = fdp->fd_lastfile; |
1418 | | | 1418 | |
1419 | /* | | 1419 | /* |
1420 | * If the number of open files fits in the internal arrays | | 1420 | * If the number of open files fits in the internal arrays |
1421 | * of the open file structure, use them, otherwise allocate | | 1421 | * of the open file structure, use them, otherwise allocate |
1422 | * additional memory for the number of descriptors currently | | 1422 | * additional memory for the number of descriptors currently |
1423 | * in use. | | 1423 | * in use. |
1424 | */ | | 1424 | */ |
1425 | if (lastfile < NDFILE) { | | 1425 | if (lastfile < NDFILE) { |
1426 | i = NDFILE; | | 1426 | i = NDFILE; |
1427 | newdt = newfdp->fd_dt; | | 1427 | newdt = newfdp->fd_dt; |
1428 | KASSERT(newfdp->fd_dt == &newfdp->fd_dtbuiltin); | | 1428 | KASSERT(newfdp->fd_dt == &newfdp->fd_dtbuiltin); |
1429 | } else { | | 1429 | } else { |
1430 | /* | | 1430 | /* |
1431 | * Compute the smallest multiple of NDEXTENT needed | | 1431 | * Compute the smallest multiple of NDEXTENT needed |
1432 | * for the file descriptors currently in use, | | 1432 | * for the file descriptors currently in use, |
1433 | * allowing the table to shrink. | | 1433 | * allowing the table to shrink. |
1434 | */ | | 1434 | */ |
1435 | i = numfiles; | | 1435 | i = numfiles; |
1436 | while (i >= 2 * NDEXTENT && i > lastfile * 2) { | | 1436 | while (i >= 2 * NDEXTENT && i > lastfile * 2) { |
1437 | i /= 2; | | 1437 | i /= 2; |
1438 | } | | 1438 | } |
1439 | KASSERT(i > NDFILE); | | 1439 | KASSERT(i > NDFILE); |
1440 | newdt = fd_dtab_alloc(i); | | 1440 | newdt = fd_dtab_alloc(i); |
1441 | newfdp->fd_dt = newdt; | | 1441 | newfdp->fd_dt = newdt; |
1442 | memcpy(newdt->dt_ff, newfdp->fd_dtbuiltin.dt_ff, | | 1442 | memcpy(newdt->dt_ff, newfdp->fd_dtbuiltin.dt_ff, |
1443 | NDFDFILE * sizeof(fdfile_t **)); | | 1443 | NDFDFILE * sizeof(fdfile_t **)); |
1444 | memset(newdt->dt_ff + NDFDFILE, 0, | | 1444 | memset(newdt->dt_ff + NDFDFILE, 0, |
1445 | (i - NDFDFILE) * sizeof(fdfile_t **)); | | 1445 | (i - NDFDFILE) * sizeof(fdfile_t **)); |
1446 | } | | 1446 | } |
1447 | if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) { | | 1447 | if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) { |
1448 | newfdp->fd_himap = newfdp->fd_dhimap; | | 1448 | newfdp->fd_himap = newfdp->fd_dhimap; |
1449 | newfdp->fd_lomap = newfdp->fd_dlomap; | | 1449 | newfdp->fd_lomap = newfdp->fd_dlomap; |
1450 | } else { | | 1450 | } else { |
1451 | fd_map_alloc(i, &newfdp->fd_lomap, &newfdp->fd_himap); | | 1451 | fd_map_alloc(i, &newfdp->fd_lomap, &newfdp->fd_himap); |
1452 | KASSERT(i >= NDENTRIES * NDENTRIES); | | 1452 | KASSERT(i >= NDENTRIES * NDENTRIES); |
1453 | memset(newfdp->fd_himap, 0, NDHISLOTS(i)*sizeof(uint32_t)); | | 1453 | memset(newfdp->fd_himap, 0, NDHISLOTS(i)*sizeof(uint32_t)); |
1454 | memset(newfdp->fd_lomap, 0, NDLOSLOTS(i)*sizeof(uint32_t)); | | 1454 | memset(newfdp->fd_lomap, 0, NDLOSLOTS(i)*sizeof(uint32_t)); |
1455 | } | | 1455 | } |
1456 | newfdp->fd_freefile = fdp->fd_freefile; | | 1456 | newfdp->fd_freefile = fdp->fd_freefile; |
1457 | newfdp->fd_exclose = fdp->fd_exclose; | | 1457 | newfdp->fd_exclose = fdp->fd_exclose; |
1458 | | | 1458 | |
1459 | ffp = fdp->fd_dt->dt_ff; | | 1459 | ffp = fdp->fd_dt->dt_ff; |
1460 | nffp = newdt->dt_ff; | | 1460 | nffp = newdt->dt_ff; |
1461 | newlast = -1; | | 1461 | newlast = -1; |
1462 | for (i = 0; i <= lastfile; i++, ffp++, nffp++) { | | 1462 | for (i = 0; i <= lastfile; i++, ffp++, nffp++) { |
1463 | KASSERT(i >= NDFDFILE || | | 1463 | KASSERT(i >= NDFDFILE || |
1464 | *nffp == (fdfile_t *)newfdp->fd_dfdfile[i]); | | 1464 | *nffp == (fdfile_t *)newfdp->fd_dfdfile[i]); |
1465 | ff = *ffp; | | 1465 | ff = *ffp; |
1466 | if (ff == NULL || | | 1466 | if (ff == NULL || |
1467 | (fp = atomic_load_consume(&ff->ff_file)) == NULL) { | | 1467 | (fp = atomic_load_consume(&ff->ff_file)) == NULL) { |
1468 | /* Descriptor unused, or descriptor half open. */ | | 1468 | /* Descriptor unused, or descriptor half open. */ |
1469 | KASSERT(!fd_isused(newfdp, i)); | | 1469 | KASSERT(!fd_isused(newfdp, i)); |
1470 | continue; | | 1470 | continue; |
1471 | } | | 1471 | } |
1472 | if (__predict_false(fp->f_type == DTYPE_KQUEUE)) { | | 1472 | if (__predict_false(fp->f_type == DTYPE_KQUEUE)) { |
1473 | /* kqueue descriptors cannot be copied. */ | | 1473 | /* kqueue descriptors cannot be copied. */ |
1474 | if (i < newfdp->fd_freefile) { | | 1474 | if (i < newfdp->fd_freefile) { |
1475 | newfdp->fd_freefile = i; | | 1475 | newfdp->fd_freefile = i; |
1476 | } | | 1476 | } |
1477 | continue; | | 1477 | continue; |
1478 | } | | 1478 | } |
1479 | /* It's active: add a reference to the file. */ | | 1479 | /* It's active: add a reference to the file. */ |
1480 | mutex_enter(&fp->f_lock); | | 1480 | mutex_enter(&fp->f_lock); |
1481 | fp->f_count++; | | 1481 | fp->f_count++; |
1482 | mutex_exit(&fp->f_lock); | | 1482 | mutex_exit(&fp->f_lock); |
1483 | | | 1483 | |
1484 | /* Allocate an fdfile_t to represent it. */ | | 1484 | /* Allocate an fdfile_t to represent it. */ |
1485 | if (i >= NDFDFILE) { | | 1485 | if (i >= NDFDFILE) { |
1486 | ff2 = pool_cache_get(fdfile_cache, PR_WAITOK); | | 1486 | ff2 = pool_cache_get(fdfile_cache, PR_WAITOK); |
1487 | *nffp = ff2; | | 1487 | *nffp = ff2; |
1488 | } else { | | 1488 | } else { |
1489 | ff2 = newdt->dt_ff[i]; | | 1489 | ff2 = newdt->dt_ff[i]; |
1490 | } | | 1490 | } |
1491 | ff2->ff_file = fp; | | 1491 | ff2->ff_file = fp; |
1492 | ff2->ff_exclose = ff->ff_exclose; | | 1492 | ff2->ff_exclose = ff->ff_exclose; |
1493 | ff2->ff_allocated = true; | | 1493 | ff2->ff_allocated = true; |
1494 | | | 1494 | |
1495 | /* Fix up bitmaps. */ | | 1495 | /* Fix up bitmaps. */ |
1496 | j = i >> NDENTRYSHIFT; | | 1496 | j = i >> NDENTRYSHIFT; |
1497 | KASSERT((newfdp->fd_lomap[j] & (1U << (i & NDENTRYMASK))) == 0); | | 1497 | KASSERT((newfdp->fd_lomap[j] & (1U << (i & NDENTRYMASK))) == 0); |
1498 | newfdp->fd_lomap[j] |= 1U << (i & NDENTRYMASK); | | 1498 | newfdp->fd_lomap[j] |= 1U << (i & NDENTRYMASK); |
1499 | if (__predict_false(newfdp->fd_lomap[j] == ~0)) { | | 1499 | if (__predict_false(newfdp->fd_lomap[j] == ~0)) { |
1500 | KASSERT((newfdp->fd_himap[j >> NDENTRYSHIFT] & | | 1500 | KASSERT((newfdp->fd_himap[j >> NDENTRYSHIFT] & |
1501 | (1U << (j & NDENTRYMASK))) == 0); | | 1501 | (1U << (j & NDENTRYMASK))) == 0); |
1502 | newfdp->fd_himap[j >> NDENTRYSHIFT] |= | | 1502 | newfdp->fd_himap[j >> NDENTRYSHIFT] |= |
1503 | 1U << (j & NDENTRYMASK); | | 1503 | 1U << (j & NDENTRYMASK); |
1504 | } | | 1504 | } |
1505 | newlast = i; | | 1505 | newlast = i; |
1506 | } | | 1506 | } |
1507 | KASSERT(newdt->dt_ff[0] == (fdfile_t *)newfdp->fd_dfdfile[0]); | | 1507 | KASSERT(newdt->dt_ff[0] == (fdfile_t *)newfdp->fd_dfdfile[0]); |
1508 | newfdp->fd_lastfile = newlast; | | 1508 | newfdp->fd_lastfile = newlast; |
1509 | fd_checkmaps(newfdp); | | 1509 | fd_checkmaps(newfdp); |
1510 | mutex_exit(&fdp->fd_lock); | | 1510 | mutex_exit(&fdp->fd_lock); |
1511 | | | 1511 | |
1512 | return newfdp; | | 1512 | return newfdp; |
1513 | } | | 1513 | } |
1514 | | | 1514 | |
1515 | /* | | 1515 | /* |
1516 | * Release a filedesc structure. | | 1516 | * Release a filedesc structure. |
1517 | */ | | 1517 | */ |
1518 | void | | 1518 | void |
1519 | fd_free(void) | | 1519 | fd_free(void) |
1520 | { | | 1520 | { |
1521 | fdfile_t *ff; | | 1521 | fdfile_t *ff; |
1522 | file_t *fp; | | 1522 | file_t *fp; |
1523 | int fd, nf; | | 1523 | int fd, nf; |
1524 | fdtab_t *dt; | | 1524 | fdtab_t *dt; |
1525 | lwp_t * const l = curlwp; | | 1525 | lwp_t * const l = curlwp; |
1526 | filedesc_t * const fdp = l->l_fd; | | 1526 | filedesc_t * const fdp = l->l_fd; |
1527 | const bool noadvlock = (l->l_proc->p_flag & PK_ADVLOCK) == 0; | | 1527 | const bool noadvlock = (l->l_proc->p_flag & PK_ADVLOCK) == 0; |
1528 | | | 1528 | |
1529 | KASSERT(atomic_load_consume(&fdp->fd_dt)->dt_ff[0] == | | 1529 | KASSERT(atomic_load_consume(&fdp->fd_dt)->dt_ff[0] == |
1530 | (fdfile_t *)fdp->fd_dfdfile[0]); | | 1530 | (fdfile_t *)fdp->fd_dfdfile[0]); |
1531 | KASSERT(fdp->fd_dtbuiltin.dt_nfiles == NDFILE); | | 1531 | KASSERT(fdp->fd_dtbuiltin.dt_nfiles == NDFILE); |
1532 | KASSERT(fdp->fd_dtbuiltin.dt_link == NULL); | | 1532 | KASSERT(fdp->fd_dtbuiltin.dt_link == NULL); |
1533 | | | 1533 | |
1534 | #ifndef __HAVE_ATOMIC_AS_MEMBAR | | 1534 | #ifndef __HAVE_ATOMIC_AS_MEMBAR |
1535 | membar_exit(); | | 1535 | membar_exit(); |
1536 | #endif | | 1536 | #endif |
1537 | if (atomic_dec_uint_nv(&fdp->fd_refcnt) > 0) | | 1537 | if (atomic_dec_uint_nv(&fdp->fd_refcnt) > 0) |
1538 | return; | | 1538 | return; |
1539 | | | 1539 | |
1540 | /* | | 1540 | /* |
1541 | * Close any files that the process holds open. | | 1541 | * Close any files that the process holds open. |
1542 | */ | | 1542 | */ |
1543 | dt = fdp->fd_dt; | | 1543 | dt = fdp->fd_dt; |
1544 | fd_checkmaps(fdp); | | 1544 | fd_checkmaps(fdp); |
1545 | #ifdef DEBUG | | 1545 | #ifdef DEBUG |
1546 | fdp->fd_refcnt = -1; /* see fd_checkmaps */ | | 1546 | fdp->fd_refcnt = -1; /* see fd_checkmaps */ |
1547 | #endif | | 1547 | #endif |
1548 | for (fd = 0, nf = dt->dt_nfiles; fd < nf; fd++) { | | 1548 | for (fd = 0, nf = dt->dt_nfiles; fd < nf; fd++) { |
1549 | ff = dt->dt_ff[fd]; | | 1549 | ff = dt->dt_ff[fd]; |
1550 | KASSERT(fd >= NDFDFILE || | | 1550 | KASSERT(fd >= NDFDFILE || |
1551 | ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | | 1551 | ff == (fdfile_t *)fdp->fd_dfdfile[fd]); |
1552 | if (ff == NULL) | | 1552 | if (ff == NULL) |
1553 | continue; | | 1553 | continue; |
1554 | if ((fp = atomic_load_consume(&ff->ff_file)) != NULL) { | | 1554 | if ((fp = atomic_load_consume(&ff->ff_file)) != NULL) { |
1555 | /* | | 1555 | /* |
1556 | * Must use fd_close() here if there is | | 1556 | * Must use fd_close() here if there is |
1557 | * a reference from kqueue or we might have posix | | 1557 | * a reference from kqueue or we might have posix |
1558 | * advisory locks. | | 1558 | * advisory locks. |
1559 | */ | | 1559 | */ |
1560 | if (__predict_true(ff->ff_refcnt == 0) && | | 1560 | if (__predict_true(ff->ff_refcnt == 0) && |
1561 | (noadvlock || fp->f_type != DTYPE_VNODE)) { | | 1561 | (noadvlock || fp->f_type != DTYPE_VNODE)) { |
1562 | ff->ff_file = NULL; | | 1562 | ff->ff_file = NULL; |
1563 | ff->ff_exclose = false; | | 1563 | ff->ff_exclose = false; |
1564 | ff->ff_allocated = false; | | 1564 | ff->ff_allocated = false; |
1565 | closef(fp); | | 1565 | closef(fp); |
1566 | } else { | | 1566 | } else { |
1567 | ff->ff_refcnt++; | | 1567 | ff->ff_refcnt++; |
1568 | fd_close(fd); | | 1568 | fd_close(fd); |
1569 | } | | 1569 | } |
1570 | } | | 1570 | } |
1571 | KASSERT(ff->ff_refcnt == 0); | | 1571 | KASSERT(ff->ff_refcnt == 0); |
1572 | KASSERT(ff->ff_file == NULL); | | 1572 | KASSERT(ff->ff_file == NULL); |
1573 | KASSERT(!ff->ff_exclose); | | 1573 | KASSERT(!ff->ff_exclose); |
1574 | KASSERT(!ff->ff_allocated); | | 1574 | KASSERT(!ff->ff_allocated); |
1575 | if (fd >= NDFDFILE) { | | 1575 | if (fd >= NDFDFILE) { |
1576 | pool_cache_put(fdfile_cache, ff); | | 1576 | pool_cache_put(fdfile_cache, ff); |
1577 | dt->dt_ff[fd] = NULL; | | 1577 | dt->dt_ff[fd] = NULL; |
1578 | } | | 1578 | } |
1579 | } | | 1579 | } |
1580 | | | 1580 | |
1581 | /* | | 1581 | /* |
1582 | * Clean out the descriptor table for the next user and return | | 1582 | * Clean out the descriptor table for the next user and return |
1583 | * to the cache. | | 1583 | * to the cache. |
1584 | */ | | 1584 | */ |
1585 | if (__predict_false(dt != &fdp->fd_dtbuiltin)) { | | 1585 | if (__predict_false(dt != &fdp->fd_dtbuiltin)) { |
1586 | fd_dtab_free(fdp->fd_dt); | | 1586 | fd_dtab_free(fdp->fd_dt); |
1587 | /* Otherwise, done above. */ | | 1587 | /* Otherwise, done above. */ |
1588 | memset(&fdp->fd_dtbuiltin.dt_ff[NDFDFILE], 0, | | 1588 | memset(&fdp->fd_dtbuiltin.dt_ff[NDFDFILE], 0, |
1589 | (NDFILE - NDFDFILE) * sizeof(fdp->fd_dtbuiltin.dt_ff[0])); | | 1589 | (NDFILE - NDFDFILE) * sizeof(fdp->fd_dtbuiltin.dt_ff[0])); |
1590 | fdp->fd_dt = &fdp->fd_dtbuiltin; | | 1590 | fdp->fd_dt = &fdp->fd_dtbuiltin; |
1591 | } | | 1591 | } |
1592 | if (__predict_false(NDHISLOTS(nf) > NDHISLOTS(NDFILE))) { | | 1592 | if (__predict_false(NDHISLOTS(nf) > NDHISLOTS(NDFILE))) { |
1593 | KASSERT(fdp->fd_himap != fdp->fd_dhimap); | | 1593 | KASSERT(fdp->fd_himap != fdp->fd_dhimap); |
1594 | KASSERT(fdp->fd_lomap != fdp->fd_dlomap); | | 1594 | KASSERT(fdp->fd_lomap != fdp->fd_dlomap); |
1595 | fd_map_free(nf, fdp->fd_lomap, fdp->fd_himap); | | 1595 | fd_map_free(nf, fdp->fd_lomap, fdp->fd_himap); |
1596 | } | | 1596 | } |
1597 | if (__predict_false(fdp->fd_knhash != NULL)) { | | 1597 | if (__predict_false(fdp->fd_knhash != NULL)) { |
1598 | hashdone(fdp->fd_knhash, HASH_LIST, fdp->fd_knhashmask); | | 1598 | hashdone(fdp->fd_knhash, HASH_LIST, fdp->fd_knhashmask); |
1599 | fdp->fd_knhash = NULL; | | 1599 | fdp->fd_knhash = NULL; |
1600 | fdp->fd_knhashmask = 0; | | 1600 | fdp->fd_knhashmask = 0; |
1601 | } else { | | 1601 | } else { |
1602 | KASSERT(fdp->fd_knhashmask == 0); | | 1602 | KASSERT(fdp->fd_knhashmask == 0); |
1603 | } | | 1603 | } |
1604 | fdp->fd_dt = &fdp->fd_dtbuiltin; | | 1604 | fdp->fd_dt = &fdp->fd_dtbuiltin; |
1605 | fdp->fd_lastkqfile = -1; | | 1605 | fdp->fd_lastkqfile = -1; |
1606 | fdp->fd_lastfile = -1; | | 1606 | fdp->fd_lastfile = -1; |
1607 | fdp->fd_freefile = 0; | | 1607 | fdp->fd_freefile = 0; |
1608 | fdp->fd_exclose = false; | | 1608 | fdp->fd_exclose = false; |
1609 | memset(&fdp->fd_startzero, 0, sizeof(*fdp) - | | 1609 | memset(&fdp->fd_startzero, 0, sizeof(*fdp) - |
1610 | offsetof(filedesc_t, fd_startzero)); | | 1610 | offsetof(filedesc_t, fd_startzero)); |
1611 | fdp->fd_himap = fdp->fd_dhimap; | | 1611 | fdp->fd_himap = fdp->fd_dhimap; |
1612 | fdp->fd_lomap = fdp->fd_dlomap; | | 1612 | fdp->fd_lomap = fdp->fd_dlomap; |
1613 | KASSERT(fdp->fd_dtbuiltin.dt_nfiles == NDFILE); | | 1613 | KASSERT(fdp->fd_dtbuiltin.dt_nfiles == NDFILE); |
1614 | KASSERT(fdp->fd_dtbuiltin.dt_link == NULL); | | 1614 | KASSERT(fdp->fd_dtbuiltin.dt_link == NULL); |
1615 | KASSERT(fdp->fd_dt == &fdp->fd_dtbuiltin); | | 1615 | KASSERT(fdp->fd_dt == &fdp->fd_dtbuiltin); |
1616 | #ifdef DEBUG | | 1616 | #ifdef DEBUG |
1617 | fdp->fd_refcnt = 0; /* see fd_checkmaps */ | | 1617 | fdp->fd_refcnt = 0; /* see fd_checkmaps */ |
1618 | #endif | | 1618 | #endif |
1619 | fd_checkmaps(fdp); | | 1619 | fd_checkmaps(fdp); |
1620 | pool_cache_put(filedesc_cache, fdp); | | 1620 | pool_cache_put(filedesc_cache, fdp); |
1621 | } | | 1621 | } |
1622 | | | 1622 | |
1623 | /* | | 1623 | /* |
1624 | * File Descriptor pseudo-device driver (/dev/fd/). | | 1624 | * File Descriptor pseudo-device driver (/dev/fd/). |
1625 | * | | 1625 | * |
1626 | * Opening minor device N dup()s the file (if any) connected to file | | 1626 | * Opening minor device N dup()s the file (if any) connected to file |
1627 | * descriptor N belonging to the calling process. Note that this driver | | 1627 | * descriptor N belonging to the calling process. Note that this driver |
1628 | * consists of only the ``open()'' routine, because all subsequent | | 1628 | * consists of only the ``open()'' routine, because all subsequent |
1629 | * references to this file will be direct to the other driver. | | 1629 | * references to this file will be direct to the other driver. |
1630 | */ | | 1630 | */ |
1631 | static int | | 1631 | static int |
1632 | filedescopen(dev_t dev, int mode, int type, lwp_t *l) | | 1632 | filedescopen(dev_t dev, int mode, int type, lwp_t *l) |
1633 | { | | 1633 | { |
1634 | | | 1634 | |
1635 | /* | | 1635 | /* |
1636 | * XXX Kludge: set dupfd to contain the value of the | | 1636 | * XXX Kludge: set dupfd to contain the value of the |
1637 | * the file descriptor being sought for duplication. The error | | 1637 | * the file descriptor being sought for duplication. The error |
1638 | * return ensures that the vnode for this device will be released | | 1638 | * return ensures that the vnode for this device will be released |
1639 | * by vn_open. Open will detect this special error and take the | | 1639 | * by vn_open. Open will detect this special error and take the |
1640 | * actions in fd_dupopen below. Other callers of vn_open or VOP_OPEN | | 1640 | * actions in fd_dupopen below. Other callers of vn_open or VOP_OPEN |
1641 | * will simply report the error. | | 1641 | * will simply report the error. |
1642 | */ | | 1642 | */ |
1643 | l->l_dupfd = minor(dev); /* XXX */ | | 1643 | l->l_dupfd = minor(dev); /* XXX */ |
1644 | return EDUPFD; | | 1644 | return EDUPFD; |
1645 | } | | 1645 | } |
1646 | | | 1646 | |
1647 | /* | | 1647 | /* |
1648 | * Duplicate the specified descriptor to a free descriptor. | | 1648 | * Duplicate the specified descriptor to a free descriptor. |
1649 | */ | | 1649 | */ |
1650 | int | | 1650 | int |
1651 | fd_dupopen(int old, int *newp, int mode, int error) | | 1651 | fd_dupopen(int old, int *newp, int mode, int error) |
1652 | { | | 1652 | { |
1653 | filedesc_t *fdp; | | 1653 | filedesc_t *fdp; |
1654 | fdfile_t *ff; | | 1654 | fdfile_t *ff; |
1655 | file_t *fp; | | 1655 | file_t *fp; |
1656 | fdtab_t *dt; | | 1656 | fdtab_t *dt; |
1657 | | | 1657 | |
1658 | if ((fp = fd_getfile(old)) == NULL) { | | 1658 | if ((fp = fd_getfile(old)) == NULL) { |
1659 | return EBADF; | | 1659 | return EBADF; |
1660 | } | | 1660 | } |
1661 | fdp = curlwp->l_fd; | | 1661 | fdp = curlwp->l_fd; |
1662 | dt = atomic_load_consume(&fdp->fd_dt); | | 1662 | dt = atomic_load_consume(&fdp->fd_dt); |
1663 | ff = dt->dt_ff[old]; | | 1663 | ff = dt->dt_ff[old]; |
1664 | | | 1664 | |
1665 | /* | | 1665 | /* |
1666 | * There are two cases of interest here. | | 1666 | * There are two cases of interest here. |
1667 | * | | 1667 | * |
1668 | * For EDUPFD simply dup (old) to file descriptor | | 1668 | * For EDUPFD simply dup (old) to file descriptor |
1669 | * (new) and return. | | 1669 | * (new) and return. |
1670 | * | | 1670 | * |
1671 | * For EMOVEFD steal away the file structure from (old) and | | 1671 | * For EMOVEFD steal away the file structure from (old) and |
1672 | * store it in (new). (old) is effectively closed by | | 1672 | * store it in (new). (old) is effectively closed by |
1673 | * this operation. | | 1673 | * this operation. |
1674 | * | | 1674 | * |
1675 | * Any other error code is just returned. | | 1675 | * Any other error code is just returned. |
1676 | */ | | 1676 | */ |
1677 | switch (error) { | | 1677 | switch (error) { |
1678 | case EDUPFD: | | 1678 | case EDUPFD: |
1679 | /* | | 1679 | /* |
1680 | * Check that the mode the file is being opened for is a | | 1680 | * Check that the mode the file is being opened for is a |
1681 | * subset of the mode of the existing descriptor. | | 1681 | * subset of the mode of the existing descriptor. |
1682 | */ | | 1682 | */ |
1683 | if (((mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) { | | 1683 | if (((mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) { |
1684 | error = EACCES; | | 1684 | error = EACCES; |
1685 | break; | | 1685 | break; |
1686 | } | | 1686 | } |
1687 | | | 1687 | |
1688 | /* Copy it. */ | | 1688 | /* Copy it. */ |
1689 | error = fd_dup(fp, 0, newp, ff->ff_exclose); | | 1689 | error = fd_dup(fp, 0, newp, ff->ff_exclose); |
1690 | break; | | 1690 | break; |
1691 | | | 1691 | |
1692 | case EMOVEFD: | | 1692 | case EMOVEFD: |
1693 | /* Copy it. */ | | 1693 | /* Copy it. */ |
1694 | error = fd_dup(fp, 0, newp, ff->ff_exclose); | | 1694 | error = fd_dup(fp, 0, newp, ff->ff_exclose); |
1695 | if (error != 0) { | | 1695 | if (error != 0) { |
1696 | break; | | 1696 | break; |
1697 | } | | 1697 | } |
1698 | | | 1698 | |
1699 | /* Steal away the file pointer from 'old'. */ | | 1699 | /* Steal away the file pointer from 'old'. */ |
1700 | (void)fd_close(old); | | 1700 | (void)fd_close(old); |
1701 | return 0; | | 1701 | return 0; |
1702 | } | | 1702 | } |
1703 | | | 1703 | |
1704 | fd_putfile(old); | | 1704 | fd_putfile(old); |
1705 | return error; | | 1705 | return error; |
1706 | } | | 1706 | } |
1707 | | | 1707 | |
1708 | /* | | 1708 | /* |
1709 | * Close open files on exec. | | 1709 | * Close open files on exec. |
1710 | */ | | 1710 | */ |
1711 | void | | 1711 | void |
1712 | fd_closeexec(void) | | 1712 | fd_closeexec(void) |
1713 | { | | 1713 | { |
1714 | proc_t *p; | | 1714 | proc_t *p; |
1715 | filedesc_t *fdp; | | 1715 | filedesc_t *fdp; |
1716 | fdfile_t *ff; | | 1716 | fdfile_t *ff; |
1717 | lwp_t *l; | | 1717 | lwp_t *l; |
1718 | fdtab_t *dt; | | 1718 | fdtab_t *dt; |
1719 | int fd; | | 1719 | int fd; |
1720 | | | 1720 | |
1721 | l = curlwp; | | 1721 | l = curlwp; |
1722 | p = l->l_proc; | | 1722 | p = l->l_proc; |
1723 | fdp = p->p_fd; | | 1723 | fdp = p->p_fd; |
1724 | | | 1724 | |
1725 | if (fdp->fd_refcnt > 1) { | | 1725 | if (fdp->fd_refcnt > 1) { |
1726 | fdp = fd_copy(); | | 1726 | fdp = fd_copy(); |
1727 | fd_free(); | | 1727 | fd_free(); |
1728 | p->p_fd = fdp; | | 1728 | p->p_fd = fdp; |
1729 | l->l_fd = fdp; | | 1729 | l->l_fd = fdp; |
1730 | } | | 1730 | } |
1731 | if (!fdp->fd_exclose) { | | 1731 | if (!fdp->fd_exclose) { |
1732 | return; | | 1732 | return; |
1733 | } | | 1733 | } |
1734 | fdp->fd_exclose = false; | | 1734 | fdp->fd_exclose = false; |
1735 | dt = atomic_load_consume(&fdp->fd_dt); | | 1735 | dt = atomic_load_consume(&fdp->fd_dt); |
1736 | | | 1736 | |
1737 | for (fd = 0; fd <= fdp->fd_lastfile; fd++) { | | 1737 | for (fd = 0; fd <= fdp->fd_lastfile; fd++) { |
1738 | if ((ff = dt->dt_ff[fd]) == NULL) { | | 1738 | if ((ff = dt->dt_ff[fd]) == NULL) { |
1739 | KASSERT(fd >= NDFDFILE); | | 1739 | KASSERT(fd >= NDFDFILE); |
1740 | continue; | | 1740 | continue; |
1741 | } | | 1741 | } |
1742 | KASSERT(fd >= NDFDFILE || | | 1742 | KASSERT(fd >= NDFDFILE || |
1743 | ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | | 1743 | ff == (fdfile_t *)fdp->fd_dfdfile[fd]); |
1744 | if (ff->ff_file == NULL) | | 1744 | if (ff->ff_file == NULL) |
1745 | continue; | | 1745 | continue; |
1746 | if (ff->ff_exclose) { | | 1746 | if (ff->ff_exclose) { |
1747 | /* | | 1747 | /* |
1748 | * We need a reference to close the file. | | 1748 | * We need a reference to close the file. |
1749 | * No other threads can see the fdfile_t at | | 1749 | * No other threads can see the fdfile_t at |
1750 | * this point, so don't bother locking. | | 1750 | * this point, so don't bother locking. |
1751 | */ | | 1751 | */ |
1752 | KASSERT((ff->ff_refcnt & FR_CLOSING) == 0); | | 1752 | KASSERT((ff->ff_refcnt & FR_CLOSING) == 0); |
1753 | ff->ff_refcnt++; | | 1753 | ff->ff_refcnt++; |
1754 | fd_close(fd); | | 1754 | fd_close(fd); |
1755 | } | | 1755 | } |
1756 | } | | 1756 | } |
1757 | } | | 1757 | } |
1758 | | | 1758 | |
1759 | /* | | 1759 | /* |
1760 | * Sets descriptor owner. If the owner is a process, 'pgid' | | 1760 | * Sets descriptor owner. If the owner is a process, 'pgid' |
1761 | * is set to positive value, process ID. If the owner is process group, | | 1761 | * is set to positive value, process ID. If the owner is process group, |
1762 | * 'pgid' is set to -pg_id. | | 1762 | * 'pgid' is set to -pg_id. |
1763 | */ | | 1763 | */ |
1764 | int | | 1764 | int |
1765 | fsetown(pid_t *pgid, u_long cmd, const void *data) | | 1765 | fsetown(pid_t *pgid, u_long cmd, const void *data) |
1766 | { | | 1766 | { |
1767 | pid_t id = *(const pid_t *)data; | | 1767 | pid_t id = *(const pid_t *)data; |
1768 | int error; | | 1768 | int error; |
1769 | | | 1769 | |
1770 | switch (cmd) { | | 1770 | switch (cmd) { |
1771 | case TIOCSPGRP: | | 1771 | case TIOCSPGRP: |
1772 | if (id < 0) | | 1772 | if (id < 0) |
1773 | return EINVAL; | | 1773 | return EINVAL; |
1774 | id = -id; | | 1774 | id = -id; |
1775 | break; | | 1775 | break; |
1776 | default: | | 1776 | default: |
1777 | break; | | 1777 | break; |
1778 | } | | 1778 | } |
1779 | if (id > 0) { | | 1779 | if (id > 0) { |
1780 | mutex_enter(&proc_lock); | | 1780 | mutex_enter(&proc_lock); |
1781 | error = proc_find(id) ? 0 : ESRCH; | | 1781 | error = proc_find(id) ? 0 : ESRCH; |
1782 | mutex_exit(&proc_lock); | | 1782 | mutex_exit(&proc_lock); |
1783 | } else if (id < 0) { | | 1783 | } else if (id < 0) { |
1784 | error = pgid_in_session(curproc, -id); | | 1784 | error = pgid_in_session(curproc, -id); |
1785 | } else { | | 1785 | } else { |
1786 | error = 0; | | 1786 | error = 0; |
1787 | } | | 1787 | } |
1788 | if (!error) { | | 1788 | if (!error) { |
1789 | *pgid = id; | | 1789 | *pgid = id; |
1790 | } | | 1790 | } |
1791 | return error; | | 1791 | return error; |
1792 | } | | 1792 | } |
1793 | | | 1793 | |
1794 | void | | 1794 | void |
1795 | fd_set_exclose(struct lwp *l, int fd, bool exclose) | | 1795 | fd_set_exclose(struct lwp *l, int fd, bool exclose) |
1796 | { | | 1796 | { |
1797 | filedesc_t *fdp = l->l_fd; | | 1797 | filedesc_t *fdp = l->l_fd; |
1798 | fdfile_t *ff = atomic_load_consume(&fdp->fd_dt)->dt_ff[fd]; | | 1798 | fdfile_t *ff = atomic_load_consume(&fdp->fd_dt)->dt_ff[fd]; |
1799 | | | 1799 | |
1800 | ff->ff_exclose = exclose; | | 1800 | ff->ff_exclose = exclose; |
1801 | if (exclose) | | 1801 | if (exclose) |
1802 | fdp->fd_exclose = true; | | 1802 | fdp->fd_exclose = true; |
1803 | } | | 1803 | } |
1804 | | | 1804 | |
1805 | /* | | 1805 | /* |
1806 | * Return descriptor owner information. If the value is positive, | | 1806 | * Return descriptor owner information. If the value is positive, |
1807 | * it's process ID. If it's negative, it's process group ID and | | 1807 | * it's process ID. If it's negative, it's process group ID and |
1808 | * needs the sign removed before use. | | 1808 | * needs the sign removed before use. |
1809 | */ | | 1809 | */ |
1810 | int | | 1810 | int |
1811 | fgetown(pid_t pgid, u_long cmd, void *data) | | 1811 | fgetown(pid_t pgid, u_long cmd, void *data) |
1812 | { | | 1812 | { |
1813 | | | 1813 | |
1814 | switch (cmd) { | | 1814 | switch (cmd) { |
1815 | case TIOCGPGRP: | | 1815 | case TIOCGPGRP: |
1816 | *(int *)data = -pgid; | | 1816 | *(int *)data = -pgid; |
1817 | break; | | 1817 | break; |
1818 | default: | | 1818 | default: |
1819 | *(int *)data = pgid; | | 1819 | *(int *)data = pgid; |
1820 | break; | | 1820 | break; |
1821 | } | | 1821 | } |
1822 | return 0; | | 1822 | return 0; |
1823 | } | | 1823 | } |
1824 | | | 1824 | |
1825 | /* | | 1825 | /* |
1826 | * Send signal to descriptor owner, either process or process group. | | 1826 | * Send signal to descriptor owner, either process or process group. |
1827 | */ | | 1827 | */ |
1828 | void | | 1828 | void |
1829 | fownsignal(pid_t pgid, int signo, int code, int band, void *fdescdata) | | 1829 | fownsignal(pid_t pgid, int signo, int code, int band, void *fdescdata) |
1830 | { | | 1830 | { |
1831 | ksiginfo_t ksi; | | 1831 | ksiginfo_t ksi; |
1832 | | | 1832 | |
1833 | KASSERT(!cpu_intr_p()); | | 1833 | KASSERT(!cpu_intr_p()); |
1834 | | | 1834 | |
1835 | if (pgid == 0) { | | 1835 | if (pgid == 0) { |
1836 | return; | | 1836 | return; |
1837 | } | | 1837 | } |
1838 | | | 1838 | |
1839 | KSI_INIT(&ksi); | | 1839 | KSI_INIT(&ksi); |
1840 | ksi.ksi_signo = signo; | | 1840 | ksi.ksi_signo = signo; |
1841 | ksi.ksi_code = code; | | 1841 | ksi.ksi_code = code; |
1842 | ksi.ksi_band = band; | | 1842 | ksi.ksi_band = band; |
1843 | | | 1843 | |
1844 | mutex_enter(&proc_lock); | | 1844 | mutex_enter(&proc_lock); |
1845 | if (pgid > 0) { | | 1845 | if (pgid > 0) { |
1846 | struct proc *p1; | | 1846 | struct proc *p1; |
1847 | | | 1847 | |
1848 | p1 = proc_find(pgid); | | 1848 | p1 = proc_find(pgid); |
1849 | if (p1 != NULL) { | | 1849 | if (p1 != NULL) { |
1850 | kpsignal(p1, &ksi, fdescdata); | | 1850 | kpsignal(p1, &ksi, fdescdata); |
1851 | } | | 1851 | } |
1852 | } else { | | 1852 | } else { |
1853 | struct pgrp *pgrp; | | 1853 | struct pgrp *pgrp; |
1854 | | | 1854 | |
1855 | KASSERT(pgid < 0); | | 1855 | KASSERT(pgid < 0); |
1856 | pgrp = pgrp_find(-pgid); | | 1856 | pgrp = pgrp_find(-pgid); |
1857 | if (pgrp != NULL) { | | 1857 | if (pgrp != NULL) { |
1858 | kpgsignal(pgrp, &ksi, fdescdata, 0); | | 1858 | kpgsignal(pgrp, &ksi, fdescdata, 0); |
1859 | } | | 1859 | } |
1860 | } | | 1860 | } |
1861 | mutex_exit(&proc_lock); | | 1861 | mutex_exit(&proc_lock); |
1862 | } | | 1862 | } |
1863 | | | 1863 | |
1864 | int | | 1864 | int |
1865 | fd_clone(file_t *fp, unsigned fd, int flag, const struct fileops *fops, | | 1865 | fd_clone(file_t *fp, unsigned fd, int flag, const struct fileops *fops, |
1866 | void *data) | | 1866 | void *data) |
1867 | { | | 1867 | { |
1868 | fdfile_t *ff; | | 1868 | fdfile_t *ff; |
1869 | filedesc_t *fdp; | | 1869 | filedesc_t *fdp; |
1870 | | | 1870 | |
1871 | fp->f_flag = flag & FMASK; | | 1871 | fp->f_flag = flag & FMASK; |
1872 | fdp = curproc->p_fd; | | 1872 | fdp = curproc->p_fd; |
1873 | ff = atomic_load_consume(&fdp->fd_dt)->dt_ff[fd]; | | 1873 | ff = atomic_load_consume(&fdp->fd_dt)->dt_ff[fd]; |
1874 | KASSERT(ff != NULL); | | 1874 | KASSERT(ff != NULL); |
1875 | ff->ff_exclose = (flag & O_CLOEXEC) != 0; | | 1875 | ff->ff_exclose = (flag & O_CLOEXEC) != 0; |
1876 | fp->f_type = DTYPE_MISC; | | 1876 | fp->f_type = DTYPE_MISC; |
1877 | fp->f_ops = fops; | | 1877 | fp->f_ops = fops; |
1878 | fp->f_data = data; | | 1878 | fp->f_data = data; |
1879 | curlwp->l_dupfd = fd; | | 1879 | curlwp->l_dupfd = fd; |
1880 | fd_affix(curproc, fp, fd); | | 1880 | fd_affix(curproc, fp, fd); |
1881 | | | 1881 | |
1882 | return EMOVEFD; | | 1882 | return EMOVEFD; |
1883 | } | | 1883 | } |
1884 | | | 1884 | |
1885 | int | | 1885 | int |
1886 | fnullop_fcntl(file_t *fp, u_int cmd, void *data) | | 1886 | fnullop_fcntl(file_t *fp, u_int cmd, void *data) |
1887 | { | | 1887 | { |
1888 | | | 1888 | |
1889 | if (cmd == F_SETFL) | | 1889 | if (cmd == F_SETFL) |
1890 | return 0; | | 1890 | return 0; |
1891 | | | 1891 | |
1892 | return EOPNOTSUPP; | | 1892 | return EOPNOTSUPP; |
1893 | } | | 1893 | } |
1894 | | | 1894 | |
1895 | int | | 1895 | int |
1896 | fnullop_poll(file_t *fp, int which) | | 1896 | fnullop_poll(file_t *fp, int which) |
1897 | { | | 1897 | { |
1898 | | | 1898 | |
1899 | return 0; | | 1899 | return 0; |
1900 | } | | 1900 | } |
1901 | | | 1901 | |
1902 | int | | 1902 | int |
1903 | fnullop_kqfilter(file_t *fp, struct knote *kn) | | 1903 | fnullop_kqfilter(file_t *fp, struct knote *kn) |
1904 | { | | 1904 | { |
1905 | | | 1905 | |
1906 | return EOPNOTSUPP; | | 1906 | return EOPNOTSUPP; |
1907 | } | | 1907 | } |
1908 | | | 1908 | |
1909 | void | | 1909 | void |
1910 | fnullop_restart(file_t *fp) | | 1910 | fnullop_restart(file_t *fp) |
1911 | { | | 1911 | { |
1912 | | | 1912 | |
1913 | } | | 1913 | } |
1914 | | | 1914 | |
1915 | int | | 1915 | int |
1916 | fbadop_read(file_t *fp, off_t *offset, struct uio *uio, | | 1916 | fbadop_read(file_t *fp, off_t *offset, struct uio *uio, |
1917 | kauth_cred_t cred, int flags) | | 1917 | kauth_cred_t cred, int flags) |
1918 | { | | 1918 | { |
1919 | | | 1919 | |
1920 | return EOPNOTSUPP; | | 1920 | return EOPNOTSUPP; |
1921 | } | | 1921 | } |
1922 | | | 1922 | |
1923 | int | | 1923 | int |
1924 | fbadop_write(file_t *fp, off_t *offset, struct uio *uio, | | 1924 | fbadop_write(file_t *fp, off_t *offset, struct uio *uio, |
1925 | kauth_cred_t cred, int flags) | | 1925 | kauth_cred_t cred, int flags) |
1926 | { | | 1926 | { |
1927 | | | 1927 | |
1928 | return EOPNOTSUPP; | | 1928 | return EOPNOTSUPP; |
1929 | } | | 1929 | } |
1930 | | | 1930 | |
1931 | int | | 1931 | int |
1932 | fbadop_ioctl(file_t *fp, u_long com, void *data) | | 1932 | fbadop_ioctl(file_t *fp, u_long com, void *data) |
1933 | { | | 1933 | { |
1934 | | | 1934 | |
1935 | return EOPNOTSUPP; | | 1935 | return EOPNOTSUPP; |
1936 | } | | 1936 | } |
1937 | | | 1937 | |
1938 | int | | 1938 | int |
1939 | fbadop_stat(file_t *fp, struct stat *sb) | | 1939 | fbadop_stat(file_t *fp, struct stat *sb) |
1940 | { | | 1940 | { |
1941 | | | 1941 | |
1942 | return EOPNOTSUPP; | | 1942 | return EOPNOTSUPP; |
1943 | } | | 1943 | } |
1944 | | | 1944 | |
1945 | int | | 1945 | int |
1946 | fbadop_close(file_t *fp) | | 1946 | fbadop_close(file_t *fp) |
1947 | { | | 1947 | { |
1948 | | | 1948 | |
1949 | return EOPNOTSUPP; | | 1949 | return EOPNOTSUPP; |
1950 | } | | 1950 | } |
1951 | | | 1951 | |
1952 | /* | | 1952 | /* |
1953 | * sysctl routines pertaining to file descriptors | | 1953 | * sysctl routines pertaining to file descriptors |
1954 | */ | | 1954 | */ |
1955 | | | 1955 | |
1956 | /* Initialized in sysctl_init() for now... */ | | 1956 | /* Initialized in sysctl_init() for now... */ |
1957 | extern kmutex_t sysctl_file_marker_lock; | | 1957 | extern kmutex_t sysctl_file_marker_lock; |
1958 | static u_int sysctl_file_marker = 1; | | 1958 | static u_int sysctl_file_marker = 1; |
1959 | | | 1959 | |
1960 | /* | | 1960 | /* |
1961 | * Expects to be called with proc_lock and sysctl_file_marker_lock locked. | | 1961 | * Expects to be called with proc_lock and sysctl_file_marker_lock locked. |
1962 | */ | | 1962 | */ |
1963 | static void | | 1963 | static void |
1964 | sysctl_file_marker_reset(void) | | 1964 | sysctl_file_marker_reset(void) |
1965 | { | | 1965 | { |
1966 | struct proc *p; | | 1966 | struct proc *p; |
1967 | | | 1967 | |
1968 | PROCLIST_FOREACH(p, &allproc) { | | 1968 | PROCLIST_FOREACH(p, &allproc) { |
1969 | struct filedesc *fd = p->p_fd; | | 1969 | struct filedesc *fd = p->p_fd; |
1970 | fdtab_t *dt; | | 1970 | fdtab_t *dt; |
1971 | u_int i; | | 1971 | u_int i; |
1972 | | | 1972 | |
1973 | mutex_enter(&fd->fd_lock); | | 1973 | mutex_enter(&fd->fd_lock); |
1974 | dt = fd->fd_dt; | | 1974 | dt = fd->fd_dt; |
1975 | for (i = 0; i < dt->dt_nfiles; i++) { | | 1975 | for (i = 0; i < dt->dt_nfiles; i++) { |
1976 | struct file *fp; | | 1976 | struct file *fp; |
1977 | fdfile_t *ff; | | 1977 | fdfile_t *ff; |
1978 | | | 1978 | |
1979 | if ((ff = dt->dt_ff[i]) == NULL) { | | 1979 | if ((ff = dt->dt_ff[i]) == NULL) { |
1980 | continue; | | 1980 | continue; |
1981 | } | | 1981 | } |
1982 | if ((fp = atomic_load_consume(&ff->ff_file)) == NULL) { | | 1982 | if ((fp = atomic_load_consume(&ff->ff_file)) == NULL) { |
1983 | continue; | | 1983 | continue; |
1984 | } | | 1984 | } |
1985 | fp->f_marker = 0; | | 1985 | fp->f_marker = 0; |
1986 | } | | 1986 | } |
1987 | mutex_exit(&fd->fd_lock); | | 1987 | mutex_exit(&fd->fd_lock); |
1988 | } | | 1988 | } |
1989 | } | | 1989 | } |
1990 | | | 1990 | |
1991 | /* | | 1991 | /* |
1992 | * sysctl helper routine for kern.file pseudo-subtree. | | 1992 | * sysctl helper routine for kern.file pseudo-subtree. |
1993 | */ | | 1993 | */ |
1994 | static int | | 1994 | static int |
1995 | sysctl_kern_file(SYSCTLFN_ARGS) | | 1995 | sysctl_kern_file(SYSCTLFN_ARGS) |
1996 | { | | 1996 | { |
1997 | const bool allowaddr = get_expose_address(curproc); | | 1997 | const bool allowaddr = get_expose_address(curproc); |
1998 | struct filelist flist; | | 1998 | struct filelist flist; |
1999 | int error; | | 1999 | int error; |
2000 | size_t buflen; | | 2000 | size_t buflen; |
2001 | struct file *fp, fbuf; | | 2001 | struct file *fp, fbuf; |
2002 | char *start, *where; | | 2002 | char *start, *where; |
2003 | struct proc *p; | | 2003 | struct proc *p; |
2004 | | | 2004 | |
2005 | start = where = oldp; | | 2005 | start = where = oldp; |
2006 | buflen = *oldlenp; | | 2006 | buflen = *oldlenp; |
2007 | | | 2007 | |
2008 | if (where == NULL) { | | 2008 | if (where == NULL) { |
2009 | /* | | 2009 | /* |
2010 | * overestimate by 10 files | | 2010 | * overestimate by 10 files |
2011 | */ | | 2011 | */ |
2012 | *oldlenp = sizeof(filehead) + (nfiles + 10) * | | 2012 | *oldlenp = sizeof(filehead) + (nfiles + 10) * |
2013 | sizeof(struct file); | | 2013 | sizeof(struct file); |
2014 | return 0; | | 2014 | return 0; |
2015 | } | | 2015 | } |
2016 | | | 2016 | |
2017 | /* | | 2017 | /* |
2018 | * first sysctl_copyout filehead | | 2018 | * first sysctl_copyout filehead |
2019 | */ | | 2019 | */ |
2020 | if (buflen < sizeof(filehead)) { | | 2020 | if (buflen < sizeof(filehead)) { |
2021 | *oldlenp = 0; | | 2021 | *oldlenp = 0; |
2022 | return 0; | | 2022 | return 0; |
2023 | } | | 2023 | } |
2024 | sysctl_unlock(); | | 2024 | sysctl_unlock(); |
2025 | if (allowaddr) { | | 2025 | if (allowaddr) { |
2026 | memcpy(&flist, &filehead, sizeof(flist)); | | 2026 | memcpy(&flist, &filehead, sizeof(flist)); |
2027 | } else { | | 2027 | } else { |
2028 | memset(&flist, 0, sizeof(flist)); | | 2028 | memset(&flist, 0, sizeof(flist)); |
2029 | } | | 2029 | } |
2030 | error = sysctl_copyout(l, &flist, where, sizeof(flist)); | | 2030 | error = sysctl_copyout(l, &flist, where, sizeof(flist)); |
2031 | if (error) { | | 2031 | if (error) { |
2032 | sysctl_relock(); | | 2032 | sysctl_relock(); |
2033 | return error; | | 2033 | return error; |
2034 | } | | 2034 | } |
2035 | buflen -= sizeof(flist); | | 2035 | buflen -= sizeof(flist); |
2036 | where += sizeof(flist); | | 2036 | where += sizeof(flist); |
2037 | | | 2037 | |
2038 | /* | | 2038 | /* |
2039 | * followed by an array of file structures | | 2039 | * followed by an array of file structures |
2040 | */ | | 2040 | */ |
2041 | mutex_enter(&sysctl_file_marker_lock); | | 2041 | mutex_enter(&sysctl_file_marker_lock); |
2042 | mutex_enter(&proc_lock); | | 2042 | mutex_enter(&proc_lock); |
2043 | PROCLIST_FOREACH(p, &allproc) { | | 2043 | PROCLIST_FOREACH(p, &allproc) { |
2044 | struct filedesc *fd; | | 2044 | struct filedesc *fd; |
2045 | fdtab_t *dt; | | 2045 | fdtab_t *dt; |
2046 | u_int i; | | 2046 | u_int i; |
2047 | | | 2047 | |
2048 | if (p->p_stat == SIDL) { | | 2048 | if (p->p_stat == SIDL) { |
2049 | /* skip embryonic processes */ | | 2049 | /* skip embryonic processes */ |
2050 | continue; | | 2050 | continue; |
2051 | } | | 2051 | } |
2052 | mutex_enter(p->p_lock); | | 2052 | mutex_enter(p->p_lock); |
2053 | error = kauth_authorize_process(l->l_cred, | | 2053 | error = kauth_authorize_process(l->l_cred, |
2054 | KAUTH_PROCESS_CANSEE, p, | | 2054 | KAUTH_PROCESS_CANSEE, p, |
2055 | KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_OPENFILES), | | 2055 | KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_OPENFILES), |
2056 | NULL, NULL); | | 2056 | NULL, NULL); |
2057 | mutex_exit(p->p_lock); | | 2057 | mutex_exit(p->p_lock); |
2058 | if (error != 0) { | | 2058 | if (error != 0) { |
2059 | /* | | 2059 | /* |
2060 | * Don't leak kauth retval if we're silently | | 2060 | * Don't leak kauth retval if we're silently |
2061 | * skipping this entry. | | 2061 | * skipping this entry. |
2062 | */ | | 2062 | */ |
2063 | error = 0; | | 2063 | error = 0; |
2064 | continue; | | 2064 | continue; |
2065 | } | | 2065 | } |
2066 | | | 2066 | |
2067 | /* | | 2067 | /* |
2068 | * Grab a hold on the process. | | 2068 | * Grab a hold on the process. |
2069 | */ | | 2069 | */ |
2070 | if (!rw_tryenter(&p->p_reflock, RW_READER)) { | | 2070 | if (!rw_tryenter(&p->p_reflock, RW_READER)) { |
2071 | continue; | | 2071 | continue; |
2072 | } | | 2072 | } |
2073 | mutex_exit(&proc_lock); | | 2073 | mutex_exit(&proc_lock); |
2074 | | | 2074 | |
2075 | fd = p->p_fd; | | 2075 | fd = p->p_fd; |
2076 | mutex_enter(&fd->fd_lock); | | 2076 | mutex_enter(&fd->fd_lock); |
2077 | dt = fd->fd_dt; | | 2077 | dt = fd->fd_dt; |
2078 | for (i = 0; i < dt->dt_nfiles; i++) { | | 2078 | for (i = 0; i < dt->dt_nfiles; i++) { |
2079 | fdfile_t *ff; | | 2079 | fdfile_t *ff; |
2080 | | | 2080 | |
2081 | if ((ff = dt->dt_ff[i]) == NULL) { | | 2081 | if ((ff = dt->dt_ff[i]) == NULL) { |
2082 | continue; | | 2082 | continue; |
2083 | } | | 2083 | } |
2084 | if ((fp = atomic_load_consume(&ff->ff_file)) == NULL) { | | 2084 | if ((fp = atomic_load_consume(&ff->ff_file)) == NULL) { |
2085 | continue; | | 2085 | continue; |
2086 | } | | 2086 | } |
2087 | | | 2087 | |
2088 | mutex_enter(&fp->f_lock); | | 2088 | mutex_enter(&fp->f_lock); |
2089 | | | 2089 | |
2090 | if ((fp->f_count == 0) || | | 2090 | if ((fp->f_count == 0) || |
2091 | (fp->f_marker == sysctl_file_marker)) { | | 2091 | (fp->f_marker == sysctl_file_marker)) { |
2092 | mutex_exit(&fp->f_lock); | | 2092 | mutex_exit(&fp->f_lock); |
2093 | continue; | | 2093 | continue; |
2094 | } | | 2094 | } |
2095 | | | 2095 | |
2096 | /* Check that we have enough space. */ | | 2096 | /* Check that we have enough space. */ |
2097 | if (buflen < sizeof(struct file)) { | | 2097 | if (buflen < sizeof(struct file)) { |
2098 | *oldlenp = where - start; | | 2098 | *oldlenp = where - start; |
2099 | mutex_exit(&fp->f_lock); | | 2099 | mutex_exit(&fp->f_lock); |
2100 | error = ENOMEM; | | 2100 | error = ENOMEM; |
2101 | break; | | 2101 | break; |
2102 | } | | 2102 | } |
2103 | | | 2103 | |
2104 | fill_file(&fbuf, fp); | | 2104 | fill_file(&fbuf, fp); |
2105 | mutex_exit(&fp->f_lock); | | 2105 | mutex_exit(&fp->f_lock); |
2106 | error = sysctl_copyout(l, &fbuf, where, sizeof(fbuf)); | | 2106 | error = sysctl_copyout(l, &fbuf, where, sizeof(fbuf)); |
2107 | if (error) { | | 2107 | if (error) { |
2108 | break; | | 2108 | break; |
2109 | } | | 2109 | } |
2110 | buflen -= sizeof(struct file); | | 2110 | buflen -= sizeof(struct file); |
2111 | where += sizeof(struct file); | | 2111 | where += sizeof(struct file); |
2112 | | | 2112 | |
2113 | fp->f_marker = sysctl_file_marker; | | 2113 | fp->f_marker = sysctl_file_marker; |
2114 | } | | 2114 | } |
2115 | mutex_exit(&fd->fd_lock); | | 2115 | mutex_exit(&fd->fd_lock); |
2116 | | | 2116 | |
2117 | /* | | 2117 | /* |
2118 | * Release reference to process. | | 2118 | * Release reference to process. |
2119 | */ | | 2119 | */ |
2120 | mutex_enter(&proc_lock); | | 2120 | mutex_enter(&proc_lock); |
2121 | rw_exit(&p->p_reflock); | | 2121 | rw_exit(&p->p_reflock); |
2122 | | | 2122 | |
2123 | if (error) | | 2123 | if (error) |
2124 | break; | | 2124 | break; |
2125 | } | | 2125 | } |
2126 | | | 2126 | |
2127 | sysctl_file_marker++; | | 2127 | sysctl_file_marker++; |
2128 | /* Reset all markers if wrapped. */ | | 2128 | /* Reset all markers if wrapped. */ |
2129 | if (sysctl_file_marker == 0) { | | 2129 | if (sysctl_file_marker == 0) { |
2130 | sysctl_file_marker_reset(); | | 2130 | sysctl_file_marker_reset(); |
2131 | sysctl_file_marker++; | | 2131 | sysctl_file_marker++; |
2132 | } | | 2132 | } |
2133 | | | 2133 | |
2134 | mutex_exit(&proc_lock); | | 2134 | mutex_exit(&proc_lock); |
2135 | mutex_exit(&sysctl_file_marker_lock); | | 2135 | mutex_exit(&sysctl_file_marker_lock); |
2136 | | | 2136 | |
2137 | *oldlenp = where - start; | | 2137 | *oldlenp = where - start; |
2138 | sysctl_relock(); | | 2138 | sysctl_relock(); |
2139 | return error; | | 2139 | return error; |
2140 | } | | 2140 | } |
2141 | | | 2141 | |
2142 | /* | | 2142 | /* |
2143 | * sysctl helper function for kern.file2 | | 2143 | * sysctl helper function for kern.file2 |
2144 | */ | | 2144 | */ |
2145 | static int | | 2145 | static int |
2146 | sysctl_kern_file2(SYSCTLFN_ARGS) | | 2146 | sysctl_kern_file2(SYSCTLFN_ARGS) |
2147 | { | | 2147 | { |
2148 | struct proc *p; | | 2148 | struct proc *p; |
2149 | struct file *fp; | | 2149 | struct file *fp; |
2150 | struct filedesc *fd; | | 2150 | struct filedesc *fd; |
2151 | struct kinfo_file kf; | | 2151 | struct kinfo_file kf; |
2152 | char *dp; | | 2152 | char *dp; |
2153 | u_int i, op; | | 2153 | u_int i, op; |
2154 | size_t len, needed, elem_size, out_size; | | 2154 | size_t len, needed, elem_size, out_size; |
2155 | int error, arg, elem_count; | | 2155 | int error, arg, elem_count; |
2156 | fdfile_t *ff; | | 2156 | fdfile_t *ff; |
2157 | fdtab_t *dt; | | 2157 | fdtab_t *dt; |
2158 | | | 2158 | |
2159 | if (namelen == 1 && name[0] == CTL_QUERY) | | 2159 | if (namelen == 1 && name[0] == CTL_QUERY) |
2160 | return sysctl_query(SYSCTLFN_CALL(rnode)); | | 2160 | return sysctl_query(SYSCTLFN_CALL(rnode)); |
2161 | | | 2161 | |
2162 | if (namelen != 4) | | 2162 | if (namelen != 4) |
2163 | return EINVAL; | | 2163 | return EINVAL; |
2164 | | | 2164 | |
2165 | error = 0; | | 2165 | error = 0; |
2166 | dp = oldp; | | 2166 | dp = oldp; |
2167 | len = (oldp != NULL) ? *oldlenp : 0; | | 2167 | len = (oldp != NULL) ? *oldlenp : 0; |
2168 | op = name[0]; | | 2168 | op = name[0]; |
2169 | arg = name[1]; | | 2169 | arg = name[1]; |
2170 | elem_size = name[2]; | | 2170 | elem_size = name[2]; |
2171 | elem_count = name[3]; | | 2171 | elem_count = name[3]; |
2172 | out_size = MIN(sizeof(kf), elem_size); | | 2172 | out_size = MIN(sizeof(kf), elem_size); |
2173 | needed = 0; | | 2173 | needed = 0; |
2174 | | | 2174 | |
2175 | if (elem_size < 1 || elem_count < 0) | | 2175 | if (elem_size < 1 || elem_count < 0) |
2176 | return EINVAL; | | 2176 | return EINVAL; |
2177 | | | 2177 | |
2178 | switch (op) { | | 2178 | switch (op) { |
2179 | case KERN_FILE_BYFILE: | | 2179 | case KERN_FILE_BYFILE: |
2180 | case KERN_FILE_BYPID: | | 2180 | case KERN_FILE_BYPID: |
2181 | /* | | 2181 | /* |
2182 | * We're traversing the process list in both cases; the BYFILE | | 2182 | * We're traversing the process list in both cases; the BYFILE |
2183 | * case does additional work of keeping track of files already | | 2183 | * case does additional work of keeping track of files already |
2184 | * looked at. | | 2184 | * looked at. |
2185 | */ | | 2185 | */ |
2186 | | | 2186 | |
2187 | /* doesn't use arg so it must be zero */ | | 2187 | /* doesn't use arg so it must be zero */ |
2188 | if ((op == KERN_FILE_BYFILE) && (arg != 0)) | | 2188 | if ((op == KERN_FILE_BYFILE) && (arg != 0)) |
2189 | return EINVAL; | | 2189 | return EINVAL; |
2190 | | | 2190 | |
2191 | if ((op == KERN_FILE_BYPID) && (arg < -1)) | | 2191 | if ((op == KERN_FILE_BYPID) && (arg < -1)) |
2192 | /* -1 means all processes */ | | 2192 | /* -1 means all processes */ |
2193 | return EINVAL; | | 2193 | return EINVAL; |
2194 | | | 2194 | |
2195 | sysctl_unlock(); | | 2195 | sysctl_unlock(); |
2196 | if (op == KERN_FILE_BYFILE) | | 2196 | if (op == KERN_FILE_BYFILE) |
2197 | mutex_enter(&sysctl_file_marker_lock); | | 2197 | mutex_enter(&sysctl_file_marker_lock); |
2198 | mutex_enter(&proc_lock); | | 2198 | mutex_enter(&proc_lock); |
2199 | PROCLIST_FOREACH(p, &allproc) { | | 2199 | PROCLIST_FOREACH(p, &allproc) { |
2200 | if (p->p_stat == SIDL) { | | 2200 | if (p->p_stat == SIDL) { |
2201 | /* skip embryonic processes */ | | 2201 | /* skip embryonic processes */ |
2202 | continue; | | 2202 | continue; |
2203 | } | | 2203 | } |
2204 | if (arg > 0 && p->p_pid != arg) { | | 2204 | if (arg > 0 && p->p_pid != arg) { |
2205 | /* pick only the one we want */ | | 2205 | /* pick only the one we want */ |
2206 | /* XXX want 0 to mean "kernel files" */ | | 2206 | /* XXX want 0 to mean "kernel files" */ |
2207 | continue; | | 2207 | continue; |
2208 | } | | 2208 | } |
2209 | mutex_enter(p->p_lock); | | 2209 | mutex_enter(p->p_lock); |
2210 | error = kauth_authorize_process(l->l_cred, | | 2210 | error = kauth_authorize_process(l->l_cred, |
2211 | KAUTH_PROCESS_CANSEE, p, | | 2211 | KAUTH_PROCESS_CANSEE, p, |
2212 | KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_OPENFILES), | | 2212 | KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_OPENFILES), |
2213 | NULL, NULL); | | 2213 | NULL, NULL); |
2214 | mutex_exit(p->p_lock); | | 2214 | mutex_exit(p->p_lock); |
2215 | if (error != 0) { | | 2215 | if (error != 0) { |
2216 | /* | | 2216 | /* |
2217 | * Don't leak kauth retval if we're silently | | 2217 | * Don't leak kauth retval if we're silently |
2218 | * skipping this entry. | | 2218 | * skipping this entry. |
2219 | */ | | 2219 | */ |
2220 | error = 0; | | 2220 | error = 0; |
2221 | continue; | | 2221 | continue; |
2222 | } | | 2222 | } |
2223 | | | 2223 | |
2224 | /* | | 2224 | /* |
2225 | * Grab a hold on the process. | | 2225 | * Grab a hold on the process. |
2226 | */ | | 2226 | */ |
2227 | if (!rw_tryenter(&p->p_reflock, RW_READER)) { | | 2227 | if (!rw_tryenter(&p->p_reflock, RW_READER)) { |
2228 | continue; | | 2228 | continue; |
2229 | } | | 2229 | } |
2230 | mutex_exit(&proc_lock); | | 2230 | mutex_exit(&proc_lock); |
2231 | | | 2231 | |
2232 | fd = p->p_fd; | | 2232 | fd = p->p_fd; |
2233 | mutex_enter(&fd->fd_lock); | | 2233 | mutex_enter(&fd->fd_lock); |
2234 | dt = fd->fd_dt; | | 2234 | dt = fd->fd_dt; |
2235 | for (i = 0; i < dt->dt_nfiles; i++) { | | 2235 | for (i = 0; i < dt->dt_nfiles; i++) { |
2236 | if ((ff = dt->dt_ff[i]) == NULL) { | | 2236 | if ((ff = dt->dt_ff[i]) == NULL) { |
2237 | continue; | | 2237 | continue; |
2238 | } | | 2238 | } |
2239 | if ((fp = atomic_load_consume(&ff->ff_file)) == | | 2239 | if ((fp = atomic_load_consume(&ff->ff_file)) == |
2240 | NULL) { | | 2240 | NULL) { |
2241 | continue; | | 2241 | continue; |
2242 | } | | 2242 | } |
2243 | | | 2243 | |
2244 | if ((op == KERN_FILE_BYFILE) && | | 2244 | if ((op == KERN_FILE_BYFILE) && |
2245 | (fp->f_marker == sysctl_file_marker)) { | | 2245 | (fp->f_marker == sysctl_file_marker)) { |
2246 | continue; | | 2246 | continue; |
2247 | } | | 2247 | } |
2248 | if (len >= elem_size && elem_count > 0) { | | 2248 | if (len >= elem_size && elem_count > 0) { |
2249 | mutex_enter(&fp->f_lock); | | 2249 | mutex_enter(&fp->f_lock); |
2250 | fill_file2(&kf, fp, ff, i, p->p_pid); | | 2250 | fill_file2(&kf, fp, ff, i, p->p_pid); |
2251 | mutex_exit(&fp->f_lock); | | 2251 | mutex_exit(&fp->f_lock); |
2252 | mutex_exit(&fd->fd_lock); | | 2252 | mutex_exit(&fd->fd_lock); |
2253 | error = sysctl_copyout(l, | | 2253 | error = sysctl_copyout(l, |
2254 | &kf, dp, out_size); | | 2254 | &kf, dp, out_size); |
2255 | mutex_enter(&fd->fd_lock); | | 2255 | mutex_enter(&fd->fd_lock); |
2256 | if (error) | | 2256 | if (error) |
2257 | break; | | 2257 | break; |
2258 | dp += elem_size; | | 2258 | dp += elem_size; |
2259 | len -= elem_size; | | 2259 | len -= elem_size; |
2260 | } | | 2260 | } |
2261 | if (op == KERN_FILE_BYFILE) | | 2261 | if (op == KERN_FILE_BYFILE) |
2262 | fp->f_marker = sysctl_file_marker; | | 2262 | fp->f_marker = sysctl_file_marker; |
2263 | needed += elem_size; | | 2263 | needed += elem_size; |
2264 | if (elem_count > 0 && elem_count != INT_MAX) | | 2264 | if (elem_count > 0 && elem_count != INT_MAX) |
2265 | elem_count--; | | 2265 | elem_count--; |
2266 | } | | 2266 | } |
2267 | mutex_exit(&fd->fd_lock); | | 2267 | mutex_exit(&fd->fd_lock); |
2268 | | | 2268 | |
2269 | /* | | 2269 | /* |
2270 | * Release reference to process. | | 2270 | * Release reference to process. |
2271 | */ | | 2271 | */ |
2272 | mutex_enter(&proc_lock); | | 2272 | mutex_enter(&proc_lock); |
2273 | rw_exit(&p->p_reflock); | | 2273 | rw_exit(&p->p_reflock); |
2274 | } | | 2274 | } |
2275 | if (op == KERN_FILE_BYFILE) { | | 2275 | if (op == KERN_FILE_BYFILE) { |
2276 | sysctl_file_marker++; | | 2276 | sysctl_file_marker++; |
2277 | | | 2277 | |
2278 | /* Reset all markers if wrapped. */ | | 2278 | /* Reset all markers if wrapped. */ |
2279 | if (sysctl_file_marker == 0) { | | 2279 | if (sysctl_file_marker == 0) { |
2280 | sysctl_file_marker_reset(); | | 2280 | sysctl_file_marker_reset(); |
2281 | sysctl_file_marker++; | | 2281 | sysctl_file_marker++; |
2282 | } | | 2282 | } |
2283 | } | | 2283 | } |
2284 | mutex_exit(&proc_lock); | | 2284 | mutex_exit(&proc_lock); |
2285 | if (op == KERN_FILE_BYFILE) | | 2285 | if (op == KERN_FILE_BYFILE) |
2286 | mutex_exit(&sysctl_file_marker_lock); | | 2286 | mutex_exit(&sysctl_file_marker_lock); |
2287 | sysctl_relock(); | | 2287 | sysctl_relock(); |
2288 | break; | | 2288 | break; |
2289 | default: | | 2289 | default: |
2290 | return EINVAL; | | 2290 | return EINVAL; |
2291 | } | | 2291 | } |
2292 | | | 2292 | |
2293 | if (oldp == NULL) | | 2293 | if (oldp == NULL) |
2294 | needed += KERN_FILESLOP * elem_size; | | 2294 | needed += KERN_FILESLOP * elem_size; |
2295 | *oldlenp = needed; | | 2295 | *oldlenp = needed; |
2296 | | | 2296 | |
2297 | return error; | | 2297 | return error; |
2298 | } | | 2298 | } |
2299 | | | 2299 | |
2300 | static void | | 2300 | static void |
2301 | fill_file(struct file *fp, const struct file *fpsrc) | | 2301 | fill_file(struct file *fp, const struct file *fpsrc) |
2302 | { | | 2302 | { |
2303 | const bool allowaddr = get_expose_address(curproc); | | 2303 | const bool allowaddr = get_expose_address(curproc); |
2304 | | | 2304 | |
2305 | memset(fp, 0, sizeof(*fp)); | | 2305 | memset(fp, 0, sizeof(*fp)); |
2306 | | | 2306 | |
2307 | fp->f_offset = fpsrc->f_offset; | | 2307 | fp->f_offset = fpsrc->f_offset; |
2308 | COND_SET_PTR(fp->f_cred, fpsrc->f_cred, allowaddr); | | 2308 | COND_SET_PTR(fp->f_cred, fpsrc->f_cred, allowaddr); |
2309 | COND_SET_CPTR(fp->f_ops, fpsrc->f_ops, allowaddr); | | 2309 | COND_SET_CPTR(fp->f_ops, fpsrc->f_ops, allowaddr); |
2310 | COND_SET_STRUCT(fp->f_undata, fpsrc->f_undata, allowaddr); | | 2310 | COND_SET_STRUCT(fp->f_undata, fpsrc->f_undata, allowaddr); |
2311 | COND_SET_STRUCT(fp->f_list, fpsrc->f_list, allowaddr); | | 2311 | COND_SET_STRUCT(fp->f_list, fpsrc->f_list, allowaddr); |
2312 | COND_SET_STRUCT(fp->f_lock, fpsrc->f_lock, allowaddr); | | 2312 | memset(&fp->f_lock, 0, sizeof(fp->f_lock)); |
2313 | fp->f_flag = fpsrc->f_flag; | | 2313 | fp->f_flag = fpsrc->f_flag; |
2314 | fp->f_marker = fpsrc->f_marker; | | 2314 | fp->f_marker = fpsrc->f_marker; |
2315 | fp->f_type = fpsrc->f_type; | | 2315 | fp->f_type = fpsrc->f_type; |
2316 | fp->f_advice = fpsrc->f_advice; | | 2316 | fp->f_advice = fpsrc->f_advice; |
2317 | fp->f_count = fpsrc->f_count; | | 2317 | fp->f_count = fpsrc->f_count; |
2318 | fp->f_msgcount = fpsrc->f_msgcount; | | 2318 | fp->f_msgcount = fpsrc->f_msgcount; |
2319 | fp->f_unpcount = fpsrc->f_unpcount; | | 2319 | fp->f_unpcount = fpsrc->f_unpcount; |
2320 | COND_SET_STRUCT(fp->f_unplist, fpsrc->f_unplist, allowaddr); | | 2320 | COND_SET_STRUCT(fp->f_unplist, fpsrc->f_unplist, allowaddr); |
2321 | } | | 2321 | } |
2322 | | | 2322 | |
2323 | static void | | 2323 | static void |
2324 | fill_file2(struct kinfo_file *kp, const file_t *fp, const fdfile_t *ff, | | 2324 | fill_file2(struct kinfo_file *kp, const file_t *fp, const fdfile_t *ff, |
2325 | int i, pid_t pid) | | 2325 | int i, pid_t pid) |
2326 | { | | 2326 | { |
2327 | const bool allowaddr = get_expose_address(curproc); | | 2327 | const bool allowaddr = get_expose_address(curproc); |
2328 | | | 2328 | |
2329 | memset(kp, 0, sizeof(*kp)); | | 2329 | memset(kp, 0, sizeof(*kp)); |
2330 | | | 2330 | |
2331 | COND_SET_VALUE(kp->ki_fileaddr, PTRTOUINT64(fp), allowaddr); | | 2331 | COND_SET_VALUE(kp->ki_fileaddr, PTRTOUINT64(fp), allowaddr); |
2332 | kp->ki_flag = fp->f_flag; | | 2332 | kp->ki_flag = fp->f_flag; |
2333 | kp->ki_iflags = 0; | | 2333 | kp->ki_iflags = 0; |
2334 | kp->ki_ftype = fp->f_type; | | 2334 | kp->ki_ftype = fp->f_type; |
2335 | kp->ki_count = fp->f_count; | | 2335 | kp->ki_count = fp->f_count; |
2336 | kp->ki_msgcount = fp->f_msgcount; | | 2336 | kp->ki_msgcount = fp->f_msgcount; |
2337 | COND_SET_VALUE(kp->ki_fucred, PTRTOUINT64(fp->f_cred), allowaddr); | | 2337 | COND_SET_VALUE(kp->ki_fucred, PTRTOUINT64(fp->f_cred), allowaddr); |
2338 | kp->ki_fuid = kauth_cred_geteuid(fp->f_cred); | | 2338 | kp->ki_fuid = kauth_cred_geteuid(fp->f_cred); |
2339 | kp->ki_fgid = kauth_cred_getegid(fp->f_cred); | | 2339 | kp->ki_fgid = kauth_cred_getegid(fp->f_cred); |
2340 | COND_SET_VALUE(kp->ki_fops, PTRTOUINT64(fp->f_ops), allowaddr); | | 2340 | COND_SET_VALUE(kp->ki_fops, PTRTOUINT64(fp->f_ops), allowaddr); |
2341 | kp->ki_foffset = fp->f_offset; | | 2341 | kp->ki_foffset = fp->f_offset; |
2342 | COND_SET_VALUE(kp->ki_fdata, PTRTOUINT64(fp->f_data), allowaddr); | | 2342 | COND_SET_VALUE(kp->ki_fdata, PTRTOUINT64(fp->f_data), allowaddr); |
2343 | | | 2343 | |
2344 | /* vnode information to glue this file to something */ | | 2344 | /* vnode information to glue this file to something */ |
2345 | if (fp->f_type == DTYPE_VNODE) { | | 2345 | if (fp->f_type == DTYPE_VNODE) { |
2346 | struct vnode *vp = fp->f_vnode; | | 2346 | struct vnode *vp = fp->f_vnode; |
2347 | | | 2347 | |
2348 | COND_SET_VALUE(kp->ki_vun, PTRTOUINT64(vp->v_un.vu_socket), | | 2348 | COND_SET_VALUE(kp->ki_vun, PTRTOUINT64(vp->v_un.vu_socket), |
2349 | allowaddr); | | 2349 | allowaddr); |
2350 | kp->ki_vsize = vp->v_size; | | 2350 | kp->ki_vsize = vp->v_size; |
2351 | kp->ki_vtype = vp->v_type; | | 2351 | kp->ki_vtype = vp->v_type; |
2352 | kp->ki_vtag = vp->v_tag; | | 2352 | kp->ki_vtag = vp->v_tag; |
2353 | COND_SET_VALUE(kp->ki_vdata, PTRTOUINT64(vp->v_data), | | 2353 | COND_SET_VALUE(kp->ki_vdata, PTRTOUINT64(vp->v_data), |
2354 | allowaddr); | | 2354 | allowaddr); |
2355 | } | | 2355 | } |
2356 | | | 2356 | |
2357 | /* process information when retrieved via KERN_FILE_BYPID */ | | 2357 | /* process information when retrieved via KERN_FILE_BYPID */ |
2358 | if (ff != NULL) { | | 2358 | if (ff != NULL) { |
2359 | kp->ki_pid = pid; | | 2359 | kp->ki_pid = pid; |
2360 | kp->ki_fd = i; | | 2360 | kp->ki_fd = i; |
2361 | kp->ki_ofileflags = ff->ff_exclose; | | 2361 | kp->ki_ofileflags = ff->ff_exclose; |
2362 | kp->ki_usecount = ff->ff_refcnt; | | 2362 | kp->ki_usecount = ff->ff_refcnt; |
2363 | } | | 2363 | } |
2364 | } | | 2364 | } |