Pull up following revision(s) (requested by mrg in ticket #577): sys/kern/kern_descrip.c: revision 1.188 sys/kern/uipc_usrreq.c: revision 1.121 sys/sys/fcntl.h: revision 1.35 sys/sys/file.h: revision 1.66 sys/sys/param.h: patch sys/sys/un.h: revision 1.45 completely rework the way that orphaned sockets that are being fdpassed via SCM_RIGHTS messages are dealt with: 1. unp_gc: make this a kthread. 2. unp_detach: go not call unp_gc directly. instead, wake up unp_gc kthread. 3. unp_scan: do not close files here. instead, put them on a global list for unp_gc to close, along with a per-file "deferred close count". if file is already enqueued for close, just increment deferred close count. this eliminates the recursive calls. 3. unp_gc: scan files on global deferred close list. close each file N times, as specified by deferred close count in file. continue processing list until it becomes empty (closing may cause additional files to be queued for close). 4. unp_gc: add additional bit to mark files we are scanning. set during initial scan of global file list that currently clears FMARK/FDEFER. during later scans, never examine / garbage collect descriptors that we have not marked during the earlier scan. do not proceed with this initial scan until all deferred closes have been processed. be careful with locking to ensure no races are introduced between deferred close and file scan. 5. unp_gc: use dummy file_t to mark position in list when scanning. allow us to drop filelist_lock. in turn allows us to eliminate kmem_alloc() and safely close files, etc. 6. prohibit transfer of descriptors within SCM_RIGHTS messages if (num_files_in_transit > maxfiles / unp_rights_ratio) 7. fd_allocfile: ensure recycled filse don't get scanned. this is 97% work done by andrew doran, with a couple of minor bug fixes and a lot of testing by yours truly.diff -r1.182.6.3 -r1.182.6.4 src/sys/kern/kern_descrip.c
(snj)
--- src/sys/kern/kern_descrip.c 2009/03/15 20:23:26 1.182.6.3
+++ src/sys/kern/kern_descrip.c 2009/03/18 05:33:23 1.182.6.4
@@ -1,1930 +1,1941 @@ | @@ -1,1930 +1,1941 @@ | |||
1 | /* $NetBSD: kern_descrip.c,v 1.182.6.3 2009/03/15 20:23:26 snj Exp $ */ | 1 | /* $NetBSD: kern_descrip.c,v 1.182.6.4 2009/03/18 05:33:23 snj Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 2008 The NetBSD Foundation, Inc. | 4 | * Copyright (c) 2008 The NetBSD Foundation, Inc. | |
5 | * All rights reserved. | 5 | * All rights reserved. | |
6 | * | 6 | * | |
7 | * Redistribution and use in source and binary forms, with or without | 7 | * Redistribution and use in source and binary forms, with or without | |
8 | * modification, are permitted provided that the following conditions | 8 | * modification, are permitted provided that the following conditions | |
9 | * are met: | 9 | * are met: | |
10 | * 1. Redistributions of source code must retain the above copyright | 10 | * 1. Redistributions of source code must retain the above copyright | |
11 | * notice, this list of conditions and the following disclaimer. | 11 | * notice, this list of conditions and the following disclaimer. | |
12 | * 2. Redistributions in binary form must reproduce the above copyright | 12 | * 2. Redistributions in binary form must reproduce the above copyright | |
13 | * notice, this list of conditions and the following disclaimer in the | 13 | * notice, this list of conditions and the following disclaimer in the | |
14 | * documentation and/or other materials provided with the distribution. | 14 | * documentation and/or other materials provided with the distribution. | |
15 | * | 15 | * | |
16 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | 16 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |
17 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | 17 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
18 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 18 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
19 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | 19 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |
20 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 20 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
26 | * POSSIBILITY OF SUCH DAMAGE. | 26 | * POSSIBILITY OF SUCH DAMAGE. | |
27 | */ | 27 | */ | |
28 | 28 | |||
29 | /* | 29 | /* | |
30 | * Copyright (c) 1982, 1986, 1989, 1991, 1993 | 30 | * Copyright (c) 1982, 1986, 1989, 1991, 1993 | |
31 | * The Regents of the University of California. All rights reserved. | 31 | * The Regents of the University of California. All rights reserved. | |
32 | * (c) UNIX System Laboratories, Inc. | 32 | * (c) UNIX System Laboratories, Inc. | |
33 | * All or some portions of this file are derived from material licensed | 33 | * All or some portions of this file are derived from material licensed | |
34 | * to the University of California by American Telephone and Telegraph | 34 | * to the University of California by American Telephone and Telegraph | |
35 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | 35 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | |
36 | * the permission of UNIX System Laboratories, Inc. | 36 | * the permission of UNIX System Laboratories, Inc. | |
37 | * | 37 | * | |
38 | * Redistribution and use in source and binary forms, with or without | 38 | * Redistribution and use in source and binary forms, with or without | |
39 | * modification, are permitted provided that the following conditions | 39 | * modification, are permitted provided that the following conditions | |
40 | * are met: | 40 | * are met: | |
41 | * 1. Redistributions of source code must retain the above copyright | 41 | * 1. Redistributions of source code must retain the above copyright | |
42 | * notice, this list of conditions and the following disclaimer. | 42 | * notice, this list of conditions and the following disclaimer. | |
43 | * 2. Redistributions in binary form must reproduce the above copyright | 43 | * 2. Redistributions in binary form must reproduce the above copyright | |
44 | * notice, this list of conditions and the following disclaimer in the | 44 | * notice, this list of conditions and the following disclaimer in the | |
45 | * documentation and/or other materials provided with the distribution. | 45 | * documentation and/or other materials provided with the distribution. | |
46 | * 3. Neither the name of the University nor the names of its contributors | 46 | * 3. Neither the name of the University nor the names of its contributors | |
47 | * may be used to endorse or promote products derived from this software | 47 | * may be used to endorse or promote products derived from this software | |
48 | * without specific prior written permission. | 48 | * without specific prior written permission. | |
49 | * | 49 | * | |
50 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | 50 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
51 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 51 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
52 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 52 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
53 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | 53 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
54 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 54 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
55 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 55 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
56 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 56 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
57 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 57 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
58 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 58 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
59 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 59 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
60 | * SUCH DAMAGE. | 60 | * SUCH DAMAGE. | |
61 | * | 61 | * | |
62 | * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95 | 62 | * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95 | |
63 | */ | 63 | */ | |
64 | 64 | |||
65 | /* | 65 | /* | |
66 | * File descriptor management. | 66 | * File descriptor management. | |
67 | */ | 67 | */ | |
68 | 68 | |||
69 | #include <sys/cdefs.h> | 69 | #include <sys/cdefs.h> | |
70 | __KERNEL_RCSID(0, "$NetBSD: kern_descrip.c,v 1.182.6.3 2009/03/15 20:23:26 snj Exp $"); | 70 | __KERNEL_RCSID(0, "$NetBSD: kern_descrip.c,v 1.182.6.4 2009/03/18 05:33:23 snj Exp $"); | |
71 | 71 | |||
72 | #include <sys/param.h> | 72 | #include <sys/param.h> | |
73 | #include <sys/systm.h> | 73 | #include <sys/systm.h> | |
74 | #include <sys/filedesc.h> | 74 | #include <sys/filedesc.h> | |
75 | #include <sys/kernel.h> | 75 | #include <sys/kernel.h> | |
76 | #include <sys/vnode.h> | 76 | #include <sys/vnode.h> | |
77 | #include <sys/proc.h> | 77 | #include <sys/proc.h> | |
78 | #include <sys/file.h> | 78 | #include <sys/file.h> | |
79 | #include <sys/namei.h> | 79 | #include <sys/namei.h> | |
80 | #include <sys/socket.h> | 80 | #include <sys/socket.h> | |
81 | #include <sys/socketvar.h> | 81 | #include <sys/socketvar.h> | |
82 | #include <sys/stat.h> | 82 | #include <sys/stat.h> | |
83 | #include <sys/ioctl.h> | 83 | #include <sys/ioctl.h> | |
84 | #include <sys/fcntl.h> | 84 | #include <sys/fcntl.h> | |
85 | #include <sys/pool.h> | 85 | #include <sys/pool.h> | |
86 | #include <sys/syslog.h> | 86 | #include <sys/syslog.h> | |
87 | #include <sys/unistd.h> | 87 | #include <sys/unistd.h> | |
88 | #include <sys/resourcevar.h> | 88 | #include <sys/resourcevar.h> | |
89 | #include <sys/conf.h> | 89 | #include <sys/conf.h> | |
90 | #include <sys/event.h> | 90 | #include <sys/event.h> | |
91 | #include <sys/kauth.h> | 91 | #include <sys/kauth.h> | |
92 | #include <sys/atomic.h> | 92 | #include <sys/atomic.h> | |
93 | #include <sys/mount.h> | 93 | #include <sys/mount.h> | |
94 | #include <sys/syscallargs.h> | 94 | #include <sys/syscallargs.h> | |
95 | #include <sys/cpu.h> | 95 | #include <sys/cpu.h> | |
96 | 96 | |||
97 | static int cwdi_ctor(void *, void *, int); | 97 | static int cwdi_ctor(void *, void *, int); | |
98 | static void cwdi_dtor(void *, void *); | 98 | static void cwdi_dtor(void *, void *); | |
99 | static int file_ctor(void *, void *, int); | 99 | static int file_ctor(void *, void *, int); | |
100 | static void file_dtor(void *, void *); | 100 | static void file_dtor(void *, void *); | |
101 | static int fdfile_ctor(void *, void *, int); | 101 | static int fdfile_ctor(void *, void *, int); | |
102 | static void fdfile_dtor(void *, void *); | 102 | static void fdfile_dtor(void *, void *); | |
103 | static int filedesc_ctor(void *, void *, int); | 103 | static int filedesc_ctor(void *, void *, int); | |
104 | static void filedesc_dtor(void *, void *); | 104 | static void filedesc_dtor(void *, void *); | |
105 | static int filedescopen(dev_t, int, int, lwp_t *); | 105 | static int filedescopen(dev_t, int, int, lwp_t *); | |
106 | 106 | |||
107 | kmutex_t filelist_lock; /* lock on filehead */ | 107 | kmutex_t filelist_lock; /* lock on filehead */ | |
108 | struct filelist filehead; /* head of list of open files */ | 108 | struct filelist filehead; /* head of list of open files */ | |
109 | u_int nfiles; /* actual number of open files */ | 109 | u_int nfiles; /* actual number of open files */ | |
110 | 110 | |||
111 | static pool_cache_t cwdi_cache; | 111 | static pool_cache_t cwdi_cache; | |
112 | static pool_cache_t filedesc_cache; | 112 | static pool_cache_t filedesc_cache; | |
113 | static pool_cache_t file_cache; | 113 | static pool_cache_t file_cache; | |
114 | static pool_cache_t fdfile_cache; | 114 | static pool_cache_t fdfile_cache; | |
115 | 115 | |||
116 | const struct cdevsw filedesc_cdevsw = { | 116 | const struct cdevsw filedesc_cdevsw = { | |
117 | filedescopen, noclose, noread, nowrite, noioctl, | 117 | filedescopen, noclose, noread, nowrite, noioctl, | |
118 | nostop, notty, nopoll, nommap, nokqfilter, D_OTHER | D_MPSAFE, | 118 | nostop, notty, nopoll, nommap, nokqfilter, D_OTHER | D_MPSAFE, | |
119 | }; | 119 | }; | |
120 | 120 | |||
121 | /* For ease of reading. */ | 121 | /* For ease of reading. */ | |
122 | __strong_alias(fd_putvnode,fd_putfile) | 122 | __strong_alias(fd_putvnode,fd_putfile) | |
123 | __strong_alias(fd_putsock,fd_putfile) | 123 | __strong_alias(fd_putsock,fd_putfile) | |
124 | 124 | |||
125 | /* | 125 | /* | |
126 | * Initialize the descriptor system. | 126 | * Initialize the descriptor system. | |
127 | */ | 127 | */ | |
128 | void | 128 | void | |
129 | fd_sys_init(void) | 129 | fd_sys_init(void) | |
130 | { | 130 | { | |
131 | 131 | |||
132 | mutex_init(&filelist_lock, MUTEX_DEFAULT, IPL_NONE); | 132 | mutex_init(&filelist_lock, MUTEX_DEFAULT, IPL_NONE); | |
133 | 133 | |||
134 | file_cache = pool_cache_init(sizeof(file_t), coherency_unit, 0, | 134 | file_cache = pool_cache_init(sizeof(file_t), coherency_unit, 0, | |
135 | 0, "file", NULL, IPL_NONE, file_ctor, file_dtor, NULL); | 135 | 0, "file", NULL, IPL_NONE, file_ctor, file_dtor, NULL); | |
136 | KASSERT(file_cache != NULL); | 136 | KASSERT(file_cache != NULL); | |
137 | 137 | |||
138 | fdfile_cache = pool_cache_init(sizeof(fdfile_t), coherency_unit, 0, | 138 | fdfile_cache = pool_cache_init(sizeof(fdfile_t), coherency_unit, 0, | |
139 | PR_LARGECACHE, "fdfile", NULL, IPL_NONE, fdfile_ctor, fdfile_dtor, | 139 | PR_LARGECACHE, "fdfile", NULL, IPL_NONE, fdfile_ctor, fdfile_dtor, | |
140 | NULL); | 140 | NULL); | |
141 | KASSERT(fdfile_cache != NULL); | 141 | KASSERT(fdfile_cache != NULL); | |
142 | 142 | |||
143 | cwdi_cache = pool_cache_init(sizeof(struct cwdinfo), coherency_unit, | 143 | cwdi_cache = pool_cache_init(sizeof(struct cwdinfo), coherency_unit, | |
144 | 0, 0, "cwdi", NULL, IPL_NONE, cwdi_ctor, cwdi_dtor, NULL); | 144 | 0, 0, "cwdi", NULL, IPL_NONE, cwdi_ctor, cwdi_dtor, NULL); | |
145 | KASSERT(cwdi_cache != NULL); | 145 | KASSERT(cwdi_cache != NULL); | |
146 | 146 | |||
147 | filedesc_cache = pool_cache_init(sizeof(filedesc_t), coherency_unit, | 147 | filedesc_cache = pool_cache_init(sizeof(filedesc_t), coherency_unit, | |
148 | 0, 0, "filedesc", NULL, IPL_NONE, filedesc_ctor, filedesc_dtor, | 148 | 0, 0, "filedesc", NULL, IPL_NONE, filedesc_ctor, filedesc_dtor, | |
149 | NULL); | 149 | NULL); | |
150 | KASSERT(filedesc_cache != NULL); | 150 | KASSERT(filedesc_cache != NULL); | |
151 | } | 151 | } | |
152 | 152 | |||
153 | static int | 153 | static int | |
154 | fd_next_zero(filedesc_t *fdp, uint32_t *bitmap, int want, u_int bits) | 154 | fd_next_zero(filedesc_t *fdp, uint32_t *bitmap, int want, u_int bits) | |
155 | { | 155 | { | |
156 | int i, off, maxoff; | 156 | int i, off, maxoff; | |
157 | uint32_t sub; | 157 | uint32_t sub; | |
158 | 158 | |||
159 | KASSERT(mutex_owned(&fdp->fd_lock)); | 159 | KASSERT(mutex_owned(&fdp->fd_lock)); | |
160 | 160 | |||
161 | if (want > bits) | 161 | if (want > bits) | |
162 | return -1; | 162 | return -1; | |
163 | 163 | |||
164 | off = want >> NDENTRYSHIFT; | 164 | off = want >> NDENTRYSHIFT; | |
165 | i = want & NDENTRYMASK; | 165 | i = want & NDENTRYMASK; | |
166 | if (i) { | 166 | if (i) { | |
167 | sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i)); | 167 | sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i)); | |
168 | if (sub != ~0) | 168 | if (sub != ~0) | |
169 | goto found; | 169 | goto found; | |
170 | off++; | 170 | off++; | |
171 | } | 171 | } | |
172 | 172 | |||
173 | maxoff = NDLOSLOTS(bits); | 173 | maxoff = NDLOSLOTS(bits); | |
174 | while (off < maxoff) { | 174 | while (off < maxoff) { | |
175 | if ((sub = bitmap[off]) != ~0) | 175 | if ((sub = bitmap[off]) != ~0) | |
176 | goto found; | 176 | goto found; | |
177 | off++; | 177 | off++; | |
178 | } | 178 | } | |
179 | 179 | |||
180 | return (-1); | 180 | return (-1); | |
181 | 181 | |||
182 | found: | 182 | found: | |
183 | return (off << NDENTRYSHIFT) + ffs(~sub) - 1; | 183 | return (off << NDENTRYSHIFT) + ffs(~sub) - 1; | |
184 | } | 184 | } | |
185 | 185 | |||
186 | static int | 186 | static int | |
187 | fd_last_set(filedesc_t *fd, int last) | 187 | fd_last_set(filedesc_t *fd, int last) | |
188 | { | 188 | { | |
189 | int off, i; | 189 | int off, i; | |
190 | fdfile_t **ofiles = fd->fd_ofiles; | 190 | fdfile_t **ofiles = fd->fd_ofiles; | |
191 | uint32_t *bitmap = fd->fd_lomap; | 191 | uint32_t *bitmap = fd->fd_lomap; | |
192 | 192 | |||
193 | KASSERT(mutex_owned(&fd->fd_lock)); | 193 | KASSERT(mutex_owned(&fd->fd_lock)); | |
194 | 194 | |||
195 | off = (last - 1) >> NDENTRYSHIFT; | 195 | off = (last - 1) >> NDENTRYSHIFT; | |
196 | 196 | |||
197 | while (off >= 0 && !bitmap[off]) | 197 | while (off >= 0 && !bitmap[off]) | |
198 | off--; | 198 | off--; | |
199 | 199 | |||
200 | if (off < 0) | 200 | if (off < 0) | |
201 | return (-1); | 201 | return (-1); | |
202 | 202 | |||
203 | i = ((off + 1) << NDENTRYSHIFT) - 1; | 203 | i = ((off + 1) << NDENTRYSHIFT) - 1; | |
204 | if (i >= last) | 204 | if (i >= last) | |
205 | i = last - 1; | 205 | i = last - 1; | |
206 | 206 | |||
207 | /* XXX should use bitmap */ | 207 | /* XXX should use bitmap */ | |
208 | /* XXXAD does not work for fd_copy() */ | 208 | /* XXXAD does not work for fd_copy() */ | |
209 | while (i > 0 && (ofiles[i] == NULL || !ofiles[i]->ff_allocated)) | 209 | while (i > 0 && (ofiles[i] == NULL || !ofiles[i]->ff_allocated)) | |
210 | i--; | 210 | i--; | |
211 | 211 | |||
212 | return (i); | 212 | return (i); | |
213 | } | 213 | } | |
214 | 214 | |||
215 | void | 215 | void | |
216 | fd_used(filedesc_t *fdp, unsigned fd) | 216 | fd_used(filedesc_t *fdp, unsigned fd) | |
217 | { | 217 | { | |
218 | u_int off = fd >> NDENTRYSHIFT; | 218 | u_int off = fd >> NDENTRYSHIFT; | |
219 | fdfile_t *ff; | 219 | fdfile_t *ff; | |
220 | 220 | |||
221 | ff = fdp->fd_ofiles[fd]; | 221 | ff = fdp->fd_ofiles[fd]; | |
222 | 222 | |||
223 | KASSERT(mutex_owned(&fdp->fd_lock)); | 223 | KASSERT(mutex_owned(&fdp->fd_lock)); | |
224 | KASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) == 0); | 224 | KASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) == 0); | |
225 | KASSERT(ff != NULL); | 225 | KASSERT(ff != NULL); | |
226 | KASSERT(ff->ff_file == NULL); | 226 | KASSERT(ff->ff_file == NULL); | |
227 | KASSERT(!ff->ff_allocated); | 227 | KASSERT(!ff->ff_allocated); | |
228 | 228 | |||
229 | ff->ff_allocated = 1; | 229 | ff->ff_allocated = 1; | |
230 | fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK); | 230 | fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK); | |
231 | if (fdp->fd_lomap[off] == ~0) { | 231 | if (fdp->fd_lomap[off] == ~0) { | |
232 | KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & | 232 | KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & | |
233 | (1 << (off & NDENTRYMASK))) == 0); | 233 | (1 << (off & NDENTRYMASK))) == 0); | |
234 | fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK); | 234 | fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK); | |
235 | } | 235 | } | |
236 | 236 | |||
237 | if ((int)fd > fdp->fd_lastfile) { | 237 | if ((int)fd > fdp->fd_lastfile) { | |
238 | fdp->fd_lastfile = fd; | 238 | fdp->fd_lastfile = fd; | |
239 | } | 239 | } | |
240 | 240 | |||
241 | if (fd >= NDFDFILE) { | 241 | if (fd >= NDFDFILE) { | |
242 | fdp->fd_nused++; | 242 | fdp->fd_nused++; | |
243 | } else { | 243 | } else { | |
244 | KASSERT(ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | 244 | KASSERT(ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | |
245 | } | 245 | } | |
246 | } | 246 | } | |
247 | 247 | |||
248 | void | 248 | void | |
249 | fd_unused(filedesc_t *fdp, unsigned fd) | 249 | fd_unused(filedesc_t *fdp, unsigned fd) | |
250 | { | 250 | { | |
251 | u_int off = fd >> NDENTRYSHIFT; | 251 | u_int off = fd >> NDENTRYSHIFT; | |
252 | fdfile_t *ff; | 252 | fdfile_t *ff; | |
253 | 253 | |||
254 | ff = fdp->fd_ofiles[fd]; | 254 | ff = fdp->fd_ofiles[fd]; | |
255 | 255 | |||
256 | /* | 256 | /* | |
257 | * Don't assert the lock is held here, as we may be copying | 257 | * Don't assert the lock is held here, as we may be copying | |
258 | * the table during exec() and it is not needed there. | 258 | * the table during exec() and it is not needed there. | |
259 | * procfs and sysctl are locked out by proc::p_reflock. | 259 | * procfs and sysctl are locked out by proc::p_reflock. | |
260 | * | 260 | * | |
261 | * KASSERT(mutex_owned(&fdp->fd_lock)); | 261 | * KASSERT(mutex_owned(&fdp->fd_lock)); | |
262 | */ | 262 | */ | |
263 | KASSERT(ff != NULL); | 263 | KASSERT(ff != NULL); | |
264 | KASSERT(ff->ff_file == NULL); | 264 | KASSERT(ff->ff_file == NULL); | |
265 | KASSERT(ff->ff_allocated); | 265 | KASSERT(ff->ff_allocated); | |
266 | 266 | |||
267 | if (fd < fdp->fd_freefile) { | 267 | if (fd < fdp->fd_freefile) { | |
268 | fdp->fd_freefile = fd; | 268 | fdp->fd_freefile = fd; | |
269 | } | 269 | } | |
270 | 270 | |||
271 | if (fdp->fd_lomap[off] == ~0) { | 271 | if (fdp->fd_lomap[off] == ~0) { | |
272 | KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & | 272 | KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & | |
273 | (1 << (off & NDENTRYMASK))) != 0); | 273 | (1 << (off & NDENTRYMASK))) != 0); | |
274 | fdp->fd_himap[off >> NDENTRYSHIFT] &= | 274 | fdp->fd_himap[off >> NDENTRYSHIFT] &= | |
275 | ~(1 << (off & NDENTRYMASK)); | 275 | ~(1 << (off & NDENTRYMASK)); | |
276 | } | 276 | } | |
277 | KASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0); | 277 | KASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0); | |
278 | fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK)); | 278 | fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK)); | |
279 | ff->ff_allocated = 0; | 279 | ff->ff_allocated = 0; | |
280 | 280 | |||
281 | KASSERT(fd <= fdp->fd_lastfile); | 281 | KASSERT(fd <= fdp->fd_lastfile); | |
282 | if (fd == fdp->fd_lastfile) { | 282 | if (fd == fdp->fd_lastfile) { | |
283 | fdp->fd_lastfile = fd_last_set(fdp, fd); | 283 | fdp->fd_lastfile = fd_last_set(fdp, fd); | |
284 | } | 284 | } | |
285 | 285 | |||
286 | if (fd >= NDFDFILE) { | 286 | if (fd >= NDFDFILE) { | |
287 | KASSERT(fdp->fd_nused > 0); | 287 | KASSERT(fdp->fd_nused > 0); | |
288 | fdp->fd_nused--; | 288 | fdp->fd_nused--; | |
289 | } else { | 289 | } else { | |
290 | KASSERT(ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | 290 | KASSERT(ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | |
291 | } | 291 | } | |
292 | } | 292 | } | |
293 | 293 | |||
294 | /* | 294 | /* | |
295 | * Custom version of fd_unused() for fd_copy(), where the descriptor | 295 | * Custom version of fd_unused() for fd_copy(), where the descriptor | |
296 | * table is not yet fully initialized. | 296 | * table is not yet fully initialized. | |
297 | */ | 297 | */ | |
298 | static inline void | 298 | static inline void | |
299 | fd_zap(filedesc_t *fdp, unsigned fd) | 299 | fd_zap(filedesc_t *fdp, unsigned fd) | |
300 | { | 300 | { | |
301 | u_int off = fd >> NDENTRYSHIFT; | 301 | u_int off = fd >> NDENTRYSHIFT; | |
302 | 302 | |||
303 | if (fd < fdp->fd_freefile) { | 303 | if (fd < fdp->fd_freefile) { | |
304 | fdp->fd_freefile = fd; | 304 | fdp->fd_freefile = fd; | |
305 | } | 305 | } | |
306 | 306 | |||
307 | if (fdp->fd_lomap[off] == ~0) { | 307 | if (fdp->fd_lomap[off] == ~0) { | |
308 | KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & | 308 | KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & | |
309 | (1 << (off & NDENTRYMASK))) != 0); | 309 | (1 << (off & NDENTRYMASK))) != 0); | |
310 | fdp->fd_himap[off >> NDENTRYSHIFT] &= | 310 | fdp->fd_himap[off >> NDENTRYSHIFT] &= | |
311 | ~(1 << (off & NDENTRYMASK)); | 311 | ~(1 << (off & NDENTRYMASK)); | |
312 | } | 312 | } | |
313 | KASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0); | 313 | KASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0); | |
314 | fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK)); | 314 | fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK)); | |
315 | } | 315 | } | |
316 | 316 | |||
317 | bool | 317 | bool | |
318 | fd_isused(filedesc_t *fdp, unsigned fd) | 318 | fd_isused(filedesc_t *fdp, unsigned fd) | |
319 | { | 319 | { | |
320 | u_int off = fd >> NDENTRYSHIFT; | 320 | u_int off = fd >> NDENTRYSHIFT; | |
321 | 321 | |||
322 | KASSERT(fd < fdp->fd_nfiles); | 322 | KASSERT(fd < fdp->fd_nfiles); | |
323 | 323 | |||
324 | return (fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0; | 324 | return (fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0; | |
325 | } | 325 | } | |
326 | 326 | |||
327 | /* | 327 | /* | |
328 | * Look up the file structure corresponding to a file descriptor | 328 | * Look up the file structure corresponding to a file descriptor | |
329 | * and return the file, holding a reference on the descriptor. | 329 | * and return the file, holding a reference on the descriptor. | |
330 | */ | 330 | */ | |
331 | inline file_t * | 331 | inline file_t * | |
332 | fd_getfile(unsigned fd) | 332 | fd_getfile(unsigned fd) | |
333 | { | 333 | { | |
334 | filedesc_t *fdp; | 334 | filedesc_t *fdp; | |
335 | fdfile_t *ff; | 335 | fdfile_t *ff; | |
336 | file_t *fp; | 336 | file_t *fp; | |
337 | 337 | |||
338 | fdp = curlwp->l_fd; | 338 | fdp = curlwp->l_fd; | |
339 | 339 | |||
340 | /* | 340 | /* | |
341 | * Look up the fdfile structure representing this descriptor. | 341 | * Look up the fdfile structure representing this descriptor. | |
342 | * Ensure that we see fd_nfiles before fd_ofiles since we | 342 | * Ensure that we see fd_nfiles before fd_ofiles since we | |
343 | * are doing this unlocked. See fd_tryexpand(). | 343 | * are doing this unlocked. See fd_tryexpand(). | |
344 | */ | 344 | */ | |
345 | if (__predict_false(fd >= fdp->fd_nfiles)) { | 345 | if (__predict_false(fd >= fdp->fd_nfiles)) { | |
346 | return NULL; | 346 | return NULL; | |
347 | } | 347 | } | |
348 | membar_consumer(); | 348 | membar_consumer(); | |
349 | ff = fdp->fd_ofiles[fd]; | 349 | ff = fdp->fd_ofiles[fd]; | |
350 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | 350 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | |
351 | if (__predict_false(ff == NULL)) { | 351 | if (__predict_false(ff == NULL)) { | |
352 | return NULL; | 352 | return NULL; | |
353 | } | 353 | } | |
354 | 354 | |||
355 | /* | 355 | /* | |
356 | * Now get a reference to the descriptor. Issue a memory | 356 | * Now get a reference to the descriptor. Issue a memory | |
357 | * barrier to ensure that we acquire the file pointer _after_ | 357 | * barrier to ensure that we acquire the file pointer _after_ | |
358 | * adding a reference. If no memory barrier, we could fetch | 358 | * adding a reference. If no memory barrier, we could fetch | |
359 | * a stale pointer. | 359 | * a stale pointer. | |
360 | */ | 360 | */ | |
361 | atomic_inc_uint(&ff->ff_refcnt); | 361 | atomic_inc_uint(&ff->ff_refcnt); | |
362 | #ifndef __HAVE_ATOMIC_AS_MEMBAR | 362 | #ifndef __HAVE_ATOMIC_AS_MEMBAR | |
363 | membar_enter(); | 363 | membar_enter(); | |
364 | #endif | 364 | #endif | |
365 | 365 | |||
366 | /* | 366 | /* | |
367 | * If the file is not open or is being closed then put the | 367 | * If the file is not open or is being closed then put the | |
368 | * reference back. | 368 | * reference back. | |
369 | */ | 369 | */ | |
370 | fp = ff->ff_file; | 370 | fp = ff->ff_file; | |
371 | if (__predict_true(fp != NULL)) { | 371 | if (__predict_true(fp != NULL)) { | |
372 | return fp; | 372 | return fp; | |
373 | } | 373 | } | |
374 | fd_putfile(fd); | 374 | fd_putfile(fd); | |
375 | return NULL; | 375 | return NULL; | |
376 | } | 376 | } | |
377 | 377 | |||
378 | /* | 378 | /* | |
379 | * Release a reference to a file descriptor acquired with fd_getfile(). | 379 | * Release a reference to a file descriptor acquired with fd_getfile(). | |
380 | */ | 380 | */ | |
381 | void | 381 | void | |
382 | fd_putfile(unsigned fd) | 382 | fd_putfile(unsigned fd) | |
383 | { | 383 | { | |
384 | filedesc_t *fdp; | 384 | filedesc_t *fdp; | |
385 | fdfile_t *ff; | 385 | fdfile_t *ff; | |
386 | u_int u, v; | 386 | u_int u, v; | |
387 | 387 | |||
388 | fdp = curlwp->l_fd; | 388 | fdp = curlwp->l_fd; | |
389 | ff = fdp->fd_ofiles[fd]; | 389 | ff = fdp->fd_ofiles[fd]; | |
390 | 390 | |||
391 | KASSERT(fd < fdp->fd_nfiles); | 391 | KASSERT(fd < fdp->fd_nfiles); | |
392 | KASSERT(ff != NULL); | 392 | KASSERT(ff != NULL); | |
393 | KASSERT((ff->ff_refcnt & FR_MASK) > 0); | 393 | KASSERT((ff->ff_refcnt & FR_MASK) > 0); | |
394 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | 394 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | |
395 | 395 | |||
396 | /* | 396 | /* | |
397 | * Ensure that any use of the file is complete and globally | 397 | * Ensure that any use of the file is complete and globally | |
398 | * visible before dropping the final reference. If no membar, | 398 | * visible before dropping the final reference. If no membar, | |
399 | * the current CPU could still access memory associated with | 399 | * the current CPU could still access memory associated with | |
400 | * the file after it has been freed or recycled by another | 400 | * the file after it has been freed or recycled by another | |
401 | * CPU. | 401 | * CPU. | |
402 | */ | 402 | */ | |
403 | #ifndef __HAVE_ATOMIC_AS_MEMBAR | 403 | #ifndef __HAVE_ATOMIC_AS_MEMBAR | |
404 | membar_exit(); | 404 | membar_exit(); | |
405 | #endif | 405 | #endif | |
406 | 406 | |||
407 | /* | 407 | /* | |
408 | * Be optimistic and start out with the assumption that no other | 408 | * Be optimistic and start out with the assumption that no other | |
409 | * threads are trying to close the descriptor. If the CAS fails, | 409 | * threads are trying to close the descriptor. If the CAS fails, | |
410 | * we lost a race and/or it's being closed. | 410 | * we lost a race and/or it's being closed. | |
411 | */ | 411 | */ | |
412 | for (u = ff->ff_refcnt & FR_MASK;; u = v) { | 412 | for (u = ff->ff_refcnt & FR_MASK;; u = v) { | |
413 | v = atomic_cas_uint(&ff->ff_refcnt, u, u - 1); | 413 | v = atomic_cas_uint(&ff->ff_refcnt, u, u - 1); | |
414 | if (__predict_true(u == v)) { | 414 | if (__predict_true(u == v)) { | |
415 | return; | 415 | return; | |
416 | } | 416 | } | |
417 | if (__predict_false((v & FR_CLOSING) != 0)) { | 417 | if (__predict_false((v & FR_CLOSING) != 0)) { | |
418 | break; | 418 | break; | |
419 | } | 419 | } | |
420 | } | 420 | } | |
421 | 421 | |||
422 | /* Another thread is waiting to close the file: join it. */ | 422 | /* Another thread is waiting to close the file: join it. */ | |
423 | (void)fd_close(fd); | 423 | (void)fd_close(fd); | |
424 | } | 424 | } | |
425 | 425 | |||
426 | /* | 426 | /* | |
427 | * Convenience wrapper around fd_getfile() that returns reference | 427 | * Convenience wrapper around fd_getfile() that returns reference | |
428 | * to a vnode. | 428 | * to a vnode. | |
429 | */ | 429 | */ | |
430 | int | 430 | int | |
431 | fd_getvnode(unsigned fd, file_t **fpp) | 431 | fd_getvnode(unsigned fd, file_t **fpp) | |
432 | { | 432 | { | |
433 | vnode_t *vp; | 433 | vnode_t *vp; | |
434 | file_t *fp; | 434 | file_t *fp; | |
435 | 435 | |||
436 | fp = fd_getfile(fd); | 436 | fp = fd_getfile(fd); | |
437 | if (__predict_false(fp == NULL)) { | 437 | if (__predict_false(fp == NULL)) { | |
438 | return EBADF; | 438 | return EBADF; | |
439 | } | 439 | } | |
440 | if (__predict_false(fp->f_type != DTYPE_VNODE)) { | 440 | if (__predict_false(fp->f_type != DTYPE_VNODE)) { | |
441 | fd_putfile(fd); | 441 | fd_putfile(fd); | |
442 | return EINVAL; | 442 | return EINVAL; | |
443 | } | 443 | } | |
444 | vp = fp->f_data; | 444 | vp = fp->f_data; | |
445 | if (__predict_false(vp->v_type == VBAD)) { | 445 | if (__predict_false(vp->v_type == VBAD)) { | |
446 | /* XXX Is this case really necessary? */ | 446 | /* XXX Is this case really necessary? */ | |
447 | fd_putfile(fd); | 447 | fd_putfile(fd); | |
448 | return EBADF; | 448 | return EBADF; | |
449 | } | 449 | } | |
450 | *fpp = fp; | 450 | *fpp = fp; | |
451 | return 0; | 451 | return 0; | |
452 | } | 452 | } | |
453 | 453 | |||
454 | /* | 454 | /* | |
455 | * Convenience wrapper around fd_getfile() that returns reference | 455 | * Convenience wrapper around fd_getfile() that returns reference | |
456 | * to a socket. | 456 | * to a socket. | |
457 | */ | 457 | */ | |
458 | int | 458 | int | |
459 | fd_getsock(unsigned fd, struct socket **sop) | 459 | fd_getsock(unsigned fd, struct socket **sop) | |
460 | { | 460 | { | |
461 | file_t *fp; | 461 | file_t *fp; | |
462 | 462 | |||
463 | fp = fd_getfile(fd); | 463 | fp = fd_getfile(fd); | |
464 | if (__predict_false(fp == NULL)) { | 464 | if (__predict_false(fp == NULL)) { | |
465 | return EBADF; | 465 | return EBADF; | |
466 | } | 466 | } | |
467 | if (__predict_false(fp->f_type != DTYPE_SOCKET)) { | 467 | if (__predict_false(fp->f_type != DTYPE_SOCKET)) { | |
468 | fd_putfile(fd); | 468 | fd_putfile(fd); | |
469 | return ENOTSOCK; | 469 | return ENOTSOCK; | |
470 | } | 470 | } | |
471 | *sop = fp->f_data; | 471 | *sop = fp->f_data; | |
472 | return 0; | 472 | return 0; | |
473 | } | 473 | } | |
474 | 474 | |||
475 | /* | 475 | /* | |
476 | * Look up the file structure corresponding to a file descriptor | 476 | * Look up the file structure corresponding to a file descriptor | |
477 | * and return it with a reference held on the file, not the | 477 | * and return it with a reference held on the file, not the | |
478 | * descriptor. | 478 | * descriptor. | |
479 | * | 479 | * | |
480 | * This is heavyweight and only used when accessing descriptors | 480 | * This is heavyweight and only used when accessing descriptors | |
481 | * from a foreign process. The caller must ensure that `p' does | 481 | * from a foreign process. The caller must ensure that `p' does | |
482 | * not exit or fork across this call. | 482 | * not exit or fork across this call. | |
483 | * | 483 | * | |
484 | * To release the file (not descriptor) reference, use closef(). | 484 | * To release the file (not descriptor) reference, use closef(). | |
485 | */ | 485 | */ | |
486 | file_t * | 486 | file_t * | |
487 | fd_getfile2(proc_t *p, unsigned fd) | 487 | fd_getfile2(proc_t *p, unsigned fd) | |
488 | { | 488 | { | |
489 | filedesc_t *fdp; | 489 | filedesc_t *fdp; | |
490 | fdfile_t *ff; | 490 | fdfile_t *ff; | |
491 | file_t *fp; | 491 | file_t *fp; | |
492 | 492 | |||
493 | fdp = p->p_fd; | 493 | fdp = p->p_fd; | |
494 | mutex_enter(&fdp->fd_lock); | 494 | mutex_enter(&fdp->fd_lock); | |
495 | if (fd > fdp->fd_nfiles) { | 495 | if (fd > fdp->fd_nfiles) { | |
496 | mutex_exit(&fdp->fd_lock); | 496 | mutex_exit(&fdp->fd_lock); | |
497 | return NULL; | 497 | return NULL; | |
498 | } | 498 | } | |
499 | if ((ff = fdp->fd_ofiles[fd]) == NULL) { | 499 | if ((ff = fdp->fd_ofiles[fd]) == NULL) { | |
500 | mutex_exit(&fdp->fd_lock); | 500 | mutex_exit(&fdp->fd_lock); | |
501 | return NULL; | 501 | return NULL; | |
502 | } | 502 | } | |
503 | mutex_enter(&ff->ff_lock); | 503 | mutex_enter(&ff->ff_lock); | |
504 | if ((fp = ff->ff_file) == NULL) { | 504 | if ((fp = ff->ff_file) == NULL) { | |
505 | mutex_exit(&ff->ff_lock); | 505 | mutex_exit(&ff->ff_lock); | |
506 | mutex_exit(&fdp->fd_lock); | 506 | mutex_exit(&fdp->fd_lock); | |
507 | return NULL; | 507 | return NULL; | |
508 | } | 508 | } | |
509 | mutex_enter(&fp->f_lock); | 509 | mutex_enter(&fp->f_lock); | |
510 | fp->f_count++; | 510 | fp->f_count++; | |
511 | mutex_exit(&fp->f_lock); | 511 | mutex_exit(&fp->f_lock); | |
512 | mutex_exit(&ff->ff_lock); | 512 | mutex_exit(&ff->ff_lock); | |
513 | mutex_exit(&fdp->fd_lock); | 513 | mutex_exit(&fdp->fd_lock); | |
514 | 514 | |||
515 | return fp; | 515 | return fp; | |
516 | } | 516 | } | |
517 | 517 | |||
518 | /* | 518 | /* | |
519 | * Internal form of close. Must be called with a reference to the | 519 | * Internal form of close. Must be called with a reference to the | |
520 | * descriptor, and will drop the reference. When all descriptor | 520 | * descriptor, and will drop the reference. When all descriptor | |
521 | * references are dropped, releases the descriptor slot and a single | 521 | * references are dropped, releases the descriptor slot and a single | |
522 | * reference to the file structure. | 522 | * reference to the file structure. | |
523 | */ | 523 | */ | |
524 | int | 524 | int | |
525 | fd_close(unsigned fd) | 525 | fd_close(unsigned fd) | |
526 | { | 526 | { | |
527 | struct flock lf; | 527 | struct flock lf; | |
528 | filedesc_t *fdp; | 528 | filedesc_t *fdp; | |
529 | fdfile_t *ff; | 529 | fdfile_t *ff; | |
530 | file_t *fp; | 530 | file_t *fp; | |
531 | proc_t *p; | 531 | proc_t *p; | |
532 | lwp_t *l; | 532 | lwp_t *l; | |
533 | 533 | |||
534 | l = curlwp; | 534 | l = curlwp; | |
535 | p = l->l_proc; | 535 | p = l->l_proc; | |
536 | fdp = l->l_fd; | 536 | fdp = l->l_fd; | |
537 | ff = fdp->fd_ofiles[fd]; | 537 | ff = fdp->fd_ofiles[fd]; | |
538 | 538 | |||
539 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | 539 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | |
540 | 540 | |||
541 | mutex_enter(&ff->ff_lock); | 541 | mutex_enter(&ff->ff_lock); | |
542 | KASSERT((ff->ff_refcnt & FR_MASK) > 0); | 542 | KASSERT((ff->ff_refcnt & FR_MASK) > 0); | |
543 | if (ff->ff_file == NULL) { | 543 | if (ff->ff_file == NULL) { | |
544 | /* | 544 | /* | |
545 | * Another user of the file is already closing, and is | 545 | * Another user of the file is already closing, and is | |
546 | * waiting for other users of the file to drain. Release | 546 | * waiting for other users of the file to drain. Release | |
547 | * our reference, and wake up the closer. | 547 | * our reference, and wake up the closer. | |
548 | */ | 548 | */ | |
549 | atomic_dec_uint(&ff->ff_refcnt); | 549 | atomic_dec_uint(&ff->ff_refcnt); | |
550 | cv_broadcast(&ff->ff_closing); | 550 | cv_broadcast(&ff->ff_closing); | |
551 | mutex_exit(&ff->ff_lock); | 551 | mutex_exit(&ff->ff_lock); | |
552 | 552 | |||
553 | /* | 553 | /* | |
554 | * An application error, so pretend that the descriptor | 554 | * An application error, so pretend that the descriptor | |
555 | * was already closed. We can't safely wait for it to | 555 | * was already closed. We can't safely wait for it to | |
556 | * be closed without potentially deadlocking. | 556 | * be closed without potentially deadlocking. | |
557 | */ | 557 | */ | |
558 | return (EBADF); | 558 | return (EBADF); | |
559 | } | 559 | } | |
560 | KASSERT((ff->ff_refcnt & FR_CLOSING) == 0); | 560 | KASSERT((ff->ff_refcnt & FR_CLOSING) == 0); | |
561 | 561 | |||
562 | /* | 562 | /* | |
563 | * There may be multiple users of this file within the process. | 563 | * There may be multiple users of this file within the process. | |
564 | * Notify existing and new users that the file is closing. This | 564 | * Notify existing and new users that the file is closing. This | |
565 | * will prevent them from adding additional uses to this file | 565 | * will prevent them from adding additional uses to this file | |
566 | * while we are closing it. | 566 | * while we are closing it. | |
567 | */ | 567 | */ | |
568 | fp = ff->ff_file; | 568 | fp = ff->ff_file; | |
569 | ff->ff_file = NULL; | 569 | ff->ff_file = NULL; | |
570 | ff->ff_exclose = false; | 570 | ff->ff_exclose = false; | |
571 | 571 | |||
572 | /* | 572 | /* | |
573 | * We expect the caller to hold a descriptor reference - drop it. | 573 | * We expect the caller to hold a descriptor reference - drop it. | |
574 | * The reference count may increase beyond zero at this point due | 574 | * The reference count may increase beyond zero at this point due | |
575 | * to an erroneous descriptor reference by an application, but | 575 | * to an erroneous descriptor reference by an application, but | |
576 | * fd_getfile() will notice that the file is being closed and drop | 576 | * fd_getfile() will notice that the file is being closed and drop | |
577 | * the reference again. | 577 | * the reference again. | |
578 | */ | 578 | */ | |
579 | #ifndef __HAVE_ATOMIC_AS_MEMBAR | 579 | #ifndef __HAVE_ATOMIC_AS_MEMBAR | |
580 | membar_producer(); | 580 | membar_producer(); | |
581 | #endif | 581 | #endif | |
582 | if (__predict_false(atomic_dec_uint_nv(&ff->ff_refcnt) != 0)) { | 582 | if (__predict_false(atomic_dec_uint_nv(&ff->ff_refcnt) != 0)) { | |
583 | /* | 583 | /* | |
584 | * Wait for other references to drain. This is typically | 584 | * Wait for other references to drain. This is typically | |
585 | * an application error - the descriptor is being closed | 585 | * an application error - the descriptor is being closed | |
586 | * while still in use. | 586 | * while still in use. | |
587 | * | 587 | * | |
588 | */ | 588 | */ | |
589 | atomic_or_uint(&ff->ff_refcnt, FR_CLOSING); | 589 | atomic_or_uint(&ff->ff_refcnt, FR_CLOSING); | |
590 | /* | 590 | /* | |
591 | * Remove any knotes attached to the file. A knote | 591 | * Remove any knotes attached to the file. A knote | |
592 | * attached to the descriptor can hold references on it. | 592 | * attached to the descriptor can hold references on it. | |
593 | */ | 593 | */ | |
594 | if (!SLIST_EMPTY(&ff->ff_knlist)) { | 594 | if (!SLIST_EMPTY(&ff->ff_knlist)) { | |
595 | mutex_exit(&ff->ff_lock); | 595 | mutex_exit(&ff->ff_lock); | |
596 | knote_fdclose(fd); | 596 | knote_fdclose(fd); | |
597 | mutex_enter(&ff->ff_lock); | 597 | mutex_enter(&ff->ff_lock); | |
598 | } | 598 | } | |
599 | /* | 599 | /* | |
600 | * We need to see the count drop to zero at least once, | 600 | * We need to see the count drop to zero at least once, | |
601 | * in order to ensure that all pre-existing references | 601 | * in order to ensure that all pre-existing references | |
602 | * have been drained. New references past this point are | 602 | * have been drained. New references past this point are | |
603 | * of no interest. | 603 | * of no interest. | |
604 | */ | 604 | */ | |
605 | while ((ff->ff_refcnt & FR_MASK) != 0) { | 605 | while ((ff->ff_refcnt & FR_MASK) != 0) { | |
606 | cv_wait(&ff->ff_closing, &ff->ff_lock); | 606 | cv_wait(&ff->ff_closing, &ff->ff_lock); | |
607 | } | 607 | } | |
608 | atomic_and_uint(&ff->ff_refcnt, ~FR_CLOSING); | 608 | atomic_and_uint(&ff->ff_refcnt, ~FR_CLOSING); | |
609 | } else { | 609 | } else { | |
610 | /* If no references, there must be no knotes. */ | 610 | /* If no references, there must be no knotes. */ | |
611 | KASSERT(SLIST_EMPTY(&ff->ff_knlist)); | 611 | KASSERT(SLIST_EMPTY(&ff->ff_knlist)); | |
612 | } | 612 | } | |
613 | mutex_exit(&ff->ff_lock); | 613 | mutex_exit(&ff->ff_lock); | |
614 | 614 | |||
615 | /* | 615 | /* | |
616 | * POSIX record locking dictates that any close releases ALL | 616 | * POSIX record locking dictates that any close releases ALL | |
617 | * locks owned by this process. This is handled by setting | 617 | * locks owned by this process. This is handled by setting | |
618 | * a flag in the unlock to free ONLY locks obeying POSIX | 618 | * a flag in the unlock to free ONLY locks obeying POSIX | |
619 | * semantics, and not to free BSD-style file locks. | 619 | * semantics, and not to free BSD-style file locks. | |
620 | * If the descriptor was in a message, POSIX-style locks | 620 | * If the descriptor was in a message, POSIX-style locks | |
621 | * aren't passed with the descriptor. | 621 | * aren't passed with the descriptor. | |
622 | */ | 622 | */ | |
623 | if ((p->p_flag & PK_ADVLOCK) != 0 && fp->f_type == DTYPE_VNODE) { | 623 | if ((p->p_flag & PK_ADVLOCK) != 0 && fp->f_type == DTYPE_VNODE) { | |
624 | lf.l_whence = SEEK_SET; | 624 | lf.l_whence = SEEK_SET; | |
625 | lf.l_start = 0; | 625 | lf.l_start = 0; | |
626 | lf.l_len = 0; | 626 | lf.l_len = 0; | |
627 | lf.l_type = F_UNLCK; | 627 | lf.l_type = F_UNLCK; | |
628 | (void)VOP_ADVLOCK(fp->f_data, p, F_UNLCK, &lf, F_POSIX); | 628 | (void)VOP_ADVLOCK(fp->f_data, p, F_UNLCK, &lf, F_POSIX); | |
629 | } | 629 | } | |
630 | 630 | |||
631 | 631 | |||
632 | /* Free descriptor slot. */ | 632 | /* Free descriptor slot. */ | |
633 | mutex_enter(&fdp->fd_lock); | 633 | mutex_enter(&fdp->fd_lock); | |
634 | fd_unused(fdp, fd); | 634 | fd_unused(fdp, fd); | |
635 | mutex_exit(&fdp->fd_lock); | 635 | mutex_exit(&fdp->fd_lock); | |
636 | 636 | |||
637 | /* Now drop reference to the file itself. */ | 637 | /* Now drop reference to the file itself. */ | |
638 | return closef(fp); | 638 | return closef(fp); | |
639 | } | 639 | } | |
640 | 640 | |||
641 | /* | 641 | /* | |
642 | * Duplicate a file descriptor. | 642 | * Duplicate a file descriptor. | |
643 | */ | 643 | */ | |
644 | int | 644 | int | |
645 | fd_dup(file_t *fp, int minfd, int *newp, bool exclose) | 645 | fd_dup(file_t *fp, int minfd, int *newp, bool exclose) | |
646 | { | 646 | { | |
647 | proc_t *p; | 647 | proc_t *p; | |
648 | int error; | 648 | int error; | |
649 | 649 | |||
650 | p = curproc; | 650 | p = curproc; | |
651 | 651 | |||
652 | while ((error = fd_alloc(p, minfd, newp)) != 0) { | 652 | while ((error = fd_alloc(p, minfd, newp)) != 0) { | |
653 | if (error != ENOSPC) { | 653 | if (error != ENOSPC) { | |
654 | return error; | 654 | return error; | |
655 | } | 655 | } | |
656 | fd_tryexpand(p); | 656 | fd_tryexpand(p); | |
657 | } | 657 | } | |
658 | 658 | |||
659 | curlwp->l_fd->fd_ofiles[*newp]->ff_exclose = exclose; | 659 | curlwp->l_fd->fd_ofiles[*newp]->ff_exclose = exclose; | |
660 | fd_affix(p, fp, *newp); | 660 | fd_affix(p, fp, *newp); | |
661 | return 0; | 661 | return 0; | |
662 | } | 662 | } | |
663 | 663 | |||
664 | /* | 664 | /* | |
665 | * dup2 operation. | 665 | * dup2 operation. | |
666 | */ | 666 | */ | |
667 | int | 667 | int | |
668 | fd_dup2(file_t *fp, unsigned new) | 668 | fd_dup2(file_t *fp, unsigned new) | |
669 | { | 669 | { | |
670 | filedesc_t *fdp; | 670 | filedesc_t *fdp; | |
671 | fdfile_t *ff; | 671 | fdfile_t *ff; | |
672 | 672 | |||
673 | fdp = curlwp->l_fd; | 673 | fdp = curlwp->l_fd; | |
674 | 674 | |||
675 | /* | 675 | /* | |
676 | * Ensure there are enough slots in the descriptor table, | 676 | * Ensure there are enough slots in the descriptor table, | |
677 | * and allocate an fdfile_t up front in case we need it. | 677 | * and allocate an fdfile_t up front in case we need it. | |
678 | */ | 678 | */ | |
679 | while (new >= fdp->fd_nfiles) { | 679 | while (new >= fdp->fd_nfiles) { | |
680 | fd_tryexpand(curproc); | 680 | fd_tryexpand(curproc); | |
681 | } | 681 | } | |
682 | ff = pool_cache_get(fdfile_cache, PR_WAITOK); | 682 | ff = pool_cache_get(fdfile_cache, PR_WAITOK); | |
683 | 683 | |||
684 | /* | 684 | /* | |
685 | * If there is already a file open, close it. If the file is | 685 | * If there is already a file open, close it. If the file is | |
686 | * half open, wait for it to be constructed before closing it. | 686 | * half open, wait for it to be constructed before closing it. | |
687 | * XXX Potential for deadlock here? | 687 | * XXX Potential for deadlock here? | |
688 | */ | 688 | */ | |
689 | mutex_enter(&fdp->fd_lock); | 689 | mutex_enter(&fdp->fd_lock); | |
690 | while (fd_isused(fdp, new)) { | 690 | while (fd_isused(fdp, new)) { | |
691 | mutex_exit(&fdp->fd_lock); | 691 | mutex_exit(&fdp->fd_lock); | |
692 | if (fd_getfile(new) != NULL) { | 692 | if (fd_getfile(new) != NULL) { | |
693 | (void)fd_close(new); | 693 | (void)fd_close(new); | |
694 | } else { | 694 | } else { | |
695 | /* XXX Crummy, but unlikely to happen. */ | 695 | /* XXX Crummy, but unlikely to happen. */ | |
696 | kpause("dup2", false, 1, NULL); | 696 | kpause("dup2", false, 1, NULL); | |
697 | } | 697 | } | |
698 | mutex_enter(&fdp->fd_lock); | 698 | mutex_enter(&fdp->fd_lock); | |
699 | } | 699 | } | |
700 | if (fdp->fd_ofiles[new] == NULL) { | 700 | if (fdp->fd_ofiles[new] == NULL) { | |
701 | KASSERT(new >= NDFDFILE); | 701 | KASSERT(new >= NDFDFILE); | |
702 | fdp->fd_ofiles[new] = ff; | 702 | fdp->fd_ofiles[new] = ff; | |
703 | ff = NULL; | 703 | ff = NULL; | |
704 | } | 704 | } | |
705 | fd_used(fdp, new); | 705 | fd_used(fdp, new); | |
706 | mutex_exit(&fdp->fd_lock); | 706 | mutex_exit(&fdp->fd_lock); | |
707 | 707 | |||
708 | /* Slot is now allocated. Insert copy of the file. */ | 708 | /* Slot is now allocated. Insert copy of the file. */ | |
709 | fd_affix(curproc, fp, new); | 709 | fd_affix(curproc, fp, new); | |
710 | if (ff != NULL) { | 710 | if (ff != NULL) { | |
711 | pool_cache_put(fdfile_cache, ff); | 711 | pool_cache_put(fdfile_cache, ff); | |
712 | } | 712 | } | |
713 | return 0; | 713 | return 0; | |
714 | } | 714 | } | |
715 | 715 | |||
716 | /* | 716 | /* | |
717 | * Drop reference to a file structure. | 717 | * Drop reference to a file structure. | |
718 | */ | 718 | */ | |
719 | int | 719 | int | |
720 | closef(file_t *fp) | 720 | closef(file_t *fp) | |
721 | { | 721 | { | |
722 | struct flock lf; | 722 | struct flock lf; | |
723 | int error; | 723 | int error; | |
724 | 724 | |||
725 | /* | 725 | /* | |
726 | * Drop reference. If referenced elsewhere it's still open | 726 | * Drop reference. If referenced elsewhere it's still open | |
727 | * and we have nothing more to do. | 727 | * and we have nothing more to do. | |
728 | */ | 728 | */ | |
729 | mutex_enter(&fp->f_lock); | 729 | mutex_enter(&fp->f_lock); | |
730 | KASSERT(fp->f_count > 0); | 730 | KASSERT(fp->f_count > 0); | |
731 | if (--fp->f_count > 0) { | 731 | if (--fp->f_count > 0) { | |
732 | mutex_exit(&fp->f_lock); | 732 | mutex_exit(&fp->f_lock); | |
733 | return 0; | 733 | return 0; | |
734 | } | 734 | } | |
735 | KASSERT(fp->f_count == 0); | 735 | KASSERT(fp->f_count == 0); | |
736 | mutex_exit(&fp->f_lock); | 736 | mutex_exit(&fp->f_lock); | |
737 | 737 | |||
738 | /* We held the last reference - release locks, close and free. */ | 738 | /* We held the last reference - release locks, close and free. */ | |
739 | if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) { | 739 | if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) { | |
740 | lf.l_whence = SEEK_SET; | 740 | lf.l_whence = SEEK_SET; | |
741 | lf.l_start = 0; | 741 | lf.l_start = 0; | |
742 | lf.l_len = 0; | 742 | lf.l_len = 0; | |
743 | lf.l_type = F_UNLCK; | 743 | lf.l_type = F_UNLCK; | |
744 | (void)VOP_ADVLOCK(fp->f_data, fp, F_UNLCK, &lf, F_FLOCK); | 744 | (void)VOP_ADVLOCK(fp->f_data, fp, F_UNLCK, &lf, F_FLOCK); | |
745 | } | 745 | } | |
746 | if (fp->f_ops != NULL) { | 746 | if (fp->f_ops != NULL) { | |
747 | error = (*fp->f_ops->fo_close)(fp); | 747 | error = (*fp->f_ops->fo_close)(fp); | |
748 | } else { | 748 | } else { | |
749 | error = 0; | 749 | error = 0; | |
750 | } | 750 | } | |
751 | ffree(fp); | 751 | ffree(fp); | |
752 | 752 | |||
753 | return error; | 753 | return error; | |
754 | } | 754 | } | |
755 | 755 | |||
756 | /* | 756 | /* | |
757 | * Allocate a file descriptor for the process. | 757 | * Allocate a file descriptor for the process. | |
758 | */ | 758 | */ | |
759 | int | 759 | int | |
760 | fd_alloc(proc_t *p, int want, int *result) | 760 | fd_alloc(proc_t *p, int want, int *result) | |
761 | { | 761 | { | |
762 | filedesc_t *fdp; | 762 | filedesc_t *fdp; | |
763 | int i, lim, last, error; | 763 | int i, lim, last, error; | |
764 | u_int off, new; | 764 | u_int off, new; | |
765 | fdfile_t *ff; | 765 | fdfile_t *ff; | |
766 | 766 | |||
767 | KASSERT(p == curproc || p == &proc0); | 767 | KASSERT(p == curproc || p == &proc0); | |
768 | 768 | |||
769 | fdp = p->p_fd; | 769 | fdp = p->p_fd; | |
770 | ff = pool_cache_get(fdfile_cache, PR_WAITOK); | 770 | ff = pool_cache_get(fdfile_cache, PR_WAITOK); | |
771 | KASSERT(ff->ff_refcnt == 0); | 771 | KASSERT(ff->ff_refcnt == 0); | |
772 | KASSERT(ff->ff_file == NULL); | 772 | KASSERT(ff->ff_file == NULL); | |
773 | 773 | |||
774 | /* | 774 | /* | |
775 | * Search for a free descriptor starting at the higher | 775 | * Search for a free descriptor starting at the higher | |
776 | * of want or fd_freefile. | 776 | * of want or fd_freefile. | |
777 | */ | 777 | */ | |
778 | mutex_enter(&fdp->fd_lock); | 778 | mutex_enter(&fdp->fd_lock); | |
779 | KASSERT(fdp->fd_ofiles[0] == (fdfile_t *)fdp->fd_dfdfile[0]); | 779 | KASSERT(fdp->fd_ofiles[0] == (fdfile_t *)fdp->fd_dfdfile[0]); | |
780 | lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); | 780 | lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); | |
781 | last = min(fdp->fd_nfiles, lim); | 781 | last = min(fdp->fd_nfiles, lim); | |
782 | for (;;) { | 782 | for (;;) { | |
783 | if ((i = want) < fdp->fd_freefile) | 783 | if ((i = want) < fdp->fd_freefile) | |
784 | i = fdp->fd_freefile; | 784 | i = fdp->fd_freefile; | |
785 | off = i >> NDENTRYSHIFT; | 785 | off = i >> NDENTRYSHIFT; | |
786 | new = fd_next_zero(fdp, fdp->fd_himap, off, | 786 | new = fd_next_zero(fdp, fdp->fd_himap, off, | |
787 | (last + NDENTRIES - 1) >> NDENTRYSHIFT); | 787 | (last + NDENTRIES - 1) >> NDENTRYSHIFT); | |
788 | if (new == -1) | 788 | if (new == -1) | |
789 | break; | 789 | break; | |
790 | i = fd_next_zero(fdp, &fdp->fd_lomap[new], | 790 | i = fd_next_zero(fdp, &fdp->fd_lomap[new], | |
791 | new > off ? 0 : i & NDENTRYMASK, NDENTRIES); | 791 | new > off ? 0 : i & NDENTRYMASK, NDENTRIES); | |
792 | if (i == -1) { | 792 | if (i == -1) { | |
793 | /* | 793 | /* | |
794 | * Free file descriptor in this block was | 794 | * Free file descriptor in this block was | |
795 | * below want, try again with higher want. | 795 | * below want, try again with higher want. | |
796 | */ | 796 | */ | |
797 | want = (new + 1) << NDENTRYSHIFT; | 797 | want = (new + 1) << NDENTRYSHIFT; | |
798 | continue; | 798 | continue; | |
799 | } | 799 | } | |
800 | i += (new << NDENTRYSHIFT); | 800 | i += (new << NDENTRYSHIFT); | |
801 | if (i >= last) { | 801 | if (i >= last) { | |
802 | break; | 802 | break; | |
803 | } | 803 | } | |
804 | if (fdp->fd_ofiles[i] == NULL) { | 804 | if (fdp->fd_ofiles[i] == NULL) { | |
805 | KASSERT(i >= NDFDFILE); | 805 | KASSERT(i >= NDFDFILE); | |
806 | fdp->fd_ofiles[i] = ff; | 806 | fdp->fd_ofiles[i] = ff; | |
807 | } else { | 807 | } else { | |
808 | pool_cache_put(fdfile_cache, ff); | 808 | pool_cache_put(fdfile_cache, ff); | |
809 | } | 809 | } | |
810 | KASSERT(fdp->fd_ofiles[i]->ff_file == NULL); | 810 | KASSERT(fdp->fd_ofiles[i]->ff_file == NULL); | |
811 | fd_used(fdp, i); | 811 | fd_used(fdp, i); | |
812 | if (want <= fdp->fd_freefile) { | 812 | if (want <= fdp->fd_freefile) { | |
813 | fdp->fd_freefile = i; | 813 | fdp->fd_freefile = i; | |
814 | } | 814 | } | |
815 | *result = i; | 815 | *result = i; | |
816 | mutex_exit(&fdp->fd_lock); | 816 | mutex_exit(&fdp->fd_lock); | |
817 | KASSERT(i >= NDFDFILE || | 817 | KASSERT(i >= NDFDFILE || | |
818 | fdp->fd_ofiles[i] == (fdfile_t *)fdp->fd_dfdfile[i]); | 818 | fdp->fd_ofiles[i] == (fdfile_t *)fdp->fd_dfdfile[i]); | |
819 | return 0; | 819 | return 0; | |
820 | } | 820 | } | |
821 | 821 | |||
822 | /* No space in current array. Let the caller expand and retry. */ | 822 | /* No space in current array. Let the caller expand and retry. */ | |
823 | error = (fdp->fd_nfiles >= lim) ? EMFILE : ENOSPC; | 823 | error = (fdp->fd_nfiles >= lim) ? EMFILE : ENOSPC; | |
824 | mutex_exit(&fdp->fd_lock); | 824 | mutex_exit(&fdp->fd_lock); | |
825 | pool_cache_put(fdfile_cache, ff); | 825 | pool_cache_put(fdfile_cache, ff); | |
826 | return error; | 826 | return error; | |
827 | } | 827 | } | |
828 | 828 | |||
829 | /* | 829 | /* | |
830 | * Allocate memory for the open files array. | 830 | * Allocate memory for the open files array. | |
831 | */ | 831 | */ | |
832 | static fdfile_t ** | 832 | static fdfile_t ** | |
833 | fd_ofile_alloc(int n) | 833 | fd_ofile_alloc(int n) | |
834 | { | 834 | { | |
835 | uintptr_t *ptr, sz; | 835 | uintptr_t *ptr, sz; | |
836 | 836 | |||
837 | KASSERT(n > NDFILE); | 837 | KASSERT(n > NDFILE); | |
838 | 838 | |||
839 | sz = (n + 2) * sizeof(uintptr_t); | 839 | sz = (n + 2) * sizeof(uintptr_t); | |
840 | ptr = kmem_alloc((size_t)sz, KM_SLEEP); | 840 | ptr = kmem_alloc((size_t)sz, KM_SLEEP); | |
841 | ptr[1] = sz; | 841 | ptr[1] = sz; | |
842 | 842 | |||
843 | return (fdfile_t **)(ptr + 2); | 843 | return (fdfile_t **)(ptr + 2); | |
844 | } | 844 | } | |
845 | 845 | |||
846 | /* | 846 | /* | |
847 | * Free an open files array. | 847 | * Free an open files array. | |
848 | */ | 848 | */ | |
849 | static void | 849 | static void | |
850 | fd_ofile_free(int n, fdfile_t **of) | 850 | fd_ofile_free(int n, fdfile_t **of) | |
851 | { | 851 | { | |
852 | uintptr_t *ptr, sz; | 852 | uintptr_t *ptr, sz; | |
853 | 853 | |||
854 | KASSERT(n > NDFILE); | 854 | KASSERT(n > NDFILE); | |
855 | 855 | |||
856 | sz = (n + 2) * sizeof(uintptr_t); | 856 | sz = (n + 2) * sizeof(uintptr_t); | |
857 | ptr = (uintptr_t *)of - 2; | 857 | ptr = (uintptr_t *)of - 2; | |
858 | KASSERT(ptr[1] == sz); | 858 | KASSERT(ptr[1] == sz); | |
859 | kmem_free(ptr, sz); | 859 | kmem_free(ptr, sz); | |
860 | } | 860 | } | |
861 | 861 | |||
862 | /* | 862 | /* | |
863 | * Allocate descriptor bitmap. | 863 | * Allocate descriptor bitmap. | |
864 | */ | 864 | */ | |
865 | static void | 865 | static void | |
866 | fd_map_alloc(int n, uint32_t **lo, uint32_t **hi) | 866 | fd_map_alloc(int n, uint32_t **lo, uint32_t **hi) | |
867 | { | 867 | { | |
868 | uint8_t *ptr; | 868 | uint8_t *ptr; | |
869 | size_t szlo, szhi; | 869 | size_t szlo, szhi; | |
870 | 870 | |||
871 | KASSERT(n > NDENTRIES); | 871 | KASSERT(n > NDENTRIES); | |
872 | 872 | |||
873 | szlo = NDLOSLOTS(n) * sizeof(uint32_t); | 873 | szlo = NDLOSLOTS(n) * sizeof(uint32_t); | |
874 | szhi = NDHISLOTS(n) * sizeof(uint32_t); | 874 | szhi = NDHISLOTS(n) * sizeof(uint32_t); | |
875 | ptr = kmem_alloc(szlo + szhi, KM_SLEEP); | 875 | ptr = kmem_alloc(szlo + szhi, KM_SLEEP); | |
876 | *lo = (uint32_t *)ptr; | 876 | *lo = (uint32_t *)ptr; | |
877 | *hi = (uint32_t *)(ptr + szlo); | 877 | *hi = (uint32_t *)(ptr + szlo); | |
878 | } | 878 | } | |
879 | 879 | |||
880 | /* | 880 | /* | |
881 | * Free descriptor bitmap. | 881 | * Free descriptor bitmap. | |
882 | */ | 882 | */ | |
883 | static void | 883 | static void | |
884 | fd_map_free(int n, uint32_t *lo, uint32_t *hi) | 884 | fd_map_free(int n, uint32_t *lo, uint32_t *hi) | |
885 | { | 885 | { | |
886 | size_t szlo, szhi; | 886 | size_t szlo, szhi; | |
887 | 887 | |||
888 | KASSERT(n > NDENTRIES); | 888 | KASSERT(n > NDENTRIES); | |
889 | 889 | |||
890 | szlo = NDLOSLOTS(n) * sizeof(uint32_t); | 890 | szlo = NDLOSLOTS(n) * sizeof(uint32_t); | |
891 | szhi = NDHISLOTS(n) * sizeof(uint32_t); | 891 | szhi = NDHISLOTS(n) * sizeof(uint32_t); | |
892 | KASSERT(hi == (uint32_t *)((uint8_t *)lo + szlo)); | 892 | KASSERT(hi == (uint32_t *)((uint8_t *)lo + szlo)); | |
893 | kmem_free(lo, szlo + szhi); | 893 | kmem_free(lo, szlo + szhi); | |
894 | } | 894 | } | |
895 | 895 | |||
896 | /* | 896 | /* | |
897 | * Expand a process' descriptor table. | 897 | * Expand a process' descriptor table. | |
898 | */ | 898 | */ | |
899 | void | 899 | void | |
900 | fd_tryexpand(proc_t *p) | 900 | fd_tryexpand(proc_t *p) | |
901 | { | 901 | { | |
902 | filedesc_t *fdp; | 902 | filedesc_t *fdp; | |
903 | int i, numfiles, oldnfiles; | 903 | int i, numfiles, oldnfiles; | |
904 | fdfile_t **newofile; | 904 | fdfile_t **newofile; | |
905 | uint32_t *newhimap, *newlomap; | 905 | uint32_t *newhimap, *newlomap; | |
906 | 906 | |||
907 | KASSERT(p == curproc || p == &proc0); | 907 | KASSERT(p == curproc || p == &proc0); | |
908 | 908 | |||
909 | fdp = p->p_fd; | 909 | fdp = p->p_fd; | |
910 | newhimap = NULL; | 910 | newhimap = NULL; | |
911 | newlomap = NULL; | 911 | newlomap = NULL; | |
912 | oldnfiles = fdp->fd_nfiles; | 912 | oldnfiles = fdp->fd_nfiles; | |
913 | 913 | |||
914 | if (oldnfiles < NDEXTENT) | 914 | if (oldnfiles < NDEXTENT) | |
915 | numfiles = NDEXTENT; | 915 | numfiles = NDEXTENT; | |
916 | else | 916 | else | |
917 | numfiles = 2 * oldnfiles; | 917 | numfiles = 2 * oldnfiles; | |
918 | 918 | |||
919 | newofile = fd_ofile_alloc(numfiles); | 919 | newofile = fd_ofile_alloc(numfiles); | |
920 | if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { | 920 | if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { | |
921 | fd_map_alloc(numfiles, &newlomap, &newhimap); | 921 | fd_map_alloc(numfiles, &newlomap, &newhimap); | |
922 | } | 922 | } | |
923 | 923 | |||
924 | mutex_enter(&fdp->fd_lock); | 924 | mutex_enter(&fdp->fd_lock); | |
925 | KASSERT(fdp->fd_ofiles[0] == (fdfile_t *)fdp->fd_dfdfile[0]); | 925 | KASSERT(fdp->fd_ofiles[0] == (fdfile_t *)fdp->fd_dfdfile[0]); | |
926 | if (fdp->fd_nfiles != oldnfiles) { | 926 | if (fdp->fd_nfiles != oldnfiles) { | |
927 | /* fdp changed; caller must retry */ | 927 | /* fdp changed; caller must retry */ | |
928 | mutex_exit(&fdp->fd_lock); | 928 | mutex_exit(&fdp->fd_lock); | |
929 | fd_ofile_free(numfiles, newofile); | 929 | fd_ofile_free(numfiles, newofile); | |
930 | if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { | 930 | if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { | |
931 | fd_map_free(numfiles, newlomap, newhimap); | 931 | fd_map_free(numfiles, newlomap, newhimap); | |
932 | } | 932 | } | |
933 | return; | 933 | return; | |
934 | } | 934 | } | |
935 | 935 | |||
936 | /* Copy the existing ofile array and zero the new portion. */ | 936 | /* Copy the existing ofile array and zero the new portion. */ | |
937 | i = sizeof(fdfile_t *) * fdp->fd_nfiles; | 937 | i = sizeof(fdfile_t *) * fdp->fd_nfiles; | |
938 | memcpy(newofile, fdp->fd_ofiles, i); | 938 | memcpy(newofile, fdp->fd_ofiles, i); | |
939 | memset((uint8_t *)newofile + i, 0, numfiles * sizeof(fdfile_t *) - i); | 939 | memset((uint8_t *)newofile + i, 0, numfiles * sizeof(fdfile_t *) - i); | |
940 | 940 | |||
941 | /* | 941 | /* | |
942 | * Link old ofiles array into list to be discarded. We defer | 942 | * Link old ofiles array into list to be discarded. We defer | |
943 | * freeing until process exit if the descriptor table is visble | 943 | * freeing until process exit if the descriptor table is visble | |
944 | * to other threads. | 944 | * to other threads. | |
945 | */ | 945 | */ | |
946 | if (oldnfiles > NDFILE) { | 946 | if (oldnfiles > NDFILE) { | |
947 | if ((fdp->fd_refcnt | p->p_nlwps) > 1) { | 947 | if ((fdp->fd_refcnt | p->p_nlwps) > 1) { | |
948 | fdp->fd_ofiles[-2] = (void *)fdp->fd_discard; | 948 | fdp->fd_ofiles[-2] = (void *)fdp->fd_discard; | |
949 | fdp->fd_discard = fdp->fd_ofiles - 2; | 949 | fdp->fd_discard = fdp->fd_ofiles - 2; | |
950 | } else { | 950 | } else { | |
951 | fd_ofile_free(oldnfiles, fdp->fd_ofiles); | 951 | fd_ofile_free(oldnfiles, fdp->fd_ofiles); | |
952 | } | 952 | } | |
953 | } | 953 | } | |
954 | 954 | |||
955 | if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { | 955 | if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { | |
956 | i = NDHISLOTS(oldnfiles) * sizeof(uint32_t); | 956 | i = NDHISLOTS(oldnfiles) * sizeof(uint32_t); | |
957 | memcpy(newhimap, fdp->fd_himap, i); | 957 | memcpy(newhimap, fdp->fd_himap, i); | |
958 | memset((uint8_t *)newhimap + i, 0, | 958 | memset((uint8_t *)newhimap + i, 0, | |
959 | NDHISLOTS(numfiles) * sizeof(uint32_t) - i); | 959 | NDHISLOTS(numfiles) * sizeof(uint32_t) - i); | |
960 | 960 | |||
961 | i = NDLOSLOTS(oldnfiles) * sizeof(uint32_t); | 961 | i = NDLOSLOTS(oldnfiles) * sizeof(uint32_t); | |
962 | memcpy(newlomap, fdp->fd_lomap, i); | 962 | memcpy(newlomap, fdp->fd_lomap, i); | |
963 | memset((uint8_t *)newlomap + i, 0, | 963 | memset((uint8_t *)newlomap + i, 0, | |
964 | NDLOSLOTS(numfiles) * sizeof(uint32_t) - i); | 964 | NDLOSLOTS(numfiles) * sizeof(uint32_t) - i); | |
965 | 965 | |||
966 | if (NDHISLOTS(oldnfiles) > NDHISLOTS(NDFILE)) { | 966 | if (NDHISLOTS(oldnfiles) > NDHISLOTS(NDFILE)) { | |
967 | fd_map_free(oldnfiles, fdp->fd_lomap, fdp->fd_himap); | 967 | fd_map_free(oldnfiles, fdp->fd_lomap, fdp->fd_himap); | |
968 | } | 968 | } | |
969 | fdp->fd_himap = newhimap; | 969 | fdp->fd_himap = newhimap; | |
970 | fdp->fd_lomap = newlomap; | 970 | fdp->fd_lomap = newlomap; | |
971 | } | 971 | } | |
972 | 972 | |||
973 | /* | 973 | /* | |
974 | * All other modifications must become globally visible before | 974 | * All other modifications must become globally visible before | |
975 | * the change to fd_nfiles. See fd_getfile(). | 975 | * the change to fd_nfiles. See fd_getfile(). | |
976 | */ | 976 | */ | |
977 | fdp->fd_ofiles = newofile; | 977 | fdp->fd_ofiles = newofile; | |
978 | membar_producer(); | 978 | membar_producer(); | |
979 | fdp->fd_nfiles = numfiles; | 979 | fdp->fd_nfiles = numfiles; | |
980 | mutex_exit(&fdp->fd_lock); | 980 | mutex_exit(&fdp->fd_lock); | |
981 | 981 | |||
982 | KASSERT(fdp->fd_ofiles[0] == (fdfile_t *)fdp->fd_dfdfile[0]); | 982 | KASSERT(fdp->fd_ofiles[0] == (fdfile_t *)fdp->fd_dfdfile[0]); | |
983 | } | 983 | } | |
984 | 984 | |||
985 | /* | 985 | /* | |
986 | * Create a new open file structure and allocate a file descriptor | 986 | * Create a new open file structure and allocate a file descriptor | |
987 | * for the current process. | 987 | * for the current process. | |
988 | */ | 988 | */ | |
989 | int | 989 | int | |
990 | fd_allocfile(file_t **resultfp, int *resultfd) | 990 | fd_allocfile(file_t **resultfp, int *resultfd) | |
991 | { | 991 | { | |
992 | file_t *fp; | 992 | file_t *fp; | |
993 | proc_t *p; | 993 | proc_t *p; | |
994 | int error; | 994 | int error; | |
995 | 995 | |||
996 | p = curproc; | 996 | p = curproc; | |
997 | 997 | |||
998 | while ((error = fd_alloc(p, 0, resultfd)) != 0) { | 998 | while ((error = fd_alloc(p, 0, resultfd)) != 0) { | |
999 | if (error != ENOSPC) { | 999 | if (error != ENOSPC) { | |
1000 | return error; | 1000 | return error; | |
1001 | } | 1001 | } | |
1002 | fd_tryexpand(p); | 1002 | fd_tryexpand(p); | |
1003 | } | 1003 | } | |
1004 | 1004 | |||
1005 | fp = pool_cache_get(file_cache, PR_WAITOK); | 1005 | fp = pool_cache_get(file_cache, PR_WAITOK); | |
1006 | KASSERT(fp->f_count == 0); | 1006 | KASSERT(fp->f_count == 0); | |
1007 | KASSERT(fp->f_msgcount == 0); | |||
1008 | KASSERT(fp->f_unpcount == 0); | |||
1007 | fp->f_cred = kauth_cred_get(); | 1009 | fp->f_cred = kauth_cred_get(); | |
1008 | kauth_cred_hold(fp->f_cred); | 1010 | kauth_cred_hold(fp->f_cred); | |
1009 | 1011 | |||
1010 | if (__predict_false(atomic_inc_uint_nv(&nfiles) >= maxfiles)) { | 1012 | if (__predict_false(atomic_inc_uint_nv(&nfiles) >= maxfiles)) { | |
1011 | fd_abort(p, fp, *resultfd); | 1013 | fd_abort(p, fp, *resultfd); | |
1012 | tablefull("file", "increase kern.maxfiles or MAXFILES"); | 1014 | tablefull("file", "increase kern.maxfiles or MAXFILES"); | |
1013 | return ENFILE; | 1015 | return ENFILE; | |
1014 | } | 1016 | } | |
1015 | 1017 | |||
1018 | /* | |||
1019 | * Don't allow recycled files to be scanned. | |||
1020 | */ | |||
1021 | if ((fp->f_flag & FSCAN) != 0) { | |||
1022 | mutex_enter(&fp->f_lock); | |||
1023 | atomic_and_uint(&fp->f_flag, ~FSCAN); | |||
1024 | mutex_exit(&fp->f_lock); | |||
1025 | } | |||
1026 | ||||
1016 | fp->f_advice = 0; | 1027 | fp->f_advice = 0; | |
1017 | fp->f_msgcount = 0; | 1028 | fp->f_msgcount = 0; | |
1018 | fp->f_offset = 0; | 1029 | fp->f_offset = 0; | |
1019 | *resultfp = fp; | 1030 | *resultfp = fp; | |
1020 | 1031 | |||
1021 | return 0; | 1032 | return 0; | |
1022 | } | 1033 | } | |
1023 | 1034 | |||
1024 | /* | 1035 | /* | |
1025 | * Successful creation of a new descriptor: make visible to the process. | 1036 | * Successful creation of a new descriptor: make visible to the process. | |
1026 | */ | 1037 | */ | |
1027 | void | 1038 | void | |
1028 | fd_affix(proc_t *p, file_t *fp, unsigned fd) | 1039 | fd_affix(proc_t *p, file_t *fp, unsigned fd) | |
1029 | { | 1040 | { | |
1030 | fdfile_t *ff; | 1041 | fdfile_t *ff; | |
1031 | filedesc_t *fdp; | 1042 | filedesc_t *fdp; | |
1032 | 1043 | |||
1033 | KASSERT(p == curproc || p == &proc0); | 1044 | KASSERT(p == curproc || p == &proc0); | |
1034 | 1045 | |||
1035 | /* Add a reference to the file structure. */ | 1046 | /* Add a reference to the file structure. */ | |
1036 | mutex_enter(&fp->f_lock); | 1047 | mutex_enter(&fp->f_lock); | |
1037 | fp->f_count++; | 1048 | fp->f_count++; | |
1038 | mutex_exit(&fp->f_lock); | 1049 | mutex_exit(&fp->f_lock); | |
1039 | 1050 | |||
1040 | /* | 1051 | /* | |
1041 | * Insert the new file into the descriptor slot. | 1052 | * Insert the new file into the descriptor slot. | |
1042 | * | 1053 | * | |
1043 | * The memory barriers provided by lock activity in this routine | 1054 | * The memory barriers provided by lock activity in this routine | |
1044 | * ensure that any updates to the file structure become globally | 1055 | * ensure that any updates to the file structure become globally | |
1045 | * visible before the file becomes visible to other LWPs in the | 1056 | * visible before the file becomes visible to other LWPs in the | |
1046 | * current process. | 1057 | * current process. | |
1047 | */ | 1058 | */ | |
1048 | fdp = p->p_fd; | 1059 | fdp = p->p_fd; | |
1049 | ff = fdp->fd_ofiles[fd]; | 1060 | ff = fdp->fd_ofiles[fd]; | |
1050 | 1061 | |||
1051 | KASSERT(ff != NULL); | 1062 | KASSERT(ff != NULL); | |
1052 | KASSERT(ff->ff_file == NULL); | 1063 | KASSERT(ff->ff_file == NULL); | |
1053 | KASSERT(ff->ff_allocated); | 1064 | KASSERT(ff->ff_allocated); | |
1054 | KASSERT(fd_isused(fdp, fd)); | 1065 | KASSERT(fd_isused(fdp, fd)); | |
1055 | KASSERT(fd >= NDFDFILE || | 1066 | KASSERT(fd >= NDFDFILE || | |
1056 | fdp->fd_ofiles[fd] == (fdfile_t *)fdp->fd_dfdfile[fd]); | 1067 | fdp->fd_ofiles[fd] == (fdfile_t *)fdp->fd_dfdfile[fd]); | |
1057 | 1068 | |||
1058 | /* No need to lock in order to make file initially visible. */ | 1069 | /* No need to lock in order to make file initially visible. */ | |
1059 | ff->ff_file = fp; | 1070 | ff->ff_file = fp; | |
1060 | } | 1071 | } | |
1061 | 1072 | |||
1062 | /* | 1073 | /* | |
1063 | * Abort creation of a new descriptor: free descriptor slot and file. | 1074 | * Abort creation of a new descriptor: free descriptor slot and file. | |
1064 | */ | 1075 | */ | |
1065 | void | 1076 | void | |
1066 | fd_abort(proc_t *p, file_t *fp, unsigned fd) | 1077 | fd_abort(proc_t *p, file_t *fp, unsigned fd) | |
1067 | { | 1078 | { | |
1068 | filedesc_t *fdp; | 1079 | filedesc_t *fdp; | |
1069 | fdfile_t *ff; | 1080 | fdfile_t *ff; | |
1070 | 1081 | |||
1071 | KASSERT(p == curproc || p == &proc0); | 1082 | KASSERT(p == curproc || p == &proc0); | |
1072 | 1083 | |||
1073 | fdp = p->p_fd; | 1084 | fdp = p->p_fd; | |
1074 | ff = fdp->fd_ofiles[fd]; | 1085 | ff = fdp->fd_ofiles[fd]; | |
1075 | 1086 | |||
1076 | KASSERT(fd >= NDFDFILE || | 1087 | KASSERT(fd >= NDFDFILE || | |
1077 | fdp->fd_ofiles[fd] == (fdfile_t *)fdp->fd_dfdfile[fd]); | 1088 | fdp->fd_ofiles[fd] == (fdfile_t *)fdp->fd_dfdfile[fd]); | |
1078 | 1089 | |||
1079 | mutex_enter(&fdp->fd_lock); | 1090 | mutex_enter(&fdp->fd_lock); | |
1080 | KASSERT(fd_isused(fdp, fd)); | 1091 | KASSERT(fd_isused(fdp, fd)); | |
1081 | fd_unused(fdp, fd); | 1092 | fd_unused(fdp, fd); | |
1082 | mutex_exit(&fdp->fd_lock); | 1093 | mutex_exit(&fdp->fd_lock); | |
1083 | 1094 | |||
1084 | if (fp != NULL) { | 1095 | if (fp != NULL) { | |
1085 | ffree(fp); | 1096 | ffree(fp); | |
1086 | } | 1097 | } | |
1087 | } | 1098 | } | |
1088 | 1099 | |||
1089 | /* | 1100 | /* | |
1090 | * Free a file descriptor. | 1101 | * Free a file descriptor. | |
1091 | */ | 1102 | */ | |
1092 | void | 1103 | void | |
1093 | ffree(file_t *fp) | 1104 | ffree(file_t *fp) | |
1094 | { | 1105 | { | |
1095 | 1106 | |||
1096 | KASSERT(fp->f_count == 0); | 1107 | KASSERT(fp->f_count == 0); | |
1097 | 1108 | |||
1098 | atomic_dec_uint(&nfiles); | 1109 | atomic_dec_uint(&nfiles); | |
1099 | kauth_cred_free(fp->f_cred); | 1110 | kauth_cred_free(fp->f_cred); | |
1100 | pool_cache_put(file_cache, fp); | 1111 | pool_cache_put(file_cache, fp); | |
1101 | } | 1112 | } | |
1102 | 1113 | |||
1103 | /* | 1114 | /* | |
1104 | * Create an initial cwdinfo structure, using the same current and root | 1115 | * Create an initial cwdinfo structure, using the same current and root | |
1105 | * directories as curproc. | 1116 | * directories as curproc. | |
1106 | */ | 1117 | */ | |
1107 | struct cwdinfo * | 1118 | struct cwdinfo * | |
1108 | cwdinit(void) | 1119 | cwdinit(void) | |
1109 | { | 1120 | { | |
1110 | struct cwdinfo *cwdi; | 1121 | struct cwdinfo *cwdi; | |
1111 | struct cwdinfo *copy; | 1122 | struct cwdinfo *copy; | |
1112 | 1123 | |||
1113 | cwdi = pool_cache_get(cwdi_cache, PR_WAITOK); | 1124 | cwdi = pool_cache_get(cwdi_cache, PR_WAITOK); | |
1114 | copy = curproc->p_cwdi; | 1125 | copy = curproc->p_cwdi; | |
1115 | 1126 | |||
1116 | rw_enter(©->cwdi_lock, RW_READER); | 1127 | rw_enter(©->cwdi_lock, RW_READER); | |
1117 | cwdi->cwdi_cdir = copy->cwdi_cdir; | 1128 | cwdi->cwdi_cdir = copy->cwdi_cdir; | |
1118 | if (cwdi->cwdi_cdir) | 1129 | if (cwdi->cwdi_cdir) | |
1119 | VREF(cwdi->cwdi_cdir); | 1130 | VREF(cwdi->cwdi_cdir); | |
1120 | cwdi->cwdi_rdir = copy->cwdi_rdir; | 1131 | cwdi->cwdi_rdir = copy->cwdi_rdir; | |
1121 | if (cwdi->cwdi_rdir) | 1132 | if (cwdi->cwdi_rdir) | |
1122 | VREF(cwdi->cwdi_rdir); | 1133 | VREF(cwdi->cwdi_rdir); | |
1123 | cwdi->cwdi_edir = copy->cwdi_edir; | 1134 | cwdi->cwdi_edir = copy->cwdi_edir; | |
1124 | if (cwdi->cwdi_edir) | 1135 | if (cwdi->cwdi_edir) | |
1125 | VREF(cwdi->cwdi_edir); | 1136 | VREF(cwdi->cwdi_edir); | |
1126 | cwdi->cwdi_cmask = copy->cwdi_cmask; | 1137 | cwdi->cwdi_cmask = copy->cwdi_cmask; | |
1127 | cwdi->cwdi_refcnt = 1; | 1138 | cwdi->cwdi_refcnt = 1; | |
1128 | rw_exit(©->cwdi_lock); | 1139 | rw_exit(©->cwdi_lock); | |
1129 | 1140 | |||
1130 | return (cwdi); | 1141 | return (cwdi); | |
1131 | } | 1142 | } | |
1132 | 1143 | |||
1133 | static int | 1144 | static int | |
1134 | cwdi_ctor(void *arg, void *obj, int flags) | 1145 | cwdi_ctor(void *arg, void *obj, int flags) | |
1135 | { | 1146 | { | |
1136 | struct cwdinfo *cwdi = obj; | 1147 | struct cwdinfo *cwdi = obj; | |
1137 | 1148 | |||
1138 | rw_init(&cwdi->cwdi_lock); | 1149 | rw_init(&cwdi->cwdi_lock); | |
1139 | 1150 | |||
1140 | return 0; | 1151 | return 0; | |
1141 | } | 1152 | } | |
1142 | 1153 | |||
1143 | static void | 1154 | static void | |
1144 | cwdi_dtor(void *arg, void *obj) | 1155 | cwdi_dtor(void *arg, void *obj) | |
1145 | { | 1156 | { | |
1146 | struct cwdinfo *cwdi = obj; | 1157 | struct cwdinfo *cwdi = obj; | |
1147 | 1158 | |||
1148 | rw_destroy(&cwdi->cwdi_lock); | 1159 | rw_destroy(&cwdi->cwdi_lock); | |
1149 | } | 1160 | } | |
1150 | 1161 | |||
1151 | static int | 1162 | static int | |
1152 | file_ctor(void *arg, void *obj, int flags) | 1163 | file_ctor(void *arg, void *obj, int flags) | |
1153 | { | 1164 | { | |
1154 | file_t *fp = obj; | 1165 | file_t *fp = obj; | |
1155 | 1166 | |||
1156 | memset(fp, 0, sizeof(*fp)); | 1167 | memset(fp, 0, sizeof(*fp)); | |
1157 | mutex_init(&fp->f_lock, MUTEX_DEFAULT, IPL_NONE); | 1168 | mutex_init(&fp->f_lock, MUTEX_DEFAULT, IPL_NONE); | |
1158 | 1169 | |||
1159 | mutex_enter(&filelist_lock); | 1170 | mutex_enter(&filelist_lock); | |
1160 | LIST_INSERT_HEAD(&filehead, fp, f_list); | 1171 | LIST_INSERT_HEAD(&filehead, fp, f_list); | |
1161 | mutex_exit(&filelist_lock); | 1172 | mutex_exit(&filelist_lock); | |
1162 | 1173 | |||
1163 | return 0; | 1174 | return 0; | |
1164 | } | 1175 | } | |
1165 | 1176 | |||
1166 | static void | 1177 | static void | |
1167 | file_dtor(void *arg, void *obj) | 1178 | file_dtor(void *arg, void *obj) | |
1168 | { | 1179 | { | |
1169 | file_t *fp = obj; | 1180 | file_t *fp = obj; | |
1170 | 1181 | |||
1171 | mutex_enter(&filelist_lock); | 1182 | mutex_enter(&filelist_lock); | |
1172 | LIST_REMOVE(fp, f_list); | 1183 | LIST_REMOVE(fp, f_list); | |
1173 | mutex_exit(&filelist_lock); | 1184 | mutex_exit(&filelist_lock); | |
1174 | 1185 | |||
1175 | mutex_destroy(&fp->f_lock); | 1186 | mutex_destroy(&fp->f_lock); | |
1176 | } | 1187 | } | |
1177 | 1188 | |||
1178 | static int | 1189 | static int | |
1179 | fdfile_ctor(void *arg, void *obj, int flags) | 1190 | fdfile_ctor(void *arg, void *obj, int flags) | |
1180 | { | 1191 | { | |
1181 | fdfile_t *ff = obj; | 1192 | fdfile_t *ff = obj; | |
1182 | 1193 | |||
1183 | memset(ff, 0, sizeof(*ff)); | 1194 | memset(ff, 0, sizeof(*ff)); | |
1184 | mutex_init(&ff->ff_lock, MUTEX_DEFAULT, IPL_NONE); | 1195 | mutex_init(&ff->ff_lock, MUTEX_DEFAULT, IPL_NONE); | |
1185 | cv_init(&ff->ff_closing, "fdclose"); | 1196 | cv_init(&ff->ff_closing, "fdclose"); | |
1186 | 1197 | |||
1187 | return 0; | 1198 | return 0; | |
1188 | } | 1199 | } | |
1189 | 1200 | |||
1190 | static void | 1201 | static void | |
1191 | fdfile_dtor(void *arg, void *obj) | 1202 | fdfile_dtor(void *arg, void *obj) | |
1192 | { | 1203 | { | |
1193 | fdfile_t *ff = obj; | 1204 | fdfile_t *ff = obj; | |
1194 | 1205 | |||
1195 | mutex_destroy(&ff->ff_lock); | 1206 | mutex_destroy(&ff->ff_lock); | |
1196 | cv_destroy(&ff->ff_closing); | 1207 | cv_destroy(&ff->ff_closing); | |
1197 | } | 1208 | } | |
1198 | 1209 | |||
1199 | file_t * | 1210 | file_t * | |
1200 | fgetdummy(void) | 1211 | fgetdummy(void) | |
1201 | { | 1212 | { | |
1202 | file_t *fp; | 1213 | file_t *fp; | |
1203 | 1214 | |||
1204 | fp = kmem_alloc(sizeof(*fp), KM_SLEEP); | 1215 | fp = kmem_alloc(sizeof(*fp), KM_SLEEP); | |
1205 | if (fp != NULL) { | 1216 | if (fp != NULL) { | |
1206 | memset(fp, 0, sizeof(*fp)); | 1217 | memset(fp, 0, sizeof(*fp)); | |
1207 | mutex_init(&fp->f_lock, MUTEX_DEFAULT, IPL_NONE); | 1218 | mutex_init(&fp->f_lock, MUTEX_DEFAULT, IPL_NONE); | |
1208 | } | 1219 | } | |
1209 | return fp; | 1220 | return fp; | |
1210 | } | 1221 | } | |
1211 | 1222 | |||
1212 | void | 1223 | void | |
1213 | fputdummy(file_t *fp) | 1224 | fputdummy(file_t *fp) | |
1214 | { | 1225 | { | |
1215 | 1226 | |||
1216 | mutex_destroy(&fp->f_lock); | 1227 | mutex_destroy(&fp->f_lock); | |
1217 | kmem_free(fp, sizeof(*fp)); | 1228 | kmem_free(fp, sizeof(*fp)); | |
1218 | } | 1229 | } | |
1219 | 1230 | |||
1220 | /* | 1231 | /* | |
1221 | * Make p2 share p1's cwdinfo. | 1232 | * Make p2 share p1's cwdinfo. | |
1222 | */ | 1233 | */ | |
1223 | void | 1234 | void | |
1224 | cwdshare(struct proc *p2) | 1235 | cwdshare(struct proc *p2) | |
1225 | { | 1236 | { | |
1226 | struct cwdinfo *cwdi; | 1237 | struct cwdinfo *cwdi; | |
1227 | 1238 | |||
1228 | cwdi = curproc->p_cwdi; | 1239 | cwdi = curproc->p_cwdi; | |
1229 | 1240 | |||
1230 | atomic_inc_uint(&cwdi->cwdi_refcnt); | 1241 | atomic_inc_uint(&cwdi->cwdi_refcnt); | |
1231 | p2->p_cwdi = cwdi; | 1242 | p2->p_cwdi = cwdi; | |
1232 | } | 1243 | } | |
1233 | 1244 | |||
1234 | /* | 1245 | /* | |
1235 | * Release a cwdinfo structure. | 1246 | * Release a cwdinfo structure. | |
1236 | */ | 1247 | */ | |
1237 | void | 1248 | void | |
1238 | cwdfree(struct cwdinfo *cwdi) | 1249 | cwdfree(struct cwdinfo *cwdi) | |
1239 | { | 1250 | { | |
1240 | 1251 | |||
1241 | if (atomic_dec_uint_nv(&cwdi->cwdi_refcnt) > 0) | 1252 | if (atomic_dec_uint_nv(&cwdi->cwdi_refcnt) > 0) | |
1242 | return; | 1253 | return; | |
1243 | 1254 | |||
1244 | vrele(cwdi->cwdi_cdir); | 1255 | vrele(cwdi->cwdi_cdir); | |
1245 | if (cwdi->cwdi_rdir) | 1256 | if (cwdi->cwdi_rdir) | |
1246 | vrele(cwdi->cwdi_rdir); | 1257 | vrele(cwdi->cwdi_rdir); | |
1247 | if (cwdi->cwdi_edir) | 1258 | if (cwdi->cwdi_edir) | |
1248 | vrele(cwdi->cwdi_edir); | 1259 | vrele(cwdi->cwdi_edir); | |
1249 | pool_cache_put(cwdi_cache, cwdi); | 1260 | pool_cache_put(cwdi_cache, cwdi); | |
1250 | } | 1261 | } | |
1251 | 1262 | |||
1252 | /* | 1263 | /* | |
1253 | * Create an initial filedesc structure. | 1264 | * Create an initial filedesc structure. | |
1254 | */ | 1265 | */ | |
1255 | filedesc_t * | 1266 | filedesc_t * | |
1256 | fd_init(filedesc_t *fdp) | 1267 | fd_init(filedesc_t *fdp) | |
1257 | { | 1268 | { | |
1258 | unsigned fd; | 1269 | unsigned fd; | |
1259 | 1270 | |||
1260 | if (fdp == NULL) { | 1271 | if (fdp == NULL) { | |
1261 | fdp = pool_cache_get(filedesc_cache, PR_WAITOK); | 1272 | fdp = pool_cache_get(filedesc_cache, PR_WAITOK); | |
1262 | } else { | 1273 | } else { | |
1263 | filedesc_ctor(NULL, fdp, PR_WAITOK); | 1274 | filedesc_ctor(NULL, fdp, PR_WAITOK); | |
1264 | } | 1275 | } | |
1265 | 1276 | |||
1266 | fdp->fd_refcnt = 1; | 1277 | fdp->fd_refcnt = 1; | |
1267 | fdp->fd_ofiles = fdp->fd_dfiles; | 1278 | fdp->fd_ofiles = fdp->fd_dfiles; | |
1268 | fdp->fd_nfiles = NDFILE; | 1279 | fdp->fd_nfiles = NDFILE; | |
1269 | fdp->fd_himap = fdp->fd_dhimap; | 1280 | fdp->fd_himap = fdp->fd_dhimap; | |
1270 | fdp->fd_lomap = fdp->fd_dlomap; | 1281 | fdp->fd_lomap = fdp->fd_dlomap; | |
1271 | KASSERT(fdp->fd_lastfile == -1); | 1282 | KASSERT(fdp->fd_lastfile == -1); | |
1272 | KASSERT(fdp->fd_lastkqfile == -1); | 1283 | KASSERT(fdp->fd_lastkqfile == -1); | |
1273 | KASSERT(fdp->fd_knhash == NULL); | 1284 | KASSERT(fdp->fd_knhash == NULL); | |
1274 | 1285 | |||
1275 | memset(&fdp->fd_startzero, 0, sizeof(*fdp) - | 1286 | memset(&fdp->fd_startzero, 0, sizeof(*fdp) - | |
1276 | offsetof(filedesc_t, fd_startzero)); | 1287 | offsetof(filedesc_t, fd_startzero)); | |
1277 | for (fd = 0; fd < NDFDFILE; fd++) { | 1288 | for (fd = 0; fd < NDFDFILE; fd++) { | |
1278 | fdp->fd_ofiles[fd] = (fdfile_t *)fdp->fd_dfdfile[fd]; | 1289 | fdp->fd_ofiles[fd] = (fdfile_t *)fdp->fd_dfdfile[fd]; | |
1279 | } | 1290 | } | |
1280 | 1291 | |||
1281 | return fdp; | 1292 | return fdp; | |
1282 | } | 1293 | } | |
1283 | 1294 | |||
1284 | /* | 1295 | /* | |
1285 | * Initialize a file descriptor table. | 1296 | * Initialize a file descriptor table. | |
1286 | */ | 1297 | */ | |
1287 | static int | 1298 | static int | |
1288 | filedesc_ctor(void *arg, void *obj, int flag) | 1299 | filedesc_ctor(void *arg, void *obj, int flag) | |
1289 | { | 1300 | { | |
1290 | filedesc_t *fdp = obj; | 1301 | filedesc_t *fdp = obj; | |
1291 | int i; | 1302 | int i; | |
1292 | 1303 | |||
1293 | memset(fdp, 0, sizeof(*fdp)); | 1304 | memset(fdp, 0, sizeof(*fdp)); | |
1294 | mutex_init(&fdp->fd_lock, MUTEX_DEFAULT, IPL_NONE); | 1305 | mutex_init(&fdp->fd_lock, MUTEX_DEFAULT, IPL_NONE); | |
1295 | fdp->fd_lastfile = -1; | 1306 | fdp->fd_lastfile = -1; | |
1296 | fdp->fd_lastkqfile = -1; | 1307 | fdp->fd_lastkqfile = -1; | |
1297 | 1308 | |||
1298 | CTASSERT(sizeof(fdp->fd_dfdfile[0]) >= sizeof(fdfile_t)); | 1309 | CTASSERT(sizeof(fdp->fd_dfdfile[0]) >= sizeof(fdfile_t)); | |
1299 | for (i = 0; i < NDFDFILE; i++) { | 1310 | for (i = 0; i < NDFDFILE; i++) { | |
1300 | fdfile_ctor(NULL, fdp->fd_dfdfile[i], PR_WAITOK); | 1311 | fdfile_ctor(NULL, fdp->fd_dfdfile[i], PR_WAITOK); | |
1301 | } | 1312 | } | |
1302 | 1313 | |||
1303 | return 0; | 1314 | return 0; | |
1304 | } | 1315 | } | |
1305 | 1316 | |||
1306 | static void | 1317 | static void | |
1307 | filedesc_dtor(void *arg, void *obj) | 1318 | filedesc_dtor(void *arg, void *obj) | |
1308 | { | 1319 | { | |
1309 | filedesc_t *fdp = obj; | 1320 | filedesc_t *fdp = obj; | |
1310 | int i; | 1321 | int i; | |
1311 | 1322 | |||
1312 | for (i = 0; i < NDFDFILE; i++) { | 1323 | for (i = 0; i < NDFDFILE; i++) { | |
1313 | fdfile_dtor(NULL, fdp->fd_dfdfile[i]); | 1324 | fdfile_dtor(NULL, fdp->fd_dfdfile[i]); | |
1314 | } | 1325 | } | |
1315 | 1326 | |||
1316 | mutex_destroy(&fdp->fd_lock); | 1327 | mutex_destroy(&fdp->fd_lock); | |
1317 | } | 1328 | } | |
1318 | 1329 | |||
1319 | /* | 1330 | /* | |
1320 | * Make p2 share p1's filedesc structure. | 1331 | * Make p2 share p1's filedesc structure. | |
1321 | */ | 1332 | */ | |
1322 | void | 1333 | void | |
1323 | fd_share(struct proc *p2) | 1334 | fd_share(struct proc *p2) | |
1324 | { | 1335 | { | |
1325 | filedesc_t *fdp; | 1336 | filedesc_t *fdp; | |
1326 | 1337 | |||
1327 | fdp = curlwp->l_fd; | 1338 | fdp = curlwp->l_fd; | |
1328 | p2->p_fd = fdp; | 1339 | p2->p_fd = fdp; | |
1329 | atomic_inc_uint(&fdp->fd_refcnt); | 1340 | atomic_inc_uint(&fdp->fd_refcnt); | |
1330 | } | 1341 | } | |
1331 | 1342 | |||
1332 | /* | 1343 | /* | |
1333 | * Copy a filedesc structure. | 1344 | * Copy a filedesc structure. | |
1334 | */ | 1345 | */ | |
1335 | filedesc_t * | 1346 | filedesc_t * | |
1336 | fd_copy(void) | 1347 | fd_copy(void) | |
1337 | { | 1348 | { | |
1338 | filedesc_t *newfdp, *fdp; | 1349 | filedesc_t *newfdp, *fdp; | |
1339 | fdfile_t *ff, *fflist, **ffp, **nffp, *ff2; | 1350 | fdfile_t *ff, *fflist, **ffp, **nffp, *ff2; | |
1340 | int i, nused, numfiles, lastfile, j, newlast; | 1351 | int i, nused, numfiles, lastfile, j, newlast; | |
1341 | file_t *fp; | 1352 | file_t *fp; | |
1342 | 1353 | |||
1343 | fdp = curproc->p_fd; | 1354 | fdp = curproc->p_fd; | |
1344 | newfdp = pool_cache_get(filedesc_cache, PR_WAITOK); | 1355 | newfdp = pool_cache_get(filedesc_cache, PR_WAITOK); | |
1345 | newfdp->fd_refcnt = 1; | 1356 | newfdp->fd_refcnt = 1; | |
1346 | 1357 | |||
1347 | KASSERT(newfdp->fd_knhash == NULL); | 1358 | KASSERT(newfdp->fd_knhash == NULL); | |
1348 | KASSERT(newfdp->fd_knhashmask == 0); | 1359 | KASSERT(newfdp->fd_knhashmask == 0); | |
1349 | KASSERT(newfdp->fd_discard == NULL); | 1360 | KASSERT(newfdp->fd_discard == NULL); | |
1350 | 1361 | |||
1351 | for (;;) { | 1362 | for (;;) { | |
1352 | numfiles = fdp->fd_nfiles; | 1363 | numfiles = fdp->fd_nfiles; | |
1353 | lastfile = fdp->fd_lastfile; | 1364 | lastfile = fdp->fd_lastfile; | |
1354 | 1365 | |||
1355 | /* | 1366 | /* | |
1356 | * If the number of open files fits in the internal arrays | 1367 | * If the number of open files fits in the internal arrays | |
1357 | * of the open file structure, use them, otherwise allocate | 1368 | * of the open file structure, use them, otherwise allocate | |
1358 | * additional memory for the number of descriptors currently | 1369 | * additional memory for the number of descriptors currently | |
1359 | * in use. | 1370 | * in use. | |
1360 | */ | 1371 | */ | |
1361 | if (lastfile < NDFILE) { | 1372 | if (lastfile < NDFILE) { | |
1362 | i = NDFILE; | 1373 | i = NDFILE; | |
1363 | newfdp->fd_ofiles = newfdp->fd_dfiles; | 1374 | newfdp->fd_ofiles = newfdp->fd_dfiles; | |
1364 | } else { | 1375 | } else { | |
1365 | /* | 1376 | /* | |
1366 | * Compute the smallest multiple of NDEXTENT needed | 1377 | * Compute the smallest multiple of NDEXTENT needed | |
1367 | * for the file descriptors currently in use, | 1378 | * for the file descriptors currently in use, | |
1368 | * allowing the table to shrink. | 1379 | * allowing the table to shrink. | |
1369 | */ | 1380 | */ | |
1370 | i = numfiles; | 1381 | i = numfiles; | |
1371 | while (i >= 2 * NDEXTENT && i > lastfile * 2) { | 1382 | while (i >= 2 * NDEXTENT && i > lastfile * 2) { | |
1372 | i /= 2; | 1383 | i /= 2; | |
1373 | } | 1384 | } | |
1374 | newfdp->fd_ofiles = fd_ofile_alloc(i); | 1385 | newfdp->fd_ofiles = fd_ofile_alloc(i); | |
1375 | KASSERT(i > NDFILE); | 1386 | KASSERT(i > NDFILE); | |
1376 | } | 1387 | } | |
1377 | if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) { | 1388 | if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) { | |
1378 | newfdp->fd_himap = newfdp->fd_dhimap; | 1389 | newfdp->fd_himap = newfdp->fd_dhimap; | |
1379 | newfdp->fd_lomap = newfdp->fd_dlomap; | 1390 | newfdp->fd_lomap = newfdp->fd_dlomap; | |
1380 | } else { | 1391 | } else { | |
1381 | fd_map_alloc(i, &newfdp->fd_lomap, | 1392 | fd_map_alloc(i, &newfdp->fd_lomap, | |
1382 | &newfdp->fd_himap); | 1393 | &newfdp->fd_himap); | |
1383 | } | 1394 | } | |
1384 | 1395 | |||
1385 | /* | 1396 | /* | |
1386 | * Allocate and string together fdfile structures. | 1397 | * Allocate and string together fdfile structures. | |
1387 | * We abuse fdfile_t::ff_file here, but it will be | 1398 | * We abuse fdfile_t::ff_file here, but it will be | |
1388 | * cleared before this routine returns. | 1399 | * cleared before this routine returns. | |
1389 | */ | 1400 | */ | |
1390 | nused = fdp->fd_nused; | 1401 | nused = fdp->fd_nused; | |
1391 | fflist = NULL; | 1402 | fflist = NULL; | |
1392 | for (j = nused; j != 0; j--) { | 1403 | for (j = nused; j != 0; j--) { | |
1393 | ff = pool_cache_get(fdfile_cache, PR_WAITOK); | 1404 | ff = pool_cache_get(fdfile_cache, PR_WAITOK); | |
1394 | ff->ff_file = (void *)fflist; | 1405 | ff->ff_file = (void *)fflist; | |
1395 | fflist = ff; | 1406 | fflist = ff; | |
1396 | } | 1407 | } | |
1397 | 1408 | |||
1398 | mutex_enter(&fdp->fd_lock); | 1409 | mutex_enter(&fdp->fd_lock); | |
1399 | if (numfiles == fdp->fd_nfiles && nused == fdp->fd_nused && | 1410 | if (numfiles == fdp->fd_nfiles && nused == fdp->fd_nused && | |
1400 | lastfile == fdp->fd_lastfile) { | 1411 | lastfile == fdp->fd_lastfile) { | |
1401 | break; | 1412 | break; | |
1402 | } | 1413 | } | |
1403 | mutex_exit(&fdp->fd_lock); | 1414 | mutex_exit(&fdp->fd_lock); | |
1404 | if (i > NDFILE) { | 1415 | if (i > NDFILE) { | |
1405 | fd_ofile_free(i, newfdp->fd_ofiles); | 1416 | fd_ofile_free(i, newfdp->fd_ofiles); | |
1406 | } | 1417 | } | |
1407 | if (NDHISLOTS(i) > NDHISLOTS(NDFILE)) { | 1418 | if (NDHISLOTS(i) > NDHISLOTS(NDFILE)) { | |
1408 | fd_map_free(i, newfdp->fd_lomap, newfdp->fd_himap); | 1419 | fd_map_free(i, newfdp->fd_lomap, newfdp->fd_himap); | |
1409 | } | 1420 | } | |
1410 | while (fflist != NULL) { | 1421 | while (fflist != NULL) { | |
1411 | ff = fflist; | 1422 | ff = fflist; | |
1412 | fflist = (void *)ff->ff_file; | 1423 | fflist = (void *)ff->ff_file; | |
1413 | ff->ff_file = NULL; | 1424 | ff->ff_file = NULL; | |
1414 | pool_cache_put(fdfile_cache, ff); | 1425 | pool_cache_put(fdfile_cache, ff); | |
1415 | } | 1426 | } | |
1416 | } | 1427 | } | |
1417 | 1428 | |||
1418 | newfdp->fd_nfiles = i; | 1429 | newfdp->fd_nfiles = i; | |
1419 | newfdp->fd_freefile = fdp->fd_freefile; | 1430 | newfdp->fd_freefile = fdp->fd_freefile; | |
1420 | newfdp->fd_exclose = fdp->fd_exclose; | 1431 | newfdp->fd_exclose = fdp->fd_exclose; | |
1421 | 1432 | |||
1422 | /* | 1433 | /* | |
1423 | * Clear the entries that will not be copied over. | 1434 | * Clear the entries that will not be copied over. | |
1424 | * Avoid calling memset with 0 size. | 1435 | * Avoid calling memset with 0 size. | |
1425 | */ | 1436 | */ | |
1426 | if (lastfile < (i-1)) { | 1437 | if (lastfile < (i-1)) { | |
1427 | memset(newfdp->fd_ofiles + lastfile + 1, 0, | 1438 | memset(newfdp->fd_ofiles + lastfile + 1, 0, | |
1428 | (i - lastfile - 1) * sizeof(file_t **)); | 1439 | (i - lastfile - 1) * sizeof(file_t **)); | |
1429 | } | 1440 | } | |
1430 | if (i < NDENTRIES * NDENTRIES) { | 1441 | if (i < NDENTRIES * NDENTRIES) { | |
1431 | i = NDENTRIES * NDENTRIES; /* size of inlined bitmaps */ | 1442 | i = NDENTRIES * NDENTRIES; /* size of inlined bitmaps */ | |
1432 | } | 1443 | } | |
1433 | memcpy(newfdp->fd_himap, fdp->fd_himap, NDHISLOTS(i)*sizeof(uint32_t)); | 1444 | memcpy(newfdp->fd_himap, fdp->fd_himap, NDHISLOTS(i)*sizeof(uint32_t)); | |
1434 | memcpy(newfdp->fd_lomap, fdp->fd_lomap, NDLOSLOTS(i)*sizeof(uint32_t)); | 1445 | memcpy(newfdp->fd_lomap, fdp->fd_lomap, NDLOSLOTS(i)*sizeof(uint32_t)); | |
1435 | 1446 | |||
1436 | ffp = fdp->fd_ofiles; | 1447 | ffp = fdp->fd_ofiles; | |
1437 | nffp = newfdp->fd_ofiles; | 1448 | nffp = newfdp->fd_ofiles; | |
1438 | j = imax(lastfile, (NDFDFILE - 1)); | 1449 | j = imax(lastfile, (NDFDFILE - 1)); | |
1439 | newlast = -1; | 1450 | newlast = -1; | |
1440 | KASSERT(j < fdp->fd_nfiles); | 1451 | KASSERT(j < fdp->fd_nfiles); | |
1441 | for (i = 0; i <= j; i++, ffp++, *nffp++ = ff2) { | 1452 | for (i = 0; i <= j; i++, ffp++, *nffp++ = ff2) { | |
1442 | ff = *ffp; | 1453 | ff = *ffp; | |
1443 | /* Install built-in fdfiles even if unused here. */ | 1454 | /* Install built-in fdfiles even if unused here. */ | |
1444 | if (i < NDFDFILE) { | 1455 | if (i < NDFDFILE) { | |
1445 | ff2 = (fdfile_t *)newfdp->fd_dfdfile[i]; | 1456 | ff2 = (fdfile_t *)newfdp->fd_dfdfile[i]; | |
1446 | } else { | 1457 | } else { | |
1447 | ff2 = NULL; | 1458 | ff2 = NULL; | |
1448 | } | 1459 | } | |
1449 | /* Determine if descriptor is active in parent. */ | 1460 | /* Determine if descriptor is active in parent. */ | |
1450 | if (ff == NULL || !fd_isused(fdp, i)) { | 1461 | if (ff == NULL || !fd_isused(fdp, i)) { | |
1451 | KASSERT(ff != NULL || i >= NDFDFILE); | 1462 | KASSERT(ff != NULL || i >= NDFDFILE); | |
1452 | continue; | 1463 | continue; | |
1453 | } | 1464 | } | |
1454 | mutex_enter(&ff->ff_lock); | 1465 | mutex_enter(&ff->ff_lock); | |
1455 | fp = ff->ff_file; | 1466 | fp = ff->ff_file; | |
1456 | if (fp == NULL) { | 1467 | if (fp == NULL) { | |
1457 | /* Descriptor is half-open: free slot. */ | 1468 | /* Descriptor is half-open: free slot. */ | |
1458 | fd_zap(newfdp, i); | 1469 | fd_zap(newfdp, i); | |
1459 | mutex_exit(&ff->ff_lock); | 1470 | mutex_exit(&ff->ff_lock); | |
1460 | continue; | 1471 | continue; | |
1461 | } | 1472 | } | |
1462 | if (fp->f_type == DTYPE_KQUEUE) { | 1473 | if (fp->f_type == DTYPE_KQUEUE) { | |
1463 | /* kqueue descriptors cannot be copied. */ | 1474 | /* kqueue descriptors cannot be copied. */ | |
1464 | fd_zap(newfdp, i); | 1475 | fd_zap(newfdp, i); | |
1465 | mutex_exit(&ff->ff_lock); | 1476 | mutex_exit(&ff->ff_lock); | |
1466 | continue; | 1477 | continue; | |
1467 | } | 1478 | } | |
1468 | /* It's active: add a reference to the file. */ | 1479 | /* It's active: add a reference to the file. */ | |
1469 | mutex_enter(&fp->f_lock); | 1480 | mutex_enter(&fp->f_lock); | |
1470 | fp->f_count++; | 1481 | fp->f_count++; | |
1471 | mutex_exit(&fp->f_lock); | 1482 | mutex_exit(&fp->f_lock); | |
1472 | /* Consume one fdfile_t to represent it. */ | 1483 | /* Consume one fdfile_t to represent it. */ | |
1473 | if (i >= NDFDFILE) { | 1484 | if (i >= NDFDFILE) { | |
1474 | ff2 = fflist; | 1485 | ff2 = fflist; | |
1475 | fflist = (void *)ff2->ff_file; | 1486 | fflist = (void *)ff2->ff_file; | |
1476 | } | 1487 | } | |
1477 | ff2->ff_file = fp; | 1488 | ff2->ff_file = fp; | |
1478 | ff2->ff_exclose = ff->ff_exclose; | 1489 | ff2->ff_exclose = ff->ff_exclose; | |
1479 | ff2->ff_allocated = true; | 1490 | ff2->ff_allocated = true; | |
1480 | mutex_exit(&ff->ff_lock); | 1491 | mutex_exit(&ff->ff_lock); | |
1481 | if (i > newlast) { | 1492 | if (i > newlast) { | |
1482 | newlast = i; | 1493 | newlast = i; | |
1483 | } | 1494 | } | |
1484 | } | 1495 | } | |
1485 | mutex_exit(&fdp->fd_lock); | 1496 | mutex_exit(&fdp->fd_lock); | |
1486 | 1497 | |||
1487 | /* Discard unused fdfile_t structures. */ | 1498 | /* Discard unused fdfile_t structures. */ | |
1488 | while (__predict_false(fflist != NULL)) { | 1499 | while (__predict_false(fflist != NULL)) { | |
1489 | ff = fflist; | 1500 | ff = fflist; | |
1490 | fflist = (void *)ff->ff_file; | 1501 | fflist = (void *)ff->ff_file; | |
1491 | ff->ff_file = NULL; | 1502 | ff->ff_file = NULL; | |
1492 | pool_cache_put(fdfile_cache, ff); | 1503 | pool_cache_put(fdfile_cache, ff); | |
1493 | nused--; | 1504 | nused--; | |
1494 | } | 1505 | } | |
1495 | KASSERT(nused >= 0); | 1506 | KASSERT(nused >= 0); | |
1496 | KASSERT(newfdp->fd_ofiles[0] == (fdfile_t *)newfdp->fd_dfdfile[0]); | 1507 | KASSERT(newfdp->fd_ofiles[0] == (fdfile_t *)newfdp->fd_dfdfile[0]); | |
1497 | 1508 | |||
1498 | newfdp->fd_nused = nused; | 1509 | newfdp->fd_nused = nused; | |
1499 | newfdp->fd_lastfile = newlast; | 1510 | newfdp->fd_lastfile = newlast; | |
1500 | 1511 | |||
1501 | return (newfdp); | 1512 | return (newfdp); | |
1502 | } | 1513 | } | |
1503 | 1514 | |||
1504 | /* | 1515 | /* | |
1505 | * Release a filedesc structure. | 1516 | * Release a filedesc structure. | |
1506 | */ | 1517 | */ | |
1507 | void | 1518 | void | |
1508 | fd_free(void) | 1519 | fd_free(void) | |
1509 | { | 1520 | { | |
1510 | filedesc_t *fdp; | 1521 | filedesc_t *fdp; | |
1511 | fdfile_t *ff; | 1522 | fdfile_t *ff; | |
1512 | file_t *fp; | 1523 | file_t *fp; | |
1513 | int fd, lastfd; | 1524 | int fd, lastfd; | |
1514 | void **discard; | 1525 | void **discard; | |
1515 | 1526 | |||
1516 | fdp = curlwp->l_fd; | 1527 | fdp = curlwp->l_fd; | |
1517 | 1528 | |||
1518 | KASSERT(fdp->fd_ofiles[0] == (fdfile_t *)fdp->fd_dfdfile[0]); | 1529 | KASSERT(fdp->fd_ofiles[0] == (fdfile_t *)fdp->fd_dfdfile[0]); | |
1519 | 1530 | |||
1520 | if (atomic_dec_uint_nv(&fdp->fd_refcnt) > 0) | 1531 | if (atomic_dec_uint_nv(&fdp->fd_refcnt) > 0) | |
1521 | return; | 1532 | return; | |
1522 | 1533 | |||
1523 | /* | 1534 | /* | |
1524 | * Close any files that the process holds open. | 1535 | * Close any files that the process holds open. | |
1525 | */ | 1536 | */ | |
1526 | for (fd = 0, lastfd = fdp->fd_nfiles - 1; fd <= lastfd; fd++) { | 1537 | for (fd = 0, lastfd = fdp->fd_nfiles - 1; fd <= lastfd; fd++) { | |
1527 | ff = fdp->fd_ofiles[fd]; | 1538 | ff = fdp->fd_ofiles[fd]; | |
1528 | KASSERT(fd >= NDFDFILE || | 1539 | KASSERT(fd >= NDFDFILE || | |
1529 | ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | 1540 | ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | |
1530 | if ((ff = fdp->fd_ofiles[fd]) == NULL) | 1541 | if ((ff = fdp->fd_ofiles[fd]) == NULL) | |
1531 | continue; | 1542 | continue; | |
1532 | if ((fp = ff->ff_file) != NULL) { | 1543 | if ((fp = ff->ff_file) != NULL) { | |
1533 | /* | 1544 | /* | |
1534 | * Must use fd_close() here as kqueue holds | 1545 | * Must use fd_close() here as kqueue holds | |
1535 | * long term references to descriptors. | 1546 | * long term references to descriptors. | |
1536 | */ | 1547 | */ | |
1537 | ff->ff_refcnt++; | 1548 | ff->ff_refcnt++; | |
1538 | fd_close(fd); | 1549 | fd_close(fd); | |
1539 | } | 1550 | } | |
1540 | KASSERT(ff->ff_refcnt == 0); | 1551 | KASSERT(ff->ff_refcnt == 0); | |
1541 | KASSERT(ff->ff_file == NULL); | 1552 | KASSERT(ff->ff_file == NULL); | |
1542 | KASSERT(!ff->ff_exclose); | 1553 | KASSERT(!ff->ff_exclose); | |
1543 | KASSERT(!ff->ff_allocated); | 1554 | KASSERT(!ff->ff_allocated); | |
1544 | if (fd >= NDFDFILE) { | 1555 | if (fd >= NDFDFILE) { | |
1545 | pool_cache_put(fdfile_cache, ff); | 1556 | pool_cache_put(fdfile_cache, ff); | |
1546 | } | 1557 | } | |
1547 | } | 1558 | } | |
1548 | 1559 | |||
1549 | /* | 1560 | /* | |
1550 | * Clean out the descriptor table for the next user and return | 1561 | * Clean out the descriptor table for the next user and return | |
1551 | * to the cache. | 1562 | * to the cache. | |
1552 | */ | 1563 | */ | |
1553 | while ((discard = fdp->fd_discard) != NULL) { | 1564 | while ((discard = fdp->fd_discard) != NULL) { | |
1554 | fdp->fd_discard = discard[0]; | 1565 | fdp->fd_discard = discard[0]; | |
1555 | kmem_free(discard, (uintptr_t)discard[1]); | 1566 | kmem_free(discard, (uintptr_t)discard[1]); | |
1556 | } | 1567 | } | |
1557 | if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { | 1568 | if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { | |
1558 | KASSERT(fdp->fd_himap != fdp->fd_dhimap); | 1569 | KASSERT(fdp->fd_himap != fdp->fd_dhimap); | |
1559 | KASSERT(fdp->fd_lomap != fdp->fd_dlomap); | 1570 | KASSERT(fdp->fd_lomap != fdp->fd_dlomap); | |
1560 | fd_map_free(fdp->fd_nfiles, fdp->fd_lomap, fdp->fd_himap); | 1571 | fd_map_free(fdp->fd_nfiles, fdp->fd_lomap, fdp->fd_himap); | |
1561 | } | 1572 | } | |
1562 | if (fdp->fd_nfiles > NDFILE) { | 1573 | if (fdp->fd_nfiles > NDFILE) { | |
1563 | KASSERT(fdp->fd_ofiles != fdp->fd_dfiles); | 1574 | KASSERT(fdp->fd_ofiles != fdp->fd_dfiles); | |
1564 | fd_ofile_free(fdp->fd_nfiles, fdp->fd_ofiles); | 1575 | fd_ofile_free(fdp->fd_nfiles, fdp->fd_ofiles); | |
1565 | } | 1576 | } | |
1566 | if (fdp->fd_knhash != NULL) { | 1577 | if (fdp->fd_knhash != NULL) { | |
1567 | hashdone(fdp->fd_knhash, HASH_LIST, fdp->fd_knhashmask); | 1578 | hashdone(fdp->fd_knhash, HASH_LIST, fdp->fd_knhashmask); | |
1568 | fdp->fd_knhash = NULL; | 1579 | fdp->fd_knhash = NULL; | |
1569 | fdp->fd_knhashmask = 0; | 1580 | fdp->fd_knhashmask = 0; | |
1570 | } else { | 1581 | } else { | |
1571 | KASSERT(fdp->fd_knhashmask == 0); | 1582 | KASSERT(fdp->fd_knhashmask == 0); | |
1572 | } | 1583 | } | |
1573 | fdp->fd_lastkqfile = -1; | 1584 | fdp->fd_lastkqfile = -1; | |
1574 | pool_cache_put(filedesc_cache, fdp); | 1585 | pool_cache_put(filedesc_cache, fdp); | |
1575 | } | 1586 | } | |
1576 | 1587 | |||
1577 | /* | 1588 | /* | |
1578 | * File Descriptor pseudo-device driver (/dev/fd/). | 1589 | * File Descriptor pseudo-device driver (/dev/fd/). | |
1579 | * | 1590 | * | |
1580 | * Opening minor device N dup()s the file (if any) connected to file | 1591 | * Opening minor device N dup()s the file (if any) connected to file | |
1581 | * descriptor N belonging to the calling process. Note that this driver | 1592 | * descriptor N belonging to the calling process. Note that this driver | |
1582 | * consists of only the ``open()'' routine, because all subsequent | 1593 | * consists of only the ``open()'' routine, because all subsequent | |
1583 | * references to this file will be direct to the other driver. | 1594 | * references to this file will be direct to the other driver. | |
1584 | */ | 1595 | */ | |
1585 | static int | 1596 | static int | |
1586 | filedescopen(dev_t dev, int mode, int type, lwp_t *l) | 1597 | filedescopen(dev_t dev, int mode, int type, lwp_t *l) | |
1587 | { | 1598 | { | |
1588 | 1599 | |||
1589 | /* | 1600 | /* | |
1590 | * XXX Kludge: set dupfd to contain the value of the | 1601 | * XXX Kludge: set dupfd to contain the value of the | |
1591 | * the file descriptor being sought for duplication. The error | 1602 | * the file descriptor being sought for duplication. The error | |
1592 | * return ensures that the vnode for this device will be released | 1603 | * return ensures that the vnode for this device will be released | |
1593 | * by vn_open. Open will detect this special error and take the | 1604 | * by vn_open. Open will detect this special error and take the | |
1594 | * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN | 1605 | * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN | |
1595 | * will simply report the error. | 1606 | * will simply report the error. | |
1596 | */ | 1607 | */ | |
1597 | l->l_dupfd = minor(dev); /* XXX */ | 1608 | l->l_dupfd = minor(dev); /* XXX */ | |
1598 | return EDUPFD; | 1609 | return EDUPFD; | |
1599 | } | 1610 | } | |
1600 | 1611 | |||
1601 | /* | 1612 | /* | |
1602 | * Duplicate the specified descriptor to a free descriptor. | 1613 | * Duplicate the specified descriptor to a free descriptor. | |
1603 | */ | 1614 | */ | |
1604 | int | 1615 | int | |
1605 | fd_dupopen(int old, int *new, int mode, int error) | 1616 | fd_dupopen(int old, int *new, int mode, int error) | |
1606 | { | 1617 | { | |
1607 | filedesc_t *fdp; | 1618 | filedesc_t *fdp; | |
1608 | fdfile_t *ff; | 1619 | fdfile_t *ff; | |
1609 | file_t *fp; | 1620 | file_t *fp; | |
1610 | 1621 | |||
1611 | if ((fp = fd_getfile(old)) == NULL) { | 1622 | if ((fp = fd_getfile(old)) == NULL) { | |
1612 | return EBADF; | 1623 | return EBADF; | |
1613 | } | 1624 | } | |
1614 | fdp = curlwp->l_fd; | 1625 | fdp = curlwp->l_fd; | |
1615 | ff = fdp->fd_ofiles[old]; | 1626 | ff = fdp->fd_ofiles[old]; | |
1616 | 1627 | |||
1617 | /* | 1628 | /* | |
1618 | * There are two cases of interest here. | 1629 | * There are two cases of interest here. | |
1619 | * | 1630 | * | |
1620 | * For EDUPFD simply dup (dfd) to file descriptor | 1631 | * For EDUPFD simply dup (dfd) to file descriptor | |
1621 | * (indx) and return. | 1632 | * (indx) and return. | |
1622 | * | 1633 | * | |
1623 | * For EMOVEFD steal away the file structure from (dfd) and | 1634 | * For EMOVEFD steal away the file structure from (dfd) and | |
1624 | * store it in (indx). (dfd) is effectively closed by | 1635 | * store it in (indx). (dfd) is effectively closed by | |
1625 | * this operation. | 1636 | * this operation. | |
1626 | * | 1637 | * | |
1627 | * Any other error code is just returned. | 1638 | * Any other error code is just returned. | |
1628 | */ | 1639 | */ | |
1629 | switch (error) { | 1640 | switch (error) { | |
1630 | case EDUPFD: | 1641 | case EDUPFD: | |
1631 | /* | 1642 | /* | |
1632 | * Check that the mode the file is being opened for is a | 1643 | * Check that the mode the file is being opened for is a | |
1633 | * subset of the mode of the existing descriptor. | 1644 | * subset of the mode of the existing descriptor. | |
1634 | */ | 1645 | */ | |
1635 | if (((mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) { | 1646 | if (((mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) { | |
1636 | error = EACCES; | 1647 | error = EACCES; | |
1637 | break; | 1648 | break; | |
1638 | } | 1649 | } | |
1639 | 1650 | |||
1640 | /* Copy it. */ | 1651 | /* Copy it. */ | |
1641 | error = fd_dup(fp, 0, new, fdp->fd_ofiles[old]->ff_exclose); | 1652 | error = fd_dup(fp, 0, new, fdp->fd_ofiles[old]->ff_exclose); | |
1642 | break; | 1653 | break; | |
1643 | 1654 | |||
1644 | case EMOVEFD: | 1655 | case EMOVEFD: | |
1645 | /* Copy it. */ | 1656 | /* Copy it. */ | |
1646 | error = fd_dup(fp, 0, new, fdp->fd_ofiles[old]->ff_exclose); | 1657 | error = fd_dup(fp, 0, new, fdp->fd_ofiles[old]->ff_exclose); | |
1647 | if (error != 0) { | 1658 | if (error != 0) { | |
1648 | break; | 1659 | break; | |
1649 | } | 1660 | } | |
1650 | 1661 | |||
1651 | /* Steal away the file pointer from 'old'. */ | 1662 | /* Steal away the file pointer from 'old'. */ | |
1652 | (void)fd_close(old); | 1663 | (void)fd_close(old); | |
1653 | return 0; | 1664 | return 0; | |
1654 | } | 1665 | } | |
1655 | 1666 | |||
1656 | fd_putfile(old); | 1667 | fd_putfile(old); | |
1657 | return error; | 1668 | return error; | |
1658 | } | 1669 | } | |
1659 | 1670 | |||
1660 | /* | 1671 | /* | |
1661 | * Close open files on exec. | 1672 | * Close open files on exec. | |
1662 | */ | 1673 | */ | |
1663 | void | 1674 | void | |
1664 | fd_closeexec(void) | 1675 | fd_closeexec(void) | |
1665 | { | 1676 | { | |
1666 | struct cwdinfo *cwdi; | 1677 | struct cwdinfo *cwdi; | |
1667 | proc_t *p; | 1678 | proc_t *p; | |
1668 | filedesc_t *fdp; | 1679 | filedesc_t *fdp; | |
1669 | fdfile_t *ff; | 1680 | fdfile_t *ff; | |
1670 | lwp_t *l; | 1681 | lwp_t *l; | |
1671 | int fd; | 1682 | int fd; | |
1672 | 1683 | |||
1673 | l = curlwp; | 1684 | l = curlwp; | |
1674 | p = l->l_proc; | 1685 | p = l->l_proc; | |
1675 | fdp = p->p_fd; | 1686 | fdp = p->p_fd; | |
1676 | cwdi = p->p_cwdi; | 1687 | cwdi = p->p_cwdi; | |
1677 | 1688 | |||
1678 | if (cwdi->cwdi_refcnt > 1) { | 1689 | if (cwdi->cwdi_refcnt > 1) { | |
1679 | cwdi = cwdinit(); | 1690 | cwdi = cwdinit(); | |
1680 | cwdfree(p->p_cwdi); | 1691 | cwdfree(p->p_cwdi); | |
1681 | p->p_cwdi = cwdi; | 1692 | p->p_cwdi = cwdi; | |
1682 | } | 1693 | } | |
1683 | if (p->p_cwdi->cwdi_edir) { | 1694 | if (p->p_cwdi->cwdi_edir) { | |
1684 | vrele(p->p_cwdi->cwdi_edir); | 1695 | vrele(p->p_cwdi->cwdi_edir); | |
1685 | } | 1696 | } | |
1686 | 1697 | |||
1687 | if (fdp->fd_refcnt > 1) { | 1698 | if (fdp->fd_refcnt > 1) { | |
1688 | fdp = fd_copy(); | 1699 | fdp = fd_copy(); | |
1689 | fd_free(); | 1700 | fd_free(); | |
1690 | p->p_fd = fdp; | 1701 | p->p_fd = fdp; | |
1691 | l->l_fd = fdp; | 1702 | l->l_fd = fdp; | |
1692 | } | 1703 | } | |
1693 | if (!fdp->fd_exclose) { | 1704 | if (!fdp->fd_exclose) { | |
1694 | return; | 1705 | return; | |
1695 | } | 1706 | } | |
1696 | fdp->fd_exclose = false; | 1707 | fdp->fd_exclose = false; | |
1697 | 1708 | |||
1698 | for (fd = 0; fd <= fdp->fd_lastfile; fd++) { | 1709 | for (fd = 0; fd <= fdp->fd_lastfile; fd++) { | |
1699 | if ((ff = fdp->fd_ofiles[fd]) == NULL) { | 1710 | if ((ff = fdp->fd_ofiles[fd]) == NULL) { | |
1700 | KASSERT(fd >= NDFDFILE); | 1711 | KASSERT(fd >= NDFDFILE); | |
1701 | continue; | 1712 | continue; | |
1702 | } | 1713 | } | |
1703 | KASSERT(fd >= NDFDFILE || | 1714 | KASSERT(fd >= NDFDFILE || | |
1704 | ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | 1715 | ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | |
1705 | if (ff->ff_file == NULL) | 1716 | if (ff->ff_file == NULL) | |
1706 | continue; | 1717 | continue; | |
1707 | if (ff->ff_exclose) { | 1718 | if (ff->ff_exclose) { | |
1708 | /* | 1719 | /* | |
1709 | * We need a reference to close the file. | 1720 | * We need a reference to close the file. | |
1710 | * No other threads can see the fdfile_t at | 1721 | * No other threads can see the fdfile_t at | |
1711 | * this point, so don't bother locking. | 1722 | * this point, so don't bother locking. | |
1712 | */ | 1723 | */ | |
1713 | KASSERT((ff->ff_refcnt & FR_CLOSING) == 0); | 1724 | KASSERT((ff->ff_refcnt & FR_CLOSING) == 0); | |
1714 | ff->ff_refcnt++; | 1725 | ff->ff_refcnt++; | |
1715 | fd_close(fd); | 1726 | fd_close(fd); | |
1716 | } | 1727 | } | |
1717 | } | 1728 | } | |
1718 | } | 1729 | } | |
1719 | 1730 | |||
1720 | /* | 1731 | /* | |
1721 | * It is unsafe for set[ug]id processes to be started with file | 1732 | * It is unsafe for set[ug]id processes to be started with file | |
1722 | * descriptors 0..2 closed, as these descriptors are given implicit | 1733 | * descriptors 0..2 closed, as these descriptors are given implicit | |
1723 | * significance in the Standard C library. fdcheckstd() will create a | 1734 | * significance in the Standard C library. fdcheckstd() will create a | |
1724 | * descriptor referencing /dev/null for each of stdin, stdout, and | 1735 | * descriptor referencing /dev/null for each of stdin, stdout, and | |
1725 | * stderr that is not already open. | 1736 | * stderr that is not already open. | |
1726 | */ | 1737 | */ | |
1727 | #define CHECK_UPTO 3 | 1738 | #define CHECK_UPTO 3 | |
1728 | int | 1739 | int | |
1729 | fd_checkstd(void) | 1740 | fd_checkstd(void) | |
1730 | { | 1741 | { | |
1731 | struct proc *p; | 1742 | struct proc *p; | |
1732 | struct nameidata nd; | 1743 | struct nameidata nd; | |
1733 | filedesc_t *fdp; | 1744 | filedesc_t *fdp; | |
1734 | file_t *fp; | 1745 | file_t *fp; | |
1735 | struct proc *pp; | 1746 | struct proc *pp; | |
1736 | int fd, i, error, flags = FREAD|FWRITE; | 1747 | int fd, i, error, flags = FREAD|FWRITE; | |
1737 | char closed[CHECK_UPTO * 3 + 1], which[3 + 1]; | 1748 | char closed[CHECK_UPTO * 3 + 1], which[3 + 1]; | |
1738 | 1749 | |||
1739 | p = curproc; | 1750 | p = curproc; | |
1740 | closed[0] = '\0'; | 1751 | closed[0] = '\0'; | |
1741 | if ((fdp = p->p_fd) == NULL) | 1752 | if ((fdp = p->p_fd) == NULL) | |
1742 | return (0); | 1753 | return (0); | |
1743 | for (i = 0; i < CHECK_UPTO; i++) { | 1754 | for (i = 0; i < CHECK_UPTO; i++) { | |
1744 | KASSERT(i >= NDFDFILE || | 1755 | KASSERT(i >= NDFDFILE || | |
1745 | fdp->fd_ofiles[i] == (fdfile_t *)fdp->fd_dfdfile[i]); | 1756 | fdp->fd_ofiles[i] == (fdfile_t *)fdp->fd_dfdfile[i]); | |
1746 | if (fdp->fd_ofiles[i]->ff_file != NULL) | 1757 | if (fdp->fd_ofiles[i]->ff_file != NULL) | |
1747 | continue; | 1758 | continue; | |
1748 | snprintf(which, sizeof(which), ",%d", i); | 1759 | snprintf(which, sizeof(which), ",%d", i); | |
1749 | strlcat(closed, which, sizeof(closed)); | 1760 | strlcat(closed, which, sizeof(closed)); | |
1750 | if ((error = fd_allocfile(&fp, &fd)) != 0) | 1761 | if ((error = fd_allocfile(&fp, &fd)) != 0) | |
1751 | return (error); | 1762 | return (error); | |
1752 | KASSERT(fd < CHECK_UPTO); | 1763 | KASSERT(fd < CHECK_UPTO); | |
1753 | NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null"); | 1764 | NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null"); | |
1754 | if ((error = vn_open(&nd, flags, 0)) != 0) { | 1765 | if ((error = vn_open(&nd, flags, 0)) != 0) { | |
1755 | fd_abort(p, fp, fd); | 1766 | fd_abort(p, fp, fd); | |
1756 | return (error); | 1767 | return (error); | |
1757 | } | 1768 | } | |
1758 | fp->f_data = nd.ni_vp; | 1769 | fp->f_data = nd.ni_vp; | |
1759 | fp->f_flag = flags; | 1770 | fp->f_flag = flags; | |
1760 | fp->f_ops = &vnops; | 1771 | fp->f_ops = &vnops; | |
1761 | fp->f_type = DTYPE_VNODE; | 1772 | fp->f_type = DTYPE_VNODE; | |
1762 | VOP_UNLOCK(nd.ni_vp, 0); | 1773 | VOP_UNLOCK(nd.ni_vp, 0); | |
1763 | fd_affix(p, fp, fd); | 1774 | fd_affix(p, fp, fd); | |
1764 | } | 1775 | } | |
1765 | if (closed[0] != '\0') { | 1776 | if (closed[0] != '\0') { | |
1766 | mutex_enter(proc_lock); | 1777 | mutex_enter(proc_lock); | |
1767 | pp = p->p_pptr; | 1778 | pp = p->p_pptr; | |
1768 | mutex_enter(pp->p_lock); | 1779 | mutex_enter(pp->p_lock); | |
1769 | log(LOG_WARNING, "set{u,g}id pid %d (%s) " | 1780 | log(LOG_WARNING, "set{u,g}id pid %d (%s) " | |
1770 | "was invoked by uid %d ppid %d (%s) " | 1781 | "was invoked by uid %d ppid %d (%s) " | |
1771 | "with fd %s closed\n", | 1782 | "with fd %s closed\n", | |
1772 | p->p_pid, p->p_comm, kauth_cred_geteuid(pp->p_cred), | 1783 | p->p_pid, p->p_comm, kauth_cred_geteuid(pp->p_cred), | |
1773 | pp->p_pid, pp->p_comm, &closed[1]); | 1784 | pp->p_pid, pp->p_comm, &closed[1]); | |
1774 | mutex_exit(pp->p_lock); | 1785 | mutex_exit(pp->p_lock); | |
1775 | mutex_exit(proc_lock); | 1786 | mutex_exit(proc_lock); | |
1776 | } | 1787 | } | |
1777 | return (0); | 1788 | return (0); | |
1778 | } | 1789 | } | |
1779 | #undef CHECK_UPTO | 1790 | #undef CHECK_UPTO | |
1780 | 1791 | |||
1781 | /* | 1792 | /* | |
1782 | * Sets descriptor owner. If the owner is a process, 'pgid' | 1793 | * Sets descriptor owner. If the owner is a process, 'pgid' | |
1783 | * is set to positive value, process ID. If the owner is process group, | 1794 | * is set to positive value, process ID. If the owner is process group, | |
1784 | * 'pgid' is set to -pg_id. | 1795 | * 'pgid' is set to -pg_id. | |
1785 | */ | 1796 | */ | |
1786 | int | 1797 | int | |
1787 | fsetown(pid_t *pgid, u_long cmd, const void *data) | 1798 | fsetown(pid_t *pgid, u_long cmd, const void *data) | |
1788 | { | 1799 | { | |
1789 | int id = *(const int *)data; | 1800 | int id = *(const int *)data; | |
1790 | int error; | 1801 | int error; | |
1791 | 1802 | |||
1792 | switch (cmd) { | 1803 | switch (cmd) { | |
1793 | case TIOCSPGRP: | 1804 | case TIOCSPGRP: | |
1794 | if (id < 0) | 1805 | if (id < 0) | |
1795 | return (EINVAL); | 1806 | return (EINVAL); | |
1796 | id = -id; | 1807 | id = -id; | |
1797 | break; | 1808 | break; | |
1798 | default: | 1809 | default: | |
1799 | break; | 1810 | break; | |
1800 | } | 1811 | } | |
1801 | 1812 | |||
1802 | if (id > 0 && !pfind(id)) | 1813 | if (id > 0 && !pfind(id)) | |
1803 | return (ESRCH); | 1814 | return (ESRCH); | |
1804 | else if (id < 0 && (error = pgid_in_session(curproc, -id))) | 1815 | else if (id < 0 && (error = pgid_in_session(curproc, -id))) | |
1805 | return (error); | 1816 | return (error); | |
1806 | 1817 | |||
1807 | *pgid = id; | 1818 | *pgid = id; | |
1808 | return (0); | 1819 | return (0); | |
1809 | } | 1820 | } | |
1810 | 1821 | |||
1811 | /* | 1822 | /* | |
1812 | * Return descriptor owner information. If the value is positive, | 1823 | * Return descriptor owner information. If the value is positive, | |
1813 | * it's process ID. If it's negative, it's process group ID and | 1824 | * it's process ID. If it's negative, it's process group ID and | |
1814 | * needs the sign removed before use. | 1825 | * needs the sign removed before use. | |
1815 | */ | 1826 | */ | |
1816 | int | 1827 | int | |
1817 | fgetown(pid_t pgid, u_long cmd, void *data) | 1828 | fgetown(pid_t pgid, u_long cmd, void *data) | |
1818 | { | 1829 | { | |
1819 | 1830 | |||
1820 | switch (cmd) { | 1831 | switch (cmd) { | |
1821 | case TIOCGPGRP: | 1832 | case TIOCGPGRP: | |
1822 | *(int *)data = -pgid; | 1833 | *(int *)data = -pgid; | |
1823 | break; | 1834 | break; | |
1824 | default: | 1835 | default: | |
1825 | *(int *)data = pgid; | 1836 | *(int *)data = pgid; | |
1826 | break; | 1837 | break; | |
1827 | } | 1838 | } | |
1828 | return (0); | 1839 | return (0); | |
1829 | } | 1840 | } | |
1830 | 1841 | |||
1831 | /* | 1842 | /* | |
1832 | * Send signal to descriptor owner, either process or process group. | 1843 | * Send signal to descriptor owner, either process or process group. | |
1833 | */ | 1844 | */ | |
1834 | void | 1845 | void | |
1835 | fownsignal(pid_t pgid, int signo, int code, int band, void *fdescdata) | 1846 | fownsignal(pid_t pgid, int signo, int code, int band, void *fdescdata) | |
1836 | { | 1847 | { | |
1837 | struct proc *p1; | 1848 | struct proc *p1; | |
1838 | struct pgrp *pgrp; | 1849 | struct pgrp *pgrp; | |
1839 | ksiginfo_t ksi; | 1850 | ksiginfo_t ksi; | |
1840 | 1851 | |||
1841 | KASSERT(!cpu_intr_p()); | 1852 | KASSERT(!cpu_intr_p()); | |
1842 | 1853 | |||
1843 | KSI_INIT(&ksi); | 1854 | KSI_INIT(&ksi); | |
1844 | ksi.ksi_signo = signo; | 1855 | ksi.ksi_signo = signo; | |
1845 | ksi.ksi_code = code; | 1856 | ksi.ksi_code = code; | |
1846 | ksi.ksi_band = band; | 1857 | ksi.ksi_band = band; | |
1847 | 1858 | |||
1848 | mutex_enter(proc_lock); | 1859 | mutex_enter(proc_lock); | |
1849 | if (pgid > 0 && (p1 = p_find(pgid, PFIND_LOCKED))) | 1860 | if (pgid > 0 && (p1 = p_find(pgid, PFIND_LOCKED))) | |
1850 | kpsignal(p1, &ksi, fdescdata); | 1861 | kpsignal(p1, &ksi, fdescdata); | |
1851 | else if (pgid < 0 && (pgrp = pg_find(-pgid, PFIND_LOCKED))) | 1862 | else if (pgid < 0 && (pgrp = pg_find(-pgid, PFIND_LOCKED))) | |
1852 | kpgsignal(pgrp, &ksi, fdescdata, 0); | 1863 | kpgsignal(pgrp, &ksi, fdescdata, 0); | |
1853 | mutex_exit(proc_lock); | 1864 | mutex_exit(proc_lock); | |
1854 | } | 1865 | } | |
1855 | 1866 | |||
1856 | int | 1867 | int | |
1857 | fd_clone(file_t *fp, unsigned fd, int flag, const struct fileops *fops, | 1868 | fd_clone(file_t *fp, unsigned fd, int flag, const struct fileops *fops, | |
1858 | void *data) | 1869 | void *data) | |
1859 | { | 1870 | { | |
1860 | 1871 | |||
1861 | fp->f_flag = flag; | 1872 | fp->f_flag = flag; | |
1862 | fp->f_type = DTYPE_MISC; | 1873 | fp->f_type = DTYPE_MISC; | |
1863 | fp->f_ops = fops; | 1874 | fp->f_ops = fops; | |
1864 | fp->f_data = data; | 1875 | fp->f_data = data; | |
1865 | curlwp->l_dupfd = fd; | 1876 | curlwp->l_dupfd = fd; | |
1866 | fd_affix(curproc, fp, fd); | 1877 | fd_affix(curproc, fp, fd); | |
1867 | 1878 | |||
1868 | return EMOVEFD; | 1879 | return EMOVEFD; | |
1869 | } | 1880 | } | |
1870 | 1881 | |||
1871 | int | 1882 | int | |
1872 | fnullop_fcntl(file_t *fp, u_int cmd, void *data) | 1883 | fnullop_fcntl(file_t *fp, u_int cmd, void *data) | |
1873 | { | 1884 | { | |
1874 | 1885 | |||
1875 | if (cmd == F_SETFL) | 1886 | if (cmd == F_SETFL) | |
1876 | return 0; | 1887 | return 0; | |
1877 | 1888 | |||
1878 | return EOPNOTSUPP; | 1889 | return EOPNOTSUPP; | |
1879 | } | 1890 | } | |
1880 | 1891 | |||
1881 | int | 1892 | int | |
1882 | fnullop_poll(file_t *fp, int which) | 1893 | fnullop_poll(file_t *fp, int which) | |
1883 | { | 1894 | { | |
1884 | 1895 | |||
1885 | return 0; | 1896 | return 0; | |
1886 | } | 1897 | } | |
1887 | 1898 | |||
1888 | int | 1899 | int | |
1889 | fnullop_kqfilter(file_t *fp, struct knote *kn) | 1900 | fnullop_kqfilter(file_t *fp, struct knote *kn) | |
1890 | { | 1901 | { | |
1891 | 1902 | |||
1892 | return 0; | 1903 | return 0; | |
1893 | } | 1904 | } | |
1894 | 1905 | |||
1895 | int | 1906 | int | |
1896 | fbadop_read(file_t *fp, off_t *offset, struct uio *uio, | 1907 | fbadop_read(file_t *fp, off_t *offset, struct uio *uio, | |
1897 | kauth_cred_t cred, int flags) | 1908 | kauth_cred_t cred, int flags) | |
1898 | { | 1909 | { | |
1899 | 1910 | |||
1900 | return EOPNOTSUPP; | 1911 | return EOPNOTSUPP; | |
1901 | } | 1912 | } | |
1902 | 1913 | |||
1903 | int | 1914 | int | |
1904 | fbadop_write(file_t *fp, off_t *offset, struct uio *uio, | 1915 | fbadop_write(file_t *fp, off_t *offset, struct uio *uio, | |
1905 | kauth_cred_t cred, int flags) | 1916 | kauth_cred_t cred, int flags) | |
1906 | { | 1917 | { | |
1907 | 1918 | |||
1908 | return EOPNOTSUPP; | 1919 | return EOPNOTSUPP; | |
1909 | } | 1920 | } | |
1910 | 1921 | |||
1911 | int | 1922 | int | |
1912 | fbadop_ioctl(file_t *fp, u_long com, void *data) | 1923 | fbadop_ioctl(file_t *fp, u_long com, void *data) | |
1913 | { | 1924 | { | |
1914 | 1925 | |||
1915 | return EOPNOTSUPP; | 1926 | return EOPNOTSUPP; | |
1916 | } | 1927 | } | |
1917 | 1928 | |||
1918 | int | 1929 | int | |
1919 | fbadop_stat(file_t *fp, struct stat *sb) | 1930 | fbadop_stat(file_t *fp, struct stat *sb) | |
1920 | { | 1931 | { | |
1921 | 1932 | |||
1922 | return EOPNOTSUPP; | 1933 | return EOPNOTSUPP; | |
1923 | } | 1934 | } | |
1924 | 1935 | |||
1925 | int | 1936 | int | |
1926 | fbadop_close(file_t *fp) | 1937 | fbadop_close(file_t *fp) | |
1927 | { | 1938 | { | |
1928 | 1939 | |||
1929 | return EOPNOTSUPP; | 1940 | return EOPNOTSUPP; | |
1930 | } | 1941 | } |
--- src/sys/kern/uipc_usrreq.c 2009/02/16 03:31:13 1.119.4.1
+++ src/sys/kern/uipc_usrreq.c 2009/03/18 05:33:23 1.119.4.2
@@ -1,1690 +1,1752 @@ | @@ -1,1690 +1,1752 @@ | |||
1 | /* $NetBSD: uipc_usrreq.c,v 1.119.4.1 2009/02/16 03:31:13 snj Exp $ */ | 1 | /* $NetBSD: uipc_usrreq.c,v 1.119.4.2 2009/03/18 05:33:23 snj Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 1998, 2000, 2004, 2008 The NetBSD Foundation, Inc. | 4 | * Copyright (c) 1998, 2000, 2004, 2008, 2009 The NetBSD Foundation, Inc. | |
5 | * All rights reserved. | 5 | * All rights reserved. | |
6 | * | 6 | * | |
7 | * This code is derived from software contributed to The NetBSD Foundation | 7 | * This code is derived from software contributed to The NetBSD Foundation | |
8 | * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, | 8 | * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, | |
9 | * NASA Ames Research Center. | 9 | * NASA Ames Research Center, and by Andrew Doran. | |
10 | * | 10 | * | |
11 | * Redistribution and use in source and binary forms, with or without | 11 | * Redistribution and use in source and binary forms, with or without | |
12 | * modification, are permitted provided that the following conditions | 12 | * modification, are permitted provided that the following conditions | |
13 | * are met: | 13 | * are met: | |
14 | * 1. Redistributions of source code must retain the above copyright | 14 | * 1. Redistributions of source code must retain the above copyright | |
15 | * notice, this list of conditions and the following disclaimer. | 15 | * notice, this list of conditions and the following disclaimer. | |
16 | * 2. Redistributions in binary form must reproduce the above copyright | 16 | * 2. Redistributions in binary form must reproduce the above copyright | |
17 | * notice, this list of conditions and the following disclaimer in the | 17 | * notice, this list of conditions and the following disclaimer in the | |
18 | * documentation and/or other materials provided with the distribution. | 18 | * documentation and/or other materials provided with the distribution. | |
19 | * | 19 | * | |
20 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | 20 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |
21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
22 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 22 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
23 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | 23 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |
24 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 24 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
25 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 25 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
26 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 26 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
27 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 27 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
29 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 29 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
30 | * POSSIBILITY OF SUCH DAMAGE. | 30 | * POSSIBILITY OF SUCH DAMAGE. | |
31 | */ | 31 | */ | |
32 | 32 | |||
33 | /* | 33 | /* | |
34 | * Copyright (c) 1982, 1986, 1989, 1991, 1993 | 34 | * Copyright (c) 1982, 1986, 1989, 1991, 1993 | |
35 | * The Regents of the University of California. All rights reserved. | 35 | * The Regents of the University of California. All rights reserved. | |
36 | * | 36 | * | |
37 | * Redistribution and use in source and binary forms, with or without | 37 | * Redistribution and use in source and binary forms, with or without | |
38 | * modification, are permitted provided that the following conditions | 38 | * modification, are permitted provided that the following conditions | |
39 | * are met: | 39 | * are met: | |
40 | * 1. Redistributions of source code must retain the above copyright | 40 | * 1. Redistributions of source code must retain the above copyright | |
41 | * notice, this list of conditions and the following disclaimer. | 41 | * notice, this list of conditions and the following disclaimer. | |
42 | * 2. Redistributions in binary form must reproduce the above copyright | 42 | * 2. Redistributions in binary form must reproduce the above copyright | |
43 | * notice, this list of conditions and the following disclaimer in the | 43 | * notice, this list of conditions and the following disclaimer in the | |
44 | * documentation and/or other materials provided with the distribution. | 44 | * documentation and/or other materials provided with the distribution. | |
45 | * 3. Neither the name of the University nor the names of its contributors | 45 | * 3. Neither the name of the University nor the names of its contributors | |
46 | * may be used to endorse or promote products derived from this software | 46 | * may be used to endorse or promote products derived from this software | |
47 | * without specific prior written permission. | 47 | * without specific prior written permission. | |
48 | * | 48 | * | |
49 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | 49 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
50 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 50 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
51 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 51 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
52 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | 52 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
53 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 53 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
54 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 54 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
55 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 55 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
56 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 56 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
57 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 57 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
58 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 58 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
59 | * SUCH DAMAGE. | 59 | * SUCH DAMAGE. | |
60 | * | 60 | * | |
61 | * @(#)uipc_usrreq.c 8.9 (Berkeley) 5/14/95 | 61 | * @(#)uipc_usrreq.c 8.9 (Berkeley) 5/14/95 | |
62 | */ | 62 | */ | |
63 | 63 | |||
64 | /* | 64 | /* | |
65 | * Copyright (c) 1997 Christopher G. Demetriou. All rights reserved. | 65 | * Copyright (c) 1997 Christopher G. Demetriou. All rights reserved. | |
66 | * | 66 | * | |
67 | * Redistribution and use in source and binary forms, with or without | 67 | * Redistribution and use in source and binary forms, with or without | |
68 | * modification, are permitted provided that the following conditions | 68 | * modification, are permitted provided that the following conditions | |
69 | * are met: | 69 | * are met: | |
70 | * 1. Redistributions of source code must retain the above copyright | 70 | * 1. Redistributions of source code must retain the above copyright | |
71 | * notice, this list of conditions and the following disclaimer. | 71 | * notice, this list of conditions and the following disclaimer. | |
72 | * 2. Redistributions in binary form must reproduce the above copyright | 72 | * 2. Redistributions in binary form must reproduce the above copyright | |
73 | * notice, this list of conditions and the following disclaimer in the | 73 | * notice, this list of conditions and the following disclaimer in the | |
74 | * documentation and/or other materials provided with the distribution. | 74 | * documentation and/or other materials provided with the distribution. | |
75 | * 3. All advertising materials mentioning features or use of this software | 75 | * 3. All advertising materials mentioning features or use of this software | |
76 | * must display the following acknowledgement: | 76 | * must display the following acknowledgement: | |
77 | * This product includes software developed by the University of | 77 | * This product includes software developed by the University of | |
78 | * California, Berkeley and its contributors. | 78 | * California, Berkeley and its contributors. | |
79 | * 4. Neither the name of the University nor the names of its contributors | 79 | * 4. Neither the name of the University nor the names of its contributors | |
80 | * may be used to endorse or promote products derived from this software | 80 | * may be used to endorse or promote products derived from this software | |
81 | * without specific prior written permission. | 81 | * without specific prior written permission. | |
82 | * | 82 | * | |
83 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | 83 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
84 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 84 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
85 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 85 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
86 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | 86 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
87 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 87 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
88 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 88 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
89 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 89 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
90 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 90 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
91 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 91 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
92 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 92 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
93 | * SUCH DAMAGE. | 93 | * SUCH DAMAGE. | |
94 | * | 94 | * | |
95 | * @(#)uipc_usrreq.c 8.9 (Berkeley) 5/14/95 | 95 | * @(#)uipc_usrreq.c 8.9 (Berkeley) 5/14/95 | |
96 | */ | 96 | */ | |
97 | 97 | |||
98 | #include <sys/cdefs.h> | 98 | #include <sys/cdefs.h> | |
99 | __KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.119.4.1 2009/02/16 03:31:13 snj Exp $"); | 99 | __KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.119.4.2 2009/03/18 05:33:23 snj Exp $"); | |
100 | 100 | |||
101 | #include <sys/param.h> | 101 | #include <sys/param.h> | |
102 | #include <sys/systm.h> | 102 | #include <sys/systm.h> | |
103 | #include <sys/proc.h> | 103 | #include <sys/proc.h> | |
104 | #include <sys/filedesc.h> | 104 | #include <sys/filedesc.h> | |
105 | #include <sys/domain.h> | 105 | #include <sys/domain.h> | |
106 | #include <sys/protosw.h> | 106 | #include <sys/protosw.h> | |
107 | #include <sys/socket.h> | 107 | #include <sys/socket.h> | |
108 | #include <sys/socketvar.h> | 108 | #include <sys/socketvar.h> | |
109 | #include <sys/unpcb.h> | 109 | #include <sys/unpcb.h> | |
110 | #include <sys/un.h> | 110 | #include <sys/un.h> | |
111 | #include <sys/namei.h> | 111 | #include <sys/namei.h> | |
112 | #include <sys/vnode.h> | 112 | #include <sys/vnode.h> | |
113 | #include <sys/file.h> | 113 | #include <sys/file.h> | |
114 | #include <sys/stat.h> | 114 | #include <sys/stat.h> | |
115 | #include <sys/mbuf.h> | 115 | #include <sys/mbuf.h> | |
116 | #include <sys/kauth.h> | 116 | #include <sys/kauth.h> | |
117 | #include <sys/kmem.h> | 117 | #include <sys/kmem.h> | |
118 | #include <sys/atomic.h> | 118 | #include <sys/atomic.h> | |
119 | #include <sys/uidinfo.h> | 119 | #include <sys/uidinfo.h> | |
120 | #include <sys/kernel.h> | |||
121 | #include <sys/kthread.h> | |||
120 | 122 | |||
121 | /* | 123 | /* | |
122 | * Unix communications domain. | 124 | * Unix communications domain. | |
123 | * | 125 | * | |
124 | * TODO: | 126 | * TODO: | |
125 | * SEQPACKET, RDM | 127 | * SEQPACKET, RDM | |
126 | * rethink name space problems | 128 | * rethink name space problems | |
127 | * need a proper out-of-band | 129 | * need a proper out-of-band | |
128 | * | 130 | * | |
129 | * Notes on locking: | 131 | * Notes on locking: | |
130 | * | 132 | * | |
131 | * The generic rules noted in uipc_socket2.c apply. In addition: | 133 | * The generic rules noted in uipc_socket2.c apply. In addition: | |
132 | * | 134 | * | |
133 | * o We have a global lock, uipc_lock. | 135 | * o We have a global lock, uipc_lock. | |
134 | * | 136 | * | |
135 | * o All datagram sockets are locked by uipc_lock. | 137 | * o All datagram sockets are locked by uipc_lock. | |
136 | * | 138 | * | |
137 | * o For stream socketpairs, the two endpoints are created sharing the same | 139 | * o For stream socketpairs, the two endpoints are created sharing the same | |
138 | * independent lock. Sockets presented to PRU_CONNECT2 must already have | 140 | * independent lock. Sockets presented to PRU_CONNECT2 must already have | |
139 | * matching locks. | 141 | * matching locks. | |
140 | * | 142 | * | |
141 | * o Stream sockets created via socket() start life with their own | 143 | * o Stream sockets created via socket() start life with their own | |
142 | * independent lock. | 144 | * independent lock. | |
143 | * | 145 | * | |
144 | * o Stream connections to a named endpoint are slightly more complicated. | 146 | * o Stream connections to a named endpoint are slightly more complicated. | |
145 | * Sockets that have called listen() have their lock pointer mutated to | 147 | * Sockets that have called listen() have their lock pointer mutated to | |
146 | * the global uipc_lock. When establishing a connection, the connecting | 148 | * the global uipc_lock. When establishing a connection, the connecting | |
147 | * socket also has its lock mutated to uipc_lock, which matches the head | 149 | * socket also has its lock mutated to uipc_lock, which matches the head | |
148 | * (listening socket). We create a new socket for accept() to return, and | 150 | * (listening socket). We create a new socket for accept() to return, and | |
149 | * that also shares the head's lock. Until the connection is completely | 151 | * that also shares the head's lock. Until the connection is completely | |
150 | * done on both ends, all three sockets are locked by uipc_lock. Once the | 152 | * done on both ends, all three sockets are locked by uipc_lock. Once the | |
151 | * connection is complete, the association with the head's lock is broken. | 153 | * connection is complete, the association with the head's lock is broken. | |
152 | * The connecting socket and the socket returned from accept() have their | 154 | * The connecting socket and the socket returned from accept() have their | |
153 | * lock pointers mutated away from uipc_lock, and back to the connecting | 155 | * lock pointers mutated away from uipc_lock, and back to the connecting | |
154 | * socket's original, independent lock. The head continues to be locked | 156 | * socket's original, independent lock. The head continues to be locked | |
155 | * by uipc_lock. | 157 | * by uipc_lock. | |
156 | * | 158 | * | |
157 | * o If uipc_lock is determined to be a significant source of contention, | 159 | * o If uipc_lock is determined to be a significant source of contention, | |
158 | * it could easily be hashed out. It is difficult to simply make it an | 160 | * it could easily be hashed out. It is difficult to simply make it an | |
159 | * independent lock because of visibility / garbage collection issues: | 161 | * independent lock because of visibility / garbage collection issues: | |
160 | * if a socket has been associated with a lock at any point, that lock | 162 | * if a socket has been associated with a lock at any point, that lock | |
161 | * must remain valid until the socket is no longer visible in the system. | 163 | * must remain valid until the socket is no longer visible in the system. | |
162 | * The lock must not be freed or otherwise destroyed until any sockets | 164 | * The lock must not be freed or otherwise destroyed until any sockets | |
163 | * that had referenced it have also been destroyed. | 165 | * that had referenced it have also been destroyed. | |
164 | */ | 166 | */ | |
165 | const struct sockaddr_un sun_noname = { | 167 | const struct sockaddr_un sun_noname = { | |
166 | .sun_len = sizeof(sun_noname), | 168 | .sun_len = sizeof(sun_noname), | |
167 | .sun_family = AF_LOCAL, | 169 | .sun_family = AF_LOCAL, | |
168 | }; | 170 | }; | |
169 | ino_t unp_ino; /* prototype for fake inode numbers */ | 171 | ino_t unp_ino; /* prototype for fake inode numbers */ | |
170 | 172 | |||
171 | struct mbuf *unp_addsockcred(struct lwp *, struct mbuf *); | 173 | struct mbuf *unp_addsockcred(struct lwp *, struct mbuf *); | |
174 | static void unp_mark(file_t *); | |||
175 | static void unp_scan(struct mbuf *, void (*)(file_t *), int); | |||
176 | static void unp_discard_now(file_t *); | |||
177 | static void unp_discard_later(file_t *); | |||
178 | static void unp_thread(void *); | |||
179 | static void unp_thread_kick(void); | |||
172 | static kmutex_t *uipc_lock; | 180 | static kmutex_t *uipc_lock; | |
173 | 181 | |||
182 | static kcondvar_t unp_thread_cv; | |||
183 | static lwp_t *unp_thread_lwp; | |||
184 | static SLIST_HEAD(,file) unp_thread_discard; | |||
185 | static int unp_defer; | |||
186 | ||||
174 | /* | 187 | /* | |
175 | * Initialize Unix protocols. | 188 | * Initialize Unix protocols. | |
176 | */ | 189 | */ | |
177 | void | 190 | void | |
178 | uipc_init(void) | 191 | uipc_init(void) | |
179 | { | 192 | { | |
193 | int error; | |||
180 | 194 | |||
181 | uipc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); | 195 | uipc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); | |
196 | cv_init(&unp_thread_cv, "unpgc"); | |||
197 | ||||
198 | error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, unp_thread, | |||
199 | NULL, &unp_thread_lwp, "unpgc"); | |||
200 | if (error != 0) | |||
201 | panic("uipc_init %d", error); | |||
182 | } | 202 | } | |
183 | 203 | |||
184 | /* | 204 | /* | |
185 | * A connection succeeded: disassociate both endpoints from the head's | 205 | * A connection succeeded: disassociate both endpoints from the head's | |
186 | * lock, and make them share their own lock. There is a race here: for | 206 | * lock, and make them share their own lock. There is a race here: for | |
187 | * a very brief time one endpoint will be locked by a different lock | 207 | * a very brief time one endpoint will be locked by a different lock | |
188 | * than the other end. However, since the current thread holds the old | 208 | * than the other end. However, since the current thread holds the old | |
189 | * lock (the listening socket's lock, the head) access can still only be | 209 | * lock (the listening socket's lock, the head) access can still only be | |
190 | * made to one side of the connection. | 210 | * made to one side of the connection. | |
191 | */ | 211 | */ | |
192 | static void | 212 | static void | |
193 | unp_setpeerlocks(struct socket *so, struct socket *so2) | 213 | unp_setpeerlocks(struct socket *so, struct socket *so2) | |
194 | { | 214 | { | |
195 | struct unpcb *unp; | 215 | struct unpcb *unp; | |
196 | kmutex_t *lock; | 216 | kmutex_t *lock; | |
197 | 217 | |||
198 | KASSERT(solocked2(so, so2)); | 218 | KASSERT(solocked2(so, so2)); | |
199 | 219 | |||
200 | /* | 220 | /* | |
201 | * Bail out if either end of the socket is not yet fully | 221 | * Bail out if either end of the socket is not yet fully | |
202 | * connected or accepted. We only break the lock association | 222 | * connected or accepted. We only break the lock association | |
203 | * with the head when the pair of sockets stand completely | 223 | * with the head when the pair of sockets stand completely | |
204 | * on their own. | 224 | * on their own. | |
205 | */ | 225 | */ | |
206 | if (so->so_head != NULL || so2->so_head != NULL) | 226 | if (so->so_head != NULL || so2->so_head != NULL) | |
207 | return; | 227 | return; | |
208 | 228 | |||
209 | /* | 229 | /* | |
210 | * Drop references to old lock. A third reference (from the | 230 | * Drop references to old lock. A third reference (from the | |
211 | * queue head) must be held as we still hold its lock. Bonus: | 231 | * queue head) must be held as we still hold its lock. Bonus: | |
212 | * we don't need to worry about garbage collecting the lock. | 232 | * we don't need to worry about garbage collecting the lock. | |
213 | */ | 233 | */ | |
214 | lock = so->so_lock; | 234 | lock = so->so_lock; | |
215 | KASSERT(lock == uipc_lock); | 235 | KASSERT(lock == uipc_lock); | |
216 | mutex_obj_free(lock); | 236 | mutex_obj_free(lock); | |
217 | mutex_obj_free(lock); | 237 | mutex_obj_free(lock); | |
218 | 238 | |||
219 | /* | 239 | /* | |
220 | * Grab stream lock from the initiator and share between the two | 240 | * Grab stream lock from the initiator and share between the two | |
221 | * endpoints. Issue memory barrier to ensure all modifications | 241 | * endpoints. Issue memory barrier to ensure all modifications | |
222 | * become globally visible before the lock change. so2 is | 242 | * become globally visible before the lock change. so2 is | |
223 | * assumed not to have a stream lock, because it was created | 243 | * assumed not to have a stream lock, because it was created | |
224 | * purely for the server side to accept this connection and | 244 | * purely for the server side to accept this connection and | |
225 | * started out life using the domain-wide lock. | 245 | * started out life using the domain-wide lock. | |
226 | */ | 246 | */ | |
227 | unp = sotounpcb(so); | 247 | unp = sotounpcb(so); | |
228 | KASSERT(unp->unp_streamlock != NULL); | 248 | KASSERT(unp->unp_streamlock != NULL); | |
229 | KASSERT(sotounpcb(so2)->unp_streamlock == NULL); | 249 | KASSERT(sotounpcb(so2)->unp_streamlock == NULL); | |
230 | lock = unp->unp_streamlock; | 250 | lock = unp->unp_streamlock; | |
231 | unp->unp_streamlock = NULL; | 251 | unp->unp_streamlock = NULL; | |
232 | mutex_obj_hold(lock); | 252 | mutex_obj_hold(lock); | |
233 | membar_exit(); | 253 | membar_exit(); | |
234 | solockreset(so, lock); | 254 | solockreset(so, lock); | |
235 | solockreset(so2, lock); | 255 | solockreset(so2, lock); | |
236 | } | 256 | } | |
237 | 257 | |||
238 | /* | 258 | /* | |
239 | * Reset a socket's lock back to the domain-wide lock. | 259 | * Reset a socket's lock back to the domain-wide lock. | |
240 | */ | 260 | */ | |
241 | static void | 261 | static void | |
242 | unp_resetlock(struct socket *so) | 262 | unp_resetlock(struct socket *so) | |
243 | { | 263 | { | |
244 | kmutex_t *olock, *nlock; | 264 | kmutex_t *olock, *nlock; | |
245 | struct unpcb *unp; | 265 | struct unpcb *unp; | |
246 | 266 | |||
247 | KASSERT(solocked(so)); | 267 | KASSERT(solocked(so)); | |
248 | 268 | |||
249 | olock = so->so_lock; | 269 | olock = so->so_lock; | |
250 | nlock = uipc_lock; | 270 | nlock = uipc_lock; | |
251 | if (olock == nlock) | 271 | if (olock == nlock) | |
252 | return; | 272 | return; | |
253 | unp = sotounpcb(so); | 273 | unp = sotounpcb(so); | |
254 | KASSERT(unp->unp_streamlock == NULL); | 274 | KASSERT(unp->unp_streamlock == NULL); | |
255 | unp->unp_streamlock = olock; | 275 | unp->unp_streamlock = olock; | |
256 | mutex_obj_hold(nlock); | 276 | mutex_obj_hold(nlock); | |
257 | mutex_enter(nlock); | 277 | mutex_enter(nlock); | |
258 | solockreset(so, nlock); | 278 | solockreset(so, nlock); | |
259 | mutex_exit(olock); | 279 | mutex_exit(olock); | |
260 | } | 280 | } | |
261 | 281 | |||
262 | static void | 282 | static void | |
263 | unp_free(struct unpcb *unp) | 283 | unp_free(struct unpcb *unp) | |
264 | { | 284 | { | |
265 | 285 | |||
266 | if (unp->unp_addr) | 286 | if (unp->unp_addr) | |
267 | free(unp->unp_addr, M_SONAME); | 287 | free(unp->unp_addr, M_SONAME); | |
268 | if (unp->unp_streamlock != NULL) | 288 | if (unp->unp_streamlock != NULL) | |
269 | mutex_obj_free(unp->unp_streamlock); | 289 | mutex_obj_free(unp->unp_streamlock); | |
270 | free(unp, M_PCB); | 290 | free(unp, M_PCB); | |
271 | } | 291 | } | |
272 | 292 | |||
273 | int | 293 | int | |
274 | unp_output(struct mbuf *m, struct mbuf *control, struct unpcb *unp, | 294 | unp_output(struct mbuf *m, struct mbuf *control, struct unpcb *unp, | |
275 | struct lwp *l) | 295 | struct lwp *l) | |
276 | { | 296 | { | |
277 | struct socket *so2; | 297 | struct socket *so2; | |
278 | const struct sockaddr_un *sun; | 298 | const struct sockaddr_un *sun; | |
279 | 299 | |||
280 | so2 = unp->unp_conn->unp_socket; | 300 | so2 = unp->unp_conn->unp_socket; | |
281 | 301 | |||
282 | KASSERT(solocked(so2)); | 302 | KASSERT(solocked(so2)); | |
283 | 303 | |||
284 | if (unp->unp_addr) | 304 | if (unp->unp_addr) | |
285 | sun = unp->unp_addr; | 305 | sun = unp->unp_addr; | |
286 | else | 306 | else | |
287 | sun = &sun_noname; | 307 | sun = &sun_noname; | |
288 | if (unp->unp_conn->unp_flags & UNP_WANTCRED) | 308 | if (unp->unp_conn->unp_flags & UNP_WANTCRED) | |
289 | control = unp_addsockcred(l, control); | 309 | control = unp_addsockcred(l, control); | |
290 | if (sbappendaddr(&so2->so_rcv, (const struct sockaddr *)sun, m, | 310 | if (sbappendaddr(&so2->so_rcv, (const struct sockaddr *)sun, m, | |
291 | control) == 0) { | 311 | control) == 0) { | |
292 | so2->so_rcv.sb_overflowed++; | 312 | so2->so_rcv.sb_overflowed++; | |
293 | sounlock(so2); | |||
294 | unp_dispose(control); | 313 | unp_dispose(control); | |
295 | m_freem(control); | 314 | m_freem(control); | |
296 | m_freem(m); | 315 | m_freem(m); | |
297 | solock(so2); | |||
298 | return (ENOBUFS); | 316 | return (ENOBUFS); | |
299 | } else { | 317 | } else { | |
300 | sorwakeup(so2); | 318 | sorwakeup(so2); | |
301 | return (0); | 319 | return (0); | |
302 | } | 320 | } | |
303 | } | 321 | } | |
304 | 322 | |||
305 | void | 323 | void | |
306 | unp_setaddr(struct socket *so, struct mbuf *nam, bool peeraddr) | 324 | unp_setaddr(struct socket *so, struct mbuf *nam, bool peeraddr) | |
307 | { | 325 | { | |
308 | const struct sockaddr_un *sun; | 326 | const struct sockaddr_un *sun; | |
309 | struct unpcb *unp; | 327 | struct unpcb *unp; | |
310 | bool ext; | 328 | bool ext; | |
311 | 329 | |||
312 | unp = sotounpcb(so); | 330 | unp = sotounpcb(so); | |
313 | ext = false; | 331 | ext = false; | |
314 | 332 | |||
315 | for (;;) { | 333 | for (;;) { | |
316 | sun = NULL; | 334 | sun = NULL; | |
317 | if (peeraddr) { | 335 | if (peeraddr) { | |
318 | if (unp->unp_conn && unp->unp_conn->unp_addr) | 336 | if (unp->unp_conn && unp->unp_conn->unp_addr) | |
319 | sun = unp->unp_conn->unp_addr; | 337 | sun = unp->unp_conn->unp_addr; | |
320 | } else { | 338 | } else { | |
321 | if (unp->unp_addr) | 339 | if (unp->unp_addr) | |
322 | sun = unp->unp_addr; | 340 | sun = unp->unp_addr; | |
323 | } | 341 | } | |
324 | if (sun == NULL) | 342 | if (sun == NULL) | |
325 | sun = &sun_noname; | 343 | sun = &sun_noname; | |
326 | nam->m_len = sun->sun_len; | 344 | nam->m_len = sun->sun_len; | |
327 | if (nam->m_len > MLEN && !ext) { | 345 | if (nam->m_len > MLEN && !ext) { | |
328 | sounlock(so); | 346 | sounlock(so); | |
329 | MEXTMALLOC(nam, MAXPATHLEN * 2, M_WAITOK); | 347 | MEXTMALLOC(nam, MAXPATHLEN * 2, M_WAITOK); | |
330 | solock(so); | 348 | solock(so); | |
331 | ext = true; | 349 | ext = true; | |
332 | } else { | 350 | } else { | |
333 | KASSERT(nam->m_len <= MAXPATHLEN * 2); | 351 | KASSERT(nam->m_len <= MAXPATHLEN * 2); | |
334 | memcpy(mtod(nam, void *), sun, (size_t)nam->m_len); | 352 | memcpy(mtod(nam, void *), sun, (size_t)nam->m_len); | |
335 | break; | 353 | break; | |
336 | } | 354 | } | |
337 | } | 355 | } | |
338 | } | 356 | } | |
339 | 357 | |||
340 | /*ARGSUSED*/ | 358 | /*ARGSUSED*/ | |
341 | int | 359 | int | |
342 | uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, | 360 | uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, | |
343 | struct mbuf *control, struct lwp *l) | 361 | struct mbuf *control, struct lwp *l) | |
344 | { | 362 | { | |
345 | struct unpcb *unp = sotounpcb(so); | 363 | struct unpcb *unp = sotounpcb(so); | |
346 | struct socket *so2; | 364 | struct socket *so2; | |
347 | struct proc *p; | 365 | struct proc *p; | |
348 | u_int newhiwat; | 366 | u_int newhiwat; | |
349 | int error = 0; | 367 | int error = 0; | |
350 | 368 | |||
351 | if (req == PRU_CONTROL) | 369 | if (req == PRU_CONTROL) | |
352 | return (EOPNOTSUPP); | 370 | return (EOPNOTSUPP); | |
353 | 371 | |||
354 | #ifdef DIAGNOSTIC | 372 | #ifdef DIAGNOSTIC | |
355 | if (req != PRU_SEND && req != PRU_SENDOOB && control) | 373 | if (req != PRU_SEND && req != PRU_SENDOOB && control) | |
356 | panic("uipc_usrreq: unexpected control mbuf"); | 374 | panic("uipc_usrreq: unexpected control mbuf"); | |
357 | #endif | 375 | #endif | |
358 | p = l ? l->l_proc : NULL; | 376 | p = l ? l->l_proc : NULL; | |
359 | if (req != PRU_ATTACH) { | 377 | if (req != PRU_ATTACH) { | |
360 | if (unp == 0) { | 378 | if (unp == 0) { | |
361 | error = EINVAL; | 379 | error = EINVAL; | |
362 | goto release; | 380 | goto release; | |
363 | } | 381 | } | |
364 | KASSERT(solocked(so)); | 382 | KASSERT(solocked(so)); | |
365 | } | 383 | } | |
366 | 384 | |||
367 | switch (req) { | 385 | switch (req) { | |
368 | 386 | |||
369 | case PRU_ATTACH: | 387 | case PRU_ATTACH: | |
370 | if (unp != 0) { | 388 | if (unp != 0) { | |
371 | error = EISCONN; | 389 | error = EISCONN; | |
372 | break; | 390 | break; | |
373 | } | 391 | } | |
374 | error = unp_attach(so); | 392 | error = unp_attach(so); | |
375 | break; | 393 | break; | |
376 | 394 | |||
377 | case PRU_DETACH: | 395 | case PRU_DETACH: | |
378 | unp_detach(unp); | 396 | unp_detach(unp); | |
379 | break; | 397 | break; | |
380 | 398 | |||
381 | case PRU_BIND: | 399 | case PRU_BIND: | |
382 | KASSERT(l != NULL); | 400 | KASSERT(l != NULL); | |
383 | error = unp_bind(so, nam, l); | 401 | error = unp_bind(so, nam, l); | |
384 | break; | 402 | break; | |
385 | 403 | |||
386 | case PRU_LISTEN: | 404 | case PRU_LISTEN: | |
387 | /* | 405 | /* | |
388 | * If the socket can accept a connection, it must be | 406 | * If the socket can accept a connection, it must be | |
389 | * locked by uipc_lock. | 407 | * locked by uipc_lock. | |
390 | */ | 408 | */ | |
391 | unp_resetlock(so); | 409 | unp_resetlock(so); | |
392 | if (unp->unp_vnode == 0) | 410 | if (unp->unp_vnode == 0) | |
393 | error = EINVAL; | 411 | error = EINVAL; | |
394 | break; | 412 | break; | |
395 | 413 | |||
396 | case PRU_CONNECT: | 414 | case PRU_CONNECT: | |
397 | KASSERT(l != NULL); | 415 | KASSERT(l != NULL); | |
398 | error = unp_connect(so, nam, l); | 416 | error = unp_connect(so, nam, l); | |
399 | break; | 417 | break; | |
400 | 418 | |||
401 | case PRU_CONNECT2: | 419 | case PRU_CONNECT2: | |
402 | error = unp_connect2(so, (struct socket *)nam, PRU_CONNECT2); | 420 | error = unp_connect2(so, (struct socket *)nam, PRU_CONNECT2); | |
403 | break; | 421 | break; | |
404 | 422 | |||
405 | case PRU_DISCONNECT: | 423 | case PRU_DISCONNECT: | |
406 | unp_disconnect(unp); | 424 | unp_disconnect(unp); | |
407 | break; | 425 | break; | |
408 | 426 | |||
409 | case PRU_ACCEPT: | 427 | case PRU_ACCEPT: | |
410 | KASSERT(so->so_lock == uipc_lock); | 428 | KASSERT(so->so_lock == uipc_lock); | |
411 | /* | 429 | /* | |
412 | * Mark the initiating STREAM socket as connected *ONLY* | 430 | * Mark the initiating STREAM socket as connected *ONLY* | |
413 | * after it's been accepted. This prevents a client from | 431 | * after it's been accepted. This prevents a client from | |
414 | * overrunning a server and receiving ECONNREFUSED. | 432 | * overrunning a server and receiving ECONNREFUSED. | |
415 | */ | 433 | */ | |
416 | if (unp->unp_conn == NULL) | 434 | if (unp->unp_conn == NULL) | |
417 | break; | 435 | break; | |
418 | so2 = unp->unp_conn->unp_socket; | 436 | so2 = unp->unp_conn->unp_socket; | |
419 | if (so2->so_state & SS_ISCONNECTING) { | 437 | if (so2->so_state & SS_ISCONNECTING) { | |
420 | KASSERT(solocked2(so, so->so_head)); | 438 | KASSERT(solocked2(so, so->so_head)); | |
421 | KASSERT(solocked2(so2, so->so_head)); | 439 | KASSERT(solocked2(so2, so->so_head)); | |
422 | soisconnected(so2); | 440 | soisconnected(so2); | |
423 | } | 441 | } | |
424 | /* | 442 | /* | |
425 | * If the connection is fully established, break the | 443 | * If the connection is fully established, break the | |
426 | * association with uipc_lock and give the connected | 444 | * association with uipc_lock and give the connected | |
427 | * pair a seperate lock to share. | 445 | * pair a seperate lock to share. | |
428 | */ | 446 | */ | |
429 | unp_setpeerlocks(so2, so); | 447 | unp_setpeerlocks(so2, so); | |
430 | /* | 448 | /* | |
431 | * Only now return peer's address, as we may need to | 449 | * Only now return peer's address, as we may need to | |
432 | * block in order to allocate memory. | 450 | * block in order to allocate memory. | |
433 | * | 451 | * | |
434 | * XXX Minor race: connection can be broken while | 452 | * XXX Minor race: connection can be broken while | |
435 | * lock is dropped in unp_setaddr(). We will return | 453 | * lock is dropped in unp_setaddr(). We will return | |
436 | * error == 0 and sun_noname as the peer address. | 454 | * error == 0 and sun_noname as the peer address. | |
437 | */ | 455 | */ | |
438 | unp_setaddr(so, nam, true); | 456 | unp_setaddr(so, nam, true); | |
439 | break; | 457 | break; | |
440 | 458 | |||
441 | case PRU_SHUTDOWN: | 459 | case PRU_SHUTDOWN: | |
442 | socantsendmore(so); | 460 | socantsendmore(so); | |
443 | unp_shutdown(unp); | 461 | unp_shutdown(unp); | |
444 | break; | 462 | break; | |
445 | 463 | |||
446 | case PRU_RCVD: | 464 | case PRU_RCVD: | |
447 | switch (so->so_type) { | 465 | switch (so->so_type) { | |
448 | 466 | |||
449 | case SOCK_DGRAM: | 467 | case SOCK_DGRAM: | |
450 | panic("uipc 1"); | 468 | panic("uipc 1"); | |
451 | /*NOTREACHED*/ | 469 | /*NOTREACHED*/ | |
452 | 470 | |||
453 | case SOCK_STREAM: | 471 | case SOCK_STREAM: | |
454 | #define rcv (&so->so_rcv) | 472 | #define rcv (&so->so_rcv) | |
455 | #define snd (&so2->so_snd) | 473 | #define snd (&so2->so_snd) | |
456 | if (unp->unp_conn == 0) | 474 | if (unp->unp_conn == 0) | |
457 | break; | 475 | break; | |
458 | so2 = unp->unp_conn->unp_socket; | 476 | so2 = unp->unp_conn->unp_socket; | |
459 | KASSERT(solocked2(so, so2)); | 477 | KASSERT(solocked2(so, so2)); | |
460 | /* | 478 | /* | |
461 | * Adjust backpressure on sender | 479 | * Adjust backpressure on sender | |
462 | * and wakeup any waiting to write. | 480 | * and wakeup any waiting to write. | |
463 | */ | 481 | */ | |
464 | snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt; | 482 | snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt; | |
465 | unp->unp_mbcnt = rcv->sb_mbcnt; | 483 | unp->unp_mbcnt = rcv->sb_mbcnt; | |
466 | newhiwat = snd->sb_hiwat + unp->unp_cc - rcv->sb_cc; | 484 | newhiwat = snd->sb_hiwat + unp->unp_cc - rcv->sb_cc; | |
467 | (void)chgsbsize(so2->so_uidinfo, | 485 | (void)chgsbsize(so2->so_uidinfo, | |
468 | &snd->sb_hiwat, newhiwat, RLIM_INFINITY); | 486 | &snd->sb_hiwat, newhiwat, RLIM_INFINITY); | |
469 | unp->unp_cc = rcv->sb_cc; | 487 | unp->unp_cc = rcv->sb_cc; | |
470 | sowwakeup(so2); | 488 | sowwakeup(so2); | |
471 | #undef snd | 489 | #undef snd | |
472 | #undef rcv | 490 | #undef rcv | |
473 | break; | 491 | break; | |
474 | 492 | |||
475 | default: | 493 | default: | |
476 | panic("uipc 2"); | 494 | panic("uipc 2"); | |
477 | } | 495 | } | |
478 | break; | 496 | break; | |
479 | 497 | |||
480 | case PRU_SEND: | 498 | case PRU_SEND: | |
481 | /* | 499 | /* | |
482 | * Note: unp_internalize() rejects any control message | 500 | * Note: unp_internalize() rejects any control message | |
483 | * other than SCM_RIGHTS, and only allows one. This | 501 | * other than SCM_RIGHTS, and only allows one. This | |
484 | * has the side-effect of preventing a caller from | 502 | * has the side-effect of preventing a caller from | |
485 | * forging SCM_CREDS. | 503 | * forging SCM_CREDS. | |
486 | */ | 504 | */ | |
487 | if (control) { | 505 | if (control) { | |
488 | sounlock(so); | 506 | sounlock(so); | |
489 | error = unp_internalize(&control); | 507 | error = unp_internalize(&control); | |
490 | solock(so); | 508 | solock(so); | |
491 | if (error != 0) { | 509 | if (error != 0) { | |
492 | m_freem(control); | 510 | m_freem(control); | |
493 | m_freem(m); | 511 | m_freem(m); | |
494 | break; | 512 | break; | |
495 | } | 513 | } | |
496 | } | 514 | } | |
497 | switch (so->so_type) { | 515 | switch (so->so_type) { | |
498 | 516 | |||
499 | case SOCK_DGRAM: { | 517 | case SOCK_DGRAM: { | |
500 | KASSERT(so->so_lock == uipc_lock); | 518 | KASSERT(so->so_lock == uipc_lock); | |
501 | if (nam) { | 519 | if (nam) { | |
502 | if ((so->so_state & SS_ISCONNECTED) != 0) | 520 | if ((so->so_state & SS_ISCONNECTED) != 0) | |
503 | error = EISCONN; | 521 | error = EISCONN; | |
504 | else { | 522 | else { | |
505 | /* | 523 | /* | |
506 | * Note: once connected, the | 524 | * Note: once connected, the | |
507 | * socket's lock must not be | 525 | * socket's lock must not be | |
508 | * dropped until we have sent | 526 | * dropped until we have sent | |
509 | * the message and disconnected. | 527 | * the message and disconnected. | |
510 | * This is necessary to prevent | 528 | * This is necessary to prevent | |
511 | * intervening control ops, like | 529 | * intervening control ops, like | |
512 | * another connection. | 530 | * another connection. | |
513 | */ | 531 | */ | |
514 | error = unp_connect(so, nam, l); | 532 | error = unp_connect(so, nam, l); | |
515 | } | 533 | } | |
516 | } else { | 534 | } else { | |
517 | if ((so->so_state & SS_ISCONNECTED) == 0) | 535 | if ((so->so_state & SS_ISCONNECTED) == 0) | |
518 | error = ENOTCONN; | 536 | error = ENOTCONN; | |
519 | } | 537 | } | |
520 | if (error) { | 538 | if (error) { | |
521 | sounlock(so); | |||
522 | unp_dispose(control); | 539 | unp_dispose(control); | |
523 | m_freem(control); | 540 | m_freem(control); | |
524 | m_freem(m); | 541 | m_freem(m); | |
525 | solock(so); | |||
526 | break; | 542 | break; | |
527 | } | 543 | } | |
528 | KASSERT(p != NULL); | 544 | KASSERT(p != NULL); | |
529 | error = unp_output(m, control, unp, l); | 545 | error = unp_output(m, control, unp, l); | |
530 | if (nam) | 546 | if (nam) | |
531 | unp_disconnect(unp); | 547 | unp_disconnect(unp); | |
532 | break; | 548 | break; | |
533 | } | 549 | } | |
534 | 550 | |||
535 | case SOCK_STREAM: | 551 | case SOCK_STREAM: | |
536 | #define rcv (&so2->so_rcv) | 552 | #define rcv (&so2->so_rcv) | |
537 | #define snd (&so->so_snd) | 553 | #define snd (&so->so_snd) | |
538 | if (unp->unp_conn == NULL) { | 554 | if (unp->unp_conn == NULL) { | |
539 | error = ENOTCONN; | 555 | error = ENOTCONN; | |
540 | break; | 556 | break; | |
541 | } | 557 | } | |
542 | so2 = unp->unp_conn->unp_socket; | 558 | so2 = unp->unp_conn->unp_socket; | |
543 | KASSERT(solocked2(so, so2)); | 559 | KASSERT(solocked2(so, so2)); | |
544 | if (unp->unp_conn->unp_flags & UNP_WANTCRED) { | 560 | if (unp->unp_conn->unp_flags & UNP_WANTCRED) { | |
545 | /* | 561 | /* | |
546 | * Credentials are passed only once on | 562 | * Credentials are passed only once on | |
547 | * SOCK_STREAM. | 563 | * SOCK_STREAM. | |
548 | */ | 564 | */ | |
549 | unp->unp_conn->unp_flags &= ~UNP_WANTCRED; | 565 | unp->unp_conn->unp_flags &= ~UNP_WANTCRED; | |
550 | control = unp_addsockcred(l, control); | 566 | control = unp_addsockcred(l, control); | |
551 | } | 567 | } | |
552 | /* | 568 | /* | |
553 | * Send to paired receive port, and then reduce | 569 | * Send to paired receive port, and then reduce | |
554 | * send buffer hiwater marks to maintain backpressure. | 570 | * send buffer hiwater marks to maintain backpressure. | |
555 | * Wake up readers. | 571 | * Wake up readers. | |
556 | */ | 572 | */ | |
557 | if (control) { | 573 | if (control) { | |
558 | if (sbappendcontrol(rcv, m, control) != 0) | 574 | if (sbappendcontrol(rcv, m, control) != 0) | |
559 | control = NULL; | 575 | control = NULL; | |
560 | } else | 576 | } else | |
561 | sbappend(rcv, m); | 577 | sbappend(rcv, m); | |
562 | snd->sb_mbmax -= | 578 | snd->sb_mbmax -= | |
563 | rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt; | 579 | rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt; | |
564 | unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt; | 580 | unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt; | |
565 | newhiwat = snd->sb_hiwat - | 581 | newhiwat = snd->sb_hiwat - | |
566 | (rcv->sb_cc - unp->unp_conn->unp_cc); | 582 | (rcv->sb_cc - unp->unp_conn->unp_cc); | |
567 | (void)chgsbsize(so->so_uidinfo, | 583 | (void)chgsbsize(so->so_uidinfo, | |
568 | &snd->sb_hiwat, newhiwat, RLIM_INFINITY); | 584 | &snd->sb_hiwat, newhiwat, RLIM_INFINITY); | |
569 | unp->unp_conn->unp_cc = rcv->sb_cc; | 585 | unp->unp_conn->unp_cc = rcv->sb_cc; | |
570 | sorwakeup(so2); | 586 | sorwakeup(so2); | |
571 | #undef snd | 587 | #undef snd | |
572 | #undef rcv | 588 | #undef rcv | |
573 | if (control != NULL) { | 589 | if (control != NULL) { | |
574 | sounlock(so); | |||
575 | unp_dispose(control); | 590 | unp_dispose(control); | |
576 | m_freem(control); | 591 | m_freem(control); | |
577 | solock(so); | |||
578 | } | 592 | } | |
579 | break; | 593 | break; | |
580 | 594 | |||
581 | default: | 595 | default: | |
582 | panic("uipc 4"); | 596 | panic("uipc 4"); | |
583 | } | 597 | } | |
584 | break; | 598 | break; | |
585 | 599 | |||
586 | case PRU_ABORT: | 600 | case PRU_ABORT: | |
587 | (void)unp_drop(unp, ECONNABORTED); | 601 | (void)unp_drop(unp, ECONNABORTED); | |
588 | 602 | |||
589 | KASSERT(so->so_head == NULL); | 603 | KASSERT(so->so_head == NULL); | |
590 | #ifdef DIAGNOSTIC | 604 | #ifdef DIAGNOSTIC | |
591 | if (so->so_pcb == 0) | 605 | if (so->so_pcb == 0) | |
592 | panic("uipc 5: drop killed pcb"); | 606 | panic("uipc 5: drop killed pcb"); | |
593 | #endif | 607 | #endif | |
594 | unp_detach(unp); | 608 | unp_detach(unp); | |
595 | break; | 609 | break; | |
596 | 610 | |||
597 | case PRU_SENSE: | 611 | case PRU_SENSE: | |
598 | ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; | 612 | ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; | |
599 | if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) { | 613 | if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) { | |
600 | so2 = unp->unp_conn->unp_socket; | 614 | so2 = unp->unp_conn->unp_socket; | |
601 | KASSERT(solocked2(so, so2)); | 615 | KASSERT(solocked2(so, so2)); | |
602 | ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc; | 616 | ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc; | |
603 | } | 617 | } | |
604 | ((struct stat *) m)->st_dev = NODEV; | 618 | ((struct stat *) m)->st_dev = NODEV; | |
605 | if (unp->unp_ino == 0) | 619 | if (unp->unp_ino == 0) | |
606 | unp->unp_ino = unp_ino++; | 620 | unp->unp_ino = unp_ino++; | |
607 | ((struct stat *) m)->st_atimespec = | 621 | ((struct stat *) m)->st_atimespec = | |
608 | ((struct stat *) m)->st_mtimespec = | 622 | ((struct stat *) m)->st_mtimespec = | |
609 | ((struct stat *) m)->st_ctimespec = unp->unp_ctime; | 623 | ((struct stat *) m)->st_ctimespec = unp->unp_ctime; | |
610 | ((struct stat *) m)->st_ino = unp->unp_ino; | 624 | ((struct stat *) m)->st_ino = unp->unp_ino; | |
611 | return (0); | 625 | return (0); | |
612 | 626 | |||
613 | case PRU_RCVOOB: | 627 | case PRU_RCVOOB: | |
614 | error = EOPNOTSUPP; | 628 | error = EOPNOTSUPP; | |
615 | break; | 629 | break; | |
616 | 630 | |||
617 | case PRU_SENDOOB: | 631 | case PRU_SENDOOB: | |
618 | m_freem(control); | 632 | m_freem(control); | |
619 | m_freem(m); | 633 | m_freem(m); | |
620 | error = EOPNOTSUPP; | 634 | error = EOPNOTSUPP; | |
621 | break; | 635 | break; | |
622 | 636 | |||
623 | case PRU_SOCKADDR: | 637 | case PRU_SOCKADDR: | |
624 | unp_setaddr(so, nam, false); | 638 | unp_setaddr(so, nam, false); | |
625 | break; | 639 | break; | |
626 | 640 | |||
627 | case PRU_PEERADDR: | 641 | case PRU_PEERADDR: | |
628 | unp_setaddr(so, nam, true); | 642 | unp_setaddr(so, nam, true); | |
629 | break; | 643 | break; | |
630 | 644 | |||
631 | default: | 645 | default: | |
632 | panic("piusrreq"); | 646 | panic("piusrreq"); | |
633 | } | 647 | } | |
634 | 648 | |||
635 | release: | 649 | release: | |
636 | return (error); | 650 | return (error); | |
637 | } | 651 | } | |
638 | 652 | |||
639 | /* | 653 | /* | |
640 | * Unix domain socket option processing. | 654 | * Unix domain socket option processing. | |
641 | */ | 655 | */ | |
642 | int | 656 | int | |
643 | uipc_ctloutput(int op, struct socket *so, struct sockopt *sopt) | 657 | uipc_ctloutput(int op, struct socket *so, struct sockopt *sopt) | |
644 | { | 658 | { | |
645 | struct unpcb *unp = sotounpcb(so); | 659 | struct unpcb *unp = sotounpcb(so); | |
646 | int optval = 0, error = 0; | 660 | int optval = 0, error = 0; | |
647 | 661 | |||
648 | KASSERT(solocked(so)); | 662 | KASSERT(solocked(so)); | |
649 | 663 | |||
650 | if (sopt->sopt_level != 0) { | 664 | if (sopt->sopt_level != 0) { | |
651 | error = ENOPROTOOPT; | 665 | error = ENOPROTOOPT; | |
652 | } else switch (op) { | 666 | } else switch (op) { | |
653 | 667 | |||
654 | case PRCO_SETOPT: | 668 | case PRCO_SETOPT: | |
655 | switch (sopt->sopt_name) { | 669 | switch (sopt->sopt_name) { | |
656 | case LOCAL_CREDS: | 670 | case LOCAL_CREDS: | |
657 | case LOCAL_CONNWAIT: | 671 | case LOCAL_CONNWAIT: | |
658 | error = sockopt_getint(sopt, &optval); | 672 | error = sockopt_getint(sopt, &optval); | |
659 | if (error) | 673 | if (error) | |
660 | break; | 674 | break; | |
661 | switch (sopt->sopt_name) { | 675 | switch (sopt->sopt_name) { | |
662 | #define OPTSET(bit) \ | 676 | #define OPTSET(bit) \ | |
663 | if (optval) \ | 677 | if (optval) \ | |
664 | unp->unp_flags |= (bit); \ | 678 | unp->unp_flags |= (bit); \ | |
665 | else \ | 679 | else \ | |
666 | unp->unp_flags &= ~(bit); | 680 | unp->unp_flags &= ~(bit); | |
667 | 681 | |||
668 | case LOCAL_CREDS: | 682 | case LOCAL_CREDS: | |
669 | OPTSET(UNP_WANTCRED); | 683 | OPTSET(UNP_WANTCRED); | |
670 | break; | 684 | break; | |
671 | case LOCAL_CONNWAIT: | 685 | case LOCAL_CONNWAIT: | |
672 | OPTSET(UNP_CONNWAIT); | 686 | OPTSET(UNP_CONNWAIT); | |
673 | break; | 687 | break; | |
674 | } | 688 | } | |
675 | break; | 689 | break; | |
676 | #undef OPTSET | 690 | #undef OPTSET | |
677 | 691 | |||
678 | default: | 692 | default: | |
679 | error = ENOPROTOOPT; | 693 | error = ENOPROTOOPT; | |
680 | break; | 694 | break; | |
681 | } | 695 | } | |
682 | break; | 696 | break; | |
683 | 697 | |||
684 | case PRCO_GETOPT: | 698 | case PRCO_GETOPT: | |
685 | sounlock(so); | 699 | sounlock(so); | |
686 | switch (sopt->sopt_name) { | 700 | switch (sopt->sopt_name) { | |
687 | case LOCAL_PEEREID: | 701 | case LOCAL_PEEREID: | |
688 | if (unp->unp_flags & UNP_EIDSVALID) { | 702 | if (unp->unp_flags & UNP_EIDSVALID) { | |
689 | error = sockopt_set(sopt, | 703 | error = sockopt_set(sopt, | |
690 | &unp->unp_connid, sizeof(unp->unp_connid)); | 704 | &unp->unp_connid, sizeof(unp->unp_connid)); | |
691 | } else { | 705 | } else { | |
692 | error = EINVAL; | 706 | error = EINVAL; | |
693 | } | 707 | } | |
694 | break; | 708 | break; | |
695 | case LOCAL_CREDS: | 709 | case LOCAL_CREDS: | |
696 | #define OPTBIT(bit) (unp->unp_flags & (bit) ? 1 : 0) | 710 | #define OPTBIT(bit) (unp->unp_flags & (bit) ? 1 : 0) | |
697 | 711 | |||
698 | optval = OPTBIT(UNP_WANTCRED); | 712 | optval = OPTBIT(UNP_WANTCRED); | |
699 | error = sockopt_setint(sopt, optval); | 713 | error = sockopt_setint(sopt, optval); | |
700 | break; | 714 | break; | |
701 | #undef OPTBIT | 715 | #undef OPTBIT | |
702 | 716 | |||
703 | default: | 717 | default: | |
704 | error = ENOPROTOOPT; | 718 | error = ENOPROTOOPT; | |
705 | break; | 719 | break; | |
706 | } | 720 | } | |
707 | solock(so); | 721 | solock(so); | |
708 | break; | 722 | break; | |
709 | } | 723 | } | |
710 | return (error); | 724 | return (error); | |
711 | } | 725 | } | |
712 | 726 | |||
713 | /* | 727 | /* | |
714 | * Both send and receive buffers are allocated PIPSIZ bytes of buffering | 728 | * Both send and receive buffers are allocated PIPSIZ bytes of buffering | |
715 | * for stream sockets, although the total for sender and receiver is | 729 | * for stream sockets, although the total for sender and receiver is | |
716 | * actually only PIPSIZ. | 730 | * actually only PIPSIZ. | |
717 | * Datagram sockets really use the sendspace as the maximum datagram size, | 731 | * Datagram sockets really use the sendspace as the maximum datagram size, | |
718 | * and don't really want to reserve the sendspace. Their recvspace should | 732 | * and don't really want to reserve the sendspace. Their recvspace should | |
719 | * be large enough for at least one max-size datagram plus address. | 733 | * be large enough for at least one max-size datagram plus address. | |
720 | */ | 734 | */ | |
721 | #define PIPSIZ 4096 | 735 | #define PIPSIZ 4096 | |
722 | u_long unpst_sendspace = PIPSIZ; | 736 | u_long unpst_sendspace = PIPSIZ; | |
723 | u_long unpst_recvspace = PIPSIZ; | 737 | u_long unpst_recvspace = PIPSIZ; | |
724 | u_long unpdg_sendspace = 2*1024; /* really max datagram size */ | 738 | u_long unpdg_sendspace = 2*1024; /* really max datagram size */ | |
725 | u_long unpdg_recvspace = 4*1024; | 739 | u_long unpdg_recvspace = 4*1024; | |
726 | 740 | |||
727 | u_int unp_rights; /* file descriptors in flight */ | 741 | u_int unp_rights; /* files in flight */ | |
742 | u_int unp_rights_ratio = 2; /* limit, fraction of maxfiles */ | |||
728 | 743 | |||
729 | int | 744 | int | |
730 | unp_attach(struct socket *so) | 745 | unp_attach(struct socket *so) | |
731 | { | 746 | { | |
732 | struct unpcb *unp; | 747 | struct unpcb *unp; | |
733 | int error; | 748 | int error; | |
734 | 749 | |||
735 | switch (so->so_type) { | 750 | switch (so->so_type) { | |
736 | case SOCK_STREAM: | 751 | case SOCK_STREAM: | |
737 | if (so->so_lock == NULL) { | 752 | if (so->so_lock == NULL) { | |
738 | /* | 753 | /* | |
739 | * XXX Assuming that no socket locks are held, | 754 | * XXX Assuming that no socket locks are held, | |
740 | * as this call may sleep. | 755 | * as this call may sleep. | |
741 | */ | 756 | */ | |
742 | so->so_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); | 757 | so->so_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); | |
743 | solock(so); | 758 | solock(so); | |
744 | } | 759 | } | |
745 | if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { | 760 | if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { | |
746 | error = soreserve(so, unpst_sendspace, unpst_recvspace); | 761 | error = soreserve(so, unpst_sendspace, unpst_recvspace); | |
747 | if (error != 0) | 762 | if (error != 0) | |
748 | return (error); | 763 | return (error); | |
749 | } | 764 | } | |
750 | break; | 765 | break; | |
751 | 766 | |||
752 | case SOCK_DGRAM: | 767 | case SOCK_DGRAM: | |
753 | if (so->so_lock == NULL) { | 768 | if (so->so_lock == NULL) { | |
754 | mutex_obj_hold(uipc_lock); | 769 | mutex_obj_hold(uipc_lock); | |
755 | so->so_lock = uipc_lock; | 770 | so->so_lock = uipc_lock; | |
756 | solock(so); | 771 | solock(so); | |
757 | } | 772 | } | |
758 | if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { | 773 | if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { | |
759 | error = soreserve(so, unpdg_sendspace, unpdg_recvspace); | 774 | error = soreserve(so, unpdg_sendspace, unpdg_recvspace); | |
760 | if (error != 0) | 775 | if (error != 0) | |
761 | return (error); | 776 | return (error); | |
762 | } | 777 | } | |
763 | break; | 778 | break; | |
764 | 779 | |||
765 | default: | 780 | default: | |
766 | panic("unp_attach"); | 781 | panic("unp_attach"); | |
767 | } | 782 | } | |
768 | KASSERT(solocked(so)); | 783 | KASSERT(solocked(so)); | |
769 | unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT); | 784 | unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT); | |
770 | if (unp == NULL) | 785 | if (unp == NULL) | |
771 | return (ENOBUFS); | 786 | return (ENOBUFS); | |
772 | memset((void *)unp, 0, sizeof(*unp)); | 787 | memset((void *)unp, 0, sizeof(*unp)); | |
773 | unp->unp_socket = so; | 788 | unp->unp_socket = so; | |
774 | so->so_pcb = unp; | 789 | so->so_pcb = unp; | |
775 | nanotime(&unp->unp_ctime); | 790 | nanotime(&unp->unp_ctime); | |
776 | return (0); | 791 | return (0); | |
777 | } | 792 | } | |
778 | 793 | |||
779 | void | 794 | void | |
780 | unp_detach(struct unpcb *unp) | 795 | unp_detach(struct unpcb *unp) | |
781 | { | 796 | { | |
782 | struct socket *so; | 797 | struct socket *so; | |
783 | vnode_t *vp; | 798 | vnode_t *vp; | |
784 | 799 | |||
785 | so = unp->unp_socket; | 800 | so = unp->unp_socket; | |
786 | 801 | |||
787 | retry: | 802 | retry: | |
788 | if ((vp = unp->unp_vnode) != NULL) { | 803 | if ((vp = unp->unp_vnode) != NULL) { | |
789 | sounlock(so); | 804 | sounlock(so); | |
790 | /* Acquire v_interlock to protect against unp_connect(). */ | 805 | /* Acquire v_interlock to protect against unp_connect(). */ | |
791 | /* XXXAD racy */ | 806 | /* XXXAD racy */ | |
792 | mutex_enter(&vp->v_interlock); | 807 | mutex_enter(&vp->v_interlock); | |
793 | vp->v_socket = NULL; | 808 | vp->v_socket = NULL; | |
794 | vrelel(vp, 0); | 809 | vrelel(vp, 0); | |
795 | solock(so); | 810 | solock(so); | |
796 | unp->unp_vnode = NULL; | 811 | unp->unp_vnode = NULL; | |
797 | } | 812 | } | |
798 | if (unp->unp_conn) | 813 | if (unp->unp_conn) | |
799 | unp_disconnect(unp); | 814 | unp_disconnect(unp); | |
800 | while (unp->unp_refs) { | 815 | while (unp->unp_refs) { | |
801 | KASSERT(solocked2(so, unp->unp_refs->unp_socket)); | 816 | KASSERT(solocked2(so, unp->unp_refs->unp_socket)); | |
802 | if (unp_drop(unp->unp_refs, ECONNRESET)) { | 817 | if (unp_drop(unp->unp_refs, ECONNRESET)) { | |
803 | solock(so); | 818 | solock(so); | |
804 | goto retry; | 819 | goto retry; | |
805 | } | 820 | } | |
806 | } | 821 | } | |
807 | soisdisconnected(so); | 822 | soisdisconnected(so); | |
808 | so->so_pcb = NULL; | 823 | so->so_pcb = NULL; | |
809 | if (unp_rights) { | 824 | if (unp_rights) { | |
810 | /* | 825 | /* | |
811 | * Normally the receive buffer is flushed later, | 826 | * Normally the receive buffer is flushed later, in sofree, | |
812 | * in sofree, but if our receive buffer holds references | 827 | * but if our receive buffer holds references to files that | |
813 | * to descriptors that are now garbage, we will dispose | 828 | * are now garbage, we will enqueue those file references to | |
814 | * of those descriptor references after the garbage collector | 829 | * the garbage collector and kick it into action. | |
815 | * gets them (resulting in a "panic: closef: count < 0"). | |||
816 | */ | 830 | */ | |
817 | sorflush(so); | 831 | sorflush(so); | |
818 | unp_free(unp); | 832 | unp_free(unp); | |
819 | sounlock(so); | 833 | unp_thread_kick(); | |
820 | unp_gc(); | |||
821 | solock(so); | |||
822 | } else | 834 | } else | |
823 | unp_free(unp); | 835 | unp_free(unp); | |
824 | } | 836 | } | |
825 | 837 | |||
826 | int | 838 | int | |
827 | unp_bind(struct socket *so, struct mbuf *nam, struct lwp *l) | 839 | unp_bind(struct socket *so, struct mbuf *nam, struct lwp *l) | |
828 | { | 840 | { | |
829 | struct sockaddr_un *sun; | 841 | struct sockaddr_un *sun; | |
830 | struct unpcb *unp; | 842 | struct unpcb *unp; | |
831 | vnode_t *vp; | 843 | vnode_t *vp; | |
832 | struct vattr vattr; | 844 | struct vattr vattr; | |
833 | size_t addrlen; | 845 | size_t addrlen; | |
834 | int error; | 846 | int error; | |
835 | struct nameidata nd; | 847 | struct nameidata nd; | |
836 | proc_t *p; | 848 | proc_t *p; | |
837 | 849 | |||
838 | unp = sotounpcb(so); | 850 | unp = sotounpcb(so); | |
839 | if (unp->unp_vnode != NULL) | 851 | if (unp->unp_vnode != NULL) | |
840 | return (EINVAL); | 852 | return (EINVAL); | |
841 | if ((unp->unp_flags & UNP_BUSY) != 0) { | 853 | if ((unp->unp_flags & UNP_BUSY) != 0) { | |
842 | /* | 854 | /* | |
843 | * EALREADY may not be strictly accurate, but since this | 855 | * EALREADY may not be strictly accurate, but since this | |
844 | * is a major application error it's hardly a big deal. | 856 | * is a major application error it's hardly a big deal. | |
845 | */ | 857 | */ | |
846 | return (EALREADY); | 858 | return (EALREADY); | |
847 | } | 859 | } | |
848 | unp->unp_flags |= UNP_BUSY; | 860 | unp->unp_flags |= UNP_BUSY; | |
849 | sounlock(so); | 861 | sounlock(so); | |
850 | 862 | |||
851 | /* | 863 | /* | |
852 | * Allocate the new sockaddr. We have to allocate one | 864 | * Allocate the new sockaddr. We have to allocate one | |
853 | * extra byte so that we can ensure that the pathname | 865 | * extra byte so that we can ensure that the pathname | |
854 | * is nul-terminated. | 866 | * is nul-terminated. | |
855 | */ | 867 | */ | |
856 | p = l->l_proc; | 868 | p = l->l_proc; | |
857 | addrlen = nam->m_len + 1; | 869 | addrlen = nam->m_len + 1; | |
858 | sun = malloc(addrlen, M_SONAME, M_WAITOK); | 870 | sun = malloc(addrlen, M_SONAME, M_WAITOK); | |
859 | m_copydata(nam, 0, nam->m_len, (void *)sun); | 871 | m_copydata(nam, 0, nam->m_len, (void *)sun); | |
860 | *(((char *)sun) + nam->m_len) = '\0'; | 872 | *(((char *)sun) + nam->m_len) = '\0'; | |
861 | 873 | |||
862 | NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT | TRYEMULROOT, UIO_SYSSPACE, | 874 | NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT | TRYEMULROOT, UIO_SYSSPACE, | |
863 | sun->sun_path); | 875 | sun->sun_path); | |
864 | 876 | |||
865 | /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ | 877 | /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ | |
866 | if ((error = namei(&nd)) != 0) | 878 | if ((error = namei(&nd)) != 0) | |
867 | goto bad; | 879 | goto bad; | |
868 | vp = nd.ni_vp; | 880 | vp = nd.ni_vp; | |
869 | if (vp != NULL) { | 881 | if (vp != NULL) { | |
870 | VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); | 882 | VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); | |
871 | if (nd.ni_dvp == vp) | 883 | if (nd.ni_dvp == vp) | |
872 | vrele(nd.ni_dvp); | 884 | vrele(nd.ni_dvp); | |
873 | else | 885 | else | |
874 | vput(nd.ni_dvp); | 886 | vput(nd.ni_dvp); | |
875 | vrele(vp); | 887 | vrele(vp); | |
876 | error = EADDRINUSE; | 888 | error = EADDRINUSE; | |
877 | goto bad; | 889 | goto bad; | |
878 | } | 890 | } | |
879 | VATTR_NULL(&vattr); | 891 | VATTR_NULL(&vattr); | |
880 | vattr.va_type = VSOCK; | 892 | vattr.va_type = VSOCK; | |
881 | vattr.va_mode = ACCESSPERMS & ~(p->p_cwdi->cwdi_cmask); | 893 | vattr.va_mode = ACCESSPERMS & ~(p->p_cwdi->cwdi_cmask); | |
882 | error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); | 894 | error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); | |
883 | if (error) | 895 | if (error) | |
884 | goto bad; | 896 | goto bad; | |
885 | vp = nd.ni_vp; | 897 | vp = nd.ni_vp; | |
886 | solock(so); | 898 | solock(so); | |
887 | vp->v_socket = unp->unp_socket; | 899 | vp->v_socket = unp->unp_socket; | |
888 | unp->unp_vnode = vp; | 900 | unp->unp_vnode = vp; | |
889 | unp->unp_addrlen = addrlen; | 901 | unp->unp_addrlen = addrlen; | |
890 | unp->unp_addr = sun; | 902 | unp->unp_addr = sun; | |
891 | unp->unp_connid.unp_pid = p->p_pid; | 903 | unp->unp_connid.unp_pid = p->p_pid; | |
892 | unp->unp_connid.unp_euid = kauth_cred_geteuid(l->l_cred); | 904 | unp->unp_connid.unp_euid = kauth_cred_geteuid(l->l_cred); | |
893 | unp->unp_connid.unp_egid = kauth_cred_getegid(l->l_cred); | 905 | unp->unp_connid.unp_egid = kauth_cred_getegid(l->l_cred); | |
894 | unp->unp_flags |= UNP_EIDSBIND; | 906 | unp->unp_flags |= UNP_EIDSBIND; | |
895 | VOP_UNLOCK(vp, 0); | 907 | VOP_UNLOCK(vp, 0); | |
896 | unp->unp_flags &= ~UNP_BUSY; | 908 | unp->unp_flags &= ~UNP_BUSY; | |
897 | return (0); | 909 | return (0); | |
898 | 910 | |||
899 | bad: | 911 | bad: | |
900 | free(sun, M_SONAME); | 912 | free(sun, M_SONAME); | |
901 | solock(so); | 913 | solock(so); | |
902 | unp->unp_flags &= ~UNP_BUSY; | 914 | unp->unp_flags &= ~UNP_BUSY; | |
903 | return (error); | 915 | return (error); | |
904 | } | 916 | } | |
905 | 917 | |||
906 | int | 918 | int | |
907 | unp_connect(struct socket *so, struct mbuf *nam, struct lwp *l) | 919 | unp_connect(struct socket *so, struct mbuf *nam, struct lwp *l) | |
908 | { | 920 | { | |
909 | struct sockaddr_un *sun; | 921 | struct sockaddr_un *sun; | |
910 | vnode_t *vp; | 922 | vnode_t *vp; | |
911 | struct socket *so2, *so3; | 923 | struct socket *so2, *so3; | |
912 | struct unpcb *unp, *unp2, *unp3; | 924 | struct unpcb *unp, *unp2, *unp3; | |
913 | size_t addrlen; | 925 | size_t addrlen; | |
914 | int error; | 926 | int error; | |
915 | struct nameidata nd; | 927 | struct nameidata nd; | |
916 | 928 | |||
917 | unp = sotounpcb(so); | 929 | unp = sotounpcb(so); | |
918 | if ((unp->unp_flags & UNP_BUSY) != 0) { | 930 | if ((unp->unp_flags & UNP_BUSY) != 0) { | |
919 | /* | 931 | /* | |
920 | * EALREADY may not be strictly accurate, but since this | 932 | * EALREADY may not be strictly accurate, but since this | |
921 | * is a major application error it's hardly a big deal. | 933 | * is a major application error it's hardly a big deal. | |
922 | */ | 934 | */ | |
923 | return (EALREADY); | 935 | return (EALREADY); | |
924 | } | 936 | } | |
925 | unp->unp_flags |= UNP_BUSY; | 937 | unp->unp_flags |= UNP_BUSY; | |
926 | sounlock(so); | 938 | sounlock(so); | |
927 | 939 | |||
928 | /* | 940 | /* | |
929 | * Allocate a temporary sockaddr. We have to allocate one extra | 941 | * Allocate a temporary sockaddr. We have to allocate one extra | |
930 | * byte so that we can ensure that the pathname is nul-terminated. | 942 | * byte so that we can ensure that the pathname is nul-terminated. | |
931 | * When we establish the connection, we copy the other PCB's | 943 | * When we establish the connection, we copy the other PCB's | |
932 | * sockaddr to our own. | 944 | * sockaddr to our own. | |
933 | */ | 945 | */ | |
934 | addrlen = nam->m_len + 1; | 946 | addrlen = nam->m_len + 1; | |
935 | sun = malloc(addrlen, M_SONAME, M_WAITOK); | 947 | sun = malloc(addrlen, M_SONAME, M_WAITOK); | |
936 | m_copydata(nam, 0, nam->m_len, (void *)sun); | 948 | m_copydata(nam, 0, nam->m_len, (void *)sun); | |
937 | *(((char *)sun) + nam->m_len) = '\0'; | 949 | *(((char *)sun) + nam->m_len) = '\0'; | |
938 | 950 | |||
939 | NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_SYSSPACE, | 951 | NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_SYSSPACE, | |
940 | sun->sun_path); | 952 | sun->sun_path); | |
941 | 953 | |||
942 | if ((error = namei(&nd)) != 0) | 954 | if ((error = namei(&nd)) != 0) | |
943 | goto bad2; | 955 | goto bad2; | |
944 | vp = nd.ni_vp; | 956 | vp = nd.ni_vp; | |
945 | if (vp->v_type != VSOCK) { | 957 | if (vp->v_type != VSOCK) { | |
946 | error = ENOTSOCK; | 958 | error = ENOTSOCK; | |
947 | goto bad; | 959 | goto bad; | |
948 | } | 960 | } | |
949 | if ((error = VOP_ACCESS(vp, VWRITE, l->l_cred)) != 0) | 961 | if ((error = VOP_ACCESS(vp, VWRITE, l->l_cred)) != 0) | |
950 | goto bad; | 962 | goto bad; | |
951 | /* Acquire v_interlock to protect against unp_detach(). */ | 963 | /* Acquire v_interlock to protect against unp_detach(). */ | |
952 | mutex_enter(&vp->v_interlock); | 964 | mutex_enter(&vp->v_interlock); | |
953 | so2 = vp->v_socket; | 965 | so2 = vp->v_socket; | |
954 | if (so2 == NULL) { | 966 | if (so2 == NULL) { | |
955 | mutex_exit(&vp->v_interlock); | 967 | mutex_exit(&vp->v_interlock); | |
956 | error = ECONNREFUSED; | 968 | error = ECONNREFUSED; | |
957 | goto bad; | 969 | goto bad; | |
958 | } | 970 | } | |
959 | if (so->so_type != so2->so_type) { | 971 | if (so->so_type != so2->so_type) { | |
960 | mutex_exit(&vp->v_interlock); | 972 | mutex_exit(&vp->v_interlock); | |
961 | error = EPROTOTYPE; | 973 | error = EPROTOTYPE; | |
962 | goto bad; | 974 | goto bad; | |
963 | } | 975 | } | |
964 | solock(so); | 976 | solock(so); | |
965 | unp_resetlock(so); | 977 | unp_resetlock(so); | |
966 | mutex_exit(&vp->v_interlock); | 978 | mutex_exit(&vp->v_interlock); | |
967 | if ((so->so_proto->pr_flags & PR_CONNREQUIRED) != 0) { | 979 | if ((so->so_proto->pr_flags & PR_CONNREQUIRED) != 0) { | |
968 | /* | 980 | /* | |
969 | * This may seem somewhat fragile but is OK: if we can | 981 | * This may seem somewhat fragile but is OK: if we can | |
970 | * see SO_ACCEPTCONN set on the endpoint, then it must | 982 | * see SO_ACCEPTCONN set on the endpoint, then it must | |
971 | * be locked by the domain-wide uipc_lock. | 983 | * be locked by the domain-wide uipc_lock. | |
972 | */ | 984 | */ | |
973 | KASSERT((so->so_options & SO_ACCEPTCONN) == 0 || | 985 | KASSERT((so->so_options & SO_ACCEPTCONN) == 0 || | |
974 | so2->so_lock == uipc_lock); | 986 | so2->so_lock == uipc_lock); | |
975 | if ((so2->so_options & SO_ACCEPTCONN) == 0 || | 987 | if ((so2->so_options & SO_ACCEPTCONN) == 0 || | |
976 | (so3 = sonewconn(so2, 0)) == 0) { | 988 | (so3 = sonewconn(so2, 0)) == 0) { | |
977 | error = ECONNREFUSED; | 989 | error = ECONNREFUSED; | |
978 | sounlock(so); | 990 | sounlock(so); | |
979 | goto bad; | 991 | goto bad; | |
980 | } | 992 | } | |
981 | unp2 = sotounpcb(so2); | 993 | unp2 = sotounpcb(so2); | |
982 | unp3 = sotounpcb(so3); | 994 | unp3 = sotounpcb(so3); | |
983 | if (unp2->unp_addr) { | 995 | if (unp2->unp_addr) { | |
984 | unp3->unp_addr = malloc(unp2->unp_addrlen, | 996 | unp3->unp_addr = malloc(unp2->unp_addrlen, | |
985 | M_SONAME, M_WAITOK); | 997 | M_SONAME, M_WAITOK); | |
986 | memcpy(unp3->unp_addr, unp2->unp_addr, | 998 | memcpy(unp3->unp_addr, unp2->unp_addr, | |
987 | unp2->unp_addrlen); | 999 | unp2->unp_addrlen); | |
988 | unp3->unp_addrlen = unp2->unp_addrlen; | 1000 | unp3->unp_addrlen = unp2->unp_addrlen; | |
989 | } | 1001 | } | |
990 | unp3->unp_flags = unp2->unp_flags; | 1002 | unp3->unp_flags = unp2->unp_flags; | |
991 | unp3->unp_connid.unp_pid = l->l_proc->p_pid; | 1003 | unp3->unp_connid.unp_pid = l->l_proc->p_pid; | |
992 | unp3->unp_connid.unp_euid = kauth_cred_geteuid(l->l_cred); | 1004 | unp3->unp_connid.unp_euid = kauth_cred_geteuid(l->l_cred); | |
993 | unp3->unp_connid.unp_egid = kauth_cred_getegid(l->l_cred); | 1005 | unp3->unp_connid.unp_egid = kauth_cred_getegid(l->l_cred); | |
994 | unp3->unp_flags |= UNP_EIDSVALID; | 1006 | unp3->unp_flags |= UNP_EIDSVALID; | |
995 | if (unp2->unp_flags & UNP_EIDSBIND) { | 1007 | if (unp2->unp_flags & UNP_EIDSBIND) { | |
996 | unp->unp_connid = unp2->unp_connid; | 1008 | unp->unp_connid = unp2->unp_connid; | |
997 | unp->unp_flags |= UNP_EIDSVALID; | 1009 | unp->unp_flags |= UNP_EIDSVALID; | |
998 | } | 1010 | } | |
999 | so2 = so3; | 1011 | so2 = so3; | |
1000 | } | 1012 | } | |
1001 | error = unp_connect2(so, so2, PRU_CONNECT); | 1013 | error = unp_connect2(so, so2, PRU_CONNECT); | |
1002 | sounlock(so); | 1014 | sounlock(so); | |
1003 | bad: | 1015 | bad: | |
1004 | vput(vp); | 1016 | vput(vp); | |
1005 | bad2: | 1017 | bad2: | |
1006 | free(sun, M_SONAME); | 1018 | free(sun, M_SONAME); | |
1007 | solock(so); | 1019 | solock(so); | |
1008 | unp->unp_flags &= ~UNP_BUSY; | 1020 | unp->unp_flags &= ~UNP_BUSY; | |
1009 | return (error); | 1021 | return (error); | |
1010 | } | 1022 | } | |
1011 | 1023 | |||
1012 | int | 1024 | int | |
1013 | unp_connect2(struct socket *so, struct socket *so2, int req) | 1025 | unp_connect2(struct socket *so, struct socket *so2, int req) | |
1014 | { | 1026 | { | |
1015 | struct unpcb *unp = sotounpcb(so); | 1027 | struct unpcb *unp = sotounpcb(so); | |
1016 | struct unpcb *unp2; | 1028 | struct unpcb *unp2; | |
1017 | 1029 | |||
1018 | if (so2->so_type != so->so_type) | 1030 | if (so2->so_type != so->so_type) | |
1019 | return (EPROTOTYPE); | 1031 | return (EPROTOTYPE); | |
1020 | 1032 | |||
1021 | /* | 1033 | /* | |
1022 | * All three sockets involved must be locked by same lock: | 1034 | * All three sockets involved must be locked by same lock: | |
1023 | * | 1035 | * | |
1024 | * local endpoint (so) | 1036 | * local endpoint (so) | |
1025 | * remote endpoint (so2) | 1037 | * remote endpoint (so2) | |
1026 | * queue head (so->so_head, only if PR_CONNREQUIRED) | 1038 | * queue head (so->so_head, only if PR_CONNREQUIRED) | |
1027 | */ | 1039 | */ | |
1028 | KASSERT(solocked2(so, so2)); | 1040 | KASSERT(solocked2(so, so2)); | |
1029 | if (so->so_head != NULL) { | 1041 | if (so->so_head != NULL) { | |
1030 | KASSERT(so->so_lock == uipc_lock); | 1042 | KASSERT(so->so_lock == uipc_lock); | |
1031 | KASSERT(solocked2(so, so->so_head)); | 1043 | KASSERT(solocked2(so, so->so_head)); | |
1032 | } | 1044 | } | |
1033 | 1045 | |||
1034 | unp2 = sotounpcb(so2); | 1046 | unp2 = sotounpcb(so2); | |
1035 | unp->unp_conn = unp2; | 1047 | unp->unp_conn = unp2; | |
1036 | switch (so->so_type) { | 1048 | switch (so->so_type) { | |
1037 | 1049 | |||
1038 | case SOCK_DGRAM: | 1050 | case SOCK_DGRAM: | |
1039 | unp->unp_nextref = unp2->unp_refs; | 1051 | unp->unp_nextref = unp2->unp_refs; | |
1040 | unp2->unp_refs = unp; | 1052 | unp2->unp_refs = unp; | |
1041 | soisconnected(so); | 1053 | soisconnected(so); | |
1042 | break; | 1054 | break; | |
1043 | 1055 | |||
1044 | case SOCK_STREAM: | 1056 | case SOCK_STREAM: | |
1045 | unp2->unp_conn = unp; | 1057 | unp2->unp_conn = unp; | |
1046 | if (req == PRU_CONNECT && | 1058 | if (req == PRU_CONNECT && | |
1047 | ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT)) | 1059 | ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT)) | |
1048 | soisconnecting(so); | 1060 | soisconnecting(so); | |
1049 | else | 1061 | else | |
1050 | soisconnected(so); | 1062 | soisconnected(so); | |
1051 | soisconnected(so2); | 1063 | soisconnected(so2); | |
1052 | /* | 1064 | /* | |
1053 | * If the connection is fully established, break the | 1065 | * If the connection is fully established, break the | |
1054 | * association with uipc_lock and give the connected | 1066 | * association with uipc_lock and give the connected | |
1055 | * pair a seperate lock to share. For CONNECT2, we | 1067 | * pair a seperate lock to share. For CONNECT2, we | |
1056 | * require that the locks already match (the sockets | 1068 | * require that the locks already match (the sockets | |
1057 | * are created that way). | 1069 | * are created that way). | |
1058 | */ | 1070 | */ | |
1059 | if (req == PRU_CONNECT) | 1071 | if (req == PRU_CONNECT) | |
1060 | unp_setpeerlocks(so, so2); | 1072 | unp_setpeerlocks(so, so2); | |
1061 | break; | 1073 | break; | |
1062 | 1074 | |||
1063 | default: | 1075 | default: | |
1064 | panic("unp_connect2"); | 1076 | panic("unp_connect2"); | |
1065 | } | 1077 | } | |
1066 | return (0); | 1078 | return (0); | |
1067 | } | 1079 | } | |
1068 | 1080 | |||
1069 | void | 1081 | void | |
1070 | unp_disconnect(struct unpcb *unp) | 1082 | unp_disconnect(struct unpcb *unp) | |
1071 | { | 1083 | { | |
1072 | struct unpcb *unp2 = unp->unp_conn; | 1084 | struct unpcb *unp2 = unp->unp_conn; | |
1073 | struct socket *so; | 1085 | struct socket *so; | |
1074 | 1086 | |||
1075 | if (unp2 == 0) | 1087 | if (unp2 == 0) | |
1076 | return; | 1088 | return; | |
1077 | unp->unp_conn = 0; | 1089 | unp->unp_conn = 0; | |
1078 | so = unp->unp_socket; | 1090 | so = unp->unp_socket; | |
1079 | switch (so->so_type) { | 1091 | switch (so->so_type) { | |
1080 | case SOCK_DGRAM: | 1092 | case SOCK_DGRAM: | |
1081 | if (unp2->unp_refs == unp) | 1093 | if (unp2->unp_refs == unp) | |
1082 | unp2->unp_refs = unp->unp_nextref; | 1094 | unp2->unp_refs = unp->unp_nextref; | |
1083 | else { | 1095 | else { | |
1084 | unp2 = unp2->unp_refs; | 1096 | unp2 = unp2->unp_refs; | |
1085 | for (;;) { | 1097 | for (;;) { | |
1086 | KASSERT(solocked2(so, unp2->unp_socket)); | 1098 | KASSERT(solocked2(so, unp2->unp_socket)); | |
1087 | if (unp2 == 0) | 1099 | if (unp2 == 0) | |
1088 | panic("unp_disconnect"); | 1100 | panic("unp_disconnect"); | |
1089 | if (unp2->unp_nextref == unp) | 1101 | if (unp2->unp_nextref == unp) | |
1090 | break; | 1102 | break; | |
1091 | unp2 = unp2->unp_nextref; | 1103 | unp2 = unp2->unp_nextref; | |
1092 | } | 1104 | } | |
1093 | unp2->unp_nextref = unp->unp_nextref; | 1105 | unp2->unp_nextref = unp->unp_nextref; | |
1094 | } | 1106 | } | |
1095 | unp->unp_nextref = 0; | 1107 | unp->unp_nextref = 0; | |
1096 | so->so_state &= ~SS_ISCONNECTED; | 1108 | so->so_state &= ~SS_ISCONNECTED; | |
1097 | break; | 1109 | break; | |
1098 | 1110 | |||
1099 | case SOCK_STREAM: | 1111 | case SOCK_STREAM: | |
1100 | KASSERT(solocked2(so, unp2->unp_socket)); | 1112 | KASSERT(solocked2(so, unp2->unp_socket)); | |
1101 | soisdisconnected(so); | 1113 | soisdisconnected(so); | |
1102 | unp2->unp_conn = 0; | 1114 | unp2->unp_conn = 0; | |
1103 | soisdisconnected(unp2->unp_socket); | 1115 | soisdisconnected(unp2->unp_socket); | |
1104 | break; | 1116 | break; | |
1105 | } | 1117 | } | |
1106 | } | 1118 | } | |
1107 | 1119 | |||
1108 | #ifdef notdef | 1120 | #ifdef notdef | |
1109 | unp_abort(struct unpcb *unp) | 1121 | unp_abort(struct unpcb *unp) | |
1110 | { | 1122 | { | |
1111 | unp_detach(unp); | 1123 | unp_detach(unp); | |
1112 | } | 1124 | } | |
1113 | #endif | 1125 | #endif | |
1114 | 1126 | |||
1115 | void | 1127 | void | |
1116 | unp_shutdown(struct unpcb *unp) | 1128 | unp_shutdown(struct unpcb *unp) | |
1117 | { | 1129 | { | |
1118 | struct socket *so; | 1130 | struct socket *so; | |
1119 | 1131 | |||
1120 | if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && | 1132 | if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && | |
1121 | (so = unp->unp_conn->unp_socket)) | 1133 | (so = unp->unp_conn->unp_socket)) | |
1122 | socantrcvmore(so); | 1134 | socantrcvmore(so); | |
1123 | } | 1135 | } | |
1124 | 1136 | |||
1125 | bool | 1137 | bool | |
1126 | unp_drop(struct unpcb *unp, int errno) | 1138 | unp_drop(struct unpcb *unp, int errno) | |
1127 | { | 1139 | { | |
1128 | struct socket *so = unp->unp_socket; | 1140 | struct socket *so = unp->unp_socket; | |
1129 | 1141 | |||
1130 | KASSERT(solocked(so)); | 1142 | KASSERT(solocked(so)); | |
1131 | 1143 | |||
1132 | so->so_error = errno; | 1144 | so->so_error = errno; | |
1133 | unp_disconnect(unp); | 1145 | unp_disconnect(unp); | |
1134 | if (so->so_head) { | 1146 | if (so->so_head) { | |
1135 | so->so_pcb = NULL; | 1147 | so->so_pcb = NULL; | |
1136 | /* sofree() drops the socket lock */ | 1148 | /* sofree() drops the socket lock */ | |
1137 | sofree(so); | 1149 | sofree(so); | |
1138 | unp_free(unp); | 1150 | unp_free(unp); | |
1139 | return true; | 1151 | return true; | |
1140 | } | 1152 | } | |
1141 | return false; | 1153 | return false; | |
1142 | } | 1154 | } | |
1143 | 1155 | |||
1144 | #ifdef notdef | 1156 | #ifdef notdef | |
1145 | unp_drain(void) | 1157 | unp_drain(void) | |
1146 | { | 1158 | { | |
1147 | 1159 | |||
1148 | } | 1160 | } | |
1149 | #endif | 1161 | #endif | |
1150 | 1162 | |||
1151 | int | 1163 | int | |
1152 | unp_externalize(struct mbuf *rights, struct lwp *l) | 1164 | unp_externalize(struct mbuf *rights, struct lwp *l) | |
1153 | { | 1165 | { | |
1154 | struct cmsghdr *cm = mtod(rights, struct cmsghdr *); | 1166 | struct cmsghdr *cm = mtod(rights, struct cmsghdr *); | |
1155 | struct proc *p = l->l_proc; | 1167 | struct proc *p = l->l_proc; | |
1156 | int i, *fdp; | 1168 | int i, *fdp; | |
1157 | file_t **rp; | 1169 | file_t **rp; | |
1158 | file_t *fp; | 1170 | file_t *fp; | |
1159 | int nfds, error = 0; | 1171 | int nfds, error = 0; | |
1160 | 1172 | |||
1161 | nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / | 1173 | nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / | |
1162 | sizeof(file_t *); | 1174 | sizeof(file_t *); | |
1163 | rp = (file_t **)CMSG_DATA(cm); | 1175 | rp = (file_t **)CMSG_DATA(cm); | |
1164 | 1176 | |||
1165 | fdp = malloc(nfds * sizeof(int), M_TEMP, M_WAITOK); | 1177 | fdp = malloc(nfds * sizeof(int), M_TEMP, M_WAITOK); | |
1166 | rw_enter(&p->p_cwdi->cwdi_lock, RW_READER); | 1178 | rw_enter(&p->p_cwdi->cwdi_lock, RW_READER); | |
1167 | 1179 | |||
1168 | /* Make sure the recipient should be able to see the descriptors.. */ | 1180 | /* Make sure the recipient should be able to see the files.. */ | |
1169 | if (p->p_cwdi->cwdi_rdir != NULL) { | 1181 | if (p->p_cwdi->cwdi_rdir != NULL) { | |
1170 | rp = (file_t **)CMSG_DATA(cm); | 1182 | rp = (file_t **)CMSG_DATA(cm); | |
1171 | for (i = 0; i < nfds; i++) { | 1183 | for (i = 0; i < nfds; i++) { | |
1172 | fp = *rp++; | 1184 | fp = *rp++; | |
1173 | /* | 1185 | /* | |
1174 | * If we are in a chroot'ed directory, and | 1186 | * If we are in a chroot'ed directory, and | |
1175 | * someone wants to pass us a directory, make | 1187 | * someone wants to pass us a directory, make | |
1176 | * sure it's inside the subtree we're allowed | 1188 | * sure it's inside the subtree we're allowed | |
1177 | * to access. | 1189 | * to access. | |
1178 | */ | 1190 | */ | |
1179 | if (fp->f_type == DTYPE_VNODE) { | 1191 | if (fp->f_type == DTYPE_VNODE) { | |
1180 | vnode_t *vp = (vnode_t *)fp->f_data; | 1192 | vnode_t *vp = (vnode_t *)fp->f_data; | |
1181 | if ((vp->v_type == VDIR) && | 1193 | if ((vp->v_type == VDIR) && | |
1182 | !vn_isunder(vp, p->p_cwdi->cwdi_rdir, l)) { | 1194 | !vn_isunder(vp, p->p_cwdi->cwdi_rdir, l)) { | |
1183 | error = EPERM; | 1195 | error = EPERM; | |
1184 | break; | 1196 | break; | |
1185 | } | 1197 | } | |
1186 | } | 1198 | } | |
1187 | } | 1199 | } | |
1188 | } | 1200 | } | |
1189 | 1201 | |||
1190 | restart: | 1202 | restart: | |
1191 | rp = (file_t **)CMSG_DATA(cm); | 1203 | rp = (file_t **)CMSG_DATA(cm); | |
1192 | if (error != 0) { | 1204 | if (error != 0) { | |
1193 | for (i = 0; i < nfds; i++) { | 1205 | for (i = 0; i < nfds; i++) { | |
1194 | fp = *rp; | 1206 | fp = *rp; | |
1195 | /* | |||
1196 | * zero the pointer before calling unp_discard, | |||
1197 | * since it may end up in unp_gc().. | |||
1198 | */ | |||
1199 | *rp++ = 0; | 1207 | *rp++ = 0; | |
1200 | unp_discard(fp); | 1208 | unp_discard_now(fp); | |
1201 | } | 1209 | } | |
1202 | goto out; | 1210 | goto out; | |
1203 | } | 1211 | } | |
1204 | 1212 | |||
1205 | /* | 1213 | /* | |
1206 | * First loop -- allocate file descriptor table slots for the | 1214 | * First loop -- allocate file descriptor table slots for the | |
1207 | * new descriptors. | 1215 | * new files. | |
1208 | */ | 1216 | */ | |
1209 | for (i = 0; i < nfds; i++) { | 1217 | for (i = 0; i < nfds; i++) { | |
1210 | fp = *rp++; | 1218 | fp = *rp++; | |
1211 | if ((error = fd_alloc(p, 0, &fdp[i])) != 0) { | 1219 | if ((error = fd_alloc(p, 0, &fdp[i])) != 0) { | |
1212 | /* | 1220 | /* | |
1213 | * Back out what we've done so far. | 1221 | * Back out what we've done so far. | |
1214 | */ | 1222 | */ | |
1215 | for (--i; i >= 0; i--) { | 1223 | for (--i; i >= 0; i--) { | |
1216 | fd_abort(p, NULL, fdp[i]); | 1224 | fd_abort(p, NULL, fdp[i]); | |
1217 | } | 1225 | } | |
1218 | if (error == ENOSPC) { | 1226 | if (error == ENOSPC) { | |
1219 | fd_tryexpand(p); | 1227 | fd_tryexpand(p); | |
1220 | error = 0; | 1228 | error = 0; | |
1221 | } else { | 1229 | } else { | |
1222 | /* | 1230 | /* | |
1223 | * This is the error that has historically | 1231 | * This is the error that has historically | |
1224 | * been returned, and some callers may | 1232 | * been returned, and some callers may | |
1225 | * expect it. | 1233 | * expect it. | |
1226 | */ | 1234 | */ | |
1227 | error = EMSGSIZE; | 1235 | error = EMSGSIZE; | |
1228 | } | 1236 | } | |
1229 | goto restart; | 1237 | goto restart; | |
1230 | } | 1238 | } | |
1231 | } | 1239 | } | |
1232 | 1240 | |||
1233 | /* | 1241 | /* | |
1234 | * Now that adding them has succeeded, update all of the | 1242 | * Now that adding them has succeeded, update all of the | |
1235 | * descriptor passing state. | 1243 | * file passing state and affix the descriptors. | |
1236 | */ | 1244 | */ | |
1237 | rp = (file_t **)CMSG_DATA(cm); | 1245 | rp = (file_t **)CMSG_DATA(cm); | |
1238 | for (i = 0; i < nfds; i++) { | 1246 | for (i = 0; i < nfds; i++) { | |
1239 | fp = *rp++; | 1247 | fp = *rp++; | |
1240 | atomic_dec_uint(&unp_rights); | 1248 | atomic_dec_uint(&unp_rights); | |
1241 | fd_affix(p, fp, fdp[i]); | 1249 | fd_affix(p, fp, fdp[i]); | |
1242 | mutex_enter(&fp->f_lock); | 1250 | mutex_enter(&fp->f_lock); | |
1243 | fp->f_msgcount--; | 1251 | fp->f_msgcount--; | |
1244 | mutex_exit(&fp->f_lock); | 1252 | mutex_exit(&fp->f_lock); | |
1245 | /* | 1253 | /* | |
1246 | * Note that fd_affix() adds a reference to the file. | 1254 | * Note that fd_affix() adds a reference to the file. | |
1247 | * The file may already have been closed by another | 1255 | * The file may already have been closed by another | |
1248 | * LWP in the process, so we must drop the reference | 1256 | * LWP in the process, so we must drop the reference | |
1249 | * added by unp_internalize() with closef(). | 1257 | * added by unp_internalize() with closef(). | |
1250 | */ | 1258 | */ | |
1251 | closef(fp); | 1259 | closef(fp); | |
1252 | } | 1260 | } | |
1253 | 1261 | |||
1254 | /* | 1262 | /* | |
1255 | * Copy temporary array to message and adjust length, in case of | 1263 | * Copy temporary array to message and adjust length, in case of | |
1256 | * transition from large file_t pointers to ints. | 1264 | * transition from large file_t pointers to ints. | |
1257 | */ | 1265 | */ | |
1258 | memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int)); | 1266 | memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int)); | |
1259 | cm->cmsg_len = CMSG_LEN(nfds * sizeof(int)); | 1267 | cm->cmsg_len = CMSG_LEN(nfds * sizeof(int)); | |
1260 | rights->m_len = CMSG_SPACE(nfds * sizeof(int)); | 1268 | rights->m_len = CMSG_SPACE(nfds * sizeof(int)); | |
1261 | out: | 1269 | out: | |
1262 | rw_exit(&p->p_cwdi->cwdi_lock); | 1270 | rw_exit(&p->p_cwdi->cwdi_lock); | |
1263 | free(fdp, M_TEMP); | 1271 | free(fdp, M_TEMP); | |
1264 | return (error); | 1272 | return (error); | |
1265 | } | 1273 | } | |
1266 | 1274 | |||
1267 | int | 1275 | int | |
1268 | unp_internalize(struct mbuf **controlp) | 1276 | unp_internalize(struct mbuf **controlp) | |
1269 | { | 1277 | { | |
1270 | struct filedesc *fdescp = curlwp->l_fd; | 1278 | filedesc_t *fdescp = curlwp->l_fd; | |
1271 | struct mbuf *control = *controlp; | 1279 | struct mbuf *control = *controlp; | |
1272 | struct cmsghdr *newcm, *cm = mtod(control, struct cmsghdr *); | 1280 | struct cmsghdr *newcm, *cm = mtod(control, struct cmsghdr *); | |
1273 | file_t **rp, **files; | 1281 | file_t **rp, **files; | |
1274 | file_t *fp; | 1282 | file_t *fp; | |
1275 | int i, fd, *fdp; | 1283 | int i, fd, *fdp; | |
1276 | int nfds, error; | 1284 | int nfds, error; | |
1285 | u_int maxmsg; | |||
1277 | 1286 | |||
1278 | error = 0; | 1287 | error = 0; | |
1279 | newcm = NULL; | 1288 | newcm = NULL; | |
1280 | 1289 | |||
1281 | /* Sanity check the control message header. */ | 1290 | /* Sanity check the control message header. */ | |
1282 | if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || | 1291 | if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || | |
1283 | cm->cmsg_len > control->m_len || | 1292 | cm->cmsg_len > control->m_len || | |
1284 | cm->cmsg_len < CMSG_ALIGN(sizeof(*cm))) | 1293 | cm->cmsg_len < CMSG_ALIGN(sizeof(*cm))) | |
1285 | return (EINVAL); | 1294 | return (EINVAL); | |
1286 | 1295 | |||
1287 | /* | 1296 | /* | |
1288 | * Verify that the file descriptors are valid, and acquire | 1297 | * Verify that the file descriptors are valid, and acquire | |
1289 | * a reference to each. | 1298 | * a reference to each. | |
1290 | */ | 1299 | */ | |
1291 | nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int); | 1300 | nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int); | |
1292 | fdp = (int *)CMSG_DATA(cm); | 1301 | fdp = (int *)CMSG_DATA(cm); | |
1302 | maxmsg = maxfiles / unp_rights_ratio; | |||
1293 | for (i = 0; i < nfds; i++) { | 1303 | for (i = 0; i < nfds; i++) { | |
1294 | fd = *fdp++; | 1304 | fd = *fdp++; | |
1305 | if (atomic_inc_uint_nv(&unp_rights) > maxmsg) { | |||
1306 | atomic_dec_uint(&unp_rights); | |||
1307 | nfds = i; | |||
1308 | error = EAGAIN; | |||
1309 | goto out; | |||
1310 | } | |||
1295 | if ((fp = fd_getfile(fd)) == NULL) { | 1311 | if ((fp = fd_getfile(fd)) == NULL) { | |
1312 | atomic_dec_uint(&unp_rights); | |||
1296 | nfds = i; | 1313 | nfds = i; | |
1297 | error = EBADF; | 1314 | error = EBADF; | |
1298 | goto out; | 1315 | goto out; | |
1299 | } | 1316 | } | |
1300 | } | 1317 | } | |
1301 | 1318 | |||
1302 | /* Allocate new space and copy header into it. */ | 1319 | /* Allocate new space and copy header into it. */ | |
1303 | newcm = malloc(CMSG_SPACE(nfds * sizeof(file_t *)), M_MBUF, M_WAITOK); | 1320 | newcm = malloc(CMSG_SPACE(nfds * sizeof(file_t *)), M_MBUF, M_WAITOK); | |
1304 | if (newcm == NULL) { | 1321 | if (newcm == NULL) { | |
1305 | error = E2BIG; | 1322 | error = E2BIG; | |
1306 | goto out; | 1323 | goto out; | |
1307 | } | 1324 | } | |
1308 | memcpy(newcm, cm, sizeof(struct cmsghdr)); | 1325 | memcpy(newcm, cm, sizeof(struct cmsghdr)); | |
1309 | files = (file_t **)CMSG_DATA(newcm); | 1326 | files = (file_t **)CMSG_DATA(newcm); | |
1310 | 1327 | |||
1311 | /* | 1328 | /* | |
1312 | * Transform the file descriptors into file_t pointers, in | 1329 | * Transform the file descriptors into file_t pointers, in | |
1313 | * reverse order so that if pointers are bigger than ints, the | 1330 | * reverse order so that if pointers are bigger than ints, the | |
1314 | * int won't get until we're done. No need to lock, as we have | 1331 | * int won't get until we're done. No need to lock, as we have | |
1315 | * already validated the descriptors with fd_getfile(). | 1332 | * already validated the descriptors with fd_getfile(). | |
1316 | */ | 1333 | */ | |
1317 | fdp = (int *)CMSG_DATA(cm) + nfds; | 1334 | fdp = (int *)CMSG_DATA(cm) + nfds; | |
1318 | rp = files + nfds; | 1335 | rp = files + nfds; | |
1319 | for (i = 0; i < nfds; i++) { | 1336 | for (i = 0; i < nfds; i++) { | |
1320 | fp = fdescp->fd_ofiles[*--fdp]->ff_file; | 1337 | fp = fdescp->fd_ofiles[*--fdp]->ff_file; | |
1321 | KASSERT(fp != NULL); | 1338 | KASSERT(fp != NULL); | |
1322 | mutex_enter(&fp->f_lock); | 1339 | mutex_enter(&fp->f_lock); | |
1323 | *--rp = fp; | 1340 | *--rp = fp; | |
1324 | fp->f_count++; | 1341 | fp->f_count++; | |
1325 | fp->f_msgcount++; | 1342 | fp->f_msgcount++; | |
1326 | mutex_exit(&fp->f_lock); | 1343 | mutex_exit(&fp->f_lock); | |
1327 | atomic_inc_uint(&unp_rights); | |||
1328 | } | 1344 | } | |
1329 | 1345 | |||
1330 | out: | 1346 | out: | |
1331 | /* Release descriptor references. */ | 1347 | /* Release descriptor references. */ | |
1332 | fdp = (int *)CMSG_DATA(cm); | 1348 | fdp = (int *)CMSG_DATA(cm); | |
1333 | for (i = 0; i < nfds; i++) { | 1349 | for (i = 0; i < nfds; i++) { | |
1334 | fd_putfile(*fdp++); | 1350 | fd_putfile(*fdp++); | |
1351 | if (error != 0) { | |||
1352 | atomic_dec_uint(&unp_rights); | |||
1353 | } | |||
1335 | } | 1354 | } | |
1336 | 1355 | |||
1337 | if (error == 0) { | 1356 | if (error == 0) { | |
1338 | if (control->m_flags & M_EXT) { | 1357 | if (control->m_flags & M_EXT) { | |
1339 | m_freem(control); | 1358 | m_freem(control); | |
1340 | *controlp = control = m_get(M_WAIT, MT_CONTROL); | 1359 | *controlp = control = m_get(M_WAIT, MT_CONTROL); | |
1341 | } | 1360 | } | |
1342 | MEXTADD(control, newcm, CMSG_SPACE(nfds * sizeof(file_t *)), | 1361 | MEXTADD(control, newcm, CMSG_SPACE(nfds * sizeof(file_t *)), | |
1343 | M_MBUF, NULL, NULL); | 1362 | M_MBUF, NULL, NULL); | |
1344 | cm = newcm; | 1363 | cm = newcm; | |
1345 | /* | 1364 | /* | |
1346 | * Adjust message & mbuf to note amount of space | 1365 | * Adjust message & mbuf to note amount of space | |
1347 | * actually used. | 1366 | * actually used. | |
1348 | */ | 1367 | */ | |
1349 | cm->cmsg_len = CMSG_LEN(nfds * sizeof(file_t *)); | 1368 | cm->cmsg_len = CMSG_LEN(nfds * sizeof(file_t *)); | |
1350 | control->m_len = CMSG_SPACE(nfds * sizeof(file_t *)); | 1369 | control->m_len = CMSG_SPACE(nfds * sizeof(file_t *)); | |
1351 | } | 1370 | } | |
1352 | 1371 | |||
1353 | return error; | 1372 | return error; | |
1354 | } | 1373 | } | |
1355 | 1374 | |||
1356 | struct mbuf * | 1375 | struct mbuf * | |
1357 | unp_addsockcred(struct lwp *l, struct mbuf *control) | 1376 | unp_addsockcred(struct lwp *l, struct mbuf *control) | |
1358 | { | 1377 | { | |
1359 | struct cmsghdr *cmp; | 1378 | struct cmsghdr *cmp; | |
1360 | struct sockcred *sc; | 1379 | struct sockcred *sc; | |
1361 | struct mbuf *m, *n; | 1380 | struct mbuf *m, *n; | |
1362 | int len, space, i; | 1381 | int len, space, i; | |
1363 | 1382 | |||
1364 | len = CMSG_LEN(SOCKCREDSIZE(kauth_cred_ngroups(l->l_cred))); | 1383 | len = CMSG_LEN(SOCKCREDSIZE(kauth_cred_ngroups(l->l_cred))); | |
1365 | space = CMSG_SPACE(SOCKCREDSIZE(kauth_cred_ngroups(l->l_cred))); | 1384 | space = CMSG_SPACE(SOCKCREDSIZE(kauth_cred_ngroups(l->l_cred))); | |
1366 | 1385 | |||
1367 | m = m_get(M_WAIT, MT_CONTROL); | 1386 | m = m_get(M_WAIT, MT_CONTROL); | |
1368 | if (space > MLEN) { | 1387 | if (space > MLEN) { | |
1369 | if (space > MCLBYTES) | 1388 | if (space > MCLBYTES) | |
1370 | MEXTMALLOC(m, space, M_WAITOK); | 1389 | MEXTMALLOC(m, space, M_WAITOK); | |
1371 | else | 1390 | else | |
1372 | m_clget(m, M_WAIT); | 1391 | m_clget(m, M_WAIT); | |
1373 | if ((m->m_flags & M_EXT) == 0) { | 1392 | if ((m->m_flags & M_EXT) == 0) { | |
1374 | m_free(m); | 1393 | m_free(m); | |
1375 | return (control); | 1394 | return (control); | |
1376 | } | 1395 | } | |
1377 | } | 1396 | } | |
1378 | 1397 | |||
1379 | m->m_len = space; | 1398 | m->m_len = space; | |
1380 | m->m_next = NULL; | 1399 | m->m_next = NULL; | |
1381 | cmp = mtod(m, struct cmsghdr *); | 1400 | cmp = mtod(m, struct cmsghdr *); | |
1382 | sc = (struct sockcred *)CMSG_DATA(cmp); | 1401 | sc = (struct sockcred *)CMSG_DATA(cmp); | |
1383 | cmp->cmsg_len = len; | 1402 | cmp->cmsg_len = len; | |
1384 | cmp->cmsg_level = SOL_SOCKET; | 1403 | cmp->cmsg_level = SOL_SOCKET; | |
1385 | cmp->cmsg_type = SCM_CREDS; | 1404 | cmp->cmsg_type = SCM_CREDS; | |
1386 | sc->sc_uid = kauth_cred_getuid(l->l_cred); | 1405 | sc->sc_uid = kauth_cred_getuid(l->l_cred); | |
1387 | sc->sc_euid = kauth_cred_geteuid(l->l_cred); | 1406 | sc->sc_euid = kauth_cred_geteuid(l->l_cred); | |
1388 | sc->sc_gid = kauth_cred_getgid(l->l_cred); | 1407 | sc->sc_gid = kauth_cred_getgid(l->l_cred); | |
1389 | sc->sc_egid = kauth_cred_getegid(l->l_cred); | 1408 | sc->sc_egid = kauth_cred_getegid(l->l_cred); | |
1390 | sc->sc_ngroups = kauth_cred_ngroups(l->l_cred); | 1409 | sc->sc_ngroups = kauth_cred_ngroups(l->l_cred); | |
1391 | for (i = 0; i < sc->sc_ngroups; i++) | 1410 | for (i = 0; i < sc->sc_ngroups; i++) | |
1392 | sc->sc_groups[i] = kauth_cred_group(l->l_cred, i); | 1411 | sc->sc_groups[i] = kauth_cred_group(l->l_cred, i); | |
1393 | 1412 | |||
1394 | /* | 1413 | /* | |
1395 | * If a control message already exists, append us to the end. | 1414 | * If a control message already exists, append us to the end. | |
1396 | */ | 1415 | */ | |
1397 | if (control != NULL) { | 1416 | if (control != NULL) { | |
1398 | for (n = control; n->m_next != NULL; n = n->m_next) | 1417 | for (n = control; n->m_next != NULL; n = n->m_next) | |
1399 | ; | 1418 | ; | |
1400 | n->m_next = m; | 1419 | n->m_next = m; | |
1401 | } else | 1420 | } else | |
1402 | control = m; | 1421 | control = m; | |
1403 | 1422 | |||
1404 | return (control); | 1423 | return (control); | |
1405 | } | 1424 | } | |
1406 | 1425 | |||
1407 | int unp_defer, unp_gcing; | |||
1408 | extern struct domain unixdomain; | |||
1409 | ||||
1410 | /* | 1426 | /* | |
1411 | * Comment added long after the fact explaining what's going on here. | 1427 | * Do a mark-sweep GC of files in the system, to free up any which are | |
1412 | * Do a mark-sweep GC of file descriptors on the system, to free up | 1428 | * caught in flight to an about-to-be-closed socket. Additionally, | |
1413 | * any which are caught in flight to an about-to-be-closed socket. | 1429 | * process deferred file closures. | |
1414 | * | |||
1415 | * Traditional mark-sweep gc's start at the "root", and mark | |||
1416 | * everything reachable from the root (which, in our case would be the | |||
1417 | * process table). The mark bits are cleared during the sweep. | |||
1418 | * | |||
1419 | * XXX For some inexplicable reason (perhaps because the file | |||
1420 | * descriptor tables used to live in the u area which could be swapped | |||
1421 | * out and thus hard to reach), we do multiple scans over the set of | |||
1422 | * descriptors, using use *two* mark bits per object (DEFER and MARK). | |||
1423 | * Whenever we find a descriptor which references other descriptors, | |||
1424 | * the ones it references are marked with both bits, and we iterate | |||
1425 | * over the whole file table until there are no more DEFER bits set. | |||
1426 | * We also make an extra pass *before* the GC to clear the mark bits, | |||
1427 | * which could have been cleared at almost no cost during the previous | |||
1428 | * sweep. | |||
1429 | */ | 1430 | */ | |
1430 | void | 1431 | static void | |
1431 | unp_gc(void) | 1432 | unp_gc(file_t *dp) | |
1432 | { | 1433 | { | |
1433 | file_t *fp, *nextfp; | 1434 | extern struct domain unixdomain; | |
1435 | file_t *fp, *np; | |||
1434 | struct socket *so, *so1; | 1436 | struct socket *so, *so1; | |
1435 | file_t **extra_ref, **fpp; | 1437 | u_int i, old, new; | |
1436 | int nunref, nslots, i; | 1438 | bool didwork; | |
1437 | 1439 | |||
1438 | if (atomic_swap_uint(&unp_gcing, 1) == 1) | 1440 | KASSERT(curlwp == unp_thread_lwp); | |
1439 | return; | 1441 | KASSERT(mutex_owned(&filelist_lock)); | |
1440 | 1442 | |||
1441 | restart: | 1443 | /* | |
1442 | nslots = nfiles * 2; | 1444 | * First, process deferred file closures. | |
1443 | extra_ref = kmem_alloc(nslots * sizeof(file_t *), KM_SLEEP); | 1445 | */ | |
1446 | while (!SLIST_EMPTY(&unp_thread_discard)) { | |||
1447 | fp = SLIST_FIRST(&unp_thread_discard); | |||
1448 | KASSERT(fp->f_unpcount > 0); | |||
1449 | KASSERT(fp->f_count > 0); | |||
1450 | KASSERT(fp->f_msgcount > 0); | |||
1451 | KASSERT(fp->f_count >= fp->f_unpcount); | |||
1452 | KASSERT(fp->f_count >= fp->f_msgcount); | |||
1453 | KASSERT(fp->f_msgcount >= fp->f_unpcount); | |||
1454 | SLIST_REMOVE_HEAD(&unp_thread_discard, f_unplist); | |||
1455 | i = fp->f_unpcount; | |||
1456 | fp->f_unpcount = 0; | |||
1457 | mutex_exit(&filelist_lock); | |||
1458 | for (; i != 0; i--) { | |||
1459 | unp_discard_now(fp); | |||
1460 | } | |||
1461 | mutex_enter(&filelist_lock); | |||
1462 | } | |||
1444 | 1463 | |||
1445 | mutex_enter(&filelist_lock); | 1464 | /* | |
1465 | * Clear mark bits. Ensure that we don't consider new files | |||
1466 | * entering the file table during this loop (they will not have | |||
1467 | * FSCAN set). | |||
1468 | */ | |||
1446 | unp_defer = 0; | 1469 | unp_defer = 0; | |
1447 | ||||
1448 | /* Clear mark bits */ | |||
1449 | LIST_FOREACH(fp, &filehead, f_list) { | 1470 | LIST_FOREACH(fp, &filehead, f_list) { | |
1450 | atomic_and_uint(&fp->f_flag, ~(FMARK|FDEFER)); | 1471 | for (old = fp->f_flag;; old = new) { | |
1472 | new = atomic_cas_uint(&fp->f_flag, old, | |||
1473 | (old | FSCAN) & ~(FMARK|FDEFER)); | |||
1474 | if (__predict_true(old == new)) { | |||
1475 | break; | |||
1476 | } | |||
1477 | } | |||
1451 | } | 1478 | } | |
1452 | 1479 | |||
1453 | /* | 1480 | /* | |
1454 | * Iterate over the set of descriptors, marking ones believed | 1481 | * Iterate over the set of sockets, marking ones believed (based on | |
1455 | * (based on refcount) to be referenced from a process, and | 1482 | * refcount) to be referenced from a process, and marking for rescan | |
1456 | * marking for rescan descriptors which are queued on a socket. | 1483 | * sockets which are queued on a socket. Recan continues descending | |
1484 | * and searching for sockets referenced by sockets (FDEFER), until | |||
1485 | * there are no more socket->socket references to be discovered. | |||
1457 | */ | 1486 | */ | |
1458 | do { | 1487 | do { | |
1459 | LIST_FOREACH(fp, &filehead, f_list) { | 1488 | didwork = false; | |
1489 | for (fp = LIST_FIRST(&filehead); fp != NULL; fp = np) { | |||
1490 | KASSERT(mutex_owned(&filelist_lock)); | |||
1491 | np = LIST_NEXT(fp, f_list); | |||
1460 | mutex_enter(&fp->f_lock); | 1492 | mutex_enter(&fp->f_lock); | |
1461 | if (fp->f_flag & FDEFER) { | 1493 | if ((fp->f_flag & FDEFER) != 0) { | |
1462 | atomic_and_uint(&fp->f_flag, ~FDEFER); | 1494 | atomic_and_uint(&fp->f_flag, ~FDEFER); | |
1463 | unp_defer--; | 1495 | unp_defer--; | |
1464 | KASSERT(fp->f_count != 0); | 1496 | KASSERT(fp->f_count != 0); | |
1465 | } else { | 1497 | } else { | |
1466 | if (fp->f_count == 0 || | 1498 | if (fp->f_count == 0 || | |
1467 | (fp->f_flag & FMARK) || | 1499 | (fp->f_flag & FMARK) != 0 || | |
1468 | fp->f_count == fp->f_msgcount) { | 1500 | fp->f_count == fp->f_msgcount || | |
1501 | fp->f_unpcount != 0) { | |||
1469 | mutex_exit(&fp->f_lock); | 1502 | mutex_exit(&fp->f_lock); | |
1470 | continue; | 1503 | continue; | |
1471 | } | 1504 | } | |
1472 | } | 1505 | } | |
1473 | atomic_or_uint(&fp->f_flag, FMARK); | 1506 | atomic_or_uint(&fp->f_flag, FMARK); | |
1474 | 1507 | |||
1475 | if (fp->f_type != DTYPE_SOCKET || | 1508 | if (fp->f_type != DTYPE_SOCKET || | |
1476 | (so = fp->f_data) == NULL || | 1509 | (so = fp->f_data) == NULL || | |
1477 | so->so_proto->pr_domain != &unixdomain || | 1510 | so->so_proto->pr_domain != &unixdomain || | |
1478 | (so->so_proto->pr_flags&PR_RIGHTS) == 0) { | 1511 | (so->so_proto->pr_flags & PR_RIGHTS) == 0) { | |
1479 | mutex_exit(&fp->f_lock); | 1512 | mutex_exit(&fp->f_lock); | |
1480 | continue; | 1513 | continue; | |
1481 | } | 1514 | } | |
1482 | #ifdef notdef | 1515 | ||
1483 | if (so->so_rcv.sb_flags & SB_LOCK) { | 1516 | /* Gain file ref, mark our position, and unlock. */ | |
1484 | mutex_exit(&fp->f_lock); | 1517 | didwork = true; | |
1485 | mutex_exit(&filelist_lock); | 1518 | LIST_INSERT_AFTER(fp, dp, f_list); | |
1486 | kmem_free(extra_ref, nslots * sizeof(file_t *)); | 1519 | fp->f_count++; | |
1487 | /* | |||
1488 | * This is problematical; it's not clear | |||
1489 | * we need to wait for the sockbuf to be | |||
1490 | * unlocked (on a uniprocessor, at least), | |||
1491 | * and it's also not clear what to do | |||
1492 | * if sbwait returns an error due to receipt | |||
1493 | * of a signal. If sbwait does return | |||
1494 | * an error, we'll go into an infinite | |||
1495 | * loop. Delete all of this for now. | |||
1496 | */ | |||
1497 | (void) sbwait(&so->so_rcv); | |||
1498 | goto restart; | |||
1499 | } | |||
1500 | #endif | |||
1501 | mutex_exit(&fp->f_lock); | 1520 | mutex_exit(&fp->f_lock); | |
1521 | mutex_exit(&filelist_lock); | |||
1502 | 1522 | |||
1503 | /* | 1523 | /* | |
1504 | * XXX Locking a socket with filelist_lock held | 1524 | * Mark files referenced from sockets queued on the | |
1505 | * is ugly. filelist_lock can be taken by the | 1525 | * accept queue as well. | |
1506 | * pagedaemon when reclaiming items from file_cache. | |||
1507 | * Socket activity could delay the pagedaemon. | |||
1508 | */ | 1526 | */ | |
1509 | solock(so); | 1527 | solock(so); | |
1510 | unp_scan(so->so_rcv.sb_mb, unp_mark, 0); | 1528 | unp_scan(so->so_rcv.sb_mb, unp_mark, 0); | |
1511 | /* | 1529 | if ((so->so_options & SO_ACCEPTCONN) != 0) { | |
1512 | * Mark descriptors referenced from sockets queued | |||
1513 | * on the accept queue as well. | |||
1514 | */ | |||
1515 | if (so->so_options & SO_ACCEPTCONN) { | |||
1516 | TAILQ_FOREACH(so1, &so->so_q0, so_qe) { | 1530 | TAILQ_FOREACH(so1, &so->so_q0, so_qe) { | |
1517 | unp_scan(so1->so_rcv.sb_mb, unp_mark, 0); | 1531 | unp_scan(so1->so_rcv.sb_mb, unp_mark, 0); | |
1518 | } | 1532 | } | |
1519 | TAILQ_FOREACH(so1, &so->so_q, so_qe) { | 1533 | TAILQ_FOREACH(so1, &so->so_q, so_qe) { | |
1520 | unp_scan(so1->so_rcv.sb_mb, unp_mark, 0); | 1534 | unp_scan(so1->so_rcv.sb_mb, unp_mark, 0); | |
1521 | } | 1535 | } | |
1522 | } | 1536 | } | |
1523 | sounlock(so); | 1537 | sounlock(so); | |
1538 | ||||
1539 | /* Re-lock and restart from where we left off. */ | |||
1540 | closef(fp); | |||
1541 | mutex_enter(&filelist_lock); | |||
1542 | np = LIST_NEXT(dp, f_list); | |||
1543 | LIST_REMOVE(dp, f_list); | |||
1524 | } | 1544 | } | |
1525 | } while (unp_defer); | 1545 | /* | |
1546 | * Bail early if we did nothing in the loop above. Could | |||
1547 | * happen because of concurrent activity causing unp_defer | |||
1548 | * to get out of sync. | |||
1549 | */ | |||
1550 | } while (unp_defer != 0 && didwork); | |||
1526 | 1551 | |||
1527 | /* | 1552 | /* | |
1528 | * Sweep pass. Find unmarked descriptors, and free them. | 1553 | * Sweep pass. | |
1529 | * | |||
1530 | * We grab an extra reference to each of the file table entries | |||
1531 | * that are not otherwise accessible and then free the rights | |||
1532 | * that are stored in messages on them. | |||
1533 | * | |||
1534 | * The bug in the original code is a little tricky, so I'll describe | |||
1535 | * what's wrong with it here. | |||
1536 | * | |||
1537 | * It is incorrect to simply unp_discard each entry for f_msgcount | |||
1538 | * times -- consider the case of sockets A and B that contain | |||
1539 | * references to each other. On a last close of some other socket, | |||
1540 | * we trigger a gc since the number of outstanding rights (unp_rights) | |||
1541 | * is non-zero. If during the sweep phase the gc code un_discards, | |||
1542 | * we end up doing a (full) closef on the descriptor. A closef on A | |||
1543 | * results in the following chain. Closef calls soo_close, which | |||
1544 | * calls soclose. Soclose calls first (through the switch | |||
1545 | * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply | |||
1546 | * returns because the previous instance had set unp_gcing, and | |||
1547 | * we return all the way back to soclose, which marks the socket | |||
1548 | * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush | |||
1549 | * to free up the rights that are queued in messages on the socket A, | |||
1550 | * i.e., the reference on B. The sorflush calls via the dom_dispose | |||
1551 | * switch unp_dispose, which unp_scans with unp_discard. This second | |||
1552 | * instance of unp_discard just calls closef on B. | |||
1553 | * | 1554 | * | |
1554 | * Well, a similar chain occurs on B, resulting in a sorflush on B, | 1555 | * We grab an extra reference to each of the files that are | |
1555 | * which results in another closef on A. Unfortunately, A is already | 1556 | * not otherwise accessible and then free the rights that are | |
1556 | * being closed, and the descriptor has already been marked with | 1557 | * stored in messages on them. | |
1557 | * SS_NOFDREF, and soclose panics at this point. | |||
1558 | * | |||
1559 | * Here, we first take an extra reference to each inaccessible | |||
1560 | * descriptor. Then, if the inaccessible descriptor is a | |||
1561 | * socket, we call sorflush in case it is a Unix domain | |||
1562 | * socket. After we destroy all the rights carried in | |||
1563 | * messages, we do a last closef to get rid of our extra | |||
1564 | * reference. This is the last close, and the unp_detach etc | |||
1565 | * will shut down the socket. | |||
1566 | * | |||
1567 | * 91/09/19, bsy@cs.cmu.edu | |||
1568 | */ | 1558 | */ | |
1569 | if (nslots < nfiles) { | 1559 | for (fp = LIST_FIRST(&filehead); fp != NULL; fp = np) { | |
1570 | mutex_exit(&filelist_lock); | 1560 | KASSERT(mutex_owned(&filelist_lock)); | |
1571 | kmem_free(extra_ref, nslots * sizeof(file_t *)); | 1561 | np = LIST_NEXT(fp, f_list); | |
1572 | goto restart; | |||
1573 | } | |||
1574 | for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; fp != 0; | |||
1575 | fp = nextfp) { | |||
1576 | nextfp = LIST_NEXT(fp, f_list); | |||
1577 | mutex_enter(&fp->f_lock); | 1562 | mutex_enter(&fp->f_lock); | |
1578 | if (fp->f_count != 0 && | 1563 | ||
1579 | fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { | 1564 | /* | |
1580 | *fpp++ = fp; | 1565 | * Ignore non-sockets. | |
1581 | nunref++; | 1566 | * Ignore dead sockets, or sockets with pending close. | |
1582 | fp->f_count++; | 1567 | * Ignore sockets obviously referenced elsewhere. | |
1568 | * Ignore sockets marked as referenced by our scan. | |||
1569 | * Ignore new sockets that did not exist during the scan. | |||
1570 | */ | |||
1571 | if (fp->f_type != DTYPE_SOCKET || | |||
1572 | fp->f_count == 0 || fp->f_unpcount != 0 || | |||
1573 | fp->f_count != fp->f_msgcount || | |||
1574 | (fp->f_flag & (FMARK | FSCAN)) != FSCAN) { | |||
1575 | mutex_exit(&fp->f_lock); | |||
1576 | continue; | |||
1583 | } | 1577 | } | |
1578 | ||||
1579 | /* Gain file ref, mark our position, and unlock. */ | |||
1580 | LIST_INSERT_AFTER(fp, dp, f_list); | |||
1581 | fp->f_count++; | |||
1584 | mutex_exit(&fp->f_lock); | 1582 | mutex_exit(&fp->f_lock); | |
1583 | mutex_exit(&filelist_lock); | |||
1584 | ||||
1585 | /* | |||
1586 | * Flush all data from the socket's receive buffer. | |||
1587 | * This will cause files referenced only by the | |||
1588 | * socket to be queued for close. | |||
1589 | */ | |||
1590 | so = fp->f_data; | |||
1591 | solock(so); | |||
1592 | sorflush(so); | |||
1593 | sounlock(so); | |||
1594 | ||||
1595 | /* Re-lock and restart from where we left off. */ | |||
1596 | closef(fp); | |||
1597 | mutex_enter(&filelist_lock); | |||
1598 | np = LIST_NEXT(dp, f_list); | |||
1599 | LIST_REMOVE(dp, f_list); | |||
1585 | } | 1600 | } | |
1586 | mutex_exit(&filelist_lock); | 1601 | } | |
1587 | 1602 | |||
1588 | for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { | 1603 | /* | |
1589 | fp = *fpp; | 1604 | * Garbage collector thread. While SCM_RIGHTS messages are in transit, | |
1590 | if (fp->f_type == DTYPE_SOCKET) { | 1605 | * wake once per second to garbage collect. Run continually while we | |
1591 | so = fp->f_data; | 1606 | * have deferred closes to process. | |
1592 | solock(so); | 1607 | */ | |
1593 | sorflush(fp->f_data); | 1608 | static void | |
1594 | sounlock(so); | 1609 | unp_thread(void *cookie) | |
1610 | { | |||
1611 | file_t *dp; | |||
1612 | ||||
1613 | /* Allocate a dummy file for our scans. */ | |||
1614 | if ((dp = fgetdummy()) == NULL) { | |||
1615 | panic("unp_thread"); | |||
1616 | } | |||
1617 | ||||
1618 | mutex_enter(&filelist_lock); | |||
1619 | for (;;) { | |||
1620 | KASSERT(mutex_owned(&filelist_lock)); | |||
1621 | if (SLIST_EMPTY(&unp_thread_discard)) { | |||
1622 | if (unp_rights != 0) { | |||
1623 | (void)cv_timedwait(&unp_thread_cv, | |||
1624 | &filelist_lock, hz); | |||
1625 | } else { | |||
1626 | cv_wait(&unp_thread_cv, &filelist_lock); | |||
1627 | } | |||
1595 | } | 1628 | } | |
1629 | unp_gc(dp); | |||
1596 | } | 1630 | } | |
1597 | for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { | 1631 | /* NOTREACHED */ | |
1598 | closef(*fpp); | 1632 | } | |
1633 | ||||
1634 | /* | |||
1635 | * Kick the garbage collector into action if there is something for | |||
1636 | * it to process. | |||
1637 | */ | |||
1638 | static void | |||
1639 | unp_thread_kick(void) | |||
1640 | { | |||
1641 | ||||
1642 | if (!SLIST_EMPTY(&unp_thread_discard) || unp_rights != 0) { | |||
1643 | mutex_enter(&filelist_lock); | |||
1644 | cv_signal(&unp_thread_cv); | |||
1645 | mutex_exit(&filelist_lock); | |||
1599 | } | 1646 | } | |
1600 | kmem_free(extra_ref, nslots * sizeof(file_t *)); | |||
1601 | atomic_swap_uint(&unp_gcing, 0); | |||
1602 | } | 1647 | } | |
1603 | 1648 | |||
1604 | void | 1649 | void | |
1605 | unp_dispose(struct mbuf *m) | 1650 | unp_dispose(struct mbuf *m) | |
1606 | { | 1651 | { | |
1607 | 1652 | |||
1608 | if (m) | 1653 | if (m) | |
1609 | unp_scan(m, unp_discard, 1); | 1654 | unp_scan(m, unp_discard_later, 1); | |
1610 | } | 1655 | } | |
1611 | 1656 | |||
1612 | void | 1657 | void | |
1613 | unp_scan(struct mbuf *m0, void (*op)(file_t *), int discard) | 1658 | unp_scan(struct mbuf *m0, void (*op)(file_t *), int discard) | |
1614 | { | 1659 | { | |
1615 | struct mbuf *m; | 1660 | struct mbuf *m; | |
1616 | file_t **rp; | 1661 | file_t **rp, *fp; | |
1617 | struct cmsghdr *cm; | 1662 | struct cmsghdr *cm; | |
1618 | int i; | 1663 | int i, qfds; | |
1619 | int qfds; | |||
1620 | 1664 | |||
1621 | while (m0) { | 1665 | while (m0) { | |
1622 | for (m = m0; m; m = m->m_next) { | 1666 | for (m = m0; m; m = m->m_next) { | |
1623 | if (m->m_type == MT_CONTROL && | 1667 | if (m->m_type != MT_CONTROL || | |
1624 | m->m_len >= sizeof(*cm)) { | 1668 | m->m_len < sizeof(*cm)) { | |
1625 | cm = mtod(m, struct cmsghdr *); | 1669 | continue; | |
1626 | if (cm->cmsg_level != SOL_SOCKET || | 1670 | } | |
1627 | cm->cmsg_type != SCM_RIGHTS) | 1671 | cm = mtod(m, struct cmsghdr *); | |
1628 | continue; | 1672 | if (cm->cmsg_level != SOL_SOCKET || | |
1629 | qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) | 1673 | cm->cmsg_type != SCM_RIGHTS) | |
1630 | / sizeof(file_t *); | 1674 | continue; | |
1631 | rp = (file_t **)CMSG_DATA(cm); | 1675 | qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) | |
1632 | for (i = 0; i < qfds; i++) { | 1676 | / sizeof(file_t *); | |
1633 | file_t *fp = *rp; | 1677 | rp = (file_t **)CMSG_DATA(cm); | |
1634 | if (discard) | 1678 | for (i = 0; i < qfds; i++) { | |
1635 | *rp = 0; | 1679 | fp = *rp; | |
1636 | (*op)(fp); | 1680 | if (discard) { | |
1637 | rp++; | 1681 | *rp = 0; | |
1638 | } | 1682 | } | |
1639 | break; /* XXX, but saves time */ | 1683 | (*op)(fp); | |
1684 | rp++; | |||
1640 | } | 1685 | } | |
1641 | } | 1686 | } | |
1642 | m0 = m0->m_nextpkt; | 1687 | m0 = m0->m_nextpkt; | |
1643 | } | 1688 | } | |
1644 | } | 1689 | } | |
1645 | 1690 | |||
1646 | void | 1691 | void | |
1647 | unp_mark(file_t *fp) | 1692 | unp_mark(file_t *fp) | |
1648 | { | 1693 | { | |
1649 | 1694 | |||
1650 | if (fp == NULL) | 1695 | if (fp == NULL) | |
1651 | return; | 1696 | return; | |
1652 | 1697 | |||
1653 | /* If we're already deferred, don't screw up the defer count */ | 1698 | /* If we're already deferred, don't screw up the defer count */ | |
1654 | mutex_enter(&fp->f_lock); | 1699 | mutex_enter(&fp->f_lock); | |
1655 | if (fp->f_flag & (FMARK | FDEFER)) { | 1700 | if (fp->f_flag & (FMARK | FDEFER)) { | |
1656 | mutex_exit(&fp->f_lock); | 1701 | mutex_exit(&fp->f_lock); | |
1657 | return; | 1702 | return; | |
1658 | } | 1703 | } | |
1659 | 1704 | |||
1660 | /* | 1705 | /* | |
1661 | * Minimize the number of deferrals... Sockets are the only | 1706 | * Minimize the number of deferrals... Sockets are the only type of | |
1662 | * type of descriptor which can hold references to another | 1707 | * file which can hold references to another file, so just mark | |
1663 | * descriptor, so just mark other descriptors, and defer | 1708 | * other files, and defer unmarked sockets for the next pass. | |
1664 | * unmarked sockets for the next pass. | |||
1665 | */ | 1709 | */ | |
1666 | if (fp->f_type == DTYPE_SOCKET) { | 1710 | if (fp->f_type == DTYPE_SOCKET) { | |
1667 | unp_defer++; | 1711 | unp_defer++; | |
1668 | KASSERT(fp->f_count != 0); | 1712 | KASSERT(fp->f_count != 0); | |
1669 | atomic_or_uint(&fp->f_flag, FDEFER); | 1713 | atomic_or_uint(&fp->f_flag, FDEFER); | |
1670 | } else { | 1714 | } else { | |
1671 | atomic_or_uint(&fp->f_flag, FMARK); | 1715 | atomic_or_uint(&fp->f_flag, FMARK); | |
1672 | } | 1716 | } | |
1673 | mutex_exit(&fp->f_lock); | 1717 | mutex_exit(&fp->f_lock); | |
1674 | return; | |||
1675 | } | 1718 | } | |
1676 | 1719 | |||
1677 | void | 1720 | static void | |
1678 | unp_discard(file_t *fp) | 1721 | unp_discard_now(file_t *fp) | |
1679 | { | 1722 | { | |
1680 | 1723 | |||
1681 | if (fp == NULL) | 1724 | if (fp == NULL) | |
1682 | return; | 1725 | return; | |
1683 | 1726 | |||
1684 | mutex_enter(&fp->f_lock); | |||
1685 | KASSERT(fp->f_count > 0); | 1727 | KASSERT(fp->f_count > 0); | |
1728 | KASSERT(fp->f_msgcount > 0); | |||
1729 | ||||
1730 | mutex_enter(&fp->f_lock); | |||
1686 | fp->f_msgcount--; | 1731 | fp->f_msgcount--; | |
1687 | mutex_exit(&fp->f_lock); | 1732 | mutex_exit(&fp->f_lock); | |
1688 | atomic_dec_uint(&unp_rights); | 1733 | atomic_dec_uint(&unp_rights); | |
1689 | (void)closef(fp); | 1734 | (void)closef(fp); | |
1690 | } | 1735 | } | |
1736 | ||||
1737 | static void | |||
1738 | unp_discard_later(file_t *fp) | |||
1739 | { | |||
1740 | ||||
1741 | if (fp == NULL) | |||
1742 | return; | |||
1743 | ||||
1744 | KASSERT(fp->f_count > 0); | |||
1745 | KASSERT(fp->f_msgcount > 0); | |||
1746 | ||||
1747 | mutex_enter(&filelist_lock); | |||
1748 | if (fp->f_unpcount++ == 0) { | |||
1749 | SLIST_INSERT_HEAD(&unp_thread_discard, fp, f_unplist); | |||
1750 | } | |||
1751 | mutex_exit(&filelist_lock); | |||
1752 | } |
--- src/sys/sys/fcntl.h 2006/10/05 14:48:33 1.34
+++ src/sys/sys/fcntl.h 2009/03/18 05:33:23 1.34.64.1
@@ -1,287 +1,288 @@ | @@ -1,287 +1,288 @@ | |||
1 | /* $NetBSD: fcntl.h,v 1.34 2006/10/05 14:48:33 chs Exp $ */ | 1 | /* $NetBSD: fcntl.h,v 1.34.64.1 2009/03/18 05:33:23 snj Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 1983, 1990, 1993 | 4 | * Copyright (c) 1983, 1990, 1993 | |
5 | * The Regents of the University of California. All rights reserved. | 5 | * The Regents of the University of California. All rights reserved. | |
6 | * (c) UNIX System Laboratories, Inc. | 6 | * (c) UNIX System Laboratories, Inc. | |
7 | * All or some portions of this file are derived from material licensed | 7 | * All or some portions of this file are derived from material licensed | |
8 | * to the University of California by American Telephone and Telegraph | 8 | * to the University of California by American Telephone and Telegraph | |
9 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | 9 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | |
10 | * the permission of UNIX System Laboratories, Inc. | 10 | * the permission of UNIX System Laboratories, Inc. | |
11 | * | 11 | * | |
12 | * Redistribution and use in source and binary forms, with or without | 12 | * Redistribution and use in source and binary forms, with or without | |
13 | * modification, are permitted provided that the following conditions | 13 | * modification, are permitted provided that the following conditions | |
14 | * are met: | 14 | * are met: | |
15 | * 1. Redistributions of source code must retain the above copyright | 15 | * 1. Redistributions of source code must retain the above copyright | |
16 | * notice, this list of conditions and the following disclaimer. | 16 | * notice, this list of conditions and the following disclaimer. | |
17 | * 2. Redistributions in binary form must reproduce the above copyright | 17 | * 2. Redistributions in binary form must reproduce the above copyright | |
18 | * notice, this list of conditions and the following disclaimer in the | 18 | * notice, this list of conditions and the following disclaimer in the | |
19 | * documentation and/or other materials provided with the distribution. | 19 | * documentation and/or other materials provided with the distribution. | |
20 | * 3. Neither the name of the University nor the names of its contributors | 20 | * 3. Neither the name of the University nor the names of its contributors | |
21 | * may be used to endorse or promote products derived from this software | 21 | * may be used to endorse or promote products derived from this software | |
22 | * without specific prior written permission. | 22 | * without specific prior written permission. | |
23 | * | 23 | * | |
24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | 24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | 27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
34 | * SUCH DAMAGE. | 34 | * SUCH DAMAGE. | |
35 | * | 35 | * | |
36 | * @(#)fcntl.h 8.3 (Berkeley) 1/21/94 | 36 | * @(#)fcntl.h 8.3 (Berkeley) 1/21/94 | |
37 | */ | 37 | */ | |
38 | 38 | |||
39 | #ifndef _SYS_FCNTL_H_ | 39 | #ifndef _SYS_FCNTL_H_ | |
40 | #define _SYS_FCNTL_H_ | 40 | #define _SYS_FCNTL_H_ | |
41 | 41 | |||
42 | /* | 42 | /* | |
43 | * This file includes the definitions for open and fcntl | 43 | * This file includes the definitions for open and fcntl | |
44 | * described by POSIX for <fcntl.h>; it also includes | 44 | * described by POSIX for <fcntl.h>; it also includes | |
45 | * related kernel definitions. | 45 | * related kernel definitions. | |
46 | */ | 46 | */ | |
47 | 47 | |||
48 | #ifndef _KERNEL | 48 | #ifndef _KERNEL | |
49 | #include <sys/featuretest.h> | 49 | #include <sys/featuretest.h> | |
50 | #include <sys/types.h> | 50 | #include <sys/types.h> | |
51 | #if defined(_XOPEN_SOURCE) || defined(_NETBSD_SOURCE) | 51 | #if defined(_XOPEN_SOURCE) || defined(_NETBSD_SOURCE) | |
52 | #include <sys/stat.h> | 52 | #include <sys/stat.h> | |
53 | #endif /* _XOPEN_SOURCE || _NETBSD_SOURCE */ | 53 | #endif /* _XOPEN_SOURCE || _NETBSD_SOURCE */ | |
54 | #endif /* !_KERNEL */ | 54 | #endif /* !_KERNEL */ | |
55 | 55 | |||
56 | /* | 56 | /* | |
57 | * File status flags: these are used by open(2), fcntl(2). | 57 | * File status flags: these are used by open(2), fcntl(2). | |
58 | * They are also used (indirectly) in the kernel file structure f_flags, | 58 | * They are also used (indirectly) in the kernel file structure f_flags, | |
59 | * which is a superset of the open/fcntl flags. Open flags and f_flags | 59 | * which is a superset of the open/fcntl flags. Open flags and f_flags | |
60 | * are inter-convertible using OFLAGS(fflags) and FFLAGS(oflags). | 60 | * are inter-convertible using OFLAGS(fflags) and FFLAGS(oflags). | |
61 | * Open/fcntl flags begin with O_; kernel-internal flags begin with F. | 61 | * Open/fcntl flags begin with O_; kernel-internal flags begin with F. | |
62 | */ | 62 | */ | |
63 | /* open-only flags */ | 63 | /* open-only flags */ | |
64 | #define O_RDONLY 0x00000000 /* open for reading only */ | 64 | #define O_RDONLY 0x00000000 /* open for reading only */ | |
65 | #define O_WRONLY 0x00000001 /* open for writing only */ | 65 | #define O_WRONLY 0x00000001 /* open for writing only */ | |
66 | #define O_RDWR 0x00000002 /* open for reading and writing */ | 66 | #define O_RDWR 0x00000002 /* open for reading and writing */ | |
67 | #define O_ACCMODE 0x00000003 /* mask for above modes */ | 67 | #define O_ACCMODE 0x00000003 /* mask for above modes */ | |
68 | 68 | |||
69 | /* | 69 | /* | |
70 | * Kernel encoding of open mode; separate read and write bits that are | 70 | * Kernel encoding of open mode; separate read and write bits that are | |
71 | * independently testable: 1 greater than the above. | 71 | * independently testable: 1 greater than the above. | |
72 | * | 72 | * | |
73 | * XXX | 73 | * XXX | |
74 | * FREAD and FWRITE are excluded from the #ifdef _KERNEL so that TIOCFLUSH, | 74 | * FREAD and FWRITE are excluded from the #ifdef _KERNEL so that TIOCFLUSH, | |
75 | * which was documented to use FREAD/FWRITE, continues to work. | 75 | * which was documented to use FREAD/FWRITE, continues to work. | |
76 | */ | 76 | */ | |
77 | #if defined(_NETBSD_SOURCE) | 77 | #if defined(_NETBSD_SOURCE) | |
78 | #define FREAD 0x00000001 | 78 | #define FREAD 0x00000001 | |
79 | #define FWRITE 0x00000002 | 79 | #define FWRITE 0x00000002 | |
80 | #endif | 80 | #endif | |
81 | #define O_NONBLOCK 0x00000004 /* no delay */ | 81 | #define O_NONBLOCK 0x00000004 /* no delay */ | |
82 | #define O_APPEND 0x00000008 /* set append mode */ | 82 | #define O_APPEND 0x00000008 /* set append mode */ | |
83 | #if defined(_NETBSD_SOURCE) | 83 | #if defined(_NETBSD_SOURCE) | |
84 | #define O_SHLOCK 0x00000010 /* open with shared file lock */ | 84 | #define O_SHLOCK 0x00000010 /* open with shared file lock */ | |
85 | #define O_EXLOCK 0x00000020 /* open with exclusive file lock */ | 85 | #define O_EXLOCK 0x00000020 /* open with exclusive file lock */ | |
86 | #define O_ASYNC 0x00000040 /* signal pgrp when data ready */ | 86 | #define O_ASYNC 0x00000040 /* signal pgrp when data ready */ | |
87 | #endif | 87 | #endif | |
88 | #if (_POSIX_C_SOURCE - 0) >= 199309L || \ | 88 | #if (_POSIX_C_SOURCE - 0) >= 199309L || \ | |
89 | (defined(_XOPEN_SOURCE) && defined(_XOPEN_SOURCE_EXTENDED)) || \ | 89 | (defined(_XOPEN_SOURCE) && defined(_XOPEN_SOURCE_EXTENDED)) || \ | |
90 | (_XOPEN_SOURCE - 0) >= 500 || defined(_NETBSD_SOURCE) | 90 | (_XOPEN_SOURCE - 0) >= 500 || defined(_NETBSD_SOURCE) | |
91 | #define O_SYNC 0x00000080 /* synchronous writes */ | 91 | #define O_SYNC 0x00000080 /* synchronous writes */ | |
92 | #endif | 92 | #endif | |
93 | #if defined(_NETBSD_SOURCE) | 93 | #if defined(_NETBSD_SOURCE) | |
94 | #define O_NOFOLLOW 0x00000100 /* don't follow symlinks on the last */ | 94 | #define O_NOFOLLOW 0x00000100 /* don't follow symlinks on the last */ | |
95 | /* path component */ | 95 | /* path component */ | |
96 | #endif | 96 | #endif | |
97 | #define O_CREAT 0x00000200 /* create if nonexistent */ | 97 | #define O_CREAT 0x00000200 /* create if nonexistent */ | |
98 | #define O_TRUNC 0x00000400 /* truncate to zero length */ | 98 | #define O_TRUNC 0x00000400 /* truncate to zero length */ | |
99 | #define O_EXCL 0x00000800 /* error if already exists */ | 99 | #define O_EXCL 0x00000800 /* error if already exists */ | |
100 | 100 | |||
101 | #if (_POSIX_C_SOURCE - 0) >= 199309L || (_XOPEN_SOURCE - 0) >= 500 || \ | 101 | #if (_POSIX_C_SOURCE - 0) >= 199309L || (_XOPEN_SOURCE - 0) >= 500 || \ | |
102 | defined(_NETBSD_SOURCE) | 102 | defined(_NETBSD_SOURCE) | |
103 | #define O_DSYNC 0x00010000 /* write: I/O data completion */ | 103 | #define O_DSYNC 0x00010000 /* write: I/O data completion */ | |
104 | #define O_RSYNC 0x00020000 /* read: I/O completion as for write */ | 104 | #define O_RSYNC 0x00020000 /* read: I/O completion as for write */ | |
105 | #endif | 105 | #endif | |
106 | 106 | |||
107 | #if defined(_NETBSD_SOURCE) | 107 | #if defined(_NETBSD_SOURCE) | |
108 | #define O_ALT_IO 0x00040000 /* use alternate i/o semantics */ | 108 | #define O_ALT_IO 0x00040000 /* use alternate i/o semantics */ | |
109 | #define O_DIRECT 0x00080000 /* direct I/O hint */ | 109 | #define O_DIRECT 0x00080000 /* direct I/O hint */ | |
110 | #endif | 110 | #endif | |
111 | 111 | |||
112 | /* defined by POSIX 1003.1; BSD default, but required to be bitwise distinct */ | 112 | /* defined by POSIX 1003.1; BSD default, but required to be bitwise distinct */ | |
113 | #define O_NOCTTY 0x00008000 /* don't assign controlling terminal */ | 113 | #define O_NOCTTY 0x00008000 /* don't assign controlling terminal */ | |
114 | 114 | |||
115 | #ifdef _KERNEL | 115 | #ifdef _KERNEL | |
116 | /* convert from open() flags to/from fflags; convert O_RD/WR to FREAD/FWRITE */ | 116 | /* convert from open() flags to/from fflags; convert O_RD/WR to FREAD/FWRITE */ | |
117 | #define FFLAGS(oflags) ((oflags) + 1) | 117 | #define FFLAGS(oflags) ((oflags) + 1) | |
118 | #define OFLAGS(fflags) ((fflags) - 1) | 118 | #define OFLAGS(fflags) ((fflags) - 1) | |
119 | 119 | |||
120 | /* all bits settable during open(2) */ | 120 | /* all bits settable during open(2) */ | |
121 | #define O_MASK (O_ACCMODE|O_NONBLOCK|O_APPEND|O_SHLOCK|O_EXLOCK|\ | 121 | #define O_MASK (O_ACCMODE|O_NONBLOCK|O_APPEND|O_SHLOCK|O_EXLOCK|\ | |
122 | O_ASYNC|O_SYNC|O_CREAT|O_TRUNC|O_EXCL|O_DSYNC|\ | 122 | O_ASYNC|O_SYNC|O_CREAT|O_TRUNC|O_EXCL|O_DSYNC|\ | |
123 | O_RSYNC|O_NOCTTY|O_ALT_IO|O_NOFOLLOW|O_DIRECT) | 123 | O_RSYNC|O_NOCTTY|O_ALT_IO|O_NOFOLLOW|O_DIRECT) | |
124 | 124 | |||
125 | #define FMARK 0x00001000 /* mark during gc() */ | 125 | #define FMARK 0x00001000 /* mark during gc() */ | |
126 | #define FDEFER 0x00002000 /* defer for next gc pass */ | 126 | #define FDEFER 0x00002000 /* defer for next gc pass */ | |
127 | #define FHASLOCK 0x00004000 /* descriptor holds advisory lock */ | 127 | #define FHASLOCK 0x00004000 /* descriptor holds advisory lock */ | |
128 | #define FSCAN 0x00100000 /* scan during gc passes */ | |||
128 | #define FKIOCTL 0x80000000 /* kernel originated ioctl */ | 129 | #define FKIOCTL 0x80000000 /* kernel originated ioctl */ | |
129 | /* bits settable by fcntl(F_SETFL, ...) */ | 130 | /* bits settable by fcntl(F_SETFL, ...) */ | |
130 | #define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FNONBLOCK|FDSYNC|FRSYNC|FALTIO|\ | 131 | #define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FNONBLOCK|FDSYNC|FRSYNC|FALTIO|\ | |
131 | FDIRECT) | 132 | FDIRECT) | |
132 | /* bits to save after open(2) */ | 133 | /* bits to save after open(2) */ | |
133 | #define FMASK (FREAD|FWRITE|FCNTLFLAGS) | 134 | #define FMASK (FREAD|FWRITE|FCNTLFLAGS) | |
134 | #endif /* _KERNEL */ | 135 | #endif /* _KERNEL */ | |
135 | 136 | |||
136 | /* | 137 | /* | |
137 | * The O_* flags used to have only F* names, which were used in the kernel | 138 | * The O_* flags used to have only F* names, which were used in the kernel | |
138 | * and by fcntl. We retain the F* names for the kernel f_flags field | 139 | * and by fcntl. We retain the F* names for the kernel f_flags field | |
139 | * and for backward compatibility for fcntl. | 140 | * and for backward compatibility for fcntl. | |
140 | */ | 141 | */ | |
141 | #if defined(_NETBSD_SOURCE) | 142 | #if defined(_NETBSD_SOURCE) | |
142 | #define FAPPEND O_APPEND /* kernel/compat */ | 143 | #define FAPPEND O_APPEND /* kernel/compat */ | |
143 | #define FASYNC O_ASYNC /* kernel/compat */ | 144 | #define FASYNC O_ASYNC /* kernel/compat */ | |
144 | #define O_FSYNC O_SYNC /* compat */ | 145 | #define O_FSYNC O_SYNC /* compat */ | |
145 | #define FNDELAY O_NONBLOCK /* compat */ | 146 | #define FNDELAY O_NONBLOCK /* compat */ | |
146 | #define O_NDELAY O_NONBLOCK /* compat */ | 147 | #define O_NDELAY O_NONBLOCK /* compat */ | |
147 | #endif | 148 | #endif | |
148 | #if defined(_KERNEL) | 149 | #if defined(_KERNEL) | |
149 | #define FNONBLOCK O_NONBLOCK /* kernel */ | 150 | #define FNONBLOCK O_NONBLOCK /* kernel */ | |
150 | #define FFSYNC O_SYNC /* kernel */ | 151 | #define FFSYNC O_SYNC /* kernel */ | |
151 | #define FDSYNC O_DSYNC /* kernel */ | 152 | #define FDSYNC O_DSYNC /* kernel */ | |
152 | #define FRSYNC O_RSYNC /* kernel */ | 153 | #define FRSYNC O_RSYNC /* kernel */ | |
153 | #define FALTIO O_ALT_IO /* kernel */ | 154 | #define FALTIO O_ALT_IO /* kernel */ | |
154 | #define FDIRECT O_DIRECT /* kernel */ | 155 | #define FDIRECT O_DIRECT /* kernel */ | |
155 | #endif | 156 | #endif | |
156 | 157 | |||
157 | /* | 158 | /* | |
158 | * Constants used for fcntl(2) | 159 | * Constants used for fcntl(2) | |
159 | */ | 160 | */ | |
160 | 161 | |||
161 | /* command values */ | 162 | /* command values */ | |
162 | #define F_DUPFD 0 /* duplicate file descriptor */ | 163 | #define F_DUPFD 0 /* duplicate file descriptor */ | |
163 | #define F_GETFD 1 /* get file descriptor flags */ | 164 | #define F_GETFD 1 /* get file descriptor flags */ | |
164 | #define F_SETFD 2 /* set file descriptor flags */ | 165 | #define F_SETFD 2 /* set file descriptor flags */ | |
165 | #define F_GETFL 3 /* get file status flags */ | 166 | #define F_GETFL 3 /* get file status flags */ | |
166 | #define F_SETFL 4 /* set file status flags */ | 167 | #define F_SETFL 4 /* set file status flags */ | |
167 | #if (_POSIX_C_SOURCE - 0) >= 200112L || (_XOPEN_SOURCE - 0) >= 500 || \ | 168 | #if (_POSIX_C_SOURCE - 0) >= 200112L || (_XOPEN_SOURCE - 0) >= 500 || \ | |
168 | defined(_NETBSD_SOURCE) | 169 | defined(_NETBSD_SOURCE) | |
169 | #define F_GETOWN 5 /* get SIGIO/SIGURG proc/pgrp */ | 170 | #define F_GETOWN 5 /* get SIGIO/SIGURG proc/pgrp */ | |
170 | #define F_SETOWN 6 /* set SIGIO/SIGURG proc/pgrp */ | 171 | #define F_SETOWN 6 /* set SIGIO/SIGURG proc/pgrp */ | |
171 | #endif | 172 | #endif | |
172 | #define F_GETLK 7 /* get record locking information */ | 173 | #define F_GETLK 7 /* get record locking information */ | |
173 | #define F_SETLK 8 /* set record locking information */ | 174 | #define F_SETLK 8 /* set record locking information */ | |
174 | #define F_SETLKW 9 /* F_SETLK; wait if blocked */ | 175 | #define F_SETLKW 9 /* F_SETLK; wait if blocked */ | |
175 | #if defined(_NETBSD_SOURCE) | 176 | #if defined(_NETBSD_SOURCE) | |
176 | #define F_CLOSEM 10 /* close all fds >= to the one given */ | 177 | #define F_CLOSEM 10 /* close all fds >= to the one given */ | |
177 | #define F_MAXFD 11 /* return the max open fd */ | 178 | #define F_MAXFD 11 /* return the max open fd */ | |
178 | #endif | 179 | #endif | |
179 | 180 | |||
180 | /* file descriptor flags (F_GETFD, F_SETFD) */ | 181 | /* file descriptor flags (F_GETFD, F_SETFD) */ | |
181 | #define FD_CLOEXEC 1 /* close-on-exec flag */ | 182 | #define FD_CLOEXEC 1 /* close-on-exec flag */ | |
182 | 183 | |||
183 | /* record locking flags (F_GETLK, F_SETLK, F_SETLKW) */ | 184 | /* record locking flags (F_GETLK, F_SETLK, F_SETLKW) */ | |
184 | #define F_RDLCK 1 /* shared or read lock */ | 185 | #define F_RDLCK 1 /* shared or read lock */ | |
185 | #define F_UNLCK 2 /* unlock */ | 186 | #define F_UNLCK 2 /* unlock */ | |
186 | #define F_WRLCK 3 /* exclusive or write lock */ | 187 | #define F_WRLCK 3 /* exclusive or write lock */ | |
187 | #ifdef _KERNEL | 188 | #ifdef _KERNEL | |
188 | #define F_WAIT 0x010 /* Wait until lock is granted */ | 189 | #define F_WAIT 0x010 /* Wait until lock is granted */ | |
189 | #define F_FLOCK 0x020 /* Use flock(2) semantics for lock */ | 190 | #define F_FLOCK 0x020 /* Use flock(2) semantics for lock */ | |
190 | #define F_POSIX 0x040 /* Use POSIX semantics for lock */ | 191 | #define F_POSIX 0x040 /* Use POSIX semantics for lock */ | |
191 | #endif | 192 | #endif | |
192 | 193 | |||
193 | /* Constants for fcntl's passed to the underlying fs - like ioctl's. */ | 194 | /* Constants for fcntl's passed to the underlying fs - like ioctl's. */ | |
194 | #if defined(_NETBSD_SOURCE) | 195 | #if defined(_NETBSD_SOURCE) | |
195 | #define F_PARAM_MASK 0xfff | 196 | #define F_PARAM_MASK 0xfff | |
196 | #define F_PARAM_LEN(x) (((x) >> 16) & F_PARAM_MASK) | 197 | #define F_PARAM_LEN(x) (((x) >> 16) & F_PARAM_MASK) | |
197 | #define F_PARAM_MAX 4095 | 198 | #define F_PARAM_MAX 4095 | |
198 | #define F_FSCTL (int)0x80000000 /* This fcntl goes to the fs */ | 199 | #define F_FSCTL (int)0x80000000 /* This fcntl goes to the fs */ | |
199 | #define F_FSVOID (int)0x40000000 /* no parameters */ | 200 | #define F_FSVOID (int)0x40000000 /* no parameters */ | |
200 | #define F_FSOUT (int)0x20000000 /* copy out parameter */ | 201 | #define F_FSOUT (int)0x20000000 /* copy out parameter */ | |
201 | #define F_FSIN (int)0x10000000 /* copy in parameter */ | 202 | #define F_FSIN (int)0x10000000 /* copy in parameter */ | |
202 | #define F_FSINOUT (F_FSIN | F_FSOUT) | 203 | #define F_FSINOUT (F_FSIN | F_FSOUT) | |
203 | #define F_FSDIRMASK (int)0x70000000 /* mask for IN/OUT/VOID */ | 204 | #define F_FSDIRMASK (int)0x70000000 /* mask for IN/OUT/VOID */ | |
204 | #define F_FSPRIV (int)0x00008000 /* command is fs-specific */ | 205 | #define F_FSPRIV (int)0x00008000 /* command is fs-specific */ | |
205 | 206 | |||
206 | /* | 207 | /* | |
207 | * Define command macros for operations which, if implemented, must be | 208 | * Define command macros for operations which, if implemented, must be | |
208 | * the same for all fs's. | 209 | * the same for all fs's. | |
209 | */ | 210 | */ | |
210 | #define _FCN(inout, num, len) \ | 211 | #define _FCN(inout, num, len) \ | |
211 | (F_FSCTL | inout | ((len & F_PARAM_MASK) << 16) | (num)) | 212 | (F_FSCTL | inout | ((len & F_PARAM_MASK) << 16) | (num)) | |
212 | #define _FCNO(c) _FCN(F_FSVOID, (c), 0) | 213 | #define _FCNO(c) _FCN(F_FSVOID, (c), 0) | |
213 | #define _FCNR(c, t) _FCN(F_FSIN, (c), (int)sizeof(t)) | 214 | #define _FCNR(c, t) _FCN(F_FSIN, (c), (int)sizeof(t)) | |
214 | #define _FCNW(c, t) _FCN(F_FSOUT, (c), (int)sizeof(t)) | 215 | #define _FCNW(c, t) _FCN(F_FSOUT, (c), (int)sizeof(t)) | |
215 | #define _FCNRW(c, t) _FCN(F_FSINOUT, (c), (int)sizeof(t)) | 216 | #define _FCNRW(c, t) _FCN(F_FSINOUT, (c), (int)sizeof(t)) | |
216 | 217 | |||
217 | /* | 218 | /* | |
218 | * Define command macros for fs-specific commands. | 219 | * Define command macros for fs-specific commands. | |
219 | */ | 220 | */ | |
220 | #define _FCN_FSPRIV(inout, fs, num, len) \ | 221 | #define _FCN_FSPRIV(inout, fs, num, len) \ | |
221 | (F_FSCTL | F_FSPRIV | inout | ((len & F_PARAM_MASK) << 16) | \ | 222 | (F_FSCTL | F_FSPRIV | inout | ((len & F_PARAM_MASK) << 16) | \ | |
222 | (fs) << 8 | (num)) | 223 | (fs) << 8 | (num)) | |
223 | #define _FCNO_FSPRIV(f, c) _FCN_FSPRIV(F_FSVOID, (f), (c), 0) | 224 | #define _FCNO_FSPRIV(f, c) _FCN_FSPRIV(F_FSVOID, (f), (c), 0) | |
224 | #define _FCNR_FSPRIV(f, c, t) _FCN_FSPRIV(F_FSIN, (f), (c), (int)sizeof(t)) | 225 | #define _FCNR_FSPRIV(f, c, t) _FCN_FSPRIV(F_FSIN, (f), (c), (int)sizeof(t)) | |
225 | #define _FCNW_FSPRIV(f, c, t) _FCN_FSPRIV(F_FSOUT, (f), (c), (int)sizeof(t)) | 226 | #define _FCNW_FSPRIV(f, c, t) _FCN_FSPRIV(F_FSOUT, (f), (c), (int)sizeof(t)) | |
226 | #define _FCNRW_FSPRIV(f, c, t) _FCN_FSPRIV(F_FSINOUT, (f), (c), (int)sizeof(t)) | 227 | #define _FCNRW_FSPRIV(f, c, t) _FCN_FSPRIV(F_FSINOUT, (f), (c), (int)sizeof(t)) | |
227 | 228 | |||
228 | #endif /* _NETBSD_SOURCE */ | 229 | #endif /* _NETBSD_SOURCE */ | |
229 | 230 | |||
230 | /* | 231 | /* | |
231 | * Advisory file segment locking data type - | 232 | * Advisory file segment locking data type - | |
232 | * information passed to system by user | 233 | * information passed to system by user | |
233 | */ | 234 | */ | |
234 | struct flock { | 235 | struct flock { | |
235 | off_t l_start; /* starting offset */ | 236 | off_t l_start; /* starting offset */ | |
236 | off_t l_len; /* len = 0 means until end of file */ | 237 | off_t l_len; /* len = 0 means until end of file */ | |
237 | pid_t l_pid; /* lock owner */ | 238 | pid_t l_pid; /* lock owner */ | |
238 | short l_type; /* lock type: read/write, etc. */ | 239 | short l_type; /* lock type: read/write, etc. */ | |
239 | short l_whence; /* type of l_start */ | 240 | short l_whence; /* type of l_start */ | |
240 | }; | 241 | }; | |
241 | 242 | |||
242 | 243 | |||
243 | #if defined(_NETBSD_SOURCE) | 244 | #if defined(_NETBSD_SOURCE) | |
244 | /* lock operations for flock(2) */ | 245 | /* lock operations for flock(2) */ | |
245 | #define LOCK_SH 0x01 /* shared file lock */ | 246 | #define LOCK_SH 0x01 /* shared file lock */ | |
246 | #define LOCK_EX 0x02 /* exclusive file lock */ | 247 | #define LOCK_EX 0x02 /* exclusive file lock */ | |
247 | #define LOCK_NB 0x04 /* don't block when locking */ | 248 | #define LOCK_NB 0x04 /* don't block when locking */ | |
248 | #define LOCK_UN 0x08 /* unlock file */ | 249 | #define LOCK_UN 0x08 /* unlock file */ | |
249 | #endif | 250 | #endif | |
250 | 251 | |||
251 | /* Always ensure that these are consistent with <stdio.h> and <unistd.h>! */ | 252 | /* Always ensure that these are consistent with <stdio.h> and <unistd.h>! */ | |
252 | #ifndef SEEK_SET | 253 | #ifndef SEEK_SET | |
253 | #define SEEK_SET 0 /* set file offset to offset */ | 254 | #define SEEK_SET 0 /* set file offset to offset */ | |
254 | #endif | 255 | #endif | |
255 | #ifndef SEEK_CUR | 256 | #ifndef SEEK_CUR | |
256 | #define SEEK_CUR 1 /* set file offset to current plus offset */ | 257 | #define SEEK_CUR 1 /* set file offset to current plus offset */ | |
257 | #endif | 258 | #endif | |
258 | #ifndef SEEK_END | 259 | #ifndef SEEK_END | |
259 | #define SEEK_END 2 /* set file offset to EOF plus offset */ | 260 | #define SEEK_END 2 /* set file offset to EOF plus offset */ | |
260 | #endif | 261 | #endif | |
261 | 262 | |||
262 | /* | 263 | /* | |
263 | * posix_advise advisories. | 264 | * posix_advise advisories. | |
264 | */ | 265 | */ | |
265 | 266 | |||
266 | #define POSIX_FADV_NORMAL 0 /* default advice / no advice */ | 267 | #define POSIX_FADV_NORMAL 0 /* default advice / no advice */ | |
267 | #define POSIX_FADV_RANDOM 1 /* random access */ | 268 | #define POSIX_FADV_RANDOM 1 /* random access */ | |
268 | #define POSIX_FADV_SEQUENTIAL 2 /* sequential access(lower to higher) */ | 269 | #define POSIX_FADV_SEQUENTIAL 2 /* sequential access(lower to higher) */ | |
269 | #define POSIX_FADV_WILLNEED 3 /* be needed in near future */ | 270 | #define POSIX_FADV_WILLNEED 3 /* be needed in near future */ | |
270 | #define POSIX_FADV_DONTNEED 4 /* not be needed in near future */ | 271 | #define POSIX_FADV_DONTNEED 4 /* not be needed in near future */ | |
271 | #define POSIX_FADV_NOREUSE 5 /* be accessed once */ | 272 | #define POSIX_FADV_NOREUSE 5 /* be accessed once */ | |
272 | 273 | |||
273 | #ifndef _KERNEL | 274 | #ifndef _KERNEL | |
274 | #include <sys/cdefs.h> | 275 | #include <sys/cdefs.h> | |
275 | 276 | |||
276 | __BEGIN_DECLS | 277 | __BEGIN_DECLS | |
277 | int open(const char *, int, ...); | 278 | int open(const char *, int, ...); | |
278 | int creat(const char *, mode_t); | 279 | int creat(const char *, mode_t); | |
279 | int fcntl(int, int, ...); | 280 | int fcntl(int, int, ...); | |
280 | #if defined(_NETBSD_SOURCE) | 281 | #if defined(_NETBSD_SOURCE) | |
281 | int flock(int, int); | 282 | int flock(int, int); | |
282 | #endif /* _NETBSD_SOURCE */ | 283 | #endif /* _NETBSD_SOURCE */ | |
283 | int posix_fadvise(int, off_t, off_t, int); | 284 | int posix_fadvise(int, off_t, off_t, int); | |
284 | __END_DECLS | 285 | __END_DECLS | |
285 | #endif /* !_KERNEL */ | 286 | #endif /* !_KERNEL */ | |
286 | 287 | |||
287 | #endif /* !_SYS_FCNTL_H_ */ | 288 | #endif /* !_SYS_FCNTL_H_ */ |
--- src/sys/sys/file.h 2008/06/24 10:26:27 1.65
+++ src/sys/sys/file.h 2009/03/18 05:33:23 1.65.6.1
@@ -1,134 +1,168 @@ | @@ -1,134 +1,168 @@ | |||
1 | /* $NetBSD: file.h,v 1.65 2008/06/24 10:26:27 gmcgarry Exp $ */ | 1 | /* $NetBSD: file.h,v 1.65.6.1 2009/03/18 05:33:23 snj Exp $ */ | |
2 | ||||
3 | /*- | |||
4 | * Copyright (c) 2009 The NetBSD Foundation, Inc. | |||
5 | * All rights reserved. | |||
6 | * | |||
7 | * This code is derived from software contributed to The NetBSD Foundation | |||
8 | * by Andrew Doran. | |||
9 | * | |||
10 | * Redistribution and use in source and binary forms, with or without | |||
11 | * modification, are permitted provided that the following conditions | |||
12 | * are met: | |||
13 | * 1. Redistributions of source code must retain the above copyright | |||
14 | * notice, this list of conditions and the following disclaimer. | |||
15 | * 2. Redistributions in binary form must reproduce the above copyright | |||
16 | * notice, this list of conditions and the following disclaimer in the | |||
17 | * documentation and/or other materials provided with the distribution. | |||
18 | * | |||
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |||
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |||
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |||
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |||
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |||
29 | * POSSIBILITY OF SUCH DAMAGE. | |||
30 | */ | |||
2 | 31 | |||
3 | /* | 32 | /* | |
4 | * Copyright (c) 1982, 1986, 1989, 1993 | 33 | * Copyright (c) 1982, 1986, 1989, 1993 | |
5 | * The Regents of the University of California. All rights reserved. | 34 | * The Regents of the University of California. All rights reserved. | |
6 | * | 35 | * | |
7 | * Redistribution and use in source and binary forms, with or without | 36 | * Redistribution and use in source and binary forms, with or without | |
8 | * modification, are permitted provided that the following conditions | 37 | * modification, are permitted provided that the following conditions | |
9 | * are met: | 38 | * are met: | |
10 | * 1. Redistributions of source code must retain the above copyright | 39 | * 1. Redistributions of source code must retain the above copyright | |
11 | * notice, this list of conditions and the following disclaimer. | 40 | * notice, this list of conditions and the following disclaimer. | |
12 | * 2. Redistributions in binary form must reproduce the above copyright | 41 | * 2. Redistributions in binary form must reproduce the above copyright | |
13 | * notice, this list of conditions and the following disclaimer in the | 42 | * notice, this list of conditions and the following disclaimer in the | |
14 | * documentation and/or other materials provided with the distribution. | 43 | * documentation and/or other materials provided with the distribution. | |
15 | * 3. Neither the name of the University nor the names of its contributors | 44 | * 3. Neither the name of the University nor the names of its contributors | |
16 | * may be used to endorse or promote products derived from this software | 45 | * may be used to endorse or promote products derived from this software | |
17 | * without specific prior written permission. | 46 | * without specific prior written permission. | |
18 | * | 47 | * | |
19 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | 48 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
20 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 49 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 50 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | 51 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 52 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 53 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
25 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 54 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 55 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 56 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 57 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
29 | * SUCH DAMAGE. | 58 | * SUCH DAMAGE. | |
30 | * | 59 | * | |
31 | * @(#)file.h 8.3 (Berkeley) 1/9/95 | 60 | * @(#)file.h 8.3 (Berkeley) 1/9/95 | |
32 | */ | 61 | */ | |
33 | 62 | |||
34 | #ifndef _SYS_FILE_H_ | 63 | #ifndef _SYS_FILE_H_ | |
35 | #define _SYS_FILE_H_ | 64 | #define _SYS_FILE_H_ | |
36 | 65 | |||
37 | #include <sys/fcntl.h> | 66 | #include <sys/fcntl.h> | |
38 | #include <sys/unistd.h> | 67 | #include <sys/unistd.h> | |
39 | 68 | |||
40 | #ifdef _KERNEL | 69 | #ifdef _KERNEL | |
41 | #include <sys/mallocvar.h> | 70 | #include <sys/mallocvar.h> | |
42 | #include <sys/queue.h> | 71 | #include <sys/queue.h> | |
43 | #include <sys/mutex.h> | 72 | #include <sys/mutex.h> | |
44 | #include <sys/condvar.h> | 73 | #include <sys/condvar.h> | |
45 | 74 | |||
46 | struct proc; | 75 | struct proc; | |
47 | struct lwp; | 76 | struct lwp; | |
48 | struct uio; | 77 | struct uio; | |
49 | struct iovec; | 78 | struct iovec; | |
50 | struct stat; | 79 | struct stat; | |
51 | struct knote; | 80 | struct knote; | |
52 | 81 | |||
53 | /* | 82 | /* | |
54 | * Kernel file descriptor. One entry for each open kernel vnode and | 83 | * Kernel file descriptor. One entry for each open kernel vnode and | |
55 | * socket. | 84 | * socket. | |
85 | * | |||
86 | * This structure is exported via the KERN_FILE and KERN_FILE2 sysctl | |||
87 | * calls. Only add members to the end, do not delete them. | |||
56 | */ | 88 | */ | |
57 | struct file { | 89 | struct file { | |
58 | off_t f_offset; /* first, is 64-bit */ | 90 | off_t f_offset; /* first, is 64-bit */ | |
59 | kauth_cred_t f_cred; /* creds associated with descriptor */ | 91 | kauth_cred_t f_cred; /* creds associated with descriptor */ | |
60 | const struct fileops { | 92 | const struct fileops { | |
61 | int (*fo_read) (struct file *, off_t *, struct uio *, | 93 | int (*fo_read) (struct file *, off_t *, struct uio *, | |
62 | kauth_cred_t, int); | 94 | kauth_cred_t, int); | |
63 | int (*fo_write) (struct file *, off_t *, struct uio *, | 95 | int (*fo_write) (struct file *, off_t *, struct uio *, | |
64 | kauth_cred_t, int); | 96 | kauth_cred_t, int); | |
65 | int (*fo_ioctl) (struct file *, u_long, void *); | 97 | int (*fo_ioctl) (struct file *, u_long, void *); | |
66 | int (*fo_fcntl) (struct file *, u_int, void *); | 98 | int (*fo_fcntl) (struct file *, u_int, void *); | |
67 | int (*fo_poll) (struct file *, int); | 99 | int (*fo_poll) (struct file *, int); | |
68 | int (*fo_stat) (struct file *, struct stat *); | 100 | int (*fo_stat) (struct file *, struct stat *); | |
69 | int (*fo_close) (struct file *); | 101 | int (*fo_close) (struct file *); | |
70 | int (*fo_kqfilter) (struct file *, struct knote *); | 102 | int (*fo_kqfilter) (struct file *, struct knote *); | |
71 | } *f_ops; | 103 | } *f_ops; | |
72 | void *f_data; /* descriptor data, e.g. vnode/socket */ | 104 | void *f_data; /* descriptor data, e.g. vnode/socket */ | |
73 | LIST_ENTRY(file) f_list; /* list of active files */ | 105 | LIST_ENTRY(file) f_list; /* list of active files */ | |
74 | kmutex_t f_lock; /* lock on structure */ | 106 | kmutex_t f_lock; /* lock on structure */ | |
75 | int f_flag; /* see fcntl.h */ | 107 | int f_flag; /* see fcntl.h */ | |
76 | u_int f_iflags; /* internal flags; FIF_* */ | 108 | u_int f_unused1; /* unused; was internal flags; FIF_* */ | |
77 | #define DTYPE_VNODE 1 /* file */ | 109 | #define DTYPE_VNODE 1 /* file */ | |
78 | #define DTYPE_SOCKET 2 /* communications endpoint */ | 110 | #define DTYPE_SOCKET 2 /* communications endpoint */ | |
79 | #define DTYPE_PIPE 3 /* pipe */ | 111 | #define DTYPE_PIPE 3 /* pipe */ | |
80 | #define DTYPE_KQUEUE 4 /* event queue */ | 112 | #define DTYPE_KQUEUE 4 /* event queue */ | |
81 | #define DTYPE_MISC 5 /* misc file descriptor type */ | 113 | #define DTYPE_MISC 5 /* misc file descriptor type */ | |
82 | #define DTYPE_CRYPTO 6 /* crypto */ | 114 | #define DTYPE_CRYPTO 6 /* crypto */ | |
83 | #define DTYPE_MQUEUE 7 /* message queue */ | 115 | #define DTYPE_MQUEUE 7 /* message queue */ | |
84 | #define DTYPE_NAMES \ | 116 | #define DTYPE_NAMES \ | |
85 | "0", "file", "socket", "pipe", "kqueue", "misc", "crypto", "mqueue" | 117 | "0", "file", "socket", "pipe", "kqueue", "misc", "crypto", "mqueue" | |
86 | u_int f_type; /* descriptor type */ | 118 | u_int f_type; /* descriptor type */ | |
87 | u_int f_advice; /* access pattern hint; UVM_ADV_* */ | 119 | u_int f_advice; /* access pattern hint; UVM_ADV_* */ | |
88 | u_int f_count; /* reference count */ | 120 | u_int f_count; /* reference count */ | |
89 | u_int f_msgcount; /* references from message queue */ | 121 | u_int f_msgcount; /* references from message queue */ | |
122 | u_int f_unpcount; /* deferred close: see uipc_usrreq.c */ | |||
123 | SLIST_ENTRY(file) f_unplist; /* deferred close: see uipc_usrreq.c */ | |||
90 | }; | 124 | }; | |
91 | 125 | |||
92 | #define FILE_LOCK(fp) mutex_enter(&(fp)->f_lock) | 126 | #define FILE_LOCK(fp) mutex_enter(&(fp)->f_lock) | |
93 | #define FILE_UNLOCK(fp) mutex_exit(&(fp)->f_lock) | 127 | #define FILE_UNLOCK(fp) mutex_exit(&(fp)->f_lock) | |
94 | 128 | |||
95 | /* | 129 | /* | |
96 | * Flags for fo_read and fo_write and do_fileread/write/v | 130 | * Flags for fo_read and fo_write and do_fileread/write/v | |
97 | */ | 131 | */ | |
98 | #define FOF_UPDATE_OFFSET 0x0001 /* update the file offset */ | 132 | #define FOF_UPDATE_OFFSET 0x0001 /* update the file offset */ | |
99 | #define FOF_IOV_SYSSPACE 0x0100 /* iov structure in kernel memory */ | 133 | #define FOF_IOV_SYSSPACE 0x0100 /* iov structure in kernel memory */ | |
100 | 134 | |||
101 | LIST_HEAD(filelist, file); | 135 | LIST_HEAD(filelist, file); | |
102 | extern struct filelist filehead; /* head of list of open files */ | 136 | extern struct filelist filehead; /* head of list of open files */ | |
103 | extern u_int maxfiles; /* kernel limit on # of open files */ | 137 | extern u_int maxfiles; /* kernel limit on # of open files */ | |
104 | extern u_int nfiles; /* actual number of open files */ | 138 | extern u_int nfiles; /* actual number of open files */ | |
105 | 139 | |||
106 | extern const struct fileops vnops; /* vnode operations for files */ | 140 | extern const struct fileops vnops; /* vnode operations for files */ | |
107 | 141 | |||
108 | int dofileread(int, struct file *, void *, size_t, | 142 | int dofileread(int, struct file *, void *, size_t, | |
109 | off_t *, int, register_t *); | 143 | off_t *, int, register_t *); | |
110 | int dofilewrite(int, struct file *, const void *, | 144 | int dofilewrite(int, struct file *, const void *, | |
111 | size_t, off_t *, int, register_t *); | 145 | size_t, off_t *, int, register_t *); | |
112 | 146 | |||
113 | int do_filereadv(int, const struct iovec *, int, off_t *, | 147 | int do_filereadv(int, const struct iovec *, int, off_t *, | |
114 | int, register_t *); | 148 | int, register_t *); | |
115 | int do_filewritev(int, const struct iovec *, int, off_t *, | 149 | int do_filewritev(int, const struct iovec *, int, off_t *, | |
116 | int, register_t *); | 150 | int, register_t *); | |
117 | 151 | |||
118 | int fsetown(pid_t *, u_long, const void *); | 152 | int fsetown(pid_t *, u_long, const void *); | |
119 | int fgetown(pid_t, u_long, void *); | 153 | int fgetown(pid_t, u_long, void *); | |
120 | void fownsignal(pid_t, int, int, int, void *); | 154 | void fownsignal(pid_t, int, int, int, void *); | |
121 | 155 | |||
122 | /* Commonly used fileops */ | 156 | /* Commonly used fileops */ | |
123 | int fnullop_fcntl(struct file *, u_int, void *); | 157 | int fnullop_fcntl(struct file *, u_int, void *); | |
124 | int fnullop_poll(struct file *, int); | 158 | int fnullop_poll(struct file *, int); | |
125 | int fnullop_kqfilter(struct file *, struct knote *); | 159 | int fnullop_kqfilter(struct file *, struct knote *); | |
126 | int fbadop_read(struct file *, off_t *, struct uio *, kauth_cred_t, int); | 160 | int fbadop_read(struct file *, off_t *, struct uio *, kauth_cred_t, int); | |
127 | int fbadop_write(struct file *, off_t *, struct uio *, kauth_cred_t, int); | 161 | int fbadop_write(struct file *, off_t *, struct uio *, kauth_cred_t, int); | |
128 | int fbadop_ioctl(struct file *, u_long, void *); | 162 | int fbadop_ioctl(struct file *, u_long, void *); | |
129 | int fbadop_close(struct file *); | 163 | int fbadop_close(struct file *); | |
130 | int fbadop_stat(struct file *, struct stat *); | 164 | int fbadop_stat(struct file *, struct stat *); | |
131 | 165 | |||
132 | #endif /* _KERNEL */ | 166 | #endif /* _KERNEL */ | |
133 | 167 | |||
134 | #endif /* _SYS_FILE_H_ */ | 168 | #endif /* _SYS_FILE_H_ */ |
--- src/sys/sys/param.h 2009/02/09 00:22:09 1.330.4.3
+++ src/sys/sys/param.h 2009/03/18 05:33:23 1.330.4.4
@@ -1,432 +1,432 @@ | @@ -1,432 +1,432 @@ | |||
1 | /* $NetBSD: param.h,v 1.330.4.3 2009/02/09 00:22:09 snj Exp $ */ | 1 | /* $NetBSD: param.h,v 1.330.4.4 2009/03/18 05:33:23 snj Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 1982, 1986, 1989, 1993 | 4 | * Copyright (c) 1982, 1986, 1989, 1993 | |
5 | * The Regents of the University of California. All rights reserved. | 5 | * The Regents of the University of California. All rights reserved. | |
6 | * (c) UNIX System Laboratories, Inc. | 6 | * (c) UNIX System Laboratories, Inc. | |
7 | * All or some portions of this file are derived from material licensed | 7 | * All or some portions of this file are derived from material licensed | |
8 | * to the University of California by American Telephone and Telegraph | 8 | * to the University of California by American Telephone and Telegraph | |
9 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | 9 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | |
10 | * the permission of UNIX System Laboratories, Inc. | 10 | * the permission of UNIX System Laboratories, Inc. | |
11 | * | 11 | * | |
12 | * Redistribution and use in source and binary forms, with or without | 12 | * Redistribution and use in source and binary forms, with or without | |
13 | * modification, are permitted provided that the following conditions | 13 | * modification, are permitted provided that the following conditions | |
14 | * are met: | 14 | * are met: | |
15 | * 1. Redistributions of source code must retain the above copyright | 15 | * 1. Redistributions of source code must retain the above copyright | |
16 | * notice, this list of conditions and the following disclaimer. | 16 | * notice, this list of conditions and the following disclaimer. | |
17 | * 2. Redistributions in binary form must reproduce the above copyright | 17 | * 2. Redistributions in binary form must reproduce the above copyright | |
18 | * notice, this list of conditions and the following disclaimer in the | 18 | * notice, this list of conditions and the following disclaimer in the | |
19 | * documentation and/or other materials provided with the distribution. | 19 | * documentation and/or other materials provided with the distribution. | |
20 | * 3. Neither the name of the University nor the names of its contributors | 20 | * 3. Neither the name of the University nor the names of its contributors | |
21 | * may be used to endorse or promote products derived from this software | 21 | * may be used to endorse or promote products derived from this software | |
22 | * without specific prior written permission. | 22 | * without specific prior written permission. | |
23 | * | 23 | * | |
24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | 24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | 27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
34 | * SUCH DAMAGE. | 34 | * SUCH DAMAGE. | |
35 | * | 35 | * | |
36 | * @(#)param.h 8.3 (Berkeley) 4/4/95 | 36 | * @(#)param.h 8.3 (Berkeley) 4/4/95 | |
37 | */ | 37 | */ | |
38 | 38 | |||
39 | #ifndef _SYS_PARAM_H_ | 39 | #ifndef _SYS_PARAM_H_ | |
40 | #define _SYS_PARAM_H_ | 40 | #define _SYS_PARAM_H_ | |
41 | 41 | |||
42 | /* | 42 | /* | |
43 | * Historic BSD #defines -- probably will remain untouched for all time. | 43 | * Historic BSD #defines -- probably will remain untouched for all time. | |
44 | */ | 44 | */ | |
45 | #define BSD 199506 /* System version (year & month). */ | 45 | #define BSD 199506 /* System version (year & month). */ | |
46 | #define BSD4_3 1 | 46 | #define BSD4_3 1 | |
47 | #define BSD4_4 1 | 47 | #define BSD4_4 1 | |
48 | 48 | |||
49 | /* | 49 | /* | |
50 | * #define __NetBSD_Version__ MMmmrrpp00 | 50 | * #define __NetBSD_Version__ MMmmrrpp00 | |
51 | * | 51 | * | |
52 | * M = major version | 52 | * M = major version | |
53 | * m = minor version; a minor number of 99 indicates current. | 53 | * m = minor version; a minor number of 99 indicates current. | |
54 | * r = 0 (*) | 54 | * r = 0 (*) | |
55 | * p = patchlevel | 55 | * p = patchlevel | |
56 | * | 56 | * | |
57 | * When new releases are made, src/gnu/usr.bin/groff/tmac/mdoc.local | 57 | * When new releases are made, src/gnu/usr.bin/groff/tmac/mdoc.local | |
58 | * needs to be updated and the changes sent back to the groff maintainers. | 58 | * needs to be updated and the changes sent back to the groff maintainers. | |
59 | * | 59 | * | |
60 | * (*) Up to 2.0I "release" used to be "",A-Z,Z[A-Z] but numeric | 60 | * (*) Up to 2.0I "release" used to be "",A-Z,Z[A-Z] but numeric | |
61 | * e.g. NetBSD-1.2D = 102040000 ('D' == 4) | 61 | * e.g. NetBSD-1.2D = 102040000 ('D' == 4) | |
62 | * NetBSD-2.0H (200080000) was changed on 20041001 to: | 62 | * NetBSD-2.0H (200080000) was changed on 20041001 to: | |
63 | * 2.99.9 (299000900) | 63 | * 2.99.9 (299000900) | |
64 | */ | 64 | */ | |
65 | 65 | |||
66 | #define __NetBSD_Version__ 500000000 /* NetBSD 5.0_RC2 */ | 66 | #define __NetBSD_Version__ 500000001 /* NetBSD 5.0_RC2 */ | |
67 | 67 | |||
68 | #define __NetBSD_Prereq__(M,m,p) (((((M) * 100000000) + \ | 68 | #define __NetBSD_Prereq__(M,m,p) (((((M) * 100000000) + \ | |
69 | (m) * 1000000) + (p) * 100) <= __NetBSD_Version__) | 69 | (m) * 1000000) + (p) * 100) <= __NetBSD_Version__) | |
70 | 70 | |||
71 | /* | 71 | /* | |
72 | * Historical NetBSD #define | 72 | * Historical NetBSD #define | |
73 | * | 73 | * | |
74 | * NetBSD 1.4 was the last release for which this value was incremented. | 74 | * NetBSD 1.4 was the last release for which this value was incremented. | |
75 | * The value is now permanently fixed at 199905. It will never be | 75 | * The value is now permanently fixed at 199905. It will never be | |
76 | * changed again. | 76 | * changed again. | |
77 | * | 77 | * | |
78 | * New code must use __NetBSD_Version__ instead, and should not even | 78 | * New code must use __NetBSD_Version__ instead, and should not even | |
79 | * count on NetBSD being defined. | 79 | * count on NetBSD being defined. | |
80 | * | 80 | * | |
81 | */ | 81 | */ | |
82 | 82 | |||
83 | #define NetBSD 199905 /* NetBSD version (year & month). */ | 83 | #define NetBSD 199905 /* NetBSD version (year & month). */ | |
84 | 84 | |||
85 | #include <sys/null.h> | 85 | #include <sys/null.h> | |
86 | 86 | |||
87 | #ifndef _LOCORE | 87 | #ifndef _LOCORE | |
88 | #include <sys/inttypes.h> | 88 | #include <sys/inttypes.h> | |
89 | #include <sys/types.h> | 89 | #include <sys/types.h> | |
90 | #endif | 90 | #endif | |
91 | 91 | |||
92 | /* | 92 | /* | |
93 | * Machine-independent constants (some used in following include files). | 93 | * Machine-independent constants (some used in following include files). | |
94 | * Redefined constants are from POSIX 1003.1 limits file. | 94 | * Redefined constants are from POSIX 1003.1 limits file. | |
95 | * | 95 | * | |
96 | * MAXCOMLEN should be >= sizeof(ac_comm) (see <acct.h>) | 96 | * MAXCOMLEN should be >= sizeof(ac_comm) (see <acct.h>) | |
97 | * MAXHOSTNAMELEN should be >= (_POSIX_HOST_NAME_MAX + 1) (see <limits.h>) | 97 | * MAXHOSTNAMELEN should be >= (_POSIX_HOST_NAME_MAX + 1) (see <limits.h>) | |
98 | * MAXLOGNAME should be >= UT_NAMESIZE (see <utmp.h>) | 98 | * MAXLOGNAME should be >= UT_NAMESIZE (see <utmp.h>) | |
99 | */ | 99 | */ | |
100 | #include <sys/syslimits.h> | 100 | #include <sys/syslimits.h> | |
101 | 101 | |||
102 | #define MAXCOMLEN 16 /* max command name remembered */ | 102 | #define MAXCOMLEN 16 /* max command name remembered */ | |
103 | #define MAXINTERP PATH_MAX /* max interpreter file name length */ | 103 | #define MAXINTERP PATH_MAX /* max interpreter file name length */ | |
104 | /* DEPRECATED: use LOGIN_NAME_MAX instead. */ | 104 | /* DEPRECATED: use LOGIN_NAME_MAX instead. */ | |
105 | #define MAXLOGNAME (LOGIN_NAME_MAX - 1) /* max login name length */ | 105 | #define MAXLOGNAME (LOGIN_NAME_MAX - 1) /* max login name length */ | |
106 | #define NCARGS ARG_MAX /* max bytes for an exec function */ | 106 | #define NCARGS ARG_MAX /* max bytes for an exec function */ | |
107 | #define NGROUPS NGROUPS_MAX /* max number groups */ | 107 | #define NGROUPS NGROUPS_MAX /* max number groups */ | |
108 | #define NOGROUP 65535 /* marker for empty group set member */ | 108 | #define NOGROUP 65535 /* marker for empty group set member */ | |
109 | #define MAXHOSTNAMELEN 256 /* max hostname size */ | 109 | #define MAXHOSTNAMELEN 256 /* max hostname size */ | |
110 | 110 | |||
111 | #ifndef NOFILE | 111 | #ifndef NOFILE | |
112 | #define NOFILE OPEN_MAX /* max open files per process */ | 112 | #define NOFILE OPEN_MAX /* max open files per process */ | |
113 | #endif | 113 | #endif | |
114 | #ifndef MAXUPRC /* max simultaneous processes */ | 114 | #ifndef MAXUPRC /* max simultaneous processes */ | |
115 | #define MAXUPRC CHILD_MAX /* POSIX 1003.1-compliant default */ | 115 | #define MAXUPRC CHILD_MAX /* POSIX 1003.1-compliant default */ | |
116 | #else | 116 | #else | |
117 | #if (MAXUPRC - 0) < CHILD_MAX | 117 | #if (MAXUPRC - 0) < CHILD_MAX | |
118 | #error MAXUPRC less than CHILD_MAX. See options(4) for details. | 118 | #error MAXUPRC less than CHILD_MAX. See options(4) for details. | |
119 | #endif /* (MAXUPRC - 0) < CHILD_MAX */ | 119 | #endif /* (MAXUPRC - 0) < CHILD_MAX */ | |
120 | #endif /* !defined(MAXUPRC) */ | 120 | #endif /* !defined(MAXUPRC) */ | |
121 | 121 | |||
122 | /* More types and definitions used throughout the kernel. */ | 122 | /* More types and definitions used throughout the kernel. */ | |
123 | #ifdef _KERNEL | 123 | #ifdef _KERNEL | |
124 | #include <sys/cdefs.h> | 124 | #include <sys/cdefs.h> | |
125 | #include <sys/errno.h> | 125 | #include <sys/errno.h> | |
126 | #include <sys/time.h> | 126 | #include <sys/time.h> | |
127 | #include <sys/resource.h> | 127 | #include <sys/resource.h> | |
128 | #include <sys/ucred.h> | 128 | #include <sys/ucred.h> | |
129 | #include <sys/uio.h> | 129 | #include <sys/uio.h> | |
130 | #ifndef NPROC | 130 | #ifndef NPROC | |
131 | #define NPROC (20 + 16 * MAXUSERS) | 131 | #define NPROC (20 + 16 * MAXUSERS) | |
132 | #endif | 132 | #endif | |
133 | #ifndef NTEXT | 133 | #ifndef NTEXT | |
134 | #define NTEXT (80 + NPROC / 8) /* actually the object cache */ | 134 | #define NTEXT (80 + NPROC / 8) /* actually the object cache */ | |
135 | #endif | 135 | #endif | |
136 | #ifndef NVNODE | 136 | #ifndef NVNODE | |
137 | #define NVNODE (NPROC + NTEXT + 100) | 137 | #define NVNODE (NPROC + NTEXT + 100) | |
138 | #define NVNODE_IMPLICIT | 138 | #define NVNODE_IMPLICIT | |
139 | #endif | 139 | #endif | |
140 | #ifndef VNODE_VA_MAXPCT | 140 | #ifndef VNODE_VA_MAXPCT | |
141 | #define VNODE_VA_MAXPCT 20 | 141 | #define VNODE_VA_MAXPCT 20 | |
142 | #endif | 142 | #endif | |
143 | #ifndef BUFCACHE_VA_MAXPCT | 143 | #ifndef BUFCACHE_VA_MAXPCT | |
144 | #define BUFCACHE_VA_MAXPCT 20 | 144 | #define BUFCACHE_VA_MAXPCT 20 | |
145 | #endif | 145 | #endif | |
146 | #define VNODE_COST 2048 /* assumed space in bytes */ | 146 | #define VNODE_COST 2048 /* assumed space in bytes */ | |
147 | #endif /* _KERNEL */ | 147 | #endif /* _KERNEL */ | |
148 | 148 | |||
149 | /* Signals. */ | 149 | /* Signals. */ | |
150 | #include <sys/signal.h> | 150 | #include <sys/signal.h> | |
151 | 151 | |||
152 | /* Machine type dependent parameters. */ | 152 | /* Machine type dependent parameters. */ | |
153 | #include <machine/param.h> | 153 | #include <machine/param.h> | |
154 | #include <machine/limits.h> | 154 | #include <machine/limits.h> | |
155 | 155 | |||
156 | /* pages ("clicks") to disk blocks */ | 156 | /* pages ("clicks") to disk blocks */ | |
157 | #define ctod(x) ((x) << (PGSHIFT - DEV_BSHIFT)) | 157 | #define ctod(x) ((x) << (PGSHIFT - DEV_BSHIFT)) | |
158 | #define dtoc(x) ((x) >> (PGSHIFT - DEV_BSHIFT)) | 158 | #define dtoc(x) ((x) >> (PGSHIFT - DEV_BSHIFT)) | |
159 | 159 | |||
160 | /* bytes to pages */ | 160 | /* bytes to pages */ | |
161 | #define ctob(x) ((x) << PGSHIFT) | 161 | #define ctob(x) ((x) << PGSHIFT) | |
162 | #define btoc(x) (((x) + PGOFSET) >> PGSHIFT) | 162 | #define btoc(x) (((x) + PGOFSET) >> PGSHIFT) | |
163 | 163 | |||
164 | /* bytes to disk blocks */ | 164 | /* bytes to disk blocks */ | |
165 | #define dbtob(x) ((x) << DEV_BSHIFT) | 165 | #define dbtob(x) ((x) << DEV_BSHIFT) | |
166 | #define btodb(x) ((x) >> DEV_BSHIFT) | 166 | #define btodb(x) ((x) >> DEV_BSHIFT) | |
167 | 167 | |||
168 | #ifndef COHERENCY_UNIT | 168 | #ifndef COHERENCY_UNIT | |
169 | #define COHERENCY_UNIT 64 | 169 | #define COHERENCY_UNIT 64 | |
170 | #endif | 170 | #endif | |
171 | #ifndef CACHE_LINE_SIZE | 171 | #ifndef CACHE_LINE_SIZE | |
172 | #define CACHE_LINE_SIZE 64 | 172 | #define CACHE_LINE_SIZE 64 | |
173 | #endif | 173 | #endif | |
174 | #ifndef MAXCPUS | 174 | #ifndef MAXCPUS | |
175 | #define MAXCPUS 32 | 175 | #define MAXCPUS 32 | |
176 | #endif | 176 | #endif | |
177 | #ifndef MAX_LWP_PER_PROC | 177 | #ifndef MAX_LWP_PER_PROC | |
178 | #define MAX_LWP_PER_PROC 8000 | 178 | #define MAX_LWP_PER_PROC 8000 | |
179 | #endif | 179 | #endif | |
180 | 180 | |||
181 | /* | 181 | /* | |
182 | * Stack macros. On most architectures, the stack grows down, | 182 | * Stack macros. On most architectures, the stack grows down, | |
183 | * towards lower addresses; it is the rare architecture where | 183 | * towards lower addresses; it is the rare architecture where | |
184 | * it grows up, towards higher addresses. | 184 | * it grows up, towards higher addresses. | |
185 | * | 185 | * | |
186 | * STACK_GROW and STACK_SHRINK adjust a stack pointer by some | 186 | * STACK_GROW and STACK_SHRINK adjust a stack pointer by some | |
187 | * size, no questions asked. STACK_ALIGN aligns a stack pointer. | 187 | * size, no questions asked. STACK_ALIGN aligns a stack pointer. | |
188 | * | 188 | * | |
189 | * STACK_ALLOC returns a pointer to allocated stack space of | 189 | * STACK_ALLOC returns a pointer to allocated stack space of | |
190 | * some size; given such a pointer and a size, STACK_MAX gives | 190 | * some size; given such a pointer and a size, STACK_MAX gives | |
191 | * the maximum (in the "maxsaddr" sense) stack address of the | 191 | * the maximum (in the "maxsaddr" sense) stack address of the | |
192 | * allocated memory. | 192 | * allocated memory. | |
193 | */ | 193 | */ | |
194 | #if defined(_KERNEL) || defined(__EXPOSE_STACK) | 194 | #if defined(_KERNEL) || defined(__EXPOSE_STACK) | |
195 | #ifdef __MACHINE_STACK_GROWS_UP | 195 | #ifdef __MACHINE_STACK_GROWS_UP | |
196 | #define STACK_GROW(sp, _size) (((char *)(void *)(sp)) + (_size)) | 196 | #define STACK_GROW(sp, _size) (((char *)(void *)(sp)) + (_size)) | |
197 | #define STACK_SHRINK(sp, _size) (((char *)(void *)(sp)) - (_size)) | 197 | #define STACK_SHRINK(sp, _size) (((char *)(void *)(sp)) - (_size)) | |
198 | #define STACK_ALIGN(sp, bytes) \ | 198 | #define STACK_ALIGN(sp, bytes) \ | |
199 | ((char *)((((unsigned long)(sp)) + (bytes)) & ~(bytes))) | 199 | ((char *)((((unsigned long)(sp)) + (bytes)) & ~(bytes))) | |
200 | #define STACK_ALLOC(sp, _size) ((char *)(void *)(sp)) | 200 | #define STACK_ALLOC(sp, _size) ((char *)(void *)(sp)) | |
201 | #define STACK_MAX(p, _size) (((char *)(void *)(p)) + (_size)) | 201 | #define STACK_MAX(p, _size) (((char *)(void *)(p)) + (_size)) | |
202 | #else | 202 | #else | |
203 | #define STACK_GROW(sp, _size) (((char *)(void *)(sp)) - (_size)) | 203 | #define STACK_GROW(sp, _size) (((char *)(void *)(sp)) - (_size)) | |
204 | #define STACK_SHRINK(sp, _size) (((char *)(void *)(sp)) + (_size)) | 204 | #define STACK_SHRINK(sp, _size) (((char *)(void *)(sp)) + (_size)) | |
205 | #define STACK_ALIGN(sp, bytes) \ | 205 | #define STACK_ALIGN(sp, bytes) \ | |
206 | ((char *)(((unsigned long)(sp)) & ~(bytes))) | 206 | ((char *)(((unsigned long)(sp)) & ~(bytes))) | |
207 | #define STACK_ALLOC(sp, _size) (((char *)(void *)(sp)) - (_size)) | 207 | #define STACK_ALLOC(sp, _size) (((char *)(void *)(sp)) - (_size)) | |
208 | #define STACK_MAX(p, _size) ((char *)(void *)(p)) | 208 | #define STACK_MAX(p, _size) ((char *)(void *)(p)) | |
209 | #endif | 209 | #endif | |
210 | #endif /* defined(_KERNEL) || defined(__EXPOSE_STACK) */ | 210 | #endif /* defined(_KERNEL) || defined(__EXPOSE_STACK) */ | |
211 | 211 | |||
212 | /* | 212 | /* | |
213 | * Historic priority levels. These are meaningless and remain only | 213 | * Historic priority levels. These are meaningless and remain only | |
214 | * for source compatibility. Do not use in new code. | 214 | * for source compatibility. Do not use in new code. | |
215 | */ | 215 | */ | |
216 | #define PSWP 0 | 216 | #define PSWP 0 | |
217 | #define PVM 4 | 217 | #define PVM 4 | |
218 | #define PINOD 8 | 218 | #define PINOD 8 | |
219 | #define PRIBIO 16 | 219 | #define PRIBIO 16 | |
220 | #define PVFS 20 | 220 | #define PVFS 20 | |
221 | #define PZERO 22 | 221 | #define PZERO 22 | |
222 | #define PSOCK 24 | 222 | #define PSOCK 24 | |
223 | #define PWAIT 32 | 223 | #define PWAIT 32 | |
224 | #define PLOCK 36 | 224 | #define PLOCK 36 | |
225 | #define PPAUSE 40 | 225 | #define PPAUSE 40 | |
226 | #define PUSER 50 | 226 | #define PUSER 50 | |
227 | #define MAXPRI 127 | 227 | #define MAXPRI 127 | |
228 | 228 | |||
229 | #define PCATCH 0x100 /* OR'd with pri for tsleep to check signals */ | 229 | #define PCATCH 0x100 /* OR'd with pri for tsleep to check signals */ | |
230 | #define PNORELOCK 0x200 /* OR'd with pri for tsleep to not relock */ | 230 | #define PNORELOCK 0x200 /* OR'd with pri for tsleep to not relock */ | |
231 | 231 | |||
232 | /* | 232 | /* | |
233 | * New priority levels. | 233 | * New priority levels. | |
234 | */ | 234 | */ | |
235 | #define PRI_COUNT 224 | 235 | #define PRI_COUNT 224 | |
236 | #define PRI_NONE (-1) | 236 | #define PRI_NONE (-1) | |
237 | 237 | |||
238 | #define PRI_KERNEL_RT 192 | 238 | #define PRI_KERNEL_RT 192 | |
239 | #define NPRI_KERNEL_RT 32 | 239 | #define NPRI_KERNEL_RT 32 | |
240 | #define MAXPRI_KERNEL_RT (PRI_KERNEL_RT + NPRI_KERNEL_RT - 1) | 240 | #define MAXPRI_KERNEL_RT (PRI_KERNEL_RT + NPRI_KERNEL_RT - 1) | |
241 | 241 | |||
242 | #define PRI_USER_RT 128 | 242 | #define PRI_USER_RT 128 | |
243 | #define NPRI_USER_RT 64 | 243 | #define NPRI_USER_RT 64 | |
244 | #define MAXPRI_USER_RT (PRI_USER_RT + NPRI_USER_RT - 1) | 244 | #define MAXPRI_USER_RT (PRI_USER_RT + NPRI_USER_RT - 1) | |
245 | 245 | |||
246 | #define PRI_KTHREAD 96 | 246 | #define PRI_KTHREAD 96 | |
247 | #define NPRI_KTHREAD 32 | 247 | #define NPRI_KTHREAD 32 | |
248 | #define MAXPRI_KTHREAD (PRI_KTHREAD + NPRI_KTHREAD - 1) | 248 | #define MAXPRI_KTHREAD (PRI_KTHREAD + NPRI_KTHREAD - 1) | |
249 | 249 | |||
250 | #define PRI_KERNEL 64 | 250 | #define PRI_KERNEL 64 | |
251 | #define NPRI_KERNEL 32 | 251 | #define NPRI_KERNEL 32 | |
252 | #define MAXPRI_KERNEL (PRI_KERNEL + NPRI_KERNEL - 1) | 252 | #define MAXPRI_KERNEL (PRI_KERNEL + NPRI_KERNEL - 1) | |
253 | 253 | |||
254 | #define PRI_USER 0 | 254 | #define PRI_USER 0 | |
255 | #define NPRI_USER 64 | 255 | #define NPRI_USER 64 | |
256 | #define MAXPRI_USER (PRI_USER + NPRI_USER - 1) | 256 | #define MAXPRI_USER (PRI_USER + NPRI_USER - 1) | |
257 | 257 | |||
258 | /* Priority range used by POSIX real-time features */ | 258 | /* Priority range used by POSIX real-time features */ | |
259 | #define SCHED_PRI_MIN 0 | 259 | #define SCHED_PRI_MIN 0 | |
260 | #define SCHED_PRI_MAX 63 | 260 | #define SCHED_PRI_MAX 63 | |
261 | 261 | |||
262 | /* | 262 | /* | |
263 | * Kernel thread priorities. | 263 | * Kernel thread priorities. | |
264 | */ | 264 | */ | |
265 | #define PRI_SOFTSERIAL MAXPRI_KERNEL_RT | 265 | #define PRI_SOFTSERIAL MAXPRI_KERNEL_RT | |
266 | #define PRI_SOFTNET (MAXPRI_KERNEL_RT - schedppq * 1) | 266 | #define PRI_SOFTNET (MAXPRI_KERNEL_RT - schedppq * 1) | |
267 | #define PRI_SOFTBIO (MAXPRI_KERNEL_RT - schedppq * 2) | 267 | #define PRI_SOFTBIO (MAXPRI_KERNEL_RT - schedppq * 2) | |
268 | #define PRI_SOFTCLOCK (MAXPRI_KERNEL_RT - schedppq * 3) | 268 | #define PRI_SOFTCLOCK (MAXPRI_KERNEL_RT - schedppq * 3) | |
269 | 269 | |||
270 | #define PRI_XCALL MAXPRI_KTHREAD | 270 | #define PRI_XCALL MAXPRI_KTHREAD | |
271 | #define PRI_PGDAEMON (MAXPRI_KTHREAD - schedppq * 1) | 271 | #define PRI_PGDAEMON (MAXPRI_KTHREAD - schedppq * 1) | |
272 | #define PRI_VM (MAXPRI_KTHREAD - schedppq * 2) | 272 | #define PRI_VM (MAXPRI_KTHREAD - schedppq * 2) | |
273 | #define PRI_IOFLUSH (MAXPRI_KTHREAD - schedppq * 3) | 273 | #define PRI_IOFLUSH (MAXPRI_KTHREAD - schedppq * 3) | |
274 | #define PRI_BIO (MAXPRI_KTHREAD - schedppq * 4) | 274 | #define PRI_BIO (MAXPRI_KTHREAD - schedppq * 4) | |
275 | 275 | |||
276 | #define PRI_IDLE PRI_USER | 276 | #define PRI_IDLE PRI_USER | |
277 | 277 | |||
278 | /* | 278 | /* | |
279 | * Miscellaneous. | 279 | * Miscellaneous. | |
280 | */ | 280 | */ | |
281 | #define NBPW sizeof(int) /* number of bytes per word (integer) */ | 281 | #define NBPW sizeof(int) /* number of bytes per word (integer) */ | |
282 | 282 | |||
283 | #define CMASK 022 /* default file mask: S_IWGRP|S_IWOTH */ | 283 | #define CMASK 022 /* default file mask: S_IWGRP|S_IWOTH */ | |
284 | #define NODEV (dev_t)(-1) /* non-existent device */ | 284 | #define NODEV (dev_t)(-1) /* non-existent device */ | |
285 | 285 | |||
286 | #define CBLOCK 64 /* Clist block size, must be a power of 2. */ | 286 | #define CBLOCK 64 /* Clist block size, must be a power of 2. */ | |
287 | #define CBQSIZE (CBLOCK/NBBY) /* Quote bytes/cblock - can do better. */ | 287 | #define CBQSIZE (CBLOCK/NBBY) /* Quote bytes/cblock - can do better. */ | |
288 | /* Data chars/clist. */ | 288 | /* Data chars/clist. */ | |
289 | #define CBSIZE (CBLOCK - (int)sizeof(struct cblock *) - CBQSIZE) | 289 | #define CBSIZE (CBLOCK - (int)sizeof(struct cblock *) - CBQSIZE) | |
290 | #define CROUND (CBLOCK - 1) /* Clist rounding. */ | 290 | #define CROUND (CBLOCK - 1) /* Clist rounding. */ | |
291 | 291 | |||
292 | /* | 292 | /* | |
293 | * File system parameters and macros. | 293 | * File system parameters and macros. | |
294 | * | 294 | * | |
295 | * The file system is made out of blocks of at most MAXBSIZE units, with | 295 | * The file system is made out of blocks of at most MAXBSIZE units, with | |
296 | * smaller units (fragments) only in the last direct block. MAXBSIZE | 296 | * smaller units (fragments) only in the last direct block. MAXBSIZE | |
297 | * primarily determines the size of buffers in the buffer pool. It may be | 297 | * primarily determines the size of buffers in the buffer pool. It may be | |
298 | * made larger without any effect on existing file systems; however making | 298 | * made larger without any effect on existing file systems; however making | |
299 | * it smaller may make some file systems unmountable. | 299 | * it smaller may make some file systems unmountable. | |
300 | */ | 300 | */ | |
301 | #ifndef MAXBSIZE /* XXX */ | 301 | #ifndef MAXBSIZE /* XXX */ | |
302 | #define MAXBSIZE MAXPHYS | 302 | #define MAXBSIZE MAXPHYS | |
303 | #endif | 303 | #endif | |
304 | #define MAXFRAG 8 | 304 | #define MAXFRAG 8 | |
305 | 305 | |||
306 | /* | 306 | /* | |
307 | * MAXPATHLEN defines the longest permissible path length after expanding | 307 | * MAXPATHLEN defines the longest permissible path length after expanding | |
308 | * symbolic links. It is used to allocate a temporary buffer from the buffer | 308 | * symbolic links. It is used to allocate a temporary buffer from the buffer | |
309 | * pool in which to do the name expansion, hence should be a power of two, | 309 | * pool in which to do the name expansion, hence should be a power of two, | |
310 | * and must be less than or equal to MAXBSIZE. MAXSYMLINKS defines the | 310 | * and must be less than or equal to MAXBSIZE. MAXSYMLINKS defines the | |
311 | * maximum number of symbolic links that may be expanded in a path name. | 311 | * maximum number of symbolic links that may be expanded in a path name. | |
312 | * It should be set high enough to allow all legitimate uses, but halt | 312 | * It should be set high enough to allow all legitimate uses, but halt | |
313 | * infinite loops reasonably quickly. | 313 | * infinite loops reasonably quickly. | |
314 | * | 314 | * | |
315 | * MAXSYMLINKS should be >= _POSIX_SYMLOOP_MAX (see <limits.h>) | 315 | * MAXSYMLINKS should be >= _POSIX_SYMLOOP_MAX (see <limits.h>) | |
316 | */ | 316 | */ | |
317 | #define MAXPATHLEN PATH_MAX | 317 | #define MAXPATHLEN PATH_MAX | |
318 | #define MAXSYMLINKS 32 | 318 | #define MAXSYMLINKS 32 | |
319 | 319 | |||
320 | /* Bit map related macros. */ | 320 | /* Bit map related macros. */ | |
321 | #define setbit(a,i) ((a)[(i)/NBBY] |= 1<<((i)%NBBY)) | 321 | #define setbit(a,i) ((a)[(i)/NBBY] |= 1<<((i)%NBBY)) | |
322 | #define clrbit(a,i) ((a)[(i)/NBBY] &= ~(1<<((i)%NBBY))) | 322 | #define clrbit(a,i) ((a)[(i)/NBBY] &= ~(1<<((i)%NBBY))) | |
323 | #define isset(a,i) ((a)[(i)/NBBY] & (1<<((i)%NBBY))) | 323 | #define isset(a,i) ((a)[(i)/NBBY] & (1<<((i)%NBBY))) | |
324 | #define isclr(a,i) (((a)[(i)/NBBY] & (1<<((i)%NBBY))) == 0) | 324 | #define isclr(a,i) (((a)[(i)/NBBY] & (1<<((i)%NBBY))) == 0) | |
325 | 325 | |||
326 | /* Macros for counting and rounding. */ | 326 | /* Macros for counting and rounding. */ | |
327 | #ifndef howmany | 327 | #ifndef howmany | |
328 | #define howmany(x, y) (((x)+((y)-1))/(y)) | 328 | #define howmany(x, y) (((x)+((y)-1))/(y)) | |
329 | #endif | 329 | #endif | |
330 | #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) | 330 | #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) | |
331 | #define rounddown(x,y) (((x)/(y))*(y)) | 331 | #define rounddown(x,y) (((x)/(y))*(y)) | |
332 | #define roundup2(x, m) (((x) + m - 1) & ~(m - 1)) | 332 | #define roundup2(x, m) (((x) + m - 1) & ~(m - 1)) | |
333 | #define powerof2(x) ((((x)-1)&(x))==0) | 333 | #define powerof2(x) ((((x)-1)&(x))==0) | |
334 | 334 | |||
335 | /* Macros for min/max. */ | 335 | /* Macros for min/max. */ | |
336 | #define MIN(a,b) (((a)<(b))?(a):(b)) | 336 | #define MIN(a,b) (((a)<(b))?(a):(b)) | |
337 | #define MAX(a,b) (((a)>(b))?(a):(b)) | 337 | #define MAX(a,b) (((a)>(b))?(a):(b)) | |
338 | 338 | |||
339 | /* | 339 | /* | |
340 | * Constants for setting the parameters of the kernel memory allocator. | 340 | * Constants for setting the parameters of the kernel memory allocator. | |
341 | * | 341 | * | |
342 | * 2 ** MINBUCKET is the smallest unit of memory that will be | 342 | * 2 ** MINBUCKET is the smallest unit of memory that will be | |
343 | * allocated. It must be at least large enough to hold a pointer. | 343 | * allocated. It must be at least large enough to hold a pointer. | |
344 | * | 344 | * | |
345 | * Units of memory less or equal to MAXALLOCSAVE will permanently | 345 | * Units of memory less or equal to MAXALLOCSAVE will permanently | |
346 | * allocate physical memory; requests for these size pieces of | 346 | * allocate physical memory; requests for these size pieces of | |
347 | * memory are quite fast. Allocations greater than MAXALLOCSAVE must | 347 | * memory are quite fast. Allocations greater than MAXALLOCSAVE must | |
348 | * always allocate and free physical memory; requests for these | 348 | * always allocate and free physical memory; requests for these | |
349 | * size allocations should be done infrequently as they will be slow. | 349 | * size allocations should be done infrequently as they will be slow. | |
350 | * | 350 | * | |
351 | * Constraints: NBPG <= MAXALLOCSAVE <= 2 ** (MINBUCKET + 14), and | 351 | * Constraints: NBPG <= MAXALLOCSAVE <= 2 ** (MINBUCKET + 14), and | |
352 | * MAXALLOCSAVE must be a power of two. | 352 | * MAXALLOCSAVE must be a power of two. | |
353 | */ | 353 | */ | |
354 | #ifdef _LP64 | 354 | #ifdef _LP64 | |
355 | #define MINBUCKET 5 /* 5 => min allocation of 32 bytes */ | 355 | #define MINBUCKET 5 /* 5 => min allocation of 32 bytes */ | |
356 | #else | 356 | #else | |
357 | #define MINBUCKET 4 /* 4 => min allocation of 16 bytes */ | 357 | #define MINBUCKET 4 /* 4 => min allocation of 16 bytes */ | |
358 | #endif | 358 | #endif | |
359 | #define MAXALLOCSAVE (2 * NBPG) | 359 | #define MAXALLOCSAVE (2 * NBPG) | |
360 | 360 | |||
361 | /* | 361 | /* | |
362 | * Scale factor for scaled integers used to count %cpu time and load avgs. | 362 | * Scale factor for scaled integers used to count %cpu time and load avgs. | |
363 | * | 363 | * | |
364 | * The number of CPU `tick's that map to a unique `%age' can be expressed | 364 | * The number of CPU `tick's that map to a unique `%age' can be expressed | |
365 | * by the formula (1 / (2 ^ (FSHIFT - 11))). The maximum load average that | 365 | * by the formula (1 / (2 ^ (FSHIFT - 11))). The maximum load average that | |
366 | * can be calculated (assuming 32 bits) can be closely approximated using | 366 | * can be calculated (assuming 32 bits) can be closely approximated using | |
367 | * the formula (2 ^ (2 * (16 - FSHIFT))) for (FSHIFT < 15). | 367 | * the formula (2 ^ (2 * (16 - FSHIFT))) for (FSHIFT < 15). | |
368 | * | 368 | * | |
369 | * For the scheduler to maintain a 1:1 mapping of CPU `tick' to `%age', | 369 | * For the scheduler to maintain a 1:1 mapping of CPU `tick' to `%age', | |
370 | * FSHIFT must be at least 11; this gives us a maximum load avg of ~1024. | 370 | * FSHIFT must be at least 11; this gives us a maximum load avg of ~1024. | |
371 | */ | 371 | */ | |
372 | #define FSHIFT 11 /* bits to right of fixed binary point */ | 372 | #define FSHIFT 11 /* bits to right of fixed binary point */ | |
373 | #define FSCALE (1<<FSHIFT) | 373 | #define FSCALE (1<<FSHIFT) | |
374 | 374 | |||
375 | /* | 375 | /* | |
376 | * The time for a process to be blocked before being very swappable. | 376 | * The time for a process to be blocked before being very swappable. | |
377 | * This is a number of seconds which the system takes as being a non-trivial | 377 | * This is a number of seconds which the system takes as being a non-trivial | |
378 | * amount of real time. You probably shouldn't change this; | 378 | * amount of real time. You probably shouldn't change this; | |
379 | * it is used in subtle ways (fractions and multiples of it are, that is, like | 379 | * it is used in subtle ways (fractions and multiples of it are, that is, like | |
380 | * half of a ``long time'', almost a long time, etc.) | 380 | * half of a ``long time'', almost a long time, etc.) | |
381 | * It is related to human patience and other factors which don't really | 381 | * It is related to human patience and other factors which don't really | |
382 | * change over time. | 382 | * change over time. | |
383 | */ | 383 | */ | |
384 | #define MAXSLP 20 | 384 | #define MAXSLP 20 | |
385 | 385 | |||
386 | /* | 386 | /* | |
387 | * Defaults for Unified Buffer Cache parameters. | 387 | * Defaults for Unified Buffer Cache parameters. | |
388 | * These may be overridden in <machine/param.h>. | 388 | * These may be overridden in <machine/param.h>. | |
389 | */ | 389 | */ | |
390 | 390 | |||
391 | #ifndef UBC_WINSHIFT | 391 | #ifndef UBC_WINSHIFT | |
392 | #define UBC_WINSHIFT 13 | 392 | #define UBC_WINSHIFT 13 | |
393 | #endif | 393 | #endif | |
394 | #ifndef UBC_NWINS | 394 | #ifndef UBC_NWINS | |
395 | #define UBC_NWINS 1024 | 395 | #define UBC_NWINS 1024 | |
396 | #endif | 396 | #endif | |
397 | 397 | |||
398 | #ifdef _KERNEL | 398 | #ifdef _KERNEL | |
399 | /* | 399 | /* | |
400 | * macro to convert from milliseconds to hz without integer overflow | 400 | * macro to convert from milliseconds to hz without integer overflow | |
401 | * Default version using only 32bits arithmetics. | 401 | * Default version using only 32bits arithmetics. | |
402 | * 64bit port can define 64bit version in their <machine/param.h> | 402 | * 64bit port can define 64bit version in their <machine/param.h> | |
403 | * 0x20000 is safe for hz < 20000 | 403 | * 0x20000 is safe for hz < 20000 | |
404 | */ | 404 | */ | |
405 | #ifndef mstohz | 405 | #ifndef mstohz | |
406 | #define mstohz(ms) \ | 406 | #define mstohz(ms) \ | |
407 | (__predict_false((ms) >= 0x20000) ? \ | 407 | (__predict_false((ms) >= 0x20000) ? \ | |
408 | ((ms +0u) / 1000u) * hz : \ | 408 | ((ms +0u) / 1000u) * hz : \ | |
409 | ((ms +0u) * hz) / 1000u) | 409 | ((ms +0u) * hz) / 1000u) | |
410 | #endif | 410 | #endif | |
411 | #ifndef hztoms | 411 | #ifndef hztoms | |
412 | #define hztoms(t) \ | 412 | #define hztoms(t) \ | |
413 | (__predict_false((t) >= 0x20000) ? \ | 413 | (__predict_false((t) >= 0x20000) ? \ | |
414 | ((t +0u) / hz) * 1000u : \ | 414 | ((t +0u) / hz) * 1000u : \ | |
415 | ((t +0u) * 1000u) / hz) | 415 | ((t +0u) * 1000u) / hz) | |
416 | #endif | 416 | #endif | |
417 | 417 | |||
418 | extern const int schedppq; | 418 | extern const int schedppq; | |
419 | extern size_t coherency_unit; | 419 | extern size_t coherency_unit; | |
420 | 420 | |||
421 | #endif /* _KERNEL */ | 421 | #endif /* _KERNEL */ | |
422 | 422 | |||
423 | /* | 423 | /* | |
424 | * Minimum alignment of "struct lwp" needed by the architecture. | 424 | * Minimum alignment of "struct lwp" needed by the architecture. | |
425 | * This counts when packing a lock byte into a word alongside a | 425 | * This counts when packing a lock byte into a word alongside a | |
426 | * pointer to an LWP. | 426 | * pointer to an LWP. | |
427 | */ | 427 | */ | |
428 | #ifndef MIN_LWP_ALIGNMENT | 428 | #ifndef MIN_LWP_ALIGNMENT | |
429 | #define MIN_LWP_ALIGNMENT 32 | 429 | #define MIN_LWP_ALIGNMENT 32 | |
430 | #endif | 430 | #endif | |
431 | 431 | |||
432 | #endif /* !_SYS_PARAM_H_ */ | 432 | #endif /* !_SYS_PARAM_H_ */ |
--- src/sys/sys/un.h 2008/08/06 15:01:24 1.44
+++ src/sys/sys/un.h 2009/03/18 05:33:23 1.44.4.1
@@ -1,113 +1,110 @@ | @@ -1,113 +1,110 @@ | |||
1 | /* $NetBSD: un.h,v 1.44 2008/08/06 15:01:24 plunky Exp $ */ | 1 | /* $NetBSD: un.h,v 1.44.4.1 2009/03/18 05:33:23 snj Exp $ */ | |
2 | 2 | |||
3 | /* | 3 | /* | |
4 | * Copyright (c) 1982, 1986, 1993 | 4 | * Copyright (c) 1982, 1986, 1993 | |
5 | * The Regents of the University of California. All rights reserved. | 5 | * The Regents of the University of California. All rights reserved. | |
6 | * | 6 | * | |
7 | * Redistribution and use in source and binary forms, with or without | 7 | * Redistribution and use in source and binary forms, with or without | |
8 | * modification, are permitted provided that the following conditions | 8 | * modification, are permitted provided that the following conditions | |
9 | * are met: | 9 | * are met: | |
10 | * 1. Redistributions of source code must retain the above copyright | 10 | * 1. Redistributions of source code must retain the above copyright | |
11 | * notice, this list of conditions and the following disclaimer. | 11 | * notice, this list of conditions and the following disclaimer. | |
12 | * 2. Redistributions in binary form must reproduce the above copyright | 12 | * 2. Redistributions in binary form must reproduce the above copyright | |
13 | * notice, this list of conditions and the following disclaimer in the | 13 | * notice, this list of conditions and the following disclaimer in the | |
14 | * documentation and/or other materials provided with the distribution. | 14 | * documentation and/or other materials provided with the distribution. | |
15 | * 3. Neither the name of the University nor the names of its contributors | 15 | * 3. Neither the name of the University nor the names of its contributors | |
16 | * may be used to endorse or promote products derived from this software | 16 | * may be used to endorse or promote products derived from this software | |
17 | * without specific prior written permission. | 17 | * without specific prior written permission. | |
18 | * | 18 | * | |
19 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | 19 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
20 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 20 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | 22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
25 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 25 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
29 | * SUCH DAMAGE. | 29 | * SUCH DAMAGE. | |
30 | * | 30 | * | |
31 | * @(#)un.h 8.3 (Berkeley) 2/19/95 | 31 | * @(#)un.h 8.3 (Berkeley) 2/19/95 | |
32 | */ | 32 | */ | |
33 | 33 | |||
34 | #ifndef _SYS_UN_H_ | 34 | #ifndef _SYS_UN_H_ | |
35 | #define _SYS_UN_H_ | 35 | #define _SYS_UN_H_ | |
36 | 36 | |||
37 | #include <sys/ansi.h> | 37 | #include <sys/ansi.h> | |
38 | #include <sys/featuretest.h> | 38 | #include <sys/featuretest.h> | |
39 | #include <sys/types.h> | 39 | #include <sys/types.h> | |
40 | 40 | |||
41 | #ifndef sa_family_t | 41 | #ifndef sa_family_t | |
42 | typedef __sa_family_t sa_family_t; | 42 | typedef __sa_family_t sa_family_t; | |
43 | #define sa_family_t __sa_family_t | 43 | #define sa_family_t __sa_family_t | |
44 | #endif | 44 | #endif | |
45 | 45 | |||
46 | /* | 46 | /* | |
47 | * Definitions for UNIX IPC domain. | 47 | * Definitions for UNIX IPC domain. | |
48 | */ | 48 | */ | |
49 | struct sockaddr_un { | 49 | struct sockaddr_un { | |
50 | uint8_t sun_len; /* total sockaddr length */ | 50 | uint8_t sun_len; /* total sockaddr length */ | |
51 | sa_family_t sun_family; /* AF_LOCAL */ | 51 | sa_family_t sun_family; /* AF_LOCAL */ | |
52 | char sun_path[104]; /* path name (gag) */ | 52 | char sun_path[104]; /* path name (gag) */ | |
53 | }; | 53 | }; | |
54 | 54 | |||
55 | /* | 55 | /* | |
56 | * Socket options for UNIX IPC domain. | 56 | * Socket options for UNIX IPC domain. | |
57 | */ | 57 | */ | |
58 | #if defined(_NETBSD_SOURCE) | 58 | #if defined(_NETBSD_SOURCE) | |
59 | #define LOCAL_CREDS 0x0001 /* pass credentials to receiver */ | 59 | #define LOCAL_CREDS 0x0001 /* pass credentials to receiver */ | |
60 | #define LOCAL_CONNWAIT 0x0002 /* connects block until accepted */ | 60 | #define LOCAL_CONNWAIT 0x0002 /* connects block until accepted */ | |
61 | #define LOCAL_PEEREID 0x0003 /* get peer identification */ | 61 | #define LOCAL_PEEREID 0x0003 /* get peer identification */ | |
62 | #endif | 62 | #endif | |
63 | 63 | |||
64 | /* | 64 | /* | |
65 | * Data automatically stored inside connect() for use by LOCAL_PEEREID | 65 | * Data automatically stored inside connect() for use by LOCAL_PEEREID | |
66 | */ | 66 | */ | |
67 | struct unpcbid { | 67 | struct unpcbid { | |
68 | pid_t unp_pid; /* process id */ | 68 | pid_t unp_pid; /* process id */ | |
69 | uid_t unp_euid; /* effective user id */ | 69 | uid_t unp_euid; /* effective user id */ | |
70 | gid_t unp_egid; /* effective group id */ | 70 | gid_t unp_egid; /* effective group id */ | |
71 | }; | 71 | }; | |
72 | 72 | |||
73 | #ifdef _KERNEL | 73 | #ifdef _KERNEL | |
74 | struct unpcb; | 74 | struct unpcb; | |
75 | struct socket; | 75 | struct socket; | |
76 | struct sockopt; | 76 | struct sockopt; | |
77 | 77 | |||
78 | int uipc_usrreq(struct socket *, int, struct mbuf *, | 78 | int uipc_usrreq(struct socket *, int, struct mbuf *, | |
79 | struct mbuf *, struct mbuf *, struct lwp *); | 79 | struct mbuf *, struct mbuf *, struct lwp *); | |
80 | int uipc_ctloutput(int, struct socket *, struct sockopt *); | 80 | int uipc_ctloutput(int, struct socket *, struct sockopt *); | |
81 | void uipc_init (void); | 81 | void uipc_init (void); | |
82 | kmutex_t *uipc_dgramlock (void); | 82 | kmutex_t *uipc_dgramlock (void); | |
83 | kmutex_t *uipc_streamlock (void); | 83 | kmutex_t *uipc_streamlock (void); | |
84 | kmutex_t *uipc_rawlock (void); | 84 | kmutex_t *uipc_rawlock (void); | |
85 | 85 | |||
86 | int unp_attach (struct socket *); | 86 | int unp_attach (struct socket *); | |
87 | int unp_bind (struct socket *, struct mbuf *, struct lwp *); | 87 | int unp_bind (struct socket *, struct mbuf *, struct lwp *); | |
88 | int unp_connect (struct socket *, struct mbuf *, struct lwp *); | 88 | int unp_connect (struct socket *, struct mbuf *, struct lwp *); | |
89 | int unp_connect2 (struct socket *, struct socket *, int); | 89 | int unp_connect2 (struct socket *, struct socket *, int); | |
90 | void unp_detach (struct unpcb *); | 90 | void unp_detach (struct unpcb *); | |
91 | void unp_discard (struct file *); | 91 | void unp_discard (struct file *); | |
92 | void unp_disconnect (struct unpcb *); | 92 | void unp_disconnect (struct unpcb *); | |
93 | bool unp_drop (struct unpcb *, int); | 93 | bool unp_drop (struct unpcb *, int); | |
94 | void unp_gc (void); | |||
95 | void unp_mark (struct file *); | |||
96 | void unp_scan (struct mbuf *, void (*)(struct file *), int); | |||
97 | void unp_shutdown (struct unpcb *); | 94 | void unp_shutdown (struct unpcb *); | |
98 | int unp_externalize (struct mbuf *, struct lwp *); | 95 | int unp_externalize (struct mbuf *, struct lwp *); | |
99 | int unp_internalize (struct mbuf **); | 96 | int unp_internalize (struct mbuf **); | |
100 | void unp_dispose (struct mbuf *); | 97 | void unp_dispose (struct mbuf *); | |
101 | int unp_output (struct mbuf *, struct mbuf *, struct unpcb *, | 98 | int unp_output (struct mbuf *, struct mbuf *, struct unpcb *, | |
102 | struct lwp *); | 99 | struct lwp *); | |
103 | void unp_setaddr (struct socket *, struct mbuf *, bool); | 100 | void unp_setaddr (struct socket *, struct mbuf *, bool); | |
104 | #else /* !_KERNEL */ | 101 | #else /* !_KERNEL */ | |
105 | 102 | |||
106 | /* actual length of an initialized sockaddr_un */ | 103 | /* actual length of an initialized sockaddr_un */ | |
107 | #if defined(_NETBSD_SOURCE) | 104 | #if defined(_NETBSD_SOURCE) | |
108 | #define SUN_LEN(su) \ | 105 | #define SUN_LEN(su) \ | |
109 | (sizeof(*(su)) - sizeof((su)->sun_path) + strlen((su)->sun_path)) | 106 | (sizeof(*(su)) - sizeof((su)->sun_path) + strlen((su)->sun_path)) | |
110 | #endif /* !_NetBSD_SOURCE */ | 107 | #endif /* !_NetBSD_SOURCE */ | |
111 | #endif /* _KERNEL */ | 108 | #endif /* _KERNEL */ | |
112 | 109 | |||
113 | #endif /* !_SYS_UN_H_ */ | 110 | #endif /* !_SYS_UN_H_ */ |