| @@ -1,886 +1,899 @@ | | | @@ -1,886 +1,899 @@ |
1 | /* $NetBSD: sys_select.c,v 1.40 2017/06/01 02:45:13 chs Exp $ */ | | 1 | /* $NetBSD: sys_select.c,v 1.40.2.1 2020/03/08 09:47:28 martin Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 2007, 2008, 2009, 2010 The NetBSD Foundation, Inc. | | 4 | * Copyright (c) 2007, 2008, 2009, 2010 The NetBSD Foundation, Inc. |
5 | * All rights reserved. | | 5 | * All rights reserved. |
6 | * | | 6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation | | 7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Andrew Doran and Mindaugas Rasiukevicius. | | 8 | * by Andrew Doran and Mindaugas Rasiukevicius. |
9 | * | | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
12 | * are met: | | 12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright | | 15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the | | 16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. | | 17 | * documentation and/or other materials provided with the distribution. |
18 | * | | 18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | | 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | | 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | * POSSIBILITY OF SUCH DAMAGE. | | 29 | * POSSIBILITY OF SUCH DAMAGE. |
30 | */ | | 30 | */ |
31 | | | 31 | |
32 | /* | | 32 | /* |
33 | * Copyright (c) 1982, 1986, 1989, 1993 | | 33 | * Copyright (c) 1982, 1986, 1989, 1993 |
34 | * The Regents of the University of California. All rights reserved. | | 34 | * The Regents of the University of California. All rights reserved. |
35 | * (c) UNIX System Laboratories, Inc. | | 35 | * (c) UNIX System Laboratories, Inc. |
36 | * All or some portions of this file are derived from material licensed | | 36 | * All or some portions of this file are derived from material licensed |
37 | * to the University of California by American Telephone and Telegraph | | 37 | * to the University of California by American Telephone and Telegraph |
38 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | | 38 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with |
39 | * the permission of UNIX System Laboratories, Inc. | | 39 | * the permission of UNIX System Laboratories, Inc. |
40 | * | | 40 | * |
41 | * Redistribution and use in source and binary forms, with or without | | 41 | * Redistribution and use in source and binary forms, with or without |
42 | * modification, are permitted provided that the following conditions | | 42 | * modification, are permitted provided that the following conditions |
43 | * are met: | | 43 | * are met: |
44 | * 1. Redistributions of source code must retain the above copyright | | 44 | * 1. Redistributions of source code must retain the above copyright |
45 | * notice, this list of conditions and the following disclaimer. | | 45 | * notice, this list of conditions and the following disclaimer. |
46 | * 2. Redistributions in binary form must reproduce the above copyright | | 46 | * 2. Redistributions in binary form must reproduce the above copyright |
47 | * notice, this list of conditions and the following disclaimer in the | | 47 | * notice, this list of conditions and the following disclaimer in the |
48 | * documentation and/or other materials provided with the distribution. | | 48 | * documentation and/or other materials provided with the distribution. |
49 | * 3. Neither the name of the University nor the names of its contributors | | 49 | * 3. Neither the name of the University nor the names of its contributors |
50 | * may be used to endorse or promote products derived from this software | | 50 | * may be used to endorse or promote products derived from this software |
51 | * without specific prior written permission. | | 51 | * without specific prior written permission. |
52 | * | | 52 | * |
53 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | | 53 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
54 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | | 54 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
55 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | | 55 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
56 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | | 56 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
57 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 57 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
58 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | | 58 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
59 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 59 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
60 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 60 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
61 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 61 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
62 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 62 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
63 | * SUCH DAMAGE. | | 63 | * SUCH DAMAGE. |
64 | * | | 64 | * |
65 | * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95 | | 65 | * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95 |
66 | */ | | 66 | */ |
67 | | | 67 | |
68 | /* | | 68 | /* |
69 | * System calls of synchronous I/O multiplexing subsystem. | | 69 | * System calls of synchronous I/O multiplexing subsystem. |
70 | * | | 70 | * |
71 | * Locking | | 71 | * Locking |
72 | * | | 72 | * |
73 | * Two locks are used: <object-lock> and selcluster_t::sc_lock. | | 73 | * Two locks are used: <object-lock> and selcluster_t::sc_lock. |
74 | * | | 74 | * |
75 | * The <object-lock> might be a device driver or another subsystem, e.g. | | 75 | * The <object-lock> might be a device driver or another subsystem, e.g. |
76 | * socket or pipe. This lock is not exported, and thus invisible to this | | 76 | * socket or pipe. This lock is not exported, and thus invisible to this |
77 | * subsystem. Mainly, synchronisation between selrecord() and selnotify() | | 77 | * subsystem. Mainly, synchronisation between selrecord() and selnotify() |
78 | * routines depends on this lock, as it will be described in the comments. | | 78 | * routines depends on this lock, as it will be described in the comments. |
79 | * | | 79 | * |
80 | * Lock order | | 80 | * Lock order |
81 | * | | 81 | * |
82 | * <object-lock> -> | | 82 | * <object-lock> -> |
83 | * selcluster_t::sc_lock | | 83 | * selcluster_t::sc_lock |
84 | */ | | 84 | */ |
85 | | | 85 | |
86 | #include <sys/cdefs.h> | | 86 | #include <sys/cdefs.h> |
87 | __KERNEL_RCSID(0, "$NetBSD: sys_select.c,v 1.40 2017/06/01 02:45:13 chs Exp $"); | | 87 | __KERNEL_RCSID(0, "$NetBSD: sys_select.c,v 1.40.2.1 2020/03/08 09:47:28 martin Exp $"); |
88 | | | 88 | |
89 | #include <sys/param.h> | | 89 | #include <sys/param.h> |
90 | #include <sys/systm.h> | | 90 | #include <sys/systm.h> |
91 | #include <sys/filedesc.h> | | 91 | #include <sys/filedesc.h> |
92 | #include <sys/file.h> | | 92 | #include <sys/file.h> |
93 | #include <sys/proc.h> | | 93 | #include <sys/proc.h> |
94 | #include <sys/socketvar.h> | | 94 | #include <sys/socketvar.h> |
95 | #include <sys/signalvar.h> | | 95 | #include <sys/signalvar.h> |
96 | #include <sys/uio.h> | | 96 | #include <sys/uio.h> |
97 | #include <sys/kernel.h> | | 97 | #include <sys/kernel.h> |
98 | #include <sys/lwp.h> | | 98 | #include <sys/lwp.h> |
99 | #include <sys/poll.h> | | 99 | #include <sys/poll.h> |
100 | #include <sys/mount.h> | | 100 | #include <sys/mount.h> |
101 | #include <sys/syscallargs.h> | | 101 | #include <sys/syscallargs.h> |
102 | #include <sys/cpu.h> | | 102 | #include <sys/cpu.h> |
103 | #include <sys/atomic.h> | | 103 | #include <sys/atomic.h> |
104 | #include <sys/socketvar.h> | | 104 | #include <sys/socketvar.h> |
105 | #include <sys/sleepq.h> | | 105 | #include <sys/sleepq.h> |
106 | #include <sys/sysctl.h> | | 106 | #include <sys/sysctl.h> |
107 | | | 107 | |
108 | /* Flags for lwp::l_selflag. */ | | 108 | /* Flags for lwp::l_selflag. */ |
109 | #define SEL_RESET 0 /* awoken, interrupted, or not yet polling */ | | 109 | #define SEL_RESET 0 /* awoken, interrupted, or not yet polling */ |
110 | #define SEL_SCANNING 1 /* polling descriptors */ | | 110 | #define SEL_SCANNING 1 /* polling descriptors */ |
111 | #define SEL_BLOCKING 2 /* blocking and waiting for event */ | | 111 | #define SEL_BLOCKING 2 /* blocking and waiting for event */ |
112 | #define SEL_EVENT 3 /* interrupted, events set directly */ | | 112 | #define SEL_EVENT 3 /* interrupted, events set directly */ |
113 | | | 113 | |
114 | /* Operations: either select() or poll(). */ | | 114 | /* Operations: either select() or poll(). */ |
115 | #define SELOP_SELECT 1 | | 115 | #define SELOP_SELECT 1 |
116 | #define SELOP_POLL 2 | | 116 | #define SELOP_POLL 2 |
117 | | | 117 | |
118 | /* | | 118 | /* |
119 | * Per-cluster state for select()/poll(). For a system with fewer | | 119 | * Per-cluster state for select()/poll(). For a system with fewer |
120 | * than 32 CPUs, this gives us per-CPU clusters. | | 120 | * than 32 CPUs, this gives us per-CPU clusters. |
121 | */ | | 121 | */ |
122 | #define SELCLUSTERS 32 | | 122 | #define SELCLUSTERS 32 |
123 | #define SELCLUSTERMASK (SELCLUSTERS - 1) | | 123 | #define SELCLUSTERMASK (SELCLUSTERS - 1) |
124 | | | 124 | |
125 | typedef struct selcluster { | | 125 | typedef struct selcluster { |
126 | kmutex_t *sc_lock; | | 126 | kmutex_t *sc_lock; |
127 | sleepq_t sc_sleepq; | | 127 | sleepq_t sc_sleepq; |
128 | int sc_ncoll; | | 128 | int sc_ncoll; |
129 | uint32_t sc_mask; | | 129 | uint32_t sc_mask; |
130 | } selcluster_t; | | 130 | } selcluster_t; |
131 | | | 131 | |
132 | static inline int selscan(char *, const int, const size_t, register_t *); | | 132 | static inline int selscan(char *, const int, const size_t, register_t *); |
133 | static inline int pollscan(struct pollfd *, const int, register_t *); | | 133 | static inline int pollscan(struct pollfd *, const int, register_t *); |
134 | static void selclear(void); | | 134 | static void selclear(void); |
135 | | | 135 | |
136 | static const int sel_flag[] = { | | 136 | static const int sel_flag[] = { |
137 | POLLRDNORM | POLLHUP | POLLERR, | | 137 | POLLRDNORM | POLLHUP | POLLERR, |
138 | POLLWRNORM | POLLHUP | POLLERR, | | 138 | POLLWRNORM | POLLHUP | POLLERR, |
139 | POLLRDBAND | | 139 | POLLRDBAND |
140 | }; | | 140 | }; |
141 | | | 141 | |
142 | static syncobj_t select_sobj = { | | 142 | static syncobj_t select_sobj = { |
143 | SOBJ_SLEEPQ_FIFO, | | 143 | SOBJ_SLEEPQ_FIFO, |
144 | sleepq_unsleep, | | 144 | sleepq_unsleep, |
145 | sleepq_changepri, | | 145 | sleepq_changepri, |
146 | sleepq_lendpri, | | 146 | sleepq_lendpri, |
147 | syncobj_noowner, | | 147 | syncobj_noowner, |
148 | }; | | 148 | }; |
149 | | | 149 | |
150 | static selcluster_t *selcluster[SELCLUSTERS] __read_mostly; | | 150 | static selcluster_t *selcluster[SELCLUSTERS] __read_mostly; |
151 | static int direct_select __read_mostly = 0; | | 151 | static int direct_select __read_mostly = 0; |
152 | | | 152 | |
153 | /* | | 153 | /* |
154 | * Select system call. | | 154 | * Select system call. |
155 | */ | | 155 | */ |
156 | int | | 156 | int |
157 | sys___pselect50(struct lwp *l, const struct sys___pselect50_args *uap, | | 157 | sys___pselect50(struct lwp *l, const struct sys___pselect50_args *uap, |
158 | register_t *retval) | | 158 | register_t *retval) |
159 | { | | 159 | { |
160 | /* { | | 160 | /* { |
161 | syscallarg(int) nd; | | 161 | syscallarg(int) nd; |
162 | syscallarg(fd_set *) in; | | 162 | syscallarg(fd_set *) in; |
163 | syscallarg(fd_set *) ou; | | 163 | syscallarg(fd_set *) ou; |
164 | syscallarg(fd_set *) ex; | | 164 | syscallarg(fd_set *) ex; |
165 | syscallarg(const struct timespec *) ts; | | 165 | syscallarg(const struct timespec *) ts; |
166 | syscallarg(sigset_t *) mask; | | 166 | syscallarg(sigset_t *) mask; |
167 | } */ | | 167 | } */ |
168 | struct timespec ats, *ts = NULL; | | 168 | struct timespec ats, *ts = NULL; |
169 | sigset_t amask, *mask = NULL; | | 169 | sigset_t amask, *mask = NULL; |
170 | int error; | | 170 | int error; |
171 | | | 171 | |
172 | if (SCARG(uap, ts)) { | | 172 | if (SCARG(uap, ts)) { |
173 | error = copyin(SCARG(uap, ts), &ats, sizeof(ats)); | | 173 | error = copyin(SCARG(uap, ts), &ats, sizeof(ats)); |
174 | if (error) | | 174 | if (error) |
175 | return error; | | 175 | return error; |
176 | ts = &ats; | | 176 | ts = &ats; |
177 | } | | 177 | } |
178 | if (SCARG(uap, mask) != NULL) { | | 178 | if (SCARG(uap, mask) != NULL) { |
179 | error = copyin(SCARG(uap, mask), &amask, sizeof(amask)); | | 179 | error = copyin(SCARG(uap, mask), &amask, sizeof(amask)); |
180 | if (error) | | 180 | if (error) |
181 | return error; | | 181 | return error; |
182 | mask = &amask; | | 182 | mask = &amask; |
183 | } | | 183 | } |
184 | | | 184 | |
185 | return selcommon(retval, SCARG(uap, nd), SCARG(uap, in), | | 185 | return selcommon(retval, SCARG(uap, nd), SCARG(uap, in), |
186 | SCARG(uap, ou), SCARG(uap, ex), ts, mask); | | 186 | SCARG(uap, ou), SCARG(uap, ex), ts, mask); |
187 | } | | 187 | } |
188 | | | 188 | |
189 | int | | 189 | int |
190 | sys___select50(struct lwp *l, const struct sys___select50_args *uap, | | 190 | sys___select50(struct lwp *l, const struct sys___select50_args *uap, |
191 | register_t *retval) | | 191 | register_t *retval) |
192 | { | | 192 | { |
193 | /* { | | 193 | /* { |
194 | syscallarg(int) nd; | | 194 | syscallarg(int) nd; |
195 | syscallarg(fd_set *) in; | | 195 | syscallarg(fd_set *) in; |
196 | syscallarg(fd_set *) ou; | | 196 | syscallarg(fd_set *) ou; |
197 | syscallarg(fd_set *) ex; | | 197 | syscallarg(fd_set *) ex; |
198 | syscallarg(struct timeval *) tv; | | 198 | syscallarg(struct timeval *) tv; |
199 | } */ | | 199 | } */ |
200 | struct timeval atv; | | 200 | struct timeval atv; |
201 | struct timespec ats, *ts = NULL; | | 201 | struct timespec ats, *ts = NULL; |
202 | int error; | | 202 | int error; |
203 | | | 203 | |
204 | if (SCARG(uap, tv)) { | | 204 | if (SCARG(uap, tv)) { |
205 | error = copyin(SCARG(uap, tv), (void *)&atv, sizeof(atv)); | | 205 | error = copyin(SCARG(uap, tv), (void *)&atv, sizeof(atv)); |
206 | if (error) | | 206 | if (error) |
207 | return error; | | 207 | return error; |
208 | TIMEVAL_TO_TIMESPEC(&atv, &ats); | | 208 | TIMEVAL_TO_TIMESPEC(&atv, &ats); |
209 | ts = &ats; | | 209 | ts = &ats; |
210 | } | | 210 | } |
211 | | | 211 | |
212 | return selcommon(retval, SCARG(uap, nd), SCARG(uap, in), | | 212 | return selcommon(retval, SCARG(uap, nd), SCARG(uap, in), |
213 | SCARG(uap, ou), SCARG(uap, ex), ts, NULL); | | 213 | SCARG(uap, ou), SCARG(uap, ex), ts, NULL); |
214 | } | | 214 | } |
215 | | | 215 | |
216 | /* | | 216 | /* |
217 | * sel_do_scan: common code to perform the scan on descriptors. | | 217 | * sel_do_scan: common code to perform the scan on descriptors. |
218 | */ | | 218 | */ |
219 | static int | | 219 | static int |
220 | sel_do_scan(const int op, void *fds, const int nf, const size_t ni, | | 220 | sel_do_scan(const int op, void *fds, const int nf, const size_t ni, |
221 | struct timespec *ts, sigset_t *mask, register_t *retval) | | 221 | struct timespec *ts, sigset_t *mask, register_t *retval) |
222 | { | | 222 | { |
223 | lwp_t * const l = curlwp; | | 223 | lwp_t * const l = curlwp; |
224 | selcluster_t *sc; | | 224 | selcluster_t *sc; |
225 | kmutex_t *lock; | | 225 | kmutex_t *lock; |
226 | struct timespec sleepts; | | 226 | struct timespec sleepts; |
227 | int error, timo; | | 227 | int error, timo; |
228 | | | 228 | |
229 | timo = 0; | | 229 | timo = 0; |
230 | if (ts && inittimeleft(ts, &sleepts) == -1) { | | 230 | if (ts && inittimeleft(ts, &sleepts) == -1) { |
231 | return EINVAL; | | 231 | return EINVAL; |
232 | } | | 232 | } |
233 | | | 233 | |
234 | if (__predict_false(mask)) | | 234 | if (__predict_false(mask)) |
235 | sigsuspendsetup(l, mask); | | 235 | sigsuspendsetup(l, mask); |
236 | | | 236 | |
237 | sc = curcpu()->ci_data.cpu_selcluster; | | 237 | sc = curcpu()->ci_data.cpu_selcluster; |
238 | lock = sc->sc_lock; | | 238 | lock = sc->sc_lock; |
239 | l->l_selcluster = sc; | | 239 | l->l_selcluster = sc; |
240 | if (op == SELOP_SELECT) { | | 240 | if (op == SELOP_SELECT) { |
241 | l->l_selbits = fds; | | 241 | l->l_selbits = fds; |
242 | l->l_selni = ni; | | 242 | l->l_selni = ni; |
243 | } else { | | 243 | } else { |
244 | l->l_selbits = NULL; | | 244 | l->l_selbits = NULL; |
245 | } | | 245 | } |
246 | | | 246 | |
247 | for (;;) { | | 247 | for (;;) { |
248 | int ncoll; | | 248 | int ncoll; |
249 | | | 249 | |
250 | SLIST_INIT(&l->l_selwait); | | 250 | SLIST_INIT(&l->l_selwait); |
251 | l->l_selret = 0; | | 251 | l->l_selret = 0; |
252 | | | 252 | |
253 | /* | | 253 | /* |
254 | * No need to lock. If this is overwritten by another value | | 254 | * No need to lock. If this is overwritten by another value |
255 | * while scanning, we will retry below. We only need to see | | 255 | * while scanning, we will retry below. We only need to see |
256 | * exact state from the descriptors that we are about to poll, | | 256 | * exact state from the descriptors that we are about to poll, |
257 | * and lock activity resulting from fo_poll is enough to | | 257 | * and lock activity resulting from fo_poll is enough to |
258 | * provide an up to date value for new polling activity. | | 258 | * provide an up to date value for new polling activity. |
259 | */ | | 259 | */ |
260 | l->l_selflag = SEL_SCANNING; | | 260 | l->l_selflag = SEL_SCANNING; |
261 | ncoll = sc->sc_ncoll; | | 261 | ncoll = sc->sc_ncoll; |
262 | | | 262 | |
263 | if (op == SELOP_SELECT) { | | 263 | if (op == SELOP_SELECT) { |
264 | error = selscan((char *)fds, nf, ni, retval); | | 264 | error = selscan((char *)fds, nf, ni, retval); |
265 | } else { | | 265 | } else { |
266 | error = pollscan((struct pollfd *)fds, nf, retval); | | 266 | error = pollscan((struct pollfd *)fds, nf, retval); |
267 | } | | 267 | } |
268 | if (error || *retval) | | 268 | if (error || *retval) |
269 | break; | | 269 | break; |
270 | if (ts && (timo = gettimeleft(ts, &sleepts)) <= 0) | | 270 | if (ts && (timo = gettimeleft(ts, &sleepts)) <= 0) |
271 | break; | | 271 | break; |
272 | /* | | 272 | /* |
273 | * Acquire the lock and perform the (re)checks. Note, if | | 273 | * Acquire the lock and perform the (re)checks. Note, if |
274 | * collision has occured, then our state does not matter, | | 274 | * collision has occured, then our state does not matter, |
275 | * as we must perform re-scan. Therefore, check it first. | | 275 | * as we must perform re-scan. Therefore, check it first. |
276 | */ | | 276 | */ |
277 | state_check: | | 277 | state_check: |
278 | mutex_spin_enter(lock); | | 278 | mutex_spin_enter(lock); |
279 | if (__predict_false(sc->sc_ncoll != ncoll)) { | | 279 | if (__predict_false(sc->sc_ncoll != ncoll)) { |
280 | /* Collision: perform re-scan. */ | | 280 | /* Collision: perform re-scan. */ |
281 | mutex_spin_exit(lock); | | 281 | mutex_spin_exit(lock); |
282 | selclear(); | | 282 | selclear(); |
283 | continue; | | 283 | continue; |
284 | } | | 284 | } |
285 | if (__predict_true(l->l_selflag == SEL_EVENT)) { | | 285 | if (__predict_true(l->l_selflag == SEL_EVENT)) { |
286 | /* Events occured, they are set directly. */ | | 286 | /* Events occured, they are set directly. */ |
287 | mutex_spin_exit(lock); | | 287 | mutex_spin_exit(lock); |
288 | break; | | 288 | break; |
289 | } | | 289 | } |
290 | if (__predict_true(l->l_selflag == SEL_RESET)) { | | 290 | if (__predict_true(l->l_selflag == SEL_RESET)) { |
291 | /* Events occured, but re-scan is requested. */ | | 291 | /* Events occured, but re-scan is requested. */ |
292 | mutex_spin_exit(lock); | | 292 | mutex_spin_exit(lock); |
293 | selclear(); | | 293 | selclear(); |
294 | continue; | | 294 | continue; |
295 | } | | 295 | } |
296 | /* Nothing happen, therefore - sleep. */ | | 296 | /* Nothing happen, therefore - sleep. */ |
297 | l->l_selflag = SEL_BLOCKING; | | 297 | l->l_selflag = SEL_BLOCKING; |
298 | l->l_kpriority = true; | | 298 | l->l_kpriority = true; |
299 | sleepq_enter(&sc->sc_sleepq, l, lock); | | 299 | sleepq_enter(&sc->sc_sleepq, l, lock); |
300 | sleepq_enqueue(&sc->sc_sleepq, sc, "select", &select_sobj); | | 300 | sleepq_enqueue(&sc->sc_sleepq, sc, "select", &select_sobj); |
301 | error = sleepq_block(timo, true); | | 301 | error = sleepq_block(timo, true); |
302 | if (error != 0) { | | 302 | if (error != 0) { |
303 | break; | | 303 | break; |
304 | } | | 304 | } |
305 | /* Awoken: need to check the state. */ | | 305 | /* Awoken: need to check the state. */ |
306 | goto state_check; | | 306 | goto state_check; |
307 | } | | 307 | } |
308 | selclear(); | | 308 | selclear(); |
309 | | | 309 | |
310 | /* Add direct events if any. */ | | 310 | /* Add direct events if any. */ |
311 | if (l->l_selflag == SEL_EVENT) { | | 311 | if (l->l_selflag == SEL_EVENT) { |
312 | KASSERT(l->l_selret != 0); | | 312 | KASSERT(l->l_selret != 0); |
313 | *retval += l->l_selret; | | 313 | *retval += l->l_selret; |
314 | } | | 314 | } |
315 | | | 315 | |
316 | if (__predict_false(mask)) | | 316 | if (__predict_false(mask)) |
317 | sigsuspendteardown(l); | | 317 | sigsuspendteardown(l); |
318 | | | 318 | |
319 | /* select and poll are not restarted after signals... */ | | 319 | /* select and poll are not restarted after signals... */ |
320 | if (error == ERESTART) | | 320 | if (error == ERESTART) |
321 | return EINTR; | | 321 | return EINTR; |
322 | if (error == EWOULDBLOCK) | | 322 | if (error == EWOULDBLOCK) |
323 | return 0; | | 323 | return 0; |
324 | return error; | | 324 | return error; |
325 | } | | 325 | } |
326 | | | 326 | |
327 | int | | 327 | int |
328 | selcommon(register_t *retval, int nd, fd_set *u_in, fd_set *u_ou, | | 328 | selcommon(register_t *retval, int nd, fd_set *u_in, fd_set *u_ou, |
329 | fd_set *u_ex, struct timespec *ts, sigset_t *mask) | | 329 | fd_set *u_ex, struct timespec *ts, sigset_t *mask) |
330 | { | | 330 | { |
331 | char smallbits[howmany(FD_SETSIZE, NFDBITS) * | | 331 | char smallbits[howmany(FD_SETSIZE, NFDBITS) * |
332 | sizeof(fd_mask) * 6]; | | 332 | sizeof(fd_mask) * 6]; |
333 | char *bits; | | 333 | char *bits; |
334 | int error, nf; | | 334 | int error, nf; |
335 | size_t ni; | | 335 | size_t ni; |
336 | | | 336 | |
337 | if (nd < 0) | | 337 | if (nd < 0) |
338 | return (EINVAL); | | 338 | return (EINVAL); |
339 | nf = curlwp->l_fd->fd_dt->dt_nfiles; | | 339 | nf = curlwp->l_fd->fd_dt->dt_nfiles; |
340 | if (nd > nf) { | | 340 | if (nd > nf) { |
341 | /* forgiving; slightly wrong */ | | 341 | /* forgiving; slightly wrong */ |
342 | nd = nf; | | 342 | nd = nf; |
343 | } | | 343 | } |
344 | ni = howmany(nd, NFDBITS) * sizeof(fd_mask); | | 344 | ni = howmany(nd, NFDBITS) * sizeof(fd_mask); |
345 | if (ni * 6 > sizeof(smallbits)) | | 345 | if (ni * 6 > sizeof(smallbits)) |
346 | bits = kmem_alloc(ni * 6, KM_SLEEP); | | 346 | bits = kmem_alloc(ni * 6, KM_SLEEP); |
347 | else | | 347 | else |
348 | bits = smallbits; | | 348 | bits = smallbits; |
349 | | | 349 | |
350 | #define getbits(name, x) \ | | 350 | #define getbits(name, x) \ |
351 | if (u_ ## name) { \ | | 351 | if (u_ ## name) { \ |
352 | error = copyin(u_ ## name, bits + ni * x, ni); \ | | 352 | error = copyin(u_ ## name, bits + ni * x, ni); \ |
353 | if (error) \ | | 353 | if (error) \ |
354 | goto fail; \ | | 354 | goto fail; \ |
355 | } else \ | | 355 | } else \ |
356 | memset(bits + ni * x, 0, ni); | | 356 | memset(bits + ni * x, 0, ni); |
357 | getbits(in, 0); | | 357 | getbits(in, 0); |
358 | getbits(ou, 1); | | 358 | getbits(ou, 1); |
359 | getbits(ex, 2); | | 359 | getbits(ex, 2); |
360 | #undef getbits | | 360 | #undef getbits |
361 | | | 361 | |
362 | error = sel_do_scan(SELOP_SELECT, bits, nd, ni, ts, mask, retval); | | 362 | error = sel_do_scan(SELOP_SELECT, bits, nd, ni, ts, mask, retval); |
363 | if (error == 0 && u_in != NULL) | | 363 | if (error == 0 && u_in != NULL) |
364 | error = copyout(bits + ni * 3, u_in, ni); | | 364 | error = copyout(bits + ni * 3, u_in, ni); |
365 | if (error == 0 && u_ou != NULL) | | 365 | if (error == 0 && u_ou != NULL) |
366 | error = copyout(bits + ni * 4, u_ou, ni); | | 366 | error = copyout(bits + ni * 4, u_ou, ni); |
367 | if (error == 0 && u_ex != NULL) | | 367 | if (error == 0 && u_ex != NULL) |
368 | error = copyout(bits + ni * 5, u_ex, ni); | | 368 | error = copyout(bits + ni * 5, u_ex, ni); |
369 | fail: | | 369 | fail: |
370 | if (bits != smallbits) | | 370 | if (bits != smallbits) |
371 | kmem_free(bits, ni * 6); | | 371 | kmem_free(bits, ni * 6); |
372 | return (error); | | 372 | return (error); |
373 | } | | 373 | } |
374 | | | 374 | |
375 | static inline int | | 375 | static inline int |
376 | selscan(char *bits, const int nfd, const size_t ni, register_t *retval) | | 376 | selscan(char *bits, const int nfd, const size_t ni, register_t *retval) |
377 | { | | 377 | { |
378 | fd_mask *ibitp, *obitp; | | 378 | fd_mask *ibitp, *obitp; |
379 | int msk, i, j, fd, n; | | 379 | int msk, i, j, fd, n; |
380 | file_t *fp; | | 380 | file_t *fp; |
381 | | | 381 | |
382 | ibitp = (fd_mask *)(bits + ni * 0); | | 382 | ibitp = (fd_mask *)(bits + ni * 0); |
383 | obitp = (fd_mask *)(bits + ni * 3); | | 383 | obitp = (fd_mask *)(bits + ni * 3); |
384 | n = 0; | | 384 | n = 0; |
385 | | | 385 | |
386 | memset(obitp, 0, ni * 3); | | 386 | memset(obitp, 0, ni * 3); |
387 | for (msk = 0; msk < 3; msk++) { | | 387 | for (msk = 0; msk < 3; msk++) { |
388 | for (i = 0; i < nfd; i += NFDBITS) { | | 388 | for (i = 0; i < nfd; i += NFDBITS) { |
389 | fd_mask ibits, obits; | | 389 | fd_mask ibits, obits; |
390 | | | 390 | |
391 | ibits = *ibitp; | | 391 | ibits = *ibitp; |
392 | obits = 0; | | 392 | obits = 0; |
393 | while ((j = ffs(ibits)) && (fd = i + --j) < nfd) { | | 393 | while ((j = ffs(ibits)) && (fd = i + --j) < nfd) { |
394 | ibits &= ~(1 << j); | | 394 | ibits &= ~(1 << j); |
395 | if ((fp = fd_getfile(fd)) == NULL) | | 395 | if ((fp = fd_getfile(fd)) == NULL) |
396 | return (EBADF); | | 396 | return (EBADF); |
397 | /* | | 397 | /* |
398 | * Setup an argument to selrecord(), which is | | 398 | * Setup an argument to selrecord(), which is |
399 | * a file descriptor number. | | 399 | * a file descriptor number. |
400 | */ | | 400 | */ |
401 | curlwp->l_selrec = fd; | | 401 | curlwp->l_selrec = fd; |
402 | if ((*fp->f_ops->fo_poll)(fp, sel_flag[msk])) { | | 402 | if ((*fp->f_ops->fo_poll)(fp, sel_flag[msk])) { |
403 | obits |= (1 << j); | | 403 | obits |= (1 << j); |
404 | n++; | | 404 | n++; |
405 | } | | 405 | } |
406 | fd_putfile(fd); | | 406 | fd_putfile(fd); |
407 | } | | 407 | } |
408 | if (obits != 0) { | | 408 | if (obits != 0) { |
409 | if (direct_select) { | | 409 | if (direct_select) { |
410 | kmutex_t *lock; | | 410 | kmutex_t *lock; |
411 | lock = curlwp->l_selcluster->sc_lock; | | 411 | lock = curlwp->l_selcluster->sc_lock; |
412 | mutex_spin_enter(lock); | | 412 | mutex_spin_enter(lock); |
413 | *obitp |= obits; | | 413 | *obitp |= obits; |
414 | mutex_spin_exit(lock); | | 414 | mutex_spin_exit(lock); |
415 | } else { | | 415 | } else { |
416 | *obitp |= obits; | | 416 | *obitp |= obits; |
417 | } | | 417 | } |
418 | } | | 418 | } |
419 | ibitp++; | | 419 | ibitp++; |
420 | obitp++; | | 420 | obitp++; |
421 | } | | 421 | } |
422 | } | | 422 | } |
423 | *retval = n; | | 423 | *retval = n; |
424 | return (0); | | 424 | return (0); |
425 | } | | 425 | } |
426 | | | 426 | |
427 | /* | | 427 | /* |
428 | * Poll system call. | | 428 | * Poll system call. |
429 | */ | | 429 | */ |
430 | int | | 430 | int |
431 | sys_poll(struct lwp *l, const struct sys_poll_args *uap, register_t *retval) | | 431 | sys_poll(struct lwp *l, const struct sys_poll_args *uap, register_t *retval) |
432 | { | | 432 | { |
433 | /* { | | 433 | /* { |
434 | syscallarg(struct pollfd *) fds; | | 434 | syscallarg(struct pollfd *) fds; |
435 | syscallarg(u_int) nfds; | | 435 | syscallarg(u_int) nfds; |
436 | syscallarg(int) timeout; | | 436 | syscallarg(int) timeout; |
437 | } */ | | 437 | } */ |
438 | struct timespec ats, *ts = NULL; | | 438 | struct timespec ats, *ts = NULL; |
439 | | | 439 | |
440 | if (SCARG(uap, timeout) != INFTIM) { | | 440 | if (SCARG(uap, timeout) != INFTIM) { |
441 | ats.tv_sec = SCARG(uap, timeout) / 1000; | | 441 | ats.tv_sec = SCARG(uap, timeout) / 1000; |
442 | ats.tv_nsec = (SCARG(uap, timeout) % 1000) * 1000000; | | 442 | ats.tv_nsec = (SCARG(uap, timeout) % 1000) * 1000000; |
443 | ts = &ats; | | 443 | ts = &ats; |
444 | } | | 444 | } |
445 | | | 445 | |
446 | return pollcommon(retval, SCARG(uap, fds), SCARG(uap, nfds), ts, NULL); | | 446 | return pollcommon(retval, SCARG(uap, fds), SCARG(uap, nfds), ts, NULL); |
447 | } | | 447 | } |
448 | | | 448 | |
449 | /* | | 449 | /* |
450 | * Poll system call. | | 450 | * Poll system call. |
451 | */ | | 451 | */ |
452 | int | | 452 | int |
453 | sys___pollts50(struct lwp *l, const struct sys___pollts50_args *uap, | | 453 | sys___pollts50(struct lwp *l, const struct sys___pollts50_args *uap, |
454 | register_t *retval) | | 454 | register_t *retval) |
455 | { | | 455 | { |
456 | /* { | | 456 | /* { |
457 | syscallarg(struct pollfd *) fds; | | 457 | syscallarg(struct pollfd *) fds; |
458 | syscallarg(u_int) nfds; | | 458 | syscallarg(u_int) nfds; |
459 | syscallarg(const struct timespec *) ts; | | 459 | syscallarg(const struct timespec *) ts; |
460 | syscallarg(const sigset_t *) mask; | | 460 | syscallarg(const sigset_t *) mask; |
461 | } */ | | 461 | } */ |
462 | struct timespec ats, *ts = NULL; | | 462 | struct timespec ats, *ts = NULL; |
463 | sigset_t amask, *mask = NULL; | | 463 | sigset_t amask, *mask = NULL; |
464 | int error; | | 464 | int error; |
465 | | | 465 | |
466 | if (SCARG(uap, ts)) { | | 466 | if (SCARG(uap, ts)) { |
467 | error = copyin(SCARG(uap, ts), &ats, sizeof(ats)); | | 467 | error = copyin(SCARG(uap, ts), &ats, sizeof(ats)); |
468 | if (error) | | 468 | if (error) |
469 | return error; | | 469 | return error; |
470 | ts = &ats; | | 470 | ts = &ats; |
471 | } | | 471 | } |
472 | if (SCARG(uap, mask)) { | | 472 | if (SCARG(uap, mask)) { |
473 | error = copyin(SCARG(uap, mask), &amask, sizeof(amask)); | | 473 | error = copyin(SCARG(uap, mask), &amask, sizeof(amask)); |
474 | if (error) | | 474 | if (error) |
475 | return error; | | 475 | return error; |
476 | mask = &amask; | | 476 | mask = &amask; |
477 | } | | 477 | } |
478 | | | 478 | |
479 | return pollcommon(retval, SCARG(uap, fds), SCARG(uap, nfds), ts, mask); | | 479 | return pollcommon(retval, SCARG(uap, fds), SCARG(uap, nfds), ts, mask); |
480 | } | | 480 | } |
481 | | | 481 | |
482 | int | | 482 | int |
483 | pollcommon(register_t *retval, struct pollfd *u_fds, u_int nfds, | | 483 | pollcommon(register_t *retval, struct pollfd *u_fds, u_int nfds, |
484 | struct timespec *ts, sigset_t *mask) | | 484 | struct timespec *ts, sigset_t *mask) |
485 | { | | 485 | { |
486 | struct pollfd smallfds[32]; | | 486 | struct pollfd smallfds[32]; |
487 | struct pollfd *fds; | | 487 | struct pollfd *fds; |
488 | int error; | | 488 | int error; |
489 | size_t ni; | | 489 | size_t ni; |
490 | | | 490 | |
491 | if (nfds > 1000 + curlwp->l_fd->fd_dt->dt_nfiles) { | | 491 | if (nfds > curlwp->l_proc->p_rlimit[RLIMIT_NOFILE].rlim_max + 1000) { |
492 | /* | | 492 | /* |
493 | * Either the user passed in a very sparse 'fds' or junk! | | 493 | * Prevent userland from causing over-allocation. |
494 | * The kmem_alloc() call below would be bad news. | | 494 | * Raising the default limit too high can still cause |
495 | * We could process the 'fds' array in chunks, but that | | 495 | * a lot of memory to be allocated, but this also means |
| | | 496 | * that the file descriptor array will also be large. |
| | | 497 | * |
| | | 498 | * To reduce the memory requirements here, we could |
| | | 499 | * process the 'fds' array in chunks, but that |
496 | * is a lot of code that isn't normally useful. | | 500 | * is a lot of code that isn't normally useful. |
497 | * (Or just move the copyin/out into pollscan().) | | 501 | * (Or just move the copyin/out into pollscan().) |
| | | 502 | * |
498 | * Historically the code silently truncated 'fds' to | | 503 | * Historically the code silently truncated 'fds' to |
499 | * dt_nfiles entries - but that does cause issues. | | 504 | * dt_nfiles entries - but that does cause issues. |
| | | 505 | * |
| | | 506 | * Using the max limit equivalent to sysctl |
| | | 507 | * kern.maxfiles is the moral equivalent of OPEN_MAX |
| | | 508 | * as specified by POSIX. |
| | | 509 | * |
| | | 510 | * We add a slop of 1000 in case the resource limit was |
| | | 511 | * changed after opening descriptors or the same descriptor |
| | | 512 | * was specified more than once. |
500 | */ | | 513 | */ |
501 | return EINVAL; | | 514 | return EINVAL; |
502 | } | | 515 | } |
503 | ni = nfds * sizeof(struct pollfd); | | 516 | ni = nfds * sizeof(struct pollfd); |
504 | if (ni > sizeof(smallfds)) | | 517 | if (ni > sizeof(smallfds)) |
505 | fds = kmem_alloc(ni, KM_SLEEP); | | 518 | fds = kmem_alloc(ni, KM_SLEEP); |
506 | else | | 519 | else |
507 | fds = smallfds; | | 520 | fds = smallfds; |
508 | | | 521 | |
509 | error = copyin(u_fds, fds, ni); | | 522 | error = copyin(u_fds, fds, ni); |
510 | if (error) | | 523 | if (error) |
511 | goto fail; | | 524 | goto fail; |
512 | | | 525 | |
513 | error = sel_do_scan(SELOP_POLL, fds, nfds, ni, ts, mask, retval); | | 526 | error = sel_do_scan(SELOP_POLL, fds, nfds, ni, ts, mask, retval); |
514 | if (error == 0) | | 527 | if (error == 0) |
515 | error = copyout(fds, u_fds, ni); | | 528 | error = copyout(fds, u_fds, ni); |
516 | fail: | | 529 | fail: |
517 | if (fds != smallfds) | | 530 | if (fds != smallfds) |
518 | kmem_free(fds, ni); | | 531 | kmem_free(fds, ni); |
519 | return (error); | | 532 | return (error); |
520 | } | | 533 | } |
521 | | | 534 | |
522 | static inline int | | 535 | static inline int |
523 | pollscan(struct pollfd *fds, const int nfd, register_t *retval) | | 536 | pollscan(struct pollfd *fds, const int nfd, register_t *retval) |
524 | { | | 537 | { |
525 | file_t *fp; | | 538 | file_t *fp; |
526 | int i, n = 0, revents; | | 539 | int i, n = 0, revents; |
527 | | | 540 | |
528 | for (i = 0; i < nfd; i++, fds++) { | | 541 | for (i = 0; i < nfd; i++, fds++) { |
529 | fds->revents = 0; | | 542 | fds->revents = 0; |
530 | if (fds->fd < 0) { | | 543 | if (fds->fd < 0) { |
531 | revents = 0; | | 544 | revents = 0; |
532 | } else if ((fp = fd_getfile(fds->fd)) == NULL) { | | 545 | } else if ((fp = fd_getfile(fds->fd)) == NULL) { |
533 | revents = POLLNVAL; | | 546 | revents = POLLNVAL; |
534 | } else { | | 547 | } else { |
535 | /* | | 548 | /* |
536 | * Perform poll: registers select request or returns | | 549 | * Perform poll: registers select request or returns |
537 | * the events which are set. Setup an argument for | | 550 | * the events which are set. Setup an argument for |
538 | * selrecord(), which is a pointer to struct pollfd. | | 551 | * selrecord(), which is a pointer to struct pollfd. |
539 | */ | | 552 | */ |
540 | curlwp->l_selrec = (uintptr_t)fds; | | 553 | curlwp->l_selrec = (uintptr_t)fds; |
541 | revents = (*fp->f_ops->fo_poll)(fp, | | 554 | revents = (*fp->f_ops->fo_poll)(fp, |
542 | fds->events | POLLERR | POLLHUP); | | 555 | fds->events | POLLERR | POLLHUP); |
543 | fd_putfile(fds->fd); | | 556 | fd_putfile(fds->fd); |
544 | } | | 557 | } |
545 | if (revents) { | | 558 | if (revents) { |
546 | fds->revents = revents; | | 559 | fds->revents = revents; |
547 | n++; | | 560 | n++; |
548 | } | | 561 | } |
549 | } | | 562 | } |
550 | *retval = n; | | 563 | *retval = n; |
551 | return (0); | | 564 | return (0); |
552 | } | | 565 | } |
553 | | | 566 | |
554 | int | | 567 | int |
555 | seltrue(dev_t dev, int events, lwp_t *l) | | 568 | seltrue(dev_t dev, int events, lwp_t *l) |
556 | { | | 569 | { |
557 | | | 570 | |
558 | return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); | | 571 | return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); |
559 | } | | 572 | } |
560 | | | 573 | |
561 | /* | | 574 | /* |
562 | * Record a select request. Concurrency issues: | | 575 | * Record a select request. Concurrency issues: |
563 | * | | 576 | * |
564 | * The caller holds the same lock across calls to selrecord() and | | 577 | * The caller holds the same lock across calls to selrecord() and |
565 | * selnotify(), so we don't need to consider a concurrent wakeup | | 578 | * selnotify(), so we don't need to consider a concurrent wakeup |
566 | * while in this routine. | | 579 | * while in this routine. |
567 | * | | 580 | * |
568 | * The only activity we need to guard against is selclear(), called by | | 581 | * The only activity we need to guard against is selclear(), called by |
569 | * another thread that is exiting sel_do_scan(). | | 582 | * another thread that is exiting sel_do_scan(). |
570 | * `sel_lwp' can only become non-NULL while the caller's lock is held, | | 583 | * `sel_lwp' can only become non-NULL while the caller's lock is held, |
571 | * so it cannot become non-NULL due to a change made by another thread | | 584 | * so it cannot become non-NULL due to a change made by another thread |
572 | * while we are in this routine. It can only become _NULL_ due to a | | 585 | * while we are in this routine. It can only become _NULL_ due to a |
573 | * call to selclear(). | | 586 | * call to selclear(). |
574 | * | | 587 | * |
575 | * If it is non-NULL and != selector there is the potential for | | 588 | * If it is non-NULL and != selector there is the potential for |
576 | * selclear() to be called by another thread. If either of those | | 589 | * selclear() to be called by another thread. If either of those |
577 | * conditions are true, we're not interested in touching the `named | | 590 | * conditions are true, we're not interested in touching the `named |
578 | * waiter' part of the selinfo record because we need to record a | | 591 | * waiter' part of the selinfo record because we need to record a |
579 | * collision. Hence there is no need for additional locking in this | | 592 | * collision. Hence there is no need for additional locking in this |
580 | * routine. | | 593 | * routine. |
581 | */ | | 594 | */ |
582 | void | | 595 | void |
583 | selrecord(lwp_t *selector, struct selinfo *sip) | | 596 | selrecord(lwp_t *selector, struct selinfo *sip) |
584 | { | | 597 | { |
585 | selcluster_t *sc; | | 598 | selcluster_t *sc; |
586 | lwp_t *other; | | 599 | lwp_t *other; |
587 | | | 600 | |
588 | KASSERT(selector == curlwp); | | 601 | KASSERT(selector == curlwp); |
589 | | | 602 | |
590 | sc = selector->l_selcluster; | | 603 | sc = selector->l_selcluster; |
591 | other = sip->sel_lwp; | | 604 | other = sip->sel_lwp; |
592 | | | 605 | |
593 | if (other == selector) { | | 606 | if (other == selector) { |
594 | /* 1. We (selector) already claimed to be the first LWP. */ | | 607 | /* 1. We (selector) already claimed to be the first LWP. */ |
595 | KASSERT(sip->sel_cluster == sc); | | 608 | KASSERT(sip->sel_cluster == sc); |
596 | } else if (other == NULL) { | | 609 | } else if (other == NULL) { |
597 | /* | | 610 | /* |
598 | * 2. No first LWP, therefore we (selector) are the first. | | 611 | * 2. No first LWP, therefore we (selector) are the first. |
599 | * | | 612 | * |
600 | * There may be unnamed waiters (collisions). Issue a memory | | 613 | * There may be unnamed waiters (collisions). Issue a memory |
601 | * barrier to ensure that we access sel_lwp (above) before | | 614 | * barrier to ensure that we access sel_lwp (above) before |
602 | * other fields - this guards against a call to selclear(). | | 615 | * other fields - this guards against a call to selclear(). |
603 | */ | | 616 | */ |
604 | membar_enter(); | | 617 | membar_enter(); |
605 | sip->sel_lwp = selector; | | 618 | sip->sel_lwp = selector; |
606 | SLIST_INSERT_HEAD(&selector->l_selwait, sip, sel_chain); | | 619 | SLIST_INSERT_HEAD(&selector->l_selwait, sip, sel_chain); |
607 | /* Copy the argument, which is for selnotify(). */ | | 620 | /* Copy the argument, which is for selnotify(). */ |
608 | sip->sel_fdinfo = selector->l_selrec; | | 621 | sip->sel_fdinfo = selector->l_selrec; |
609 | /* Replace selinfo's lock with the chosen cluster's lock. */ | | 622 | /* Replace selinfo's lock with the chosen cluster's lock. */ |
610 | sip->sel_cluster = sc; | | 623 | sip->sel_cluster = sc; |
611 | } else { | | 624 | } else { |
612 | /* 3. Multiple waiters: record a collision. */ | | 625 | /* 3. Multiple waiters: record a collision. */ |
613 | sip->sel_collision |= sc->sc_mask; | | 626 | sip->sel_collision |= sc->sc_mask; |
614 | KASSERT(sip->sel_cluster != NULL); | | 627 | KASSERT(sip->sel_cluster != NULL); |
615 | } | | 628 | } |
616 | } | | 629 | } |
617 | | | 630 | |
618 | /* | | 631 | /* |
619 | * sel_setevents: a helper function for selnotify(), to set the events | | 632 | * sel_setevents: a helper function for selnotify(), to set the events |
620 | * for LWP sleeping in selcommon() or pollcommon(). | | 633 | * for LWP sleeping in selcommon() or pollcommon(). |
621 | */ | | 634 | */ |
622 | static inline bool | | 635 | static inline bool |
623 | sel_setevents(lwp_t *l, struct selinfo *sip, const int events) | | 636 | sel_setevents(lwp_t *l, struct selinfo *sip, const int events) |
624 | { | | 637 | { |
625 | const int oflag = l->l_selflag; | | 638 | const int oflag = l->l_selflag; |
626 | int ret = 0; | | 639 | int ret = 0; |
627 | | | 640 | |
628 | /* | | 641 | /* |
629 | * If we require re-scan or it was required by somebody else, | | 642 | * If we require re-scan or it was required by somebody else, |
630 | * then just (re)set SEL_RESET and return. | | 643 | * then just (re)set SEL_RESET and return. |
631 | */ | | 644 | */ |
632 | if (__predict_false(events == 0 || oflag == SEL_RESET)) { | | 645 | if (__predict_false(events == 0 || oflag == SEL_RESET)) { |
633 | l->l_selflag = SEL_RESET; | | 646 | l->l_selflag = SEL_RESET; |
634 | return true; | | 647 | return true; |
635 | } | | 648 | } |
636 | /* | | 649 | /* |
637 | * Direct set. Note: select state of LWP is locked. First, | | 650 | * Direct set. Note: select state of LWP is locked. First, |
638 | * determine whether it is selcommon() or pollcommon(). | | 651 | * determine whether it is selcommon() or pollcommon(). |
639 | */ | | 652 | */ |
640 | if (l->l_selbits != NULL) { | | 653 | if (l->l_selbits != NULL) { |
641 | const size_t ni = l->l_selni; | | 654 | const size_t ni = l->l_selni; |
642 | fd_mask *fds = (fd_mask *)l->l_selbits; | | 655 | fd_mask *fds = (fd_mask *)l->l_selbits; |
643 | fd_mask *ofds = (fd_mask *)((char *)fds + ni * 3); | | 656 | fd_mask *ofds = (fd_mask *)((char *)fds + ni * 3); |
644 | const int fd = sip->sel_fdinfo, fbit = 1 << (fd & __NFDMASK); | | 657 | const int fd = sip->sel_fdinfo, fbit = 1 << (fd & __NFDMASK); |
645 | const int idx = fd >> __NFDSHIFT; | | 658 | const int idx = fd >> __NFDSHIFT; |
646 | int n; | | 659 | int n; |
647 | | | 660 | |
648 | for (n = 0; n < 3; n++) { | | 661 | for (n = 0; n < 3; n++) { |
649 | if ((fds[idx] & fbit) != 0 && | | 662 | if ((fds[idx] & fbit) != 0 && |
650 | (ofds[idx] & fbit) == 0 && | | 663 | (ofds[idx] & fbit) == 0 && |
651 | (sel_flag[n] & events)) { | | 664 | (sel_flag[n] & events)) { |
652 | ofds[idx] |= fbit; | | 665 | ofds[idx] |= fbit; |
653 | ret++; | | 666 | ret++; |
654 | } | | 667 | } |
655 | fds = (fd_mask *)((char *)fds + ni); | | 668 | fds = (fd_mask *)((char *)fds + ni); |
656 | ofds = (fd_mask *)((char *)ofds + ni); | | 669 | ofds = (fd_mask *)((char *)ofds + ni); |
657 | } | | 670 | } |
658 | } else { | | 671 | } else { |
659 | struct pollfd *pfd = (void *)sip->sel_fdinfo; | | 672 | struct pollfd *pfd = (void *)sip->sel_fdinfo; |
660 | int revents = events & (pfd->events | POLLERR | POLLHUP); | | 673 | int revents = events & (pfd->events | POLLERR | POLLHUP); |
661 | | | 674 | |
662 | if (revents) { | | 675 | if (revents) { |
663 | if (pfd->revents == 0) | | 676 | if (pfd->revents == 0) |
664 | ret = 1; | | 677 | ret = 1; |
665 | pfd->revents |= revents; | | 678 | pfd->revents |= revents; |
666 | } | | 679 | } |
667 | } | | 680 | } |
668 | /* Check whether there are any events to return. */ | | 681 | /* Check whether there are any events to return. */ |
669 | if (!ret) { | | 682 | if (!ret) { |
670 | return false; | | 683 | return false; |
671 | } | | 684 | } |
672 | /* Indicate direct set and note the event (cluster lock is held). */ | | 685 | /* Indicate direct set and note the event (cluster lock is held). */ |
673 | l->l_selflag = SEL_EVENT; | | 686 | l->l_selflag = SEL_EVENT; |
674 | l->l_selret += ret; | | 687 | l->l_selret += ret; |
675 | return true; | | 688 | return true; |
676 | } | | 689 | } |
677 | | | 690 | |
678 | /* | | 691 | /* |
679 | * Do a wakeup when a selectable event occurs. Concurrency issues: | | 692 | * Do a wakeup when a selectable event occurs. Concurrency issues: |
680 | * | | 693 | * |
681 | * As per selrecord(), the caller's object lock is held. If there | | 694 | * As per selrecord(), the caller's object lock is held. If there |
682 | * is a named waiter, we must acquire the associated selcluster's lock | | 695 | * is a named waiter, we must acquire the associated selcluster's lock |
683 | * in order to synchronize with selclear() and pollers going to sleep | | 696 | * in order to synchronize with selclear() and pollers going to sleep |
684 | * in sel_do_scan(). | | 697 | * in sel_do_scan(). |
685 | * | | 698 | * |
686 | * sip->sel_cluser cannot change at this point, as it is only changed | | 699 | * sip->sel_cluser cannot change at this point, as it is only changed |
687 | * in selrecord(), and concurrent calls to selrecord() are locked | | 700 | * in selrecord(), and concurrent calls to selrecord() are locked |
688 | * out by the caller. | | 701 | * out by the caller. |
689 | */ | | 702 | */ |
690 | void | | 703 | void |
691 | selnotify(struct selinfo *sip, int events, long knhint) | | 704 | selnotify(struct selinfo *sip, int events, long knhint) |
692 | { | | 705 | { |
693 | selcluster_t *sc; | | 706 | selcluster_t *sc; |
694 | uint32_t mask; | | 707 | uint32_t mask; |
695 | int index, oflag; | | 708 | int index, oflag; |
696 | lwp_t *l; | | 709 | lwp_t *l; |
697 | kmutex_t *lock; | | 710 | kmutex_t *lock; |
698 | | | 711 | |
699 | KNOTE(&sip->sel_klist, knhint); | | 712 | KNOTE(&sip->sel_klist, knhint); |
700 | | | 713 | |
701 | if (sip->sel_lwp != NULL) { | | 714 | if (sip->sel_lwp != NULL) { |
702 | /* One named LWP is waiting. */ | | 715 | /* One named LWP is waiting. */ |
703 | sc = sip->sel_cluster; | | 716 | sc = sip->sel_cluster; |
704 | lock = sc->sc_lock; | | 717 | lock = sc->sc_lock; |
705 | mutex_spin_enter(lock); | | 718 | mutex_spin_enter(lock); |
706 | /* Still there? */ | | 719 | /* Still there? */ |
707 | if (sip->sel_lwp != NULL) { | | 720 | if (sip->sel_lwp != NULL) { |
708 | /* | | 721 | /* |
709 | * Set the events for our LWP and indicate that. | | 722 | * Set the events for our LWP and indicate that. |
710 | * Otherwise, request for a full re-scan. | | 723 | * Otherwise, request for a full re-scan. |
711 | */ | | 724 | */ |
712 | l = sip->sel_lwp; | | 725 | l = sip->sel_lwp; |
713 | oflag = l->l_selflag; | | 726 | oflag = l->l_selflag; |
714 | | | 727 | |
715 | if (!direct_select) { | | 728 | if (!direct_select) { |
716 | l->l_selflag = SEL_RESET; | | 729 | l->l_selflag = SEL_RESET; |
717 | } else if (!sel_setevents(l, sip, events)) { | | 730 | } else if (!sel_setevents(l, sip, events)) { |
718 | /* No events to return. */ | | 731 | /* No events to return. */ |
719 | mutex_spin_exit(lock); | | 732 | mutex_spin_exit(lock); |
720 | return; | | 733 | return; |
721 | } | | 734 | } |
722 | | | 735 | |
723 | /* | | 736 | /* |
724 | * If thread is sleeping, wake it up. If it's not | | 737 | * If thread is sleeping, wake it up. If it's not |
725 | * yet asleep, it will notice the change in state | | 738 | * yet asleep, it will notice the change in state |
726 | * and will re-poll the descriptors. | | 739 | * and will re-poll the descriptors. |
727 | */ | | 740 | */ |
728 | if (oflag == SEL_BLOCKING && l->l_mutex == lock) { | | 741 | if (oflag == SEL_BLOCKING && l->l_mutex == lock) { |
729 | KASSERT(l->l_wchan == sc); | | 742 | KASSERT(l->l_wchan == sc); |
730 | sleepq_unsleep(l, false); | | 743 | sleepq_unsleep(l, false); |
731 | } | | 744 | } |
732 | } | | 745 | } |
733 | mutex_spin_exit(lock); | | 746 | mutex_spin_exit(lock); |
734 | } | | 747 | } |
735 | | | 748 | |
736 | if ((mask = sip->sel_collision) != 0) { | | 749 | if ((mask = sip->sel_collision) != 0) { |
737 | /* | | 750 | /* |
738 | * There was a collision (multiple waiters): we must | | 751 | * There was a collision (multiple waiters): we must |
739 | * inform all potentially interested waiters. | | 752 | * inform all potentially interested waiters. |
740 | */ | | 753 | */ |
741 | sip->sel_collision = 0; | | 754 | sip->sel_collision = 0; |
742 | do { | | 755 | do { |
743 | index = ffs(mask) - 1; | | 756 | index = ffs(mask) - 1; |
744 | mask &= ~(1 << index); | | 757 | mask &= ~(1 << index); |
745 | sc = selcluster[index]; | | 758 | sc = selcluster[index]; |
746 | lock = sc->sc_lock; | | 759 | lock = sc->sc_lock; |
747 | mutex_spin_enter(lock); | | 760 | mutex_spin_enter(lock); |
748 | sc->sc_ncoll++; | | 761 | sc->sc_ncoll++; |
749 | sleepq_wake(&sc->sc_sleepq, sc, (u_int)-1, lock); | | 762 | sleepq_wake(&sc->sc_sleepq, sc, (u_int)-1, lock); |
750 | } while (__predict_false(mask != 0)); | | 763 | } while (__predict_false(mask != 0)); |
751 | } | | 764 | } |
752 | } | | 765 | } |
753 | | | 766 | |
754 | /* | | 767 | /* |
755 | * Remove an LWP from all objects that it is waiting for. Concurrency | | 768 | * Remove an LWP from all objects that it is waiting for. Concurrency |
756 | * issues: | | 769 | * issues: |
757 | * | | 770 | * |
758 | * The object owner's (e.g. device driver) lock is not held here. Calls | | 771 | * The object owner's (e.g. device driver) lock is not held here. Calls |
759 | * can be made to selrecord() and we do not synchronize against those | | 772 | * can be made to selrecord() and we do not synchronize against those |
760 | * directly using locks. However, we use `sel_lwp' to lock out changes. | | 773 | * directly using locks. However, we use `sel_lwp' to lock out changes. |
761 | * Before clearing it we must use memory barriers to ensure that we can | | 774 | * Before clearing it we must use memory barriers to ensure that we can |
762 | * safely traverse the list of selinfo records. | | 775 | * safely traverse the list of selinfo records. |
763 | */ | | 776 | */ |
764 | static void | | 777 | static void |
765 | selclear(void) | | 778 | selclear(void) |
766 | { | | 779 | { |
767 | struct selinfo *sip, *next; | | 780 | struct selinfo *sip, *next; |
768 | selcluster_t *sc; | | 781 | selcluster_t *sc; |
769 | lwp_t *l; | | 782 | lwp_t *l; |
770 | kmutex_t *lock; | | 783 | kmutex_t *lock; |
771 | | | 784 | |
772 | l = curlwp; | | 785 | l = curlwp; |
773 | sc = l->l_selcluster; | | 786 | sc = l->l_selcluster; |
774 | lock = sc->sc_lock; | | 787 | lock = sc->sc_lock; |
775 | | | 788 | |
776 | mutex_spin_enter(lock); | | 789 | mutex_spin_enter(lock); |
777 | for (sip = SLIST_FIRST(&l->l_selwait); sip != NULL; sip = next) { | | 790 | for (sip = SLIST_FIRST(&l->l_selwait); sip != NULL; sip = next) { |
778 | KASSERT(sip->sel_lwp == l); | | 791 | KASSERT(sip->sel_lwp == l); |
779 | KASSERT(sip->sel_cluster == l->l_selcluster); | | 792 | KASSERT(sip->sel_cluster == l->l_selcluster); |
780 | | | 793 | |
781 | /* | | 794 | /* |
782 | * Read link to next selinfo record, if any. | | 795 | * Read link to next selinfo record, if any. |
783 | * It's no longer safe to touch `sip' after clearing | | 796 | * It's no longer safe to touch `sip' after clearing |
784 | * `sel_lwp', so ensure that the read of `sel_chain' | | 797 | * `sel_lwp', so ensure that the read of `sel_chain' |
785 | * completes before the clearing of sel_lwp becomes | | 798 | * completes before the clearing of sel_lwp becomes |
786 | * globally visible. | | 799 | * globally visible. |
787 | */ | | 800 | */ |
788 | next = SLIST_NEXT(sip, sel_chain); | | 801 | next = SLIST_NEXT(sip, sel_chain); |
789 | membar_exit(); | | 802 | membar_exit(); |
790 | /* Release the record for another named waiter to use. */ | | 803 | /* Release the record for another named waiter to use. */ |
791 | sip->sel_lwp = NULL; | | 804 | sip->sel_lwp = NULL; |
792 | } | | 805 | } |
793 | mutex_spin_exit(lock); | | 806 | mutex_spin_exit(lock); |
794 | } | | 807 | } |
795 | | | 808 | |
796 | /* | | 809 | /* |
797 | * Initialize the select/poll system calls. Called once for each | | 810 | * Initialize the select/poll system calls. Called once for each |
798 | * CPU in the system, as they are attached. | | 811 | * CPU in the system, as they are attached. |
799 | */ | | 812 | */ |
800 | void | | 813 | void |
801 | selsysinit(struct cpu_info *ci) | | 814 | selsysinit(struct cpu_info *ci) |
802 | { | | 815 | { |
803 | selcluster_t *sc; | | 816 | selcluster_t *sc; |
804 | u_int index; | | 817 | u_int index; |
805 | | | 818 | |
806 | /* If already a cluster in place for this bit, re-use. */ | | 819 | /* If already a cluster in place for this bit, re-use. */ |
807 | index = cpu_index(ci) & SELCLUSTERMASK; | | 820 | index = cpu_index(ci) & SELCLUSTERMASK; |
808 | sc = selcluster[index]; | | 821 | sc = selcluster[index]; |
809 | if (sc == NULL) { | | 822 | if (sc == NULL) { |
810 | sc = kmem_alloc(roundup2(sizeof(selcluster_t), | | 823 | sc = kmem_alloc(roundup2(sizeof(selcluster_t), |
811 | coherency_unit) + coherency_unit, KM_SLEEP); | | 824 | coherency_unit) + coherency_unit, KM_SLEEP); |
812 | sc = (void *)roundup2((uintptr_t)sc, coherency_unit); | | 825 | sc = (void *)roundup2((uintptr_t)sc, coherency_unit); |
813 | sc->sc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); | | 826 | sc->sc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); |
814 | sleepq_init(&sc->sc_sleepq); | | 827 | sleepq_init(&sc->sc_sleepq); |
815 | sc->sc_ncoll = 0; | | 828 | sc->sc_ncoll = 0; |
816 | sc->sc_mask = (1 << index); | | 829 | sc->sc_mask = (1 << index); |
817 | selcluster[index] = sc; | | 830 | selcluster[index] = sc; |
818 | } | | 831 | } |
819 | ci->ci_data.cpu_selcluster = sc; | | 832 | ci->ci_data.cpu_selcluster = sc; |
820 | } | | 833 | } |
821 | | | 834 | |
822 | /* | | 835 | /* |
823 | * Initialize a selinfo record. | | 836 | * Initialize a selinfo record. |
824 | */ | | 837 | */ |
825 | void | | 838 | void |
826 | selinit(struct selinfo *sip) | | 839 | selinit(struct selinfo *sip) |
827 | { | | 840 | { |
828 | | | 841 | |
829 | memset(sip, 0, sizeof(*sip)); | | 842 | memset(sip, 0, sizeof(*sip)); |
830 | } | | 843 | } |
831 | | | 844 | |
832 | /* | | 845 | /* |
833 | * Destroy a selinfo record. The owning object must not gain new | | 846 | * Destroy a selinfo record. The owning object must not gain new |
834 | * references while this is in progress: all activity on the record | | 847 | * references while this is in progress: all activity on the record |
835 | * must be stopped. | | 848 | * must be stopped. |
836 | * | | 849 | * |
837 | * Concurrency issues: we only need guard against a call to selclear() | | 850 | * Concurrency issues: we only need guard against a call to selclear() |
838 | * by a thread exiting sel_do_scan(). The caller has prevented further | | 851 | * by a thread exiting sel_do_scan(). The caller has prevented further |
839 | * references being made to the selinfo record via selrecord(), and it | | 852 | * references being made to the selinfo record via selrecord(), and it |
840 | * will not call selnotify() again. | | 853 | * will not call selnotify() again. |
841 | */ | | 854 | */ |
842 | void | | 855 | void |
843 | seldestroy(struct selinfo *sip) | | 856 | seldestroy(struct selinfo *sip) |
844 | { | | 857 | { |
845 | selcluster_t *sc; | | 858 | selcluster_t *sc; |
846 | kmutex_t *lock; | | 859 | kmutex_t *lock; |
847 | lwp_t *l; | | 860 | lwp_t *l; |
848 | | | 861 | |
849 | if (sip->sel_lwp == NULL) | | 862 | if (sip->sel_lwp == NULL) |
850 | return; | | 863 | return; |
851 | | | 864 | |
852 | /* | | 865 | /* |
853 | * Lock out selclear(). The selcluster pointer can't change while | | 866 | * Lock out selclear(). The selcluster pointer can't change while |
854 | * we are here since it is only ever changed in selrecord(), | | 867 | * we are here since it is only ever changed in selrecord(), |
855 | * and that will not be entered again for this record because | | 868 | * and that will not be entered again for this record because |
856 | * it is dying. | | 869 | * it is dying. |
857 | */ | | 870 | */ |
858 | KASSERT(sip->sel_cluster != NULL); | | 871 | KASSERT(sip->sel_cluster != NULL); |
859 | sc = sip->sel_cluster; | | 872 | sc = sip->sel_cluster; |
860 | lock = sc->sc_lock; | | 873 | lock = sc->sc_lock; |
861 | mutex_spin_enter(lock); | | 874 | mutex_spin_enter(lock); |
862 | if ((l = sip->sel_lwp) != NULL) { | | 875 | if ((l = sip->sel_lwp) != NULL) { |
863 | /* | | 876 | /* |
864 | * This should rarely happen, so although SLIST_REMOVE() | | 877 | * This should rarely happen, so although SLIST_REMOVE() |
865 | * is slow, using it here is not a problem. | | 878 | * is slow, using it here is not a problem. |
866 | */ | | 879 | */ |
867 | KASSERT(l->l_selcluster == sc); | | 880 | KASSERT(l->l_selcluster == sc); |
868 | SLIST_REMOVE(&l->l_selwait, sip, selinfo, sel_chain); | | 881 | SLIST_REMOVE(&l->l_selwait, sip, selinfo, sel_chain); |
869 | sip->sel_lwp = NULL; | | 882 | sip->sel_lwp = NULL; |
870 | } | | 883 | } |
871 | mutex_spin_exit(lock); | | 884 | mutex_spin_exit(lock); |
872 | } | | 885 | } |
873 | | | 886 | |
874 | /* | | 887 | /* |
875 | * System control nodes. | | 888 | * System control nodes. |
876 | */ | | 889 | */ |
877 | SYSCTL_SETUP(sysctl_select_setup, "sysctl select setup") | | 890 | SYSCTL_SETUP(sysctl_select_setup, "sysctl select setup") |
878 | { | | 891 | { |
879 | | | 892 | |
880 | sysctl_createv(clog, 0, NULL, NULL, | | 893 | sysctl_createv(clog, 0, NULL, NULL, |
881 | CTLFLAG_PERMANENT | CTLFLAG_READWRITE, | | 894 | CTLFLAG_PERMANENT | CTLFLAG_READWRITE, |
882 | CTLTYPE_INT, "direct_select", | | 895 | CTLTYPE_INT, "direct_select", |
883 | SYSCTL_DESCR("Enable/disable direct select (for testing)"), | | 896 | SYSCTL_DESCR("Enable/disable direct select (for testing)"), |
884 | NULL, 0, &direct_select, 0, | | 897 | NULL, 0, &direct_select, 0, |
885 | CTL_KERN, CTL_CREATE, CTL_EOL); | | 898 | CTL_KERN, CTL_CREATE, CTL_EOL); |
886 | } | | 899 | } |