Thu Nov 30 14:19:27 2017 UTC ()
Put previous removed diagnostic back as debug. It has caught in the past
(and now) different kqueue behavior between NetBSD and other kqueue
implementations that depend on specific file types. If 3rd party programs
trigger this it is probably because we are doing something different.


(christos)
diff -r1.99 -r1.100 src/sys/kern/kern_event.c

cvs diff -r1.99 -r1.100 src/sys/kern/kern_event.c (switch to unified diff)

--- src/sys/kern/kern_event.c 2017/11/30 05:52:40 1.99
+++ src/sys/kern/kern_event.c 2017/11/30 14:19:27 1.100
@@ -1,1719 +1,1726 @@ @@ -1,1719 +1,1726 @@
1/* $NetBSD: kern_event.c,v 1.99 2017/11/30 05:52:40 riastradh Exp $ */ 1/* $NetBSD: kern_event.c,v 1.100 2017/11/30 14:19:27 christos Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran. 8 * by Andrew Doran.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32/*- 32/*-
33 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 33 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
34 * All rights reserved. 34 * All rights reserved.
35 * 35 *
36 * Redistribution and use in source and binary forms, with or without 36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions 37 * modification, are permitted provided that the following conditions
38 * are met: 38 * are met:
39 * 1. Redistributions of source code must retain the above copyright 39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer. 40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright 41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the 42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution. 43 * documentation and/or other materials provided with the distribution.
44 * 44 *
45 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 45 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE. 55 * SUCH DAMAGE.
56 * 56 *
57 * FreeBSD: src/sys/kern/kern_event.c,v 1.27 2001/07/05 17:10:44 rwatson Exp 57 * FreeBSD: src/sys/kern/kern_event.c,v 1.27 2001/07/05 17:10:44 rwatson Exp
58 */ 58 */
59 59
60#include <sys/cdefs.h> 60#include <sys/cdefs.h>
61__KERNEL_RCSID(0, "$NetBSD: kern_event.c,v 1.99 2017/11/30 05:52:40 riastradh Exp $"); 61__KERNEL_RCSID(0, "$NetBSD: kern_event.c,v 1.100 2017/11/30 14:19:27 christos Exp $");
62 62
63#include <sys/param.h> 63#include <sys/param.h>
64#include <sys/systm.h> 64#include <sys/systm.h>
65#include <sys/kernel.h> 65#include <sys/kernel.h>
66#include <sys/wait.h> 66#include <sys/wait.h>
67#include <sys/proc.h> 67#include <sys/proc.h>
68#include <sys/file.h> 68#include <sys/file.h>
69#include <sys/select.h> 69#include <sys/select.h>
70#include <sys/queue.h> 70#include <sys/queue.h>
71#include <sys/event.h> 71#include <sys/event.h>
72#include <sys/eventvar.h> 72#include <sys/eventvar.h>
73#include <sys/poll.h> 73#include <sys/poll.h>
74#include <sys/kmem.h> 74#include <sys/kmem.h>
75#include <sys/stat.h> 75#include <sys/stat.h>
76#include <sys/filedesc.h> 76#include <sys/filedesc.h>
77#include <sys/syscallargs.h> 77#include <sys/syscallargs.h>
78#include <sys/kauth.h> 78#include <sys/kauth.h>
79#include <sys/conf.h> 79#include <sys/conf.h>
80#include <sys/atomic.h> 80#include <sys/atomic.h>
81 81
82static int kqueue_scan(file_t *, size_t, struct kevent *, 82static int kqueue_scan(file_t *, size_t, struct kevent *,
83 const struct timespec *, register_t *, 83 const struct timespec *, register_t *,
84 const struct kevent_ops *, struct kevent *, 84 const struct kevent_ops *, struct kevent *,
85 size_t); 85 size_t);
86static int kqueue_ioctl(file_t *, u_long, void *); 86static int kqueue_ioctl(file_t *, u_long, void *);
87static int kqueue_fcntl(file_t *, u_int, void *); 87static int kqueue_fcntl(file_t *, u_int, void *);
88static int kqueue_poll(file_t *, int); 88static int kqueue_poll(file_t *, int);
89static int kqueue_kqfilter(file_t *, struct knote *); 89static int kqueue_kqfilter(file_t *, struct knote *);
90static int kqueue_stat(file_t *, struct stat *); 90static int kqueue_stat(file_t *, struct stat *);
91static int kqueue_close(file_t *); 91static int kqueue_close(file_t *);
92static int kqueue_register(struct kqueue *, struct kevent *); 92static int kqueue_register(struct kqueue *, struct kevent *);
93static void kqueue_doclose(struct kqueue *, struct klist *, int); 93static void kqueue_doclose(struct kqueue *, struct klist *, int);
94 94
95static void knote_detach(struct knote *, filedesc_t *fdp, bool); 95static void knote_detach(struct knote *, filedesc_t *fdp, bool);
96static void knote_enqueue(struct knote *); 96static void knote_enqueue(struct knote *);
97static void knote_activate(struct knote *); 97static void knote_activate(struct knote *);
98 98
99static void filt_kqdetach(struct knote *); 99static void filt_kqdetach(struct knote *);
100static int filt_kqueue(struct knote *, long hint); 100static int filt_kqueue(struct knote *, long hint);
101static int filt_procattach(struct knote *); 101static int filt_procattach(struct knote *);
102static void filt_procdetach(struct knote *); 102static void filt_procdetach(struct knote *);
103static int filt_proc(struct knote *, long hint); 103static int filt_proc(struct knote *, long hint);
104static int filt_fileattach(struct knote *); 104static int filt_fileattach(struct knote *);
105static void filt_timerexpire(void *x); 105static void filt_timerexpire(void *x);
106static int filt_timerattach(struct knote *); 106static int filt_timerattach(struct knote *);
107static void filt_timerdetach(struct knote *); 107static void filt_timerdetach(struct knote *);
108static int filt_timer(struct knote *, long hint); 108static int filt_timer(struct knote *, long hint);
109 109
110static const struct fileops kqueueops = { 110static const struct fileops kqueueops = {
111 .fo_read = (void *)enxio, 111 .fo_read = (void *)enxio,
112 .fo_write = (void *)enxio, 112 .fo_write = (void *)enxio,
113 .fo_ioctl = kqueue_ioctl, 113 .fo_ioctl = kqueue_ioctl,
114 .fo_fcntl = kqueue_fcntl, 114 .fo_fcntl = kqueue_fcntl,
115 .fo_poll = kqueue_poll, 115 .fo_poll = kqueue_poll,
116 .fo_stat = kqueue_stat, 116 .fo_stat = kqueue_stat,
117 .fo_close = kqueue_close, 117 .fo_close = kqueue_close,
118 .fo_kqfilter = kqueue_kqfilter, 118 .fo_kqfilter = kqueue_kqfilter,
119 .fo_restart = fnullop_restart, 119 .fo_restart = fnullop_restart,
120}; 120};
121 121
122static const struct filterops kqread_filtops = { 122static const struct filterops kqread_filtops = {
123 .f_isfd = 1, 123 .f_isfd = 1,
124 .f_attach = NULL, 124 .f_attach = NULL,
125 .f_detach = filt_kqdetach, 125 .f_detach = filt_kqdetach,
126 .f_event = filt_kqueue, 126 .f_event = filt_kqueue,
127}; 127};
128 128
129static const struct filterops proc_filtops = { 129static const struct filterops proc_filtops = {
130 .f_isfd = 0, 130 .f_isfd = 0,
131 .f_attach = filt_procattach, 131 .f_attach = filt_procattach,
132 .f_detach = filt_procdetach, 132 .f_detach = filt_procdetach,
133 .f_event = filt_proc, 133 .f_event = filt_proc,
134}; 134};
135 135
136static const struct filterops file_filtops = { 136static const struct filterops file_filtops = {
137 .f_isfd = 1, 137 .f_isfd = 1,
138 .f_attach = filt_fileattach, 138 .f_attach = filt_fileattach,
139 .f_detach = NULL, 139 .f_detach = NULL,
140 .f_event = NULL, 140 .f_event = NULL,
141}; 141};
142 142
143static const struct filterops timer_filtops = { 143static const struct filterops timer_filtops = {
144 .f_isfd = 0, 144 .f_isfd = 0,
145 .f_attach = filt_timerattach, 145 .f_attach = filt_timerattach,
146 .f_detach = filt_timerdetach, 146 .f_detach = filt_timerdetach,
147 .f_event = filt_timer, 147 .f_event = filt_timer,
148}; 148};
149 149
150static u_int kq_ncallouts = 0; 150static u_int kq_ncallouts = 0;
151static int kq_calloutmax = (4 * 1024); 151static int kq_calloutmax = (4 * 1024);
152 152
153#define KN_HASHSIZE 64 /* XXX should be tunable */ 153#define KN_HASHSIZE 64 /* XXX should be tunable */
154#define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 154#define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask))
155 155
156extern const struct filterops sig_filtops; 156extern const struct filterops sig_filtops;
157 157
158/* 158/*
159 * Table for for all system-defined filters. 159 * Table for for all system-defined filters.
160 * These should be listed in the numeric order of the EVFILT_* defines. 160 * These should be listed in the numeric order of the EVFILT_* defines.
161 * If filtops is NULL, the filter isn't implemented in NetBSD. 161 * If filtops is NULL, the filter isn't implemented in NetBSD.
162 * End of list is when name is NULL. 162 * End of list is when name is NULL.
163 * 163 *
164 * Note that 'refcnt' is meaningless for built-in filters. 164 * Note that 'refcnt' is meaningless for built-in filters.
165 */ 165 */
166struct kfilter { 166struct kfilter {
167 const char *name; /* name of filter */ 167 const char *name; /* name of filter */
168 uint32_t filter; /* id of filter */ 168 uint32_t filter; /* id of filter */
169 unsigned refcnt; /* reference count */ 169 unsigned refcnt; /* reference count */
170 const struct filterops *filtops;/* operations for filter */ 170 const struct filterops *filtops;/* operations for filter */
171 size_t namelen; /* length of name string */ 171 size_t namelen; /* length of name string */
172}; 172};
173 173
174/* System defined filters */ 174/* System defined filters */
175static struct kfilter sys_kfilters[] = { 175static struct kfilter sys_kfilters[] = {
176 { "EVFILT_READ", EVFILT_READ, 0, &file_filtops, 0 }, 176 { "EVFILT_READ", EVFILT_READ, 0, &file_filtops, 0 },
177 { "EVFILT_WRITE", EVFILT_WRITE, 0, &file_filtops, 0, }, 177 { "EVFILT_WRITE", EVFILT_WRITE, 0, &file_filtops, 0, },
178 { "EVFILT_AIO", EVFILT_AIO, 0, NULL, 0 }, 178 { "EVFILT_AIO", EVFILT_AIO, 0, NULL, 0 },
179 { "EVFILT_VNODE", EVFILT_VNODE, 0, &file_filtops, 0 }, 179 { "EVFILT_VNODE", EVFILT_VNODE, 0, &file_filtops, 0 },
180 { "EVFILT_PROC", EVFILT_PROC, 0, &proc_filtops, 0 }, 180 { "EVFILT_PROC", EVFILT_PROC, 0, &proc_filtops, 0 },
181 { "EVFILT_SIGNAL", EVFILT_SIGNAL, 0, &sig_filtops, 0 }, 181 { "EVFILT_SIGNAL", EVFILT_SIGNAL, 0, &sig_filtops, 0 },
182 { "EVFILT_TIMER", EVFILT_TIMER, 0, &timer_filtops, 0 }, 182 { "EVFILT_TIMER", EVFILT_TIMER, 0, &timer_filtops, 0 },
183 { NULL, 0, 0, NULL, 0 }, 183 { NULL, 0, 0, NULL, 0 },
184}; 184};
185 185
186/* User defined kfilters */ 186/* User defined kfilters */
187static struct kfilter *user_kfilters; /* array */ 187static struct kfilter *user_kfilters; /* array */
188static int user_kfilterc; /* current offset */ 188static int user_kfilterc; /* current offset */
189static int user_kfiltermaxc; /* max size so far */ 189static int user_kfiltermaxc; /* max size so far */
190static size_t user_kfiltersz; /* size of allocated memory */ 190static size_t user_kfiltersz; /* size of allocated memory */
191 191
192/* 192/*
193 * Global Locks. 193 * Global Locks.
194 * 194 *
195 * Lock order: 195 * Lock order:
196 * 196 *
197 * kqueue_filter_lock 197 * kqueue_filter_lock
198 * -> kn_kq->kq_fdp->fd_lock 198 * -> kn_kq->kq_fdp->fd_lock
199 * -> object lock (e.g., device driver lock, kqueue_misc_lock, &c.) 199 * -> object lock (e.g., device driver lock, kqueue_misc_lock, &c.)
200 * -> kn_kq->kq_lock 200 * -> kn_kq->kq_lock
201 * 201 *
202 * Locking rules: 202 * Locking rules:
203 * 203 *
204 * f_attach: fdp->fd_lock, KERNEL_LOCK 204 * f_attach: fdp->fd_lock, KERNEL_LOCK
205 * f_detach: fdp->fd_lock, KERNEL_LOCK 205 * f_detach: fdp->fd_lock, KERNEL_LOCK
206 * f_event(!NOTE_SUBMIT) via kevent: fdp->fd_lock, _no_ object lock 206 * f_event(!NOTE_SUBMIT) via kevent: fdp->fd_lock, _no_ object lock
207 * f_event via knote: whatever caller guarantees 207 * f_event via knote: whatever caller guarantees
208 * Typically, f_event(NOTE_SUBMIT) via knote: object lock 208 * Typically, f_event(NOTE_SUBMIT) via knote: object lock
209 * f_event(!NOTE_SUBMIT) via knote: nothing, 209 * f_event(!NOTE_SUBMIT) via knote: nothing,
210 * acquires/releases object lock inside. 210 * acquires/releases object lock inside.
211 */ 211 */
212static krwlock_t kqueue_filter_lock; /* lock on filter lists */ 212static krwlock_t kqueue_filter_lock; /* lock on filter lists */
213static kmutex_t kqueue_misc_lock; /* miscellaneous */ 213static kmutex_t kqueue_misc_lock; /* miscellaneous */
214 214
215static kauth_listener_t kqueue_listener; 215static kauth_listener_t kqueue_listener;
216 216
217static int 217static int
218kqueue_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 218kqueue_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
219 void *arg0, void *arg1, void *arg2, void *arg3) 219 void *arg0, void *arg1, void *arg2, void *arg3)
220{ 220{
221 struct proc *p; 221 struct proc *p;
222 int result; 222 int result;
223 223
224 result = KAUTH_RESULT_DEFER; 224 result = KAUTH_RESULT_DEFER;
225 p = arg0; 225 p = arg0;
226 226
227 if (action != KAUTH_PROCESS_KEVENT_FILTER) 227 if (action != KAUTH_PROCESS_KEVENT_FILTER)
228 return result; 228 return result;
229 229
230 if ((kauth_cred_getuid(p->p_cred) != kauth_cred_getuid(cred) || 230 if ((kauth_cred_getuid(p->p_cred) != kauth_cred_getuid(cred) ||
231 ISSET(p->p_flag, PK_SUGID))) 231 ISSET(p->p_flag, PK_SUGID)))
232 return result; 232 return result;
233 233
234 result = KAUTH_RESULT_ALLOW; 234 result = KAUTH_RESULT_ALLOW;
235 235
236 return result; 236 return result;
237} 237}
238 238
239/* 239/*
240 * Initialize the kqueue subsystem. 240 * Initialize the kqueue subsystem.
241 */ 241 */
242void 242void
243kqueue_init(void) 243kqueue_init(void)
244{ 244{
245 245
246 rw_init(&kqueue_filter_lock); 246 rw_init(&kqueue_filter_lock);
247 mutex_init(&kqueue_misc_lock, MUTEX_DEFAULT, IPL_NONE); 247 mutex_init(&kqueue_misc_lock, MUTEX_DEFAULT, IPL_NONE);
248 248
249 kqueue_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS, 249 kqueue_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
250 kqueue_listener_cb, NULL); 250 kqueue_listener_cb, NULL);
251} 251}
252 252
253/* 253/*
254 * Find kfilter entry by name, or NULL if not found. 254 * Find kfilter entry by name, or NULL if not found.
255 */ 255 */
256static struct kfilter * 256static struct kfilter *
257kfilter_byname_sys(const char *name) 257kfilter_byname_sys(const char *name)
258{ 258{
259 int i; 259 int i;
260 260
261 KASSERT(rw_lock_held(&kqueue_filter_lock)); 261 KASSERT(rw_lock_held(&kqueue_filter_lock));
262 262
263 for (i = 0; sys_kfilters[i].name != NULL; i++) { 263 for (i = 0; sys_kfilters[i].name != NULL; i++) {
264 if (strcmp(name, sys_kfilters[i].name) == 0) 264 if (strcmp(name, sys_kfilters[i].name) == 0)
265 return &sys_kfilters[i]; 265 return &sys_kfilters[i];
266 } 266 }
267 return NULL; 267 return NULL;
268} 268}
269 269
270static struct kfilter * 270static struct kfilter *
271kfilter_byname_user(const char *name) 271kfilter_byname_user(const char *name)
272{ 272{
273 int i; 273 int i;
274 274
275 KASSERT(rw_lock_held(&kqueue_filter_lock)); 275 KASSERT(rw_lock_held(&kqueue_filter_lock));
276 276
277 /* user filter slots have a NULL name if previously deregistered */ 277 /* user filter slots have a NULL name if previously deregistered */
278 for (i = 0; i < user_kfilterc ; i++) { 278 for (i = 0; i < user_kfilterc ; i++) {
279 if (user_kfilters[i].name != NULL && 279 if (user_kfilters[i].name != NULL &&
280 strcmp(name, user_kfilters[i].name) == 0) 280 strcmp(name, user_kfilters[i].name) == 0)
281 return &user_kfilters[i]; 281 return &user_kfilters[i];
282 } 282 }
283 return NULL; 283 return NULL;
284} 284}
285 285
286static struct kfilter * 286static struct kfilter *
287kfilter_byname(const char *name) 287kfilter_byname(const char *name)
288{ 288{
289 struct kfilter *kfilter; 289 struct kfilter *kfilter;
290 290
291 KASSERT(rw_lock_held(&kqueue_filter_lock)); 291 KASSERT(rw_lock_held(&kqueue_filter_lock));
292 292
293 if ((kfilter = kfilter_byname_sys(name)) != NULL) 293 if ((kfilter = kfilter_byname_sys(name)) != NULL)
294 return kfilter; 294 return kfilter;
295 295
296 return kfilter_byname_user(name); 296 return kfilter_byname_user(name);
297} 297}
298 298
299/* 299/*
300 * Find kfilter entry by filter id, or NULL if not found. 300 * Find kfilter entry by filter id, or NULL if not found.
301 * Assumes entries are indexed in filter id order, for speed. 301 * Assumes entries are indexed in filter id order, for speed.
302 */ 302 */
303static struct kfilter * 303static struct kfilter *
304kfilter_byfilter(uint32_t filter) 304kfilter_byfilter(uint32_t filter)
305{ 305{
306 struct kfilter *kfilter; 306 struct kfilter *kfilter;
307 307
308 KASSERT(rw_lock_held(&kqueue_filter_lock)); 308 KASSERT(rw_lock_held(&kqueue_filter_lock));
309 309
310 if (filter < EVFILT_SYSCOUNT) /* it's a system filter */ 310 if (filter < EVFILT_SYSCOUNT) /* it's a system filter */
311 kfilter = &sys_kfilters[filter]; 311 kfilter = &sys_kfilters[filter];
312 else if (user_kfilters != NULL && 312 else if (user_kfilters != NULL &&
313 filter < EVFILT_SYSCOUNT + user_kfilterc) 313 filter < EVFILT_SYSCOUNT + user_kfilterc)
314 /* it's a user filter */ 314 /* it's a user filter */
315 kfilter = &user_kfilters[filter - EVFILT_SYSCOUNT]; 315 kfilter = &user_kfilters[filter - EVFILT_SYSCOUNT];
316 else 316 else
317 return (NULL); /* out of range */ 317 return (NULL); /* out of range */
318 KASSERT(kfilter->filter == filter); /* sanity check! */ 318 KASSERT(kfilter->filter == filter); /* sanity check! */
319 return (kfilter); 319 return (kfilter);
320} 320}
321 321
322/* 322/*
323 * Register a new kfilter. Stores the entry in user_kfilters. 323 * Register a new kfilter. Stores the entry in user_kfilters.
324 * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise. 324 * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise.
325 * If retfilter != NULL, the new filterid is returned in it. 325 * If retfilter != NULL, the new filterid is returned in it.
326 */ 326 */
327int 327int
328kfilter_register(const char *name, const struct filterops *filtops, 328kfilter_register(const char *name, const struct filterops *filtops,
329 int *retfilter) 329 int *retfilter)
330{ 330{
331 struct kfilter *kfilter; 331 struct kfilter *kfilter;
332 size_t len; 332 size_t len;
333 int i; 333 int i;
334 334
335 if (name == NULL || name[0] == '\0' || filtops == NULL) 335 if (name == NULL || name[0] == '\0' || filtops == NULL)
336 return (EINVAL); /* invalid args */ 336 return (EINVAL); /* invalid args */
337 337
338 rw_enter(&kqueue_filter_lock, RW_WRITER); 338 rw_enter(&kqueue_filter_lock, RW_WRITER);
339 if (kfilter_byname(name) != NULL) { 339 if (kfilter_byname(name) != NULL) {
340 rw_exit(&kqueue_filter_lock); 340 rw_exit(&kqueue_filter_lock);
341 return (EEXIST); /* already exists */ 341 return (EEXIST); /* already exists */
342 } 342 }
343 if (user_kfilterc > 0xffffffff - EVFILT_SYSCOUNT) { 343 if (user_kfilterc > 0xffffffff - EVFILT_SYSCOUNT) {
344 rw_exit(&kqueue_filter_lock); 344 rw_exit(&kqueue_filter_lock);
345 return (EINVAL); /* too many */ 345 return (EINVAL); /* too many */
346 } 346 }
347 347
348 for (i = 0; i < user_kfilterc; i++) { 348 for (i = 0; i < user_kfilterc; i++) {
349 kfilter = &user_kfilters[i]; 349 kfilter = &user_kfilters[i];
350 if (kfilter->name == NULL) { 350 if (kfilter->name == NULL) {
351 /* Previously deregistered slot. Reuse. */ 351 /* Previously deregistered slot. Reuse. */
352 goto reuse; 352 goto reuse;
353 } 353 }
354 } 354 }
355 355
356 /* check if need to grow user_kfilters */ 356 /* check if need to grow user_kfilters */
357 if (user_kfilterc + 1 > user_kfiltermaxc) { 357 if (user_kfilterc + 1 > user_kfiltermaxc) {
358 /* Grow in KFILTER_EXTENT chunks. */ 358 /* Grow in KFILTER_EXTENT chunks. */
359 user_kfiltermaxc += KFILTER_EXTENT; 359 user_kfiltermaxc += KFILTER_EXTENT;
360 len = user_kfiltermaxc * sizeof(*kfilter); 360 len = user_kfiltermaxc * sizeof(*kfilter);
361 kfilter = kmem_alloc(len, KM_SLEEP); 361 kfilter = kmem_alloc(len, KM_SLEEP);
362 memset((char *)kfilter + user_kfiltersz, 0, len - user_kfiltersz); 362 memset((char *)kfilter + user_kfiltersz, 0, len - user_kfiltersz);
363 if (user_kfilters != NULL) { 363 if (user_kfilters != NULL) {
364 memcpy(kfilter, user_kfilters, user_kfiltersz); 364 memcpy(kfilter, user_kfilters, user_kfiltersz);
365 kmem_free(user_kfilters, user_kfiltersz); 365 kmem_free(user_kfilters, user_kfiltersz);
366 } 366 }
367 user_kfiltersz = len; 367 user_kfiltersz = len;
368 user_kfilters = kfilter; 368 user_kfilters = kfilter;
369 } 369 }
370 /* Adding new slot */ 370 /* Adding new slot */
371 kfilter = &user_kfilters[user_kfilterc++]; 371 kfilter = &user_kfilters[user_kfilterc++];
372reuse: 372reuse:
373 kfilter->name = kmem_strdupsize(name, &kfilter->namelen, KM_SLEEP); 373 kfilter->name = kmem_strdupsize(name, &kfilter->namelen, KM_SLEEP);
374 374
375 kfilter->filter = (kfilter - user_kfilters) + EVFILT_SYSCOUNT; 375 kfilter->filter = (kfilter - user_kfilters) + EVFILT_SYSCOUNT;
376 376
377 kfilter->filtops = kmem_alloc(sizeof(*filtops), KM_SLEEP); 377 kfilter->filtops = kmem_alloc(sizeof(*filtops), KM_SLEEP);
378 memcpy(__UNCONST(kfilter->filtops), filtops, sizeof(*filtops)); 378 memcpy(__UNCONST(kfilter->filtops), filtops, sizeof(*filtops));
379 379
380 if (retfilter != NULL) 380 if (retfilter != NULL)
381 *retfilter = kfilter->filter; 381 *retfilter = kfilter->filter;
382 rw_exit(&kqueue_filter_lock); 382 rw_exit(&kqueue_filter_lock);
383 383
384 return (0); 384 return (0);
385} 385}
386 386
387/* 387/*
388 * Unregister a kfilter previously registered with kfilter_register. 388 * Unregister a kfilter previously registered with kfilter_register.
389 * This retains the filter id, but clears the name and frees filtops (filter 389 * This retains the filter id, but clears the name and frees filtops (filter
390 * operations), so that the number isn't reused during a boot. 390 * operations), so that the number isn't reused during a boot.
391 * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise. 391 * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise.
392 */ 392 */
393int 393int
394kfilter_unregister(const char *name) 394kfilter_unregister(const char *name)
395{ 395{
396 struct kfilter *kfilter; 396 struct kfilter *kfilter;
397 397
398 if (name == NULL || name[0] == '\0') 398 if (name == NULL || name[0] == '\0')
399 return (EINVAL); /* invalid name */ 399 return (EINVAL); /* invalid name */
400 400
401 rw_enter(&kqueue_filter_lock, RW_WRITER); 401 rw_enter(&kqueue_filter_lock, RW_WRITER);
402 if (kfilter_byname_sys(name) != NULL) { 402 if (kfilter_byname_sys(name) != NULL) {
403 rw_exit(&kqueue_filter_lock); 403 rw_exit(&kqueue_filter_lock);
404 return (EINVAL); /* can't detach system filters */ 404 return (EINVAL); /* can't detach system filters */
405 } 405 }
406 406
407 kfilter = kfilter_byname_user(name); 407 kfilter = kfilter_byname_user(name);
408 if (kfilter == NULL) { 408 if (kfilter == NULL) {
409 rw_exit(&kqueue_filter_lock); 409 rw_exit(&kqueue_filter_lock);
410 return (ENOENT); 410 return (ENOENT);
411 } 411 }
412 if (kfilter->refcnt != 0) { 412 if (kfilter->refcnt != 0) {
413 rw_exit(&kqueue_filter_lock); 413 rw_exit(&kqueue_filter_lock);
414 return (EBUSY); 414 return (EBUSY);
415 } 415 }
416 416
417 /* Cast away const (but we know it's safe. */ 417 /* Cast away const (but we know it's safe. */
418 kmem_free(__UNCONST(kfilter->name), kfilter->namelen); 418 kmem_free(__UNCONST(kfilter->name), kfilter->namelen);
419 kfilter->name = NULL; /* mark as `not implemented' */ 419 kfilter->name = NULL; /* mark as `not implemented' */
420 420
421 if (kfilter->filtops != NULL) { 421 if (kfilter->filtops != NULL) {
422 /* Cast away const (but we know it's safe. */ 422 /* Cast away const (but we know it's safe. */
423 kmem_free(__UNCONST(kfilter->filtops), 423 kmem_free(__UNCONST(kfilter->filtops),
424 sizeof(*kfilter->filtops)); 424 sizeof(*kfilter->filtops));
425 kfilter->filtops = NULL; /* mark as `not implemented' */ 425 kfilter->filtops = NULL; /* mark as `not implemented' */
426 } 426 }
427 rw_exit(&kqueue_filter_lock); 427 rw_exit(&kqueue_filter_lock);
428 428
429 return (0); 429 return (0);
430} 430}
431 431
432 432
433/* 433/*
434 * Filter attach method for EVFILT_READ and EVFILT_WRITE on normal file 434 * Filter attach method for EVFILT_READ and EVFILT_WRITE on normal file
435 * descriptors. Calls fileops kqfilter method for given file descriptor. 435 * descriptors. Calls fileops kqfilter method for given file descriptor.
436 */ 436 */
437static int 437static int
438filt_fileattach(struct knote *kn) 438filt_fileattach(struct knote *kn)
439{ 439{
440 file_t *fp; 440 file_t *fp;
441 441
442 fp = kn->kn_obj; 442 fp = kn->kn_obj;
443 443
444 return (*fp->f_ops->fo_kqfilter)(fp, kn); 444 return (*fp->f_ops->fo_kqfilter)(fp, kn);
445} 445}
446 446
447/* 447/*
448 * Filter detach method for EVFILT_READ on kqueue descriptor. 448 * Filter detach method for EVFILT_READ on kqueue descriptor.
449 */ 449 */
450static void 450static void
451filt_kqdetach(struct knote *kn) 451filt_kqdetach(struct knote *kn)
452{ 452{
453 struct kqueue *kq; 453 struct kqueue *kq;
454 454
455 kq = ((file_t *)kn->kn_obj)->f_kqueue; 455 kq = ((file_t *)kn->kn_obj)->f_kqueue;
456 456
457 mutex_spin_enter(&kq->kq_lock); 457 mutex_spin_enter(&kq->kq_lock);
458 SLIST_REMOVE(&kq->kq_sel.sel_klist, kn, knote, kn_selnext); 458 SLIST_REMOVE(&kq->kq_sel.sel_klist, kn, knote, kn_selnext);
459 mutex_spin_exit(&kq->kq_lock); 459 mutex_spin_exit(&kq->kq_lock);
460} 460}
461 461
462/* 462/*
463 * Filter event method for EVFILT_READ on kqueue descriptor. 463 * Filter event method for EVFILT_READ on kqueue descriptor.
464 */ 464 */
465/*ARGSUSED*/ 465/*ARGSUSED*/
466static int 466static int
467filt_kqueue(struct knote *kn, long hint) 467filt_kqueue(struct knote *kn, long hint)
468{ 468{
469 struct kqueue *kq; 469 struct kqueue *kq;
470 int rv; 470 int rv;
471 471
472 kq = ((file_t *)kn->kn_obj)->f_kqueue; 472 kq = ((file_t *)kn->kn_obj)->f_kqueue;
473 473
474 if (hint != NOTE_SUBMIT) 474 if (hint != NOTE_SUBMIT)
475 mutex_spin_enter(&kq->kq_lock); 475 mutex_spin_enter(&kq->kq_lock);
476 kn->kn_data = kq->kq_count; 476 kn->kn_data = kq->kq_count;
477 rv = (kn->kn_data > 0); 477 rv = (kn->kn_data > 0);
478 if (hint != NOTE_SUBMIT) 478 if (hint != NOTE_SUBMIT)
479 mutex_spin_exit(&kq->kq_lock); 479 mutex_spin_exit(&kq->kq_lock);
480 480
481 return rv; 481 return rv;
482} 482}
483 483
484/* 484/*
485 * Filter attach method for EVFILT_PROC. 485 * Filter attach method for EVFILT_PROC.
486 */ 486 */
487static int 487static int
488filt_procattach(struct knote *kn) 488filt_procattach(struct knote *kn)
489{ 489{
490 struct proc *p; 490 struct proc *p;
491 struct lwp *curl; 491 struct lwp *curl;
492 492
493 curl = curlwp; 493 curl = curlwp;
494 494
495 mutex_enter(proc_lock); 495 mutex_enter(proc_lock);
496 if (kn->kn_flags & EV_FLAG1) { 496 if (kn->kn_flags & EV_FLAG1) {
497 /* 497 /*
498 * NOTE_TRACK attaches to the child process too early 498 * NOTE_TRACK attaches to the child process too early
499 * for proc_find, so do a raw look up and check the state 499 * for proc_find, so do a raw look up and check the state
500 * explicitly. 500 * explicitly.
501 */ 501 */
502 p = proc_find_raw(kn->kn_id); 502 p = proc_find_raw(kn->kn_id);
503 if (p != NULL && p->p_stat != SIDL) 503 if (p != NULL && p->p_stat != SIDL)
504 p = NULL; 504 p = NULL;
505 } else { 505 } else {
506 p = proc_find(kn->kn_id); 506 p = proc_find(kn->kn_id);
507 } 507 }
508 508
509 if (p == NULL) { 509 if (p == NULL) {
510 mutex_exit(proc_lock); 510 mutex_exit(proc_lock);
511 return ESRCH; 511 return ESRCH;
512 } 512 }
513 513
514 /* 514 /*
515 * Fail if it's not owned by you, or the last exec gave us 515 * Fail if it's not owned by you, or the last exec gave us
516 * setuid/setgid privs (unless you're root). 516 * setuid/setgid privs (unless you're root).
517 */ 517 */
518 mutex_enter(p->p_lock); 518 mutex_enter(p->p_lock);
519 mutex_exit(proc_lock); 519 mutex_exit(proc_lock);
520 if (kauth_authorize_process(curl->l_cred, KAUTH_PROCESS_KEVENT_FILTER, 520 if (kauth_authorize_process(curl->l_cred, KAUTH_PROCESS_KEVENT_FILTER,
521 p, NULL, NULL, NULL) != 0) { 521 p, NULL, NULL, NULL) != 0) {
522 mutex_exit(p->p_lock); 522 mutex_exit(p->p_lock);
523 return EACCES; 523 return EACCES;
524 } 524 }
525 525
526 kn->kn_obj = p; 526 kn->kn_obj = p;
527 kn->kn_flags |= EV_CLEAR; /* automatically set */ 527 kn->kn_flags |= EV_CLEAR; /* automatically set */
528 528
529 /* 529 /*
530 * internal flag indicating registration done by kernel 530 * internal flag indicating registration done by kernel
531 */ 531 */
532 if (kn->kn_flags & EV_FLAG1) { 532 if (kn->kn_flags & EV_FLAG1) {
533 kn->kn_data = kn->kn_sdata; /* ppid */ 533 kn->kn_data = kn->kn_sdata; /* ppid */
534 kn->kn_fflags = NOTE_CHILD; 534 kn->kn_fflags = NOTE_CHILD;
535 kn->kn_flags &= ~EV_FLAG1; 535 kn->kn_flags &= ~EV_FLAG1;
536 } 536 }
537 SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext); 537 SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext);
538 mutex_exit(p->p_lock); 538 mutex_exit(p->p_lock);
539 539
540 return 0; 540 return 0;
541} 541}
542 542
543/* 543/*
544 * Filter detach method for EVFILT_PROC. 544 * Filter detach method for EVFILT_PROC.
545 * 545 *
546 * The knote may be attached to a different process, which may exit, 546 * The knote may be attached to a different process, which may exit,
547 * leaving nothing for the knote to be attached to. So when the process 547 * leaving nothing for the knote to be attached to. So when the process
548 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 548 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so
549 * it will be deleted when read out. However, as part of the knote deletion, 549 * it will be deleted when read out. However, as part of the knote deletion,
550 * this routine is called, so a check is needed to avoid actually performing 550 * this routine is called, so a check is needed to avoid actually performing
551 * a detach, because the original process might not exist any more. 551 * a detach, because the original process might not exist any more.
552 */ 552 */
553static void 553static void
554filt_procdetach(struct knote *kn) 554filt_procdetach(struct knote *kn)
555{ 555{
556 struct proc *p; 556 struct proc *p;
557 557
558 if (kn->kn_status & KN_DETACHED) 558 if (kn->kn_status & KN_DETACHED)
559 return; 559 return;
560 560
561 p = kn->kn_obj; 561 p = kn->kn_obj;
562 562
563 mutex_enter(p->p_lock); 563 mutex_enter(p->p_lock);
564 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext); 564 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext);
565 mutex_exit(p->p_lock); 565 mutex_exit(p->p_lock);
566} 566}
567 567
568/* 568/*
569 * Filter event method for EVFILT_PROC. 569 * Filter event method for EVFILT_PROC.
570 */ 570 */
571static int 571static int
572filt_proc(struct knote *kn, long hint) 572filt_proc(struct knote *kn, long hint)
573{ 573{
574 u_int event, fflag; 574 u_int event, fflag;
575 struct kevent kev; 575 struct kevent kev;
576 struct kqueue *kq; 576 struct kqueue *kq;
577 int error; 577 int error;
578 578
579 event = (u_int)hint & NOTE_PCTRLMASK; 579 event = (u_int)hint & NOTE_PCTRLMASK;
580 kq = kn->kn_kq; 580 kq = kn->kn_kq;
581 fflag = 0; 581 fflag = 0;
582 582
583 /* If the user is interested in this event, record it. */ 583 /* If the user is interested in this event, record it. */
584 if (kn->kn_sfflags & event) 584 if (kn->kn_sfflags & event)
585 fflag |= event; 585 fflag |= event;
586 586
587 if (event == NOTE_EXIT) { 587 if (event == NOTE_EXIT) {
588 struct proc *p = kn->kn_obj; 588 struct proc *p = kn->kn_obj;
589 589
590 if (p != NULL) 590 if (p != NULL)
591 kn->kn_data = P_WAITSTATUS(p); 591 kn->kn_data = P_WAITSTATUS(p);
592 /* 592 /*
593 * Process is gone, so flag the event as finished. 593 * Process is gone, so flag the event as finished.
594 * 594 *
595 * Detach the knote from watched process and mark 595 * Detach the knote from watched process and mark
596 * it as such. We can't leave this to kqueue_scan(), 596 * it as such. We can't leave this to kqueue_scan(),
597 * since the process might not exist by then. And we 597 * since the process might not exist by then. And we
598 * have to do this now, since psignal KNOTE() is called 598 * have to do this now, since psignal KNOTE() is called
599 * also for zombies and we might end up reading freed 599 * also for zombies and we might end up reading freed
600 * memory if the kevent would already be picked up 600 * memory if the kevent would already be picked up
601 * and knote g/c'ed. 601 * and knote g/c'ed.
602 */ 602 */
603 filt_procdetach(kn); 603 filt_procdetach(kn);
604 604
605 mutex_spin_enter(&kq->kq_lock); 605 mutex_spin_enter(&kq->kq_lock);
606 kn->kn_status |= KN_DETACHED; 606 kn->kn_status |= KN_DETACHED;
607 /* Mark as ONESHOT, so that the knote it g/c'ed when read */ 607 /* Mark as ONESHOT, so that the knote it g/c'ed when read */
608 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 608 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
609 kn->kn_fflags |= fflag; 609 kn->kn_fflags |= fflag;
610 mutex_spin_exit(&kq->kq_lock); 610 mutex_spin_exit(&kq->kq_lock);
611 611
612 return 1; 612 return 1;
613 } 613 }
614 614
615 mutex_spin_enter(&kq->kq_lock); 615 mutex_spin_enter(&kq->kq_lock);
616 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 616 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) {
617 /* 617 /*
618 * Process forked, and user wants to track the new process, 618 * Process forked, and user wants to track the new process,
619 * so attach a new knote to it, and immediately report an 619 * so attach a new knote to it, and immediately report an
620 * event with the parent's pid. Register knote with new 620 * event with the parent's pid. Register knote with new
621 * process. 621 * process.
622 */ 622 */
623 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 623 kev.ident = hint & NOTE_PDATAMASK; /* pid */
624 kev.filter = kn->kn_filter; 624 kev.filter = kn->kn_filter;
625 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 625 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1;
626 kev.fflags = kn->kn_sfflags; 626 kev.fflags = kn->kn_sfflags;
627 kev.data = kn->kn_id; /* parent */ 627 kev.data = kn->kn_id; /* parent */
628 kev.udata = kn->kn_kevent.udata; /* preserve udata */ 628 kev.udata = kn->kn_kevent.udata; /* preserve udata */
629 mutex_spin_exit(&kq->kq_lock); 629 mutex_spin_exit(&kq->kq_lock);
630 error = kqueue_register(kq, &kev); 630 error = kqueue_register(kq, &kev);
631 mutex_spin_enter(&kq->kq_lock); 631 mutex_spin_enter(&kq->kq_lock);
632 if (error != 0) 632 if (error != 0)
633 kn->kn_fflags |= NOTE_TRACKERR; 633 kn->kn_fflags |= NOTE_TRACKERR;
634 } 634 }
635 kn->kn_fflags |= fflag; 635 kn->kn_fflags |= fflag;
636 fflag = kn->kn_fflags; 636 fflag = kn->kn_fflags;
637 mutex_spin_exit(&kq->kq_lock); 637 mutex_spin_exit(&kq->kq_lock);
638 638
639 return fflag != 0; 639 return fflag != 0;
640} 640}
641 641
642static void 642static void
643filt_timerexpire(void *knx) 643filt_timerexpire(void *knx)
644{ 644{
645 struct knote *kn = knx; 645 struct knote *kn = knx;
646 int tticks; 646 int tticks;
647 647
648 mutex_enter(&kqueue_misc_lock); 648 mutex_enter(&kqueue_misc_lock);
649 kn->kn_data++; 649 kn->kn_data++;
650 knote_activate(kn); 650 knote_activate(kn);
651 if ((kn->kn_flags & EV_ONESHOT) == 0) { 651 if ((kn->kn_flags & EV_ONESHOT) == 0) {
652 tticks = mstohz(kn->kn_sdata); 652 tticks = mstohz(kn->kn_sdata);
653 if (tticks <= 0) 653 if (tticks <= 0)
654 tticks = 1; 654 tticks = 1;
655 callout_schedule((callout_t *)kn->kn_hook, tticks); 655 callout_schedule((callout_t *)kn->kn_hook, tticks);
656 } 656 }
657 mutex_exit(&kqueue_misc_lock); 657 mutex_exit(&kqueue_misc_lock);
658} 658}
659 659
660/* 660/*
661 * data contains amount of time to sleep, in milliseconds 661 * data contains amount of time to sleep, in milliseconds
662 */ 662 */
663static int 663static int
664filt_timerattach(struct knote *kn) 664filt_timerattach(struct knote *kn)
665{ 665{
666 callout_t *calloutp; 666 callout_t *calloutp;
667 struct kqueue *kq; 667 struct kqueue *kq;
668 int tticks; 668 int tticks;
669 669
670 tticks = mstohz(kn->kn_sdata); 670 tticks = mstohz(kn->kn_sdata);
671 671
672 /* if the supplied value is under our resolution, use 1 tick */ 672 /* if the supplied value is under our resolution, use 1 tick */
673 if (tticks == 0) { 673 if (tticks == 0) {
674 if (kn->kn_sdata == 0) 674 if (kn->kn_sdata == 0)
675 return EINVAL; 675 return EINVAL;
676 tticks = 1; 676 tticks = 1;
677 } 677 }
678 678
679 if (atomic_inc_uint_nv(&kq_ncallouts) >= kq_calloutmax || 679 if (atomic_inc_uint_nv(&kq_ncallouts) >= kq_calloutmax ||
680 (calloutp = kmem_alloc(sizeof(*calloutp), KM_NOSLEEP)) == NULL) { 680 (calloutp = kmem_alloc(sizeof(*calloutp), KM_NOSLEEP)) == NULL) {
681 atomic_dec_uint(&kq_ncallouts); 681 atomic_dec_uint(&kq_ncallouts);
682 return ENOMEM; 682 return ENOMEM;
683 } 683 }
684 callout_init(calloutp, CALLOUT_MPSAFE); 684 callout_init(calloutp, CALLOUT_MPSAFE);
685 685
686 kq = kn->kn_kq; 686 kq = kn->kn_kq;
687 mutex_spin_enter(&kq->kq_lock); 687 mutex_spin_enter(&kq->kq_lock);
688 kn->kn_flags |= EV_CLEAR; /* automatically set */ 688 kn->kn_flags |= EV_CLEAR; /* automatically set */
689 kn->kn_hook = calloutp; 689 kn->kn_hook = calloutp;
690 mutex_spin_exit(&kq->kq_lock); 690 mutex_spin_exit(&kq->kq_lock);
691 691
692 callout_reset(calloutp, tticks, filt_timerexpire, kn); 692 callout_reset(calloutp, tticks, filt_timerexpire, kn);
693 693
694 return (0); 694 return (0);
695} 695}
696 696
697static void 697static void
698filt_timerdetach(struct knote *kn) 698filt_timerdetach(struct knote *kn)
699{ 699{
700 callout_t *calloutp; 700 callout_t *calloutp;
701 701
702 calloutp = (callout_t *)kn->kn_hook; 702 calloutp = (callout_t *)kn->kn_hook;
703 callout_halt(calloutp, NULL); 703 callout_halt(calloutp, NULL);
704 callout_destroy(calloutp); 704 callout_destroy(calloutp);
705 kmem_free(calloutp, sizeof(*calloutp)); 705 kmem_free(calloutp, sizeof(*calloutp));
706 atomic_dec_uint(&kq_ncallouts); 706 atomic_dec_uint(&kq_ncallouts);
707} 707}
708 708
709static int 709static int
710filt_timer(struct knote *kn, long hint) 710filt_timer(struct knote *kn, long hint)
711{ 711{
712 int rv; 712 int rv;
713 713
714 mutex_enter(&kqueue_misc_lock); 714 mutex_enter(&kqueue_misc_lock);
715 rv = (kn->kn_data != 0); 715 rv = (kn->kn_data != 0);
716 mutex_exit(&kqueue_misc_lock); 716 mutex_exit(&kqueue_misc_lock);
717 717
718 return rv; 718 return rv;
719} 719}
720 720
721/* 721/*
722 * filt_seltrue: 722 * filt_seltrue:
723 * 723 *
724 * This filter "event" routine simulates seltrue(). 724 * This filter "event" routine simulates seltrue().
725 */ 725 */
726int 726int
727filt_seltrue(struct knote *kn, long hint) 727filt_seltrue(struct knote *kn, long hint)
728{ 728{
729 729
730 /* 730 /*
731 * We don't know how much data can be read/written, 731 * We don't know how much data can be read/written,
732 * but we know that it *can* be. This is about as 732 * but we know that it *can* be. This is about as
733 * good as select/poll does as well. 733 * good as select/poll does as well.
734 */ 734 */
735 kn->kn_data = 0; 735 kn->kn_data = 0;
736 return (1); 736 return (1);
737} 737}
738 738
739/* 739/*
740 * This provides full kqfilter entry for device switch tables, which 740 * This provides full kqfilter entry for device switch tables, which
741 * has same effect as filter using filt_seltrue() as filter method. 741 * has same effect as filter using filt_seltrue() as filter method.
742 */ 742 */
743static void 743static void
744filt_seltruedetach(struct knote *kn) 744filt_seltruedetach(struct knote *kn)
745{ 745{
746 /* Nothing to do */ 746 /* Nothing to do */
747} 747}
748 748
749const struct filterops seltrue_filtops = { 749const struct filterops seltrue_filtops = {
750 .f_isfd = 1, 750 .f_isfd = 1,
751 .f_attach = NULL, 751 .f_attach = NULL,
752 .f_detach = filt_seltruedetach, 752 .f_detach = filt_seltruedetach,
753 .f_event = filt_seltrue, 753 .f_event = filt_seltrue,
754}; 754};
755 755
756int 756int
757seltrue_kqfilter(dev_t dev, struct knote *kn) 757seltrue_kqfilter(dev_t dev, struct knote *kn)
758{ 758{
759 switch (kn->kn_filter) { 759 switch (kn->kn_filter) {
760 case EVFILT_READ: 760 case EVFILT_READ:
761 case EVFILT_WRITE: 761 case EVFILT_WRITE:
762 kn->kn_fop = &seltrue_filtops; 762 kn->kn_fop = &seltrue_filtops;
763 break; 763 break;
764 default: 764 default:
765 return (EINVAL); 765 return (EINVAL);
766 } 766 }
767 767
768 /* Nothing more to do */ 768 /* Nothing more to do */
769 return (0); 769 return (0);
770} 770}
771 771
772/* 772/*
773 * kqueue(2) system call. 773 * kqueue(2) system call.
774 */ 774 */
775static int 775static int
776kqueue1(struct lwp *l, int flags, register_t *retval) 776kqueue1(struct lwp *l, int flags, register_t *retval)
777{ 777{
778 struct kqueue *kq; 778 struct kqueue *kq;
779 file_t *fp; 779 file_t *fp;
780 int fd, error; 780 int fd, error;
781 781
782 if ((error = fd_allocfile(&fp, &fd)) != 0) 782 if ((error = fd_allocfile(&fp, &fd)) != 0)
783 return error; 783 return error;
784 fp->f_flag = FREAD | FWRITE | (flags & (FNONBLOCK|FNOSIGPIPE)); 784 fp->f_flag = FREAD | FWRITE | (flags & (FNONBLOCK|FNOSIGPIPE));
785 fp->f_type = DTYPE_KQUEUE; 785 fp->f_type = DTYPE_KQUEUE;
786 fp->f_ops = &kqueueops; 786 fp->f_ops = &kqueueops;
787 kq = kmem_zalloc(sizeof(*kq), KM_SLEEP); 787 kq = kmem_zalloc(sizeof(*kq), KM_SLEEP);
788 mutex_init(&kq->kq_lock, MUTEX_DEFAULT, IPL_SCHED); 788 mutex_init(&kq->kq_lock, MUTEX_DEFAULT, IPL_SCHED);
789 cv_init(&kq->kq_cv, "kqueue"); 789 cv_init(&kq->kq_cv, "kqueue");
790 selinit(&kq->kq_sel); 790 selinit(&kq->kq_sel);
791 TAILQ_INIT(&kq->kq_head); 791 TAILQ_INIT(&kq->kq_head);
792 fp->f_kqueue = kq; 792 fp->f_kqueue = kq;
793 *retval = fd; 793 *retval = fd;
794 kq->kq_fdp = curlwp->l_fd; 794 kq->kq_fdp = curlwp->l_fd;
795 fd_set_exclose(l, fd, (flags & O_CLOEXEC) != 0); 795 fd_set_exclose(l, fd, (flags & O_CLOEXEC) != 0);
796 fd_affix(curproc, fp, fd); 796 fd_affix(curproc, fp, fd);
797 return error; 797 return error;
798} 798}
799 799
800/* 800/*
801 * kqueue(2) system call. 801 * kqueue(2) system call.
802 */ 802 */
803int 803int
804sys_kqueue(struct lwp *l, const void *v, register_t *retval) 804sys_kqueue(struct lwp *l, const void *v, register_t *retval)
805{ 805{
806 return kqueue1(l, 0, retval); 806 return kqueue1(l, 0, retval);
807} 807}
808 808
809int 809int
810sys_kqueue1(struct lwp *l, const struct sys_kqueue1_args *uap, 810sys_kqueue1(struct lwp *l, const struct sys_kqueue1_args *uap,
811 register_t *retval) 811 register_t *retval)
812{ 812{
813 /* { 813 /* {
814 syscallarg(int) flags; 814 syscallarg(int) flags;
815 } */ 815 } */
816 return kqueue1(l, SCARG(uap, flags), retval); 816 return kqueue1(l, SCARG(uap, flags), retval);
817} 817}
818 818
819/* 819/*
820 * kevent(2) system call. 820 * kevent(2) system call.
821 */ 821 */
822int 822int
823kevent_fetch_changes(void *ctx, const struct kevent *changelist, 823kevent_fetch_changes(void *ctx, const struct kevent *changelist,
824 struct kevent *changes, size_t index, int n) 824 struct kevent *changes, size_t index, int n)
825{ 825{
826 826
827 return copyin(changelist + index, changes, n * sizeof(*changes)); 827 return copyin(changelist + index, changes, n * sizeof(*changes));
828} 828}
829 829
830int 830int
831kevent_put_events(void *ctx, struct kevent *events, 831kevent_put_events(void *ctx, struct kevent *events,
832 struct kevent *eventlist, size_t index, int n) 832 struct kevent *eventlist, size_t index, int n)
833{ 833{
834 834
835 return copyout(events, eventlist + index, n * sizeof(*events)); 835 return copyout(events, eventlist + index, n * sizeof(*events));
836} 836}
837 837
838static const struct kevent_ops kevent_native_ops = { 838static const struct kevent_ops kevent_native_ops = {
839 .keo_private = NULL, 839 .keo_private = NULL,
840 .keo_fetch_timeout = copyin, 840 .keo_fetch_timeout = copyin,
841 .keo_fetch_changes = kevent_fetch_changes, 841 .keo_fetch_changes = kevent_fetch_changes,
842 .keo_put_events = kevent_put_events, 842 .keo_put_events = kevent_put_events,
843}; 843};
844 844
845int 845int
846sys___kevent50(struct lwp *l, const struct sys___kevent50_args *uap, 846sys___kevent50(struct lwp *l, const struct sys___kevent50_args *uap,
847 register_t *retval) 847 register_t *retval)
848{ 848{
849 /* { 849 /* {
850 syscallarg(int) fd; 850 syscallarg(int) fd;
851 syscallarg(const struct kevent *) changelist; 851 syscallarg(const struct kevent *) changelist;
852 syscallarg(size_t) nchanges; 852 syscallarg(size_t) nchanges;
853 syscallarg(struct kevent *) eventlist; 853 syscallarg(struct kevent *) eventlist;
854 syscallarg(size_t) nevents; 854 syscallarg(size_t) nevents;
855 syscallarg(const struct timespec *) timeout; 855 syscallarg(const struct timespec *) timeout;
856 } */ 856 } */
857 857
858 return kevent1(retval, SCARG(uap, fd), SCARG(uap, changelist), 858 return kevent1(retval, SCARG(uap, fd), SCARG(uap, changelist),
859 SCARG(uap, nchanges), SCARG(uap, eventlist), SCARG(uap, nevents), 859 SCARG(uap, nchanges), SCARG(uap, eventlist), SCARG(uap, nevents),
860 SCARG(uap, timeout), &kevent_native_ops); 860 SCARG(uap, timeout), &kevent_native_ops);
861} 861}
862 862
863int 863int
864kevent1(register_t *retval, int fd, 864kevent1(register_t *retval, int fd,
865 const struct kevent *changelist, size_t nchanges, 865 const struct kevent *changelist, size_t nchanges,
866 struct kevent *eventlist, size_t nevents, 866 struct kevent *eventlist, size_t nevents,
867 const struct timespec *timeout, 867 const struct timespec *timeout,
868 const struct kevent_ops *keops) 868 const struct kevent_ops *keops)
869{ 869{
870 struct kevent *kevp; 870 struct kevent *kevp;
871 struct kqueue *kq; 871 struct kqueue *kq;
872 struct timespec ts; 872 struct timespec ts;
873 size_t i, n, ichange; 873 size_t i, n, ichange;
874 int nerrors, error; 874 int nerrors, error;
875 struct kevent kevbuf[KQ_NEVENTS]; /* approx 300 bytes on 64-bit */ 875 struct kevent kevbuf[KQ_NEVENTS]; /* approx 300 bytes on 64-bit */
876 file_t *fp; 876 file_t *fp;
877 877
878 /* check that we're dealing with a kq */ 878 /* check that we're dealing with a kq */
879 fp = fd_getfile(fd); 879 fp = fd_getfile(fd);
880 if (fp == NULL) 880 if (fp == NULL)
881 return (EBADF); 881 return (EBADF);
882 882
883 if (fp->f_type != DTYPE_KQUEUE) { 883 if (fp->f_type != DTYPE_KQUEUE) {
884 fd_putfile(fd); 884 fd_putfile(fd);
885 return (EBADF); 885 return (EBADF);
886 } 886 }
887 887
888 if (timeout != NULL) { 888 if (timeout != NULL) {
889 error = (*keops->keo_fetch_timeout)(timeout, &ts, sizeof(ts)); 889 error = (*keops->keo_fetch_timeout)(timeout, &ts, sizeof(ts));
890 if (error) 890 if (error)
891 goto done; 891 goto done;
892 timeout = &ts; 892 timeout = &ts;
893 } 893 }
894 894
895 kq = fp->f_kqueue; 895 kq = fp->f_kqueue;
896 nerrors = 0; 896 nerrors = 0;
897 ichange = 0; 897 ichange = 0;
898 898
899 /* traverse list of events to register */ 899 /* traverse list of events to register */
900 while (nchanges > 0) { 900 while (nchanges > 0) {
901 n = MIN(nchanges, __arraycount(kevbuf)); 901 n = MIN(nchanges, __arraycount(kevbuf));
902 error = (*keops->keo_fetch_changes)(keops->keo_private, 902 error = (*keops->keo_fetch_changes)(keops->keo_private,
903 changelist, kevbuf, ichange, n); 903 changelist, kevbuf, ichange, n);
904 if (error) 904 if (error)
905 goto done; 905 goto done;
906 for (i = 0; i < n; i++) { 906 for (i = 0; i < n; i++) {
907 kevp = &kevbuf[i]; 907 kevp = &kevbuf[i];
908 kevp->flags &= ~EV_SYSFLAGS; 908 kevp->flags &= ~EV_SYSFLAGS;
909 /* register each knote */ 909 /* register each knote */
910 error = kqueue_register(kq, kevp); 910 error = kqueue_register(kq, kevp);
911 if (!error && !(kevp->flags & EV_RECEIPT)) 911 if (!error && !(kevp->flags & EV_RECEIPT))
912 continue; 912 continue;
913 if (nevents == 0) 913 if (nevents == 0)
914 goto done; 914 goto done;
915 kevp->flags = EV_ERROR; 915 kevp->flags = EV_ERROR;
916 kevp->data = error; 916 kevp->data = error;
917 error = (*keops->keo_put_events) 917 error = (*keops->keo_put_events)
918 (keops->keo_private, kevp, 918 (keops->keo_private, kevp,
919 eventlist, nerrors, 1); 919 eventlist, nerrors, 1);
920 if (error) 920 if (error)
921 goto done; 921 goto done;
922 nevents--; 922 nevents--;
923 nerrors++; 923 nerrors++;
924 } 924 }
925 nchanges -= n; /* update the results */ 925 nchanges -= n; /* update the results */
926 ichange += n; 926 ichange += n;
927 } 927 }
928 if (nerrors) { 928 if (nerrors) {
929 *retval = nerrors; 929 *retval = nerrors;
930 error = 0; 930 error = 0;
931 goto done; 931 goto done;
932 } 932 }
933 933
934 /* actually scan through the events */ 934 /* actually scan through the events */
935 error = kqueue_scan(fp, nevents, eventlist, timeout, retval, keops, 935 error = kqueue_scan(fp, nevents, eventlist, timeout, retval, keops,
936 kevbuf, __arraycount(kevbuf)); 936 kevbuf, __arraycount(kevbuf));
937 done: 937 done:
938 fd_putfile(fd); 938 fd_putfile(fd);
939 return (error); 939 return (error);
940} 940}
941 941
942/* 942/*
943 * Register a given kevent kev onto the kqueue 943 * Register a given kevent kev onto the kqueue
944 */ 944 */
945static int 945static int
946kqueue_register(struct kqueue *kq, struct kevent *kev) 946kqueue_register(struct kqueue *kq, struct kevent *kev)
947{ 947{
948 struct kfilter *kfilter; 948 struct kfilter *kfilter;
949 filedesc_t *fdp; 949 filedesc_t *fdp;
950 file_t *fp; 950 file_t *fp;
951 fdfile_t *ff; 951 fdfile_t *ff;
952 struct knote *kn, *newkn; 952 struct knote *kn, *newkn;
953 struct klist *list; 953 struct klist *list;
954 int error, fd, rv; 954 int error, fd, rv;
955 955
956 fdp = kq->kq_fdp; 956 fdp = kq->kq_fdp;
957 fp = NULL; 957 fp = NULL;
958 kn = NULL; 958 kn = NULL;
959 error = 0; 959 error = 0;
960 fd = 0; 960 fd = 0;
961 961
962 newkn = kmem_zalloc(sizeof(*newkn), KM_SLEEP); 962 newkn = kmem_zalloc(sizeof(*newkn), KM_SLEEP);
963 963
964 rw_enter(&kqueue_filter_lock, RW_READER); 964 rw_enter(&kqueue_filter_lock, RW_READER);
965 kfilter = kfilter_byfilter(kev->filter); 965 kfilter = kfilter_byfilter(kev->filter);
966 if (kfilter == NULL || kfilter->filtops == NULL) { 966 if (kfilter == NULL || kfilter->filtops == NULL) {
967 /* filter not found nor implemented */ 967 /* filter not found nor implemented */
968 rw_exit(&kqueue_filter_lock); 968 rw_exit(&kqueue_filter_lock);
969 kmem_free(newkn, sizeof(*newkn)); 969 kmem_free(newkn, sizeof(*newkn));
970 return (EINVAL); 970 return (EINVAL);
971 } 971 }
972 972
973 /* search if knote already exists */ 973 /* search if knote already exists */
974 if (kfilter->filtops->f_isfd) { 974 if (kfilter->filtops->f_isfd) {
975 /* monitoring a file descriptor */ 975 /* monitoring a file descriptor */
976 /* validate descriptor */ 976 /* validate descriptor */
977 if (kev->ident > INT_MAX 977 if (kev->ident > INT_MAX
978 || (fp = fd_getfile(fd = kev->ident)) == NULL) { 978 || (fp = fd_getfile(fd = kev->ident)) == NULL) {
979 rw_exit(&kqueue_filter_lock); 979 rw_exit(&kqueue_filter_lock);
980 kmem_free(newkn, sizeof(*newkn)); 980 kmem_free(newkn, sizeof(*newkn));
981 return EBADF; 981 return EBADF;
982 } 982 }
983 mutex_enter(&fdp->fd_lock); 983 mutex_enter(&fdp->fd_lock);
984 ff = fdp->fd_dt->dt_ff[fd]; 984 ff = fdp->fd_dt->dt_ff[fd];
985 if (ff->ff_refcnt & FR_CLOSING) { 985 if (ff->ff_refcnt & FR_CLOSING) {
986 error = EBADF; 986 error = EBADF;
987 goto doneunlock; 987 goto doneunlock;
988 } 988 }
989 if (fd <= fdp->fd_lastkqfile) { 989 if (fd <= fdp->fd_lastkqfile) {
990 SLIST_FOREACH(kn, &ff->ff_knlist, kn_link) { 990 SLIST_FOREACH(kn, &ff->ff_knlist, kn_link) {
991 if (kq == kn->kn_kq && 991 if (kq == kn->kn_kq &&
992 kev->filter == kn->kn_filter) 992 kev->filter == kn->kn_filter)
993 break; 993 break;
994 } 994 }
995 } 995 }
996 } else { 996 } else {
997 /* 997 /*
998 * not monitoring a file descriptor, so 998 * not monitoring a file descriptor, so
999 * lookup knotes in internal hash table 999 * lookup knotes in internal hash table
1000 */ 1000 */
1001 mutex_enter(&fdp->fd_lock); 1001 mutex_enter(&fdp->fd_lock);
1002 if (fdp->fd_knhashmask != 0) { 1002 if (fdp->fd_knhashmask != 0) {
1003 list = &fdp->fd_knhash[ 1003 list = &fdp->fd_knhash[
1004 KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)]; 1004 KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
1005 SLIST_FOREACH(kn, list, kn_link) { 1005 SLIST_FOREACH(kn, list, kn_link) {
1006 if (kev->ident == kn->kn_id && 1006 if (kev->ident == kn->kn_id &&
1007 kq == kn->kn_kq && 1007 kq == kn->kn_kq &&
1008 kev->filter == kn->kn_filter) 1008 kev->filter == kn->kn_filter)
1009 break; 1009 break;
1010 } 1010 }
1011 } 1011 }
1012 } 1012 }
1013 1013
1014 /* 1014 /*
1015 * kn now contains the matching knote, or NULL if no match 1015 * kn now contains the matching knote, or NULL if no match
1016 */ 1016 */
1017 if (kev->flags & EV_ADD) { 1017 if (kev->flags & EV_ADD) {
1018 if (kn == NULL) { 1018 if (kn == NULL) {
1019 /* create new knote */ 1019 /* create new knote */
1020 kn = newkn; 1020 kn = newkn;
1021 newkn = NULL; 1021 newkn = NULL;
1022 kn->kn_obj = fp; 1022 kn->kn_obj = fp;
1023 kn->kn_id = kev->ident; 1023 kn->kn_id = kev->ident;
1024 kn->kn_kq = kq; 1024 kn->kn_kq = kq;
1025 kn->kn_fop = kfilter->filtops; 1025 kn->kn_fop = kfilter->filtops;
1026 kn->kn_kfilter = kfilter; 1026 kn->kn_kfilter = kfilter;
1027 kn->kn_sfflags = kev->fflags; 1027 kn->kn_sfflags = kev->fflags;
1028 kn->kn_sdata = kev->data; 1028 kn->kn_sdata = kev->data;
1029 kev->fflags = 0; 1029 kev->fflags = 0;
1030 kev->data = 0; 1030 kev->data = 0;
1031 kn->kn_kevent = *kev; 1031 kn->kn_kevent = *kev;
1032 1032
1033 KASSERT(kn->kn_fop != NULL); 1033 KASSERT(kn->kn_fop != NULL);
1034 /* 1034 /*
1035 * apply reference count to knote structure, and 1035 * apply reference count to knote structure, and
1036 * do not release it at the end of this routine. 1036 * do not release it at the end of this routine.
1037 */ 1037 */
1038 fp = NULL; 1038 fp = NULL;
1039 1039
1040 if (!kn->kn_fop->f_isfd) { 1040 if (!kn->kn_fop->f_isfd) {
1041 /* 1041 /*
1042 * If knote is not on an fd, store on 1042 * If knote is not on an fd, store on
1043 * internal hash table. 1043 * internal hash table.
1044 */ 1044 */
1045 if (fdp->fd_knhashmask == 0) { 1045 if (fdp->fd_knhashmask == 0) {
1046 /* XXXAD can block with fd_lock held */ 1046 /* XXXAD can block with fd_lock held */
1047 fdp->fd_knhash = hashinit(KN_HASHSIZE, 1047 fdp->fd_knhash = hashinit(KN_HASHSIZE,
1048 HASH_LIST, true, 1048 HASH_LIST, true,
1049 &fdp->fd_knhashmask); 1049 &fdp->fd_knhashmask);
1050 } 1050 }
1051 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, 1051 list = &fdp->fd_knhash[KN_HASH(kn->kn_id,
1052 fdp->fd_knhashmask)]; 1052 fdp->fd_knhashmask)];
1053 } else { 1053 } else {
1054 /* Otherwise, knote is on an fd. */ 1054 /* Otherwise, knote is on an fd. */
1055 list = (struct klist *) 1055 list = (struct klist *)
1056 &fdp->fd_dt->dt_ff[kn->kn_id]->ff_knlist; 1056 &fdp->fd_dt->dt_ff[kn->kn_id]->ff_knlist;
1057 if ((int)kn->kn_id > fdp->fd_lastkqfile) 1057 if ((int)kn->kn_id > fdp->fd_lastkqfile)
1058 fdp->fd_lastkqfile = kn->kn_id; 1058 fdp->fd_lastkqfile = kn->kn_id;
1059 } 1059 }
1060 SLIST_INSERT_HEAD(list, kn, kn_link); 1060 SLIST_INSERT_HEAD(list, kn, kn_link);
1061 1061
1062 KERNEL_LOCK(1, NULL); /* XXXSMP */ 1062 KERNEL_LOCK(1, NULL); /* XXXSMP */
1063 error = (*kfilter->filtops->f_attach)(kn); 1063 error = (*kfilter->filtops->f_attach)(kn);
1064 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 1064 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */
1065 if (error != 0) { 1065 if (error != 0) {
 1066#ifdef DEBUG
 1067 printf("%s: event type %d not supported for "
 1068 "file type %d (error %d)\n", __func__,
 1069 kn->kn_filter, kn->kn_obj ?
 1070 ((file_t *)kn->kn_obj)->f_type : -1, error);
 1071#endif
 1072
1066 /* knote_detach() drops fdp->fd_lock */ 1073 /* knote_detach() drops fdp->fd_lock */
1067 knote_detach(kn, fdp, false); 1074 knote_detach(kn, fdp, false);
1068 goto done; 1075 goto done;
1069 } 1076 }
1070 atomic_inc_uint(&kfilter->refcnt); 1077 atomic_inc_uint(&kfilter->refcnt);
1071 } else { 1078 } else {
1072 /* 1079 /*
1073 * The user may change some filter values after the 1080 * The user may change some filter values after the
1074 * initial EV_ADD, but doing so will not reset any 1081 * initial EV_ADD, but doing so will not reset any
1075 * filter which have already been triggered. 1082 * filter which have already been triggered.
1076 */ 1083 */
1077 kn->kn_sfflags = kev->fflags; 1084 kn->kn_sfflags = kev->fflags;
1078 kn->kn_sdata = kev->data; 1085 kn->kn_sdata = kev->data;
1079 kn->kn_kevent.udata = kev->udata; 1086 kn->kn_kevent.udata = kev->udata;
1080 } 1087 }
1081 /* 1088 /*
1082 * We can get here if we are trying to attach 1089 * We can get here if we are trying to attach
1083 * an event to a file descriptor that does not 1090 * an event to a file descriptor that does not
1084 * support events, and the attach routine is 1091 * support events, and the attach routine is
1085 * broken and does not return an error. 1092 * broken and does not return an error.
1086 */ 1093 */
1087 KASSERT(kn->kn_fop != NULL); 1094 KASSERT(kn->kn_fop != NULL);
1088 KASSERT(kn->kn_fop->f_event != NULL); 1095 KASSERT(kn->kn_fop->f_event != NULL);
1089 KERNEL_LOCK(1, NULL); /* XXXSMP */ 1096 KERNEL_LOCK(1, NULL); /* XXXSMP */
1090 rv = (*kn->kn_fop->f_event)(kn, 0); 1097 rv = (*kn->kn_fop->f_event)(kn, 0);
1091 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 1098 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */
1092 if (rv) 1099 if (rv)
1093 knote_activate(kn); 1100 knote_activate(kn);
1094 } else { 1101 } else {
1095 if (kn == NULL) { 1102 if (kn == NULL) {
1096 error = ENOENT; 1103 error = ENOENT;
1097 goto doneunlock; 1104 goto doneunlock;
1098 } 1105 }
1099 if (kev->flags & EV_DELETE) { 1106 if (kev->flags & EV_DELETE) {
1100 /* knote_detach() drops fdp->fd_lock */ 1107 /* knote_detach() drops fdp->fd_lock */
1101 knote_detach(kn, fdp, true); 1108 knote_detach(kn, fdp, true);
1102 goto done; 1109 goto done;
1103 } 1110 }
1104 } 1111 }
1105 1112
1106 /* disable knote */ 1113 /* disable knote */
1107 if ((kev->flags & EV_DISABLE)) { 1114 if ((kev->flags & EV_DISABLE)) {
1108 mutex_spin_enter(&kq->kq_lock); 1115 mutex_spin_enter(&kq->kq_lock);
1109 if ((kn->kn_status & KN_DISABLED) == 0) 1116 if ((kn->kn_status & KN_DISABLED) == 0)
1110 kn->kn_status |= KN_DISABLED; 1117 kn->kn_status |= KN_DISABLED;
1111 mutex_spin_exit(&kq->kq_lock); 1118 mutex_spin_exit(&kq->kq_lock);
1112 } 1119 }
1113 1120
1114 /* enable knote */ 1121 /* enable knote */
1115 if ((kev->flags & EV_ENABLE)) { 1122 if ((kev->flags & EV_ENABLE)) {
1116 knote_enqueue(kn); 1123 knote_enqueue(kn);
1117 } 1124 }
1118doneunlock: 1125doneunlock:
1119 mutex_exit(&fdp->fd_lock); 1126 mutex_exit(&fdp->fd_lock);
1120 done: 1127 done:
1121 rw_exit(&kqueue_filter_lock); 1128 rw_exit(&kqueue_filter_lock);
1122 if (newkn != NULL) 1129 if (newkn != NULL)
1123 kmem_free(newkn, sizeof(*newkn)); 1130 kmem_free(newkn, sizeof(*newkn));
1124 if (fp != NULL) 1131 if (fp != NULL)
1125 fd_putfile(fd); 1132 fd_putfile(fd);
1126 return (error); 1133 return (error);
1127} 1134}
1128 1135
1129#if defined(DEBUG) 1136#if defined(DEBUG)
1130#define KN_FMT(buf, kn) \ 1137#define KN_FMT(buf, kn) \
1131 (snprintb((buf), sizeof(buf), __KN_FLAG_BITS, (kn)->kn_status), buf) 1138 (snprintb((buf), sizeof(buf), __KN_FLAG_BITS, (kn)->kn_status), buf)
1132 1139
1133static void 1140static void
1134kqueue_check(const char *func, size_t line, const struct kqueue *kq) 1141kqueue_check(const char *func, size_t line, const struct kqueue *kq)
1135{ 1142{
1136 const struct knote *kn; 1143 const struct knote *kn;
1137 int count; 1144 int count;
1138 int nmarker; 1145 int nmarker;
1139 char buf[128]; 1146 char buf[128];
1140 1147
1141 KASSERT(mutex_owned(&kq->kq_lock)); 1148 KASSERT(mutex_owned(&kq->kq_lock));
1142 KASSERT(kq->kq_count >= 0); 1149 KASSERT(kq->kq_count >= 0);
1143 1150
1144 count = 0; 1151 count = 0;
1145 nmarker = 0; 1152 nmarker = 0;
1146 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) { 1153 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) {
1147 if ((kn->kn_status & (KN_MARKER | KN_QUEUED)) == 0) { 1154 if ((kn->kn_status & (KN_MARKER | KN_QUEUED)) == 0) {
1148 panic("%s,%zu: kq=%p kn=%p !(MARKER|QUEUED) %s", 1155 panic("%s,%zu: kq=%p kn=%p !(MARKER|QUEUED) %s",
1149 func, line, kq, kn, KN_FMT(buf, kn)); 1156 func, line, kq, kn, KN_FMT(buf, kn));
1150 } 1157 }
1151 if ((kn->kn_status & KN_MARKER) == 0) { 1158 if ((kn->kn_status & KN_MARKER) == 0) {
1152 if (kn->kn_kq != kq) { 1159 if (kn->kn_kq != kq) {
1153 panic("%s,%zu: kq=%p kn(%p) != kn->kq(%p): %s", 1160 panic("%s,%zu: kq=%p kn(%p) != kn->kq(%p): %s",
1154 func, line, kq, kn, kn->kn_kq, 1161 func, line, kq, kn, kn->kn_kq,
1155 KN_FMT(buf, kn)); 1162 KN_FMT(buf, kn));
1156 } 1163 }
1157 if ((kn->kn_status & KN_ACTIVE) == 0) { 1164 if ((kn->kn_status & KN_ACTIVE) == 0) {
1158 panic("%s,%zu: kq=%p kn=%p: !ACTIVE %s", 1165 panic("%s,%zu: kq=%p kn=%p: !ACTIVE %s",
1159 func, line, kq, kn, KN_FMT(buf, kn)); 1166 func, line, kq, kn, KN_FMT(buf, kn));
1160 } 1167 }
1161 count++; 1168 count++;
1162 if (count > kq->kq_count) { 1169 if (count > kq->kq_count) {
1163 goto bad; 1170 goto bad;
1164 } 1171 }
1165 } else { 1172 } else {
1166 nmarker++; 1173 nmarker++;
1167#if 0 1174#if 0
1168 if (nmarker > 10000) { 1175 if (nmarker > 10000) {
1169 panic("%s,%zu: kq=%p too many markers: " 1176 panic("%s,%zu: kq=%p too many markers: "
1170 "%d != %d, nmarker=%d", 1177 "%d != %d, nmarker=%d",
1171 func, line, kq, kq->kq_count, count, 1178 func, line, kq, kq->kq_count, count,
1172 nmarker); 1179 nmarker);
1173 } 1180 }
1174#endif 1181#endif
1175 } 1182 }
1176 } 1183 }
1177 if (kq->kq_count != count) { 1184 if (kq->kq_count != count) {
1178bad: 1185bad:
1179 panic("%s,%zu: kq=%p kq->kq_count(%d) != count(%d), nmarker=%d", 1186 panic("%s,%zu: kq=%p kq->kq_count(%d) != count(%d), nmarker=%d",
1180 func, line, kq, kq->kq_count, count, nmarker); 1187 func, line, kq, kq->kq_count, count, nmarker);
1181 } 1188 }
1182} 1189}
1183#define kq_check(a) kqueue_check(__func__, __LINE__, (a)) 1190#define kq_check(a) kqueue_check(__func__, __LINE__, (a))
1184#else /* defined(DEBUG) */ 1191#else /* defined(DEBUG) */
1185#define kq_check(a) /* nothing */ 1192#define kq_check(a) /* nothing */
1186#endif /* defined(DEBUG) */ 1193#endif /* defined(DEBUG) */
1187 1194
1188/* 1195/*
1189 * Scan through the list of events on fp (for a maximum of maxevents), 1196 * Scan through the list of events on fp (for a maximum of maxevents),
1190 * returning the results in to ulistp. Timeout is determined by tsp; if 1197 * returning the results in to ulistp. Timeout is determined by tsp; if
1191 * NULL, wait indefinitely, if 0 valued, perform a poll, otherwise wait 1198 * NULL, wait indefinitely, if 0 valued, perform a poll, otherwise wait
1192 * as appropriate. 1199 * as appropriate.
1193 */ 1200 */
1194static int 1201static int
1195kqueue_scan(file_t *fp, size_t maxevents, struct kevent *ulistp, 1202kqueue_scan(file_t *fp, size_t maxevents, struct kevent *ulistp,
1196 const struct timespec *tsp, register_t *retval, 1203 const struct timespec *tsp, register_t *retval,
1197 const struct kevent_ops *keops, struct kevent *kevbuf, 1204 const struct kevent_ops *keops, struct kevent *kevbuf,
1198 size_t kevcnt) 1205 size_t kevcnt)
1199{ 1206{
1200 struct kqueue *kq; 1207 struct kqueue *kq;
1201 struct kevent *kevp; 1208 struct kevent *kevp;
1202 struct timespec ats, sleepts; 1209 struct timespec ats, sleepts;
1203 struct knote *kn, *marker, morker; 1210 struct knote *kn, *marker, morker;
1204 size_t count, nkev, nevents; 1211 size_t count, nkev, nevents;
1205 int timeout, error, rv; 1212 int timeout, error, rv;
1206 filedesc_t *fdp; 1213 filedesc_t *fdp;
1207 1214
1208 fdp = curlwp->l_fd; 1215 fdp = curlwp->l_fd;
1209 kq = fp->f_kqueue; 1216 kq = fp->f_kqueue;
1210 count = maxevents; 1217 count = maxevents;
1211 nkev = nevents = error = 0; 1218 nkev = nevents = error = 0;
1212 if (count == 0) { 1219 if (count == 0) {
1213 *retval = 0; 1220 *retval = 0;
1214 return 0; 1221 return 0;
1215 } 1222 }
1216 1223
1217 if (tsp) { /* timeout supplied */ 1224 if (tsp) { /* timeout supplied */
1218 ats = *tsp; 1225 ats = *tsp;
1219 if (inittimeleft(&ats, &sleepts) == -1) { 1226 if (inittimeleft(&ats, &sleepts) == -1) {
1220 *retval = maxevents; 1227 *retval = maxevents;
1221 return EINVAL; 1228 return EINVAL;
1222 } 1229 }
1223 timeout = tstohz(&ats); 1230 timeout = tstohz(&ats);
1224 if (timeout <= 0) 1231 if (timeout <= 0)
1225 timeout = -1; /* do poll */ 1232 timeout = -1; /* do poll */
1226 } else { 1233 } else {
1227 /* no timeout, wait forever */ 1234 /* no timeout, wait forever */
1228 timeout = 0; 1235 timeout = 0;
1229 } 1236 }
1230 1237
1231 memset(&morker, 0, sizeof(morker)); 1238 memset(&morker, 0, sizeof(morker));
1232 marker = &morker; 1239 marker = &morker;
1233 marker->kn_status = KN_MARKER; 1240 marker->kn_status = KN_MARKER;
1234 mutex_spin_enter(&kq->kq_lock); 1241 mutex_spin_enter(&kq->kq_lock);
1235 retry: 1242 retry:
1236 kevp = kevbuf; 1243 kevp = kevbuf;
1237 if (kq->kq_count == 0) { 1244 if (kq->kq_count == 0) {
1238 if (timeout >= 0) { 1245 if (timeout >= 0) {
1239 error = cv_timedwait_sig(&kq->kq_cv, 1246 error = cv_timedwait_sig(&kq->kq_cv,
1240 &kq->kq_lock, timeout); 1247 &kq->kq_lock, timeout);
1241 if (error == 0) { 1248 if (error == 0) {
1242 if (tsp == NULL || (timeout = 1249 if (tsp == NULL || (timeout =
1243 gettimeleft(&ats, &sleepts)) > 0) 1250 gettimeleft(&ats, &sleepts)) > 0)
1244 goto retry; 1251 goto retry;
1245 } else { 1252 } else {
1246 /* don't restart after signals... */ 1253 /* don't restart after signals... */
1247 if (error == ERESTART) 1254 if (error == ERESTART)
1248 error = EINTR; 1255 error = EINTR;
1249 if (error == EWOULDBLOCK) 1256 if (error == EWOULDBLOCK)
1250 error = 0; 1257 error = 0;
1251 } 1258 }
1252 } 1259 }
1253 mutex_spin_exit(&kq->kq_lock); 1260 mutex_spin_exit(&kq->kq_lock);
1254 } else { 1261 } else {
1255 /* mark end of knote list */ 1262 /* mark end of knote list */
1256 TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe); 1263 TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe);
1257 1264
1258 /* 1265 /*
1259 * Acquire the fdp->fd_lock interlock to avoid races with 1266 * Acquire the fdp->fd_lock interlock to avoid races with
1260 * file creation/destruction from other threads. 1267 * file creation/destruction from other threads.
1261 */ 1268 */
1262 mutex_spin_exit(&kq->kq_lock); 1269 mutex_spin_exit(&kq->kq_lock);
1263 mutex_enter(&fdp->fd_lock); 1270 mutex_enter(&fdp->fd_lock);
1264 mutex_spin_enter(&kq->kq_lock); 1271 mutex_spin_enter(&kq->kq_lock);
1265 1272
1266 while (count != 0) { 1273 while (count != 0) {
1267 kn = TAILQ_FIRST(&kq->kq_head); /* get next knote */ 1274 kn = TAILQ_FIRST(&kq->kq_head); /* get next knote */
1268 while ((kn->kn_status & KN_MARKER) != 0) { 1275 while ((kn->kn_status & KN_MARKER) != 0) {
1269 if (kn == marker) { 1276 if (kn == marker) {
1270 /* it's our marker, stop */ 1277 /* it's our marker, stop */
1271 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1278 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1272 if (count < maxevents || (tsp != NULL && 1279 if (count < maxevents || (tsp != NULL &&
1273 (timeout = gettimeleft(&ats, 1280 (timeout = gettimeleft(&ats,
1274 &sleepts)) <= 0)) 1281 &sleepts)) <= 0))
1275 goto done; 1282 goto done;
1276 mutex_exit(&fdp->fd_lock); 1283 mutex_exit(&fdp->fd_lock);
1277 goto retry; 1284 goto retry;
1278 } 1285 }
1279 /* someone else's marker. */ 1286 /* someone else's marker. */
1280 kn = TAILQ_NEXT(kn, kn_tqe); 1287 kn = TAILQ_NEXT(kn, kn_tqe);
1281 } 1288 }
1282 kq_check(kq); 1289 kq_check(kq);
1283 kq->kq_count--; 1290 kq->kq_count--;
1284 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1291 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1285 kn->kn_status &= ~KN_QUEUED; 1292 kn->kn_status &= ~KN_QUEUED;
1286 kn->kn_status |= KN_BUSY; 1293 kn->kn_status |= KN_BUSY;
1287 kq_check(kq); 1294 kq_check(kq);
1288 if (kn->kn_status & KN_DISABLED) { 1295 if (kn->kn_status & KN_DISABLED) {
1289 kn->kn_status &= ~KN_BUSY; 1296 kn->kn_status &= ~KN_BUSY;
1290 /* don't want disabled events */ 1297 /* don't want disabled events */
1291 continue; 1298 continue;
1292 } 1299 }
1293 if ((kn->kn_flags & EV_ONESHOT) == 0) { 1300 if ((kn->kn_flags & EV_ONESHOT) == 0) {
1294 mutex_spin_exit(&kq->kq_lock); 1301 mutex_spin_exit(&kq->kq_lock);
1295 KASSERT(kn->kn_fop != NULL); 1302 KASSERT(kn->kn_fop != NULL);
1296 KASSERT(kn->kn_fop->f_event != NULL); 1303 KASSERT(kn->kn_fop->f_event != NULL);
1297 KERNEL_LOCK(1, NULL); /* XXXSMP */ 1304 KERNEL_LOCK(1, NULL); /* XXXSMP */
1298 KASSERT(mutex_owned(&fdp->fd_lock)); 1305 KASSERT(mutex_owned(&fdp->fd_lock));
1299 rv = (*kn->kn_fop->f_event)(kn, 0); 1306 rv = (*kn->kn_fop->f_event)(kn, 0);
1300 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 1307 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */
1301 mutex_spin_enter(&kq->kq_lock); 1308 mutex_spin_enter(&kq->kq_lock);
1302 /* Re-poll if note was re-enqueued. */ 1309 /* Re-poll if note was re-enqueued. */
1303 if ((kn->kn_status & KN_QUEUED) != 0) { 1310 if ((kn->kn_status & KN_QUEUED) != 0) {
1304 kn->kn_status &= ~KN_BUSY; 1311 kn->kn_status &= ~KN_BUSY;
1305 continue; 1312 continue;
1306 } 1313 }
1307 if (rv == 0) { 1314 if (rv == 0) {
1308 /* 1315 /*
1309 * non-ONESHOT event that hasn't 1316 * non-ONESHOT event that hasn't
1310 * triggered again, so de-queue. 1317 * triggered again, so de-queue.
1311 */ 1318 */
1312 kn->kn_status &= ~(KN_ACTIVE|KN_BUSY); 1319 kn->kn_status &= ~(KN_ACTIVE|KN_BUSY);
1313 continue; 1320 continue;
1314 } 1321 }
1315 } 1322 }
1316 /* XXXAD should be got from f_event if !oneshot. */ 1323 /* XXXAD should be got from f_event if !oneshot. */
1317 *kevp++ = kn->kn_kevent; 1324 *kevp++ = kn->kn_kevent;
1318 nkev++; 1325 nkev++;
1319 if (kn->kn_flags & EV_ONESHOT) { 1326 if (kn->kn_flags & EV_ONESHOT) {
1320 /* delete ONESHOT events after retrieval */ 1327 /* delete ONESHOT events after retrieval */
1321 kn->kn_status &= ~KN_BUSY; 1328 kn->kn_status &= ~KN_BUSY;
1322 mutex_spin_exit(&kq->kq_lock); 1329 mutex_spin_exit(&kq->kq_lock);
1323 knote_detach(kn, fdp, true); 1330 knote_detach(kn, fdp, true);
1324 mutex_enter(&fdp->fd_lock); 1331 mutex_enter(&fdp->fd_lock);
1325 mutex_spin_enter(&kq->kq_lock); 1332 mutex_spin_enter(&kq->kq_lock);
1326 } else if (kn->kn_flags & EV_CLEAR) { 1333 } else if (kn->kn_flags & EV_CLEAR) {
1327 /* clear state after retrieval */ 1334 /* clear state after retrieval */
1328 kn->kn_data = 0; 1335 kn->kn_data = 0;
1329 kn->kn_fflags = 0; 1336 kn->kn_fflags = 0;
1330 kn->kn_status &= ~(KN_QUEUED|KN_ACTIVE|KN_BUSY); 1337 kn->kn_status &= ~(KN_QUEUED|KN_ACTIVE|KN_BUSY);
1331 } else if (kn->kn_flags & EV_DISPATCH) { 1338 } else if (kn->kn_flags & EV_DISPATCH) {
1332 kn->kn_status |= KN_DISABLED; 1339 kn->kn_status |= KN_DISABLED;
1333 kn->kn_status &= ~(KN_QUEUED|KN_ACTIVE|KN_BUSY); 1340 kn->kn_status &= ~(KN_QUEUED|KN_ACTIVE|KN_BUSY);
1334 } else { 1341 } else {
1335 /* add event back on list */ 1342 /* add event back on list */
1336 kq_check(kq); 1343 kq_check(kq);
1337 kn->kn_status |= KN_QUEUED; 1344 kn->kn_status |= KN_QUEUED;
1338 kn->kn_status &= ~KN_BUSY; 1345 kn->kn_status &= ~KN_BUSY;
1339 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1346 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1340 kq->kq_count++; 1347 kq->kq_count++;
1341 kq_check(kq); 1348 kq_check(kq);
1342 } 1349 }
1343 if (nkev == kevcnt) { 1350 if (nkev == kevcnt) {
1344 /* do copyouts in kevcnt chunks */ 1351 /* do copyouts in kevcnt chunks */
1345 mutex_spin_exit(&kq->kq_lock); 1352 mutex_spin_exit(&kq->kq_lock);
1346 mutex_exit(&fdp->fd_lock); 1353 mutex_exit(&fdp->fd_lock);
1347 error = (*keops->keo_put_events) 1354 error = (*keops->keo_put_events)
1348 (keops->keo_private, 1355 (keops->keo_private,
1349 kevbuf, ulistp, nevents, nkev); 1356 kevbuf, ulistp, nevents, nkev);
1350 mutex_enter(&fdp->fd_lock); 1357 mutex_enter(&fdp->fd_lock);
1351 mutex_spin_enter(&kq->kq_lock); 1358 mutex_spin_enter(&kq->kq_lock);
1352 nevents += nkev; 1359 nevents += nkev;
1353 nkev = 0; 1360 nkev = 0;
1354 kevp = kevbuf; 1361 kevp = kevbuf;
1355 } 1362 }
1356 count--; 1363 count--;
1357 if (error != 0 || count == 0) { 1364 if (error != 0 || count == 0) {
1358 /* remove marker */ 1365 /* remove marker */
1359 TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe); 1366 TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe);
1360 break; 1367 break;
1361 } 1368 }
1362 } 1369 }
1363 done: 1370 done:
1364 mutex_spin_exit(&kq->kq_lock); 1371 mutex_spin_exit(&kq->kq_lock);
1365 mutex_exit(&fdp->fd_lock); 1372 mutex_exit(&fdp->fd_lock);
1366 } 1373 }
1367 if (nkev != 0) { 1374 if (nkev != 0) {
1368 /* copyout remaining events */ 1375 /* copyout remaining events */
1369 error = (*keops->keo_put_events)(keops->keo_private, 1376 error = (*keops->keo_put_events)(keops->keo_private,
1370 kevbuf, ulistp, nevents, nkev); 1377 kevbuf, ulistp, nevents, nkev);
1371 } 1378 }
1372 *retval = maxevents - count; 1379 *retval = maxevents - count;
1373 1380
1374 return error; 1381 return error;
1375} 1382}
1376 1383
1377/* 1384/*
1378 * fileops ioctl method for a kqueue descriptor. 1385 * fileops ioctl method for a kqueue descriptor.
1379 * 1386 *
1380 * Two ioctls are currently supported. They both use struct kfilter_mapping: 1387 * Two ioctls are currently supported. They both use struct kfilter_mapping:
1381 * KFILTER_BYNAME find name for filter, and return result in 1388 * KFILTER_BYNAME find name for filter, and return result in
1382 * name, which is of size len. 1389 * name, which is of size len.
1383 * KFILTER_BYFILTER find filter for name. len is ignored. 1390 * KFILTER_BYFILTER find filter for name. len is ignored.
1384 */ 1391 */
1385/*ARGSUSED*/ 1392/*ARGSUSED*/
1386static int 1393static int
1387kqueue_ioctl(file_t *fp, u_long com, void *data) 1394kqueue_ioctl(file_t *fp, u_long com, void *data)
1388{ 1395{
1389 struct kfilter_mapping *km; 1396 struct kfilter_mapping *km;
1390 const struct kfilter *kfilter; 1397 const struct kfilter *kfilter;
1391 char *name; 1398 char *name;
1392 int error; 1399 int error;
1393 1400
1394 km = data; 1401 km = data;
1395 error = 0; 1402 error = 0;
1396 name = kmem_alloc(KFILTER_MAXNAME, KM_SLEEP); 1403 name = kmem_alloc(KFILTER_MAXNAME, KM_SLEEP);
1397 1404
1398 switch (com) { 1405 switch (com) {
1399 case KFILTER_BYFILTER: /* convert filter -> name */ 1406 case KFILTER_BYFILTER: /* convert filter -> name */
1400 rw_enter(&kqueue_filter_lock, RW_READER); 1407 rw_enter(&kqueue_filter_lock, RW_READER);
1401 kfilter = kfilter_byfilter(km->filter); 1408 kfilter = kfilter_byfilter(km->filter);
1402 if (kfilter != NULL) { 1409 if (kfilter != NULL) {
1403 strlcpy(name, kfilter->name, KFILTER_MAXNAME); 1410 strlcpy(name, kfilter->name, KFILTER_MAXNAME);
1404 rw_exit(&kqueue_filter_lock); 1411 rw_exit(&kqueue_filter_lock);
1405 error = copyoutstr(name, km->name, km->len, NULL); 1412 error = copyoutstr(name, km->name, km->len, NULL);
1406 } else { 1413 } else {
1407 rw_exit(&kqueue_filter_lock); 1414 rw_exit(&kqueue_filter_lock);
1408 error = ENOENT; 1415 error = ENOENT;
1409 } 1416 }
1410 break; 1417 break;
1411 1418
1412 case KFILTER_BYNAME: /* convert name -> filter */ 1419 case KFILTER_BYNAME: /* convert name -> filter */
1413 error = copyinstr(km->name, name, KFILTER_MAXNAME, NULL); 1420 error = copyinstr(km->name, name, KFILTER_MAXNAME, NULL);
1414 if (error) { 1421 if (error) {
1415 break; 1422 break;
1416 } 1423 }
1417 rw_enter(&kqueue_filter_lock, RW_READER); 1424 rw_enter(&kqueue_filter_lock, RW_READER);
1418 kfilter = kfilter_byname(name); 1425 kfilter = kfilter_byname(name);
1419 if (kfilter != NULL) 1426 if (kfilter != NULL)
1420 km->filter = kfilter->filter; 1427 km->filter = kfilter->filter;
1421 else 1428 else
1422 error = ENOENT; 1429 error = ENOENT;
1423 rw_exit(&kqueue_filter_lock); 1430 rw_exit(&kqueue_filter_lock);
1424 break; 1431 break;
1425 1432
1426 default: 1433 default:
1427 error = ENOTTY; 1434 error = ENOTTY;
1428 break; 1435 break;
1429 1436
1430 } 1437 }
1431 kmem_free(name, KFILTER_MAXNAME); 1438 kmem_free(name, KFILTER_MAXNAME);
1432 return (error); 1439 return (error);
1433} 1440}
1434 1441
1435/* 1442/*
1436 * fileops fcntl method for a kqueue descriptor. 1443 * fileops fcntl method for a kqueue descriptor.
1437 */ 1444 */
1438static int 1445static int
1439kqueue_fcntl(file_t *fp, u_int com, void *data) 1446kqueue_fcntl(file_t *fp, u_int com, void *data)
1440{ 1447{
1441 1448
1442 return (ENOTTY); 1449 return (ENOTTY);
1443} 1450}
1444 1451
1445/* 1452/*
1446 * fileops poll method for a kqueue descriptor. 1453 * fileops poll method for a kqueue descriptor.
1447 * Determine if kqueue has events pending. 1454 * Determine if kqueue has events pending.
1448 */ 1455 */
1449static int 1456static int
1450kqueue_poll(file_t *fp, int events) 1457kqueue_poll(file_t *fp, int events)
1451{ 1458{
1452 struct kqueue *kq; 1459 struct kqueue *kq;
1453 int revents; 1460 int revents;
1454 1461
1455 kq = fp->f_kqueue; 1462 kq = fp->f_kqueue;
1456 1463
1457 revents = 0; 1464 revents = 0;
1458 if (events & (POLLIN | POLLRDNORM)) { 1465 if (events & (POLLIN | POLLRDNORM)) {
1459 mutex_spin_enter(&kq->kq_lock); 1466 mutex_spin_enter(&kq->kq_lock);
1460 if (kq->kq_count != 0) { 1467 if (kq->kq_count != 0) {
1461 revents |= events & (POLLIN | POLLRDNORM); 1468 revents |= events & (POLLIN | POLLRDNORM);
1462 } else { 1469 } else {
1463 selrecord(curlwp, &kq->kq_sel); 1470 selrecord(curlwp, &kq->kq_sel);
1464 } 1471 }
1465 kq_check(kq); 1472 kq_check(kq);
1466 mutex_spin_exit(&kq->kq_lock); 1473 mutex_spin_exit(&kq->kq_lock);
1467 } 1474 }
1468 1475
1469 return revents; 1476 return revents;
1470} 1477}
1471 1478
1472/* 1479/*
1473 * fileops stat method for a kqueue descriptor. 1480 * fileops stat method for a kqueue descriptor.
1474 * Returns dummy info, with st_size being number of events pending. 1481 * Returns dummy info, with st_size being number of events pending.
1475 */ 1482 */
1476static int 1483static int
1477kqueue_stat(file_t *fp, struct stat *st) 1484kqueue_stat(file_t *fp, struct stat *st)
1478{ 1485{
1479 struct kqueue *kq; 1486 struct kqueue *kq;
1480 1487
1481 kq = fp->f_kqueue; 1488 kq = fp->f_kqueue;
1482 1489
1483 memset(st, 0, sizeof(*st)); 1490 memset(st, 0, sizeof(*st));
1484 st->st_size = kq->kq_count; 1491 st->st_size = kq->kq_count;
1485 st->st_blksize = sizeof(struct kevent); 1492 st->st_blksize = sizeof(struct kevent);
1486 st->st_mode = S_IFIFO; 1493 st->st_mode = S_IFIFO;
1487 1494
1488 return 0; 1495 return 0;
1489} 1496}
1490 1497
1491static void 1498static void
1492kqueue_doclose(struct kqueue *kq, struct klist *list, int fd) 1499kqueue_doclose(struct kqueue *kq, struct klist *list, int fd)
1493{ 1500{
1494 struct knote *kn; 1501 struct knote *kn;
1495 filedesc_t *fdp; 1502 filedesc_t *fdp;
1496 1503
1497 fdp = kq->kq_fdp; 1504 fdp = kq->kq_fdp;
1498 1505
1499 KASSERT(mutex_owned(&fdp->fd_lock)); 1506 KASSERT(mutex_owned(&fdp->fd_lock));
1500 1507
1501 for (kn = SLIST_FIRST(list); kn != NULL;) { 1508 for (kn = SLIST_FIRST(list); kn != NULL;) {
1502 if (kq != kn->kn_kq) { 1509 if (kq != kn->kn_kq) {
1503 kn = SLIST_NEXT(kn, kn_link); 1510 kn = SLIST_NEXT(kn, kn_link);
1504 continue; 1511 continue;
1505 } 1512 }
1506 knote_detach(kn, fdp, true); 1513 knote_detach(kn, fdp, true);
1507 mutex_enter(&fdp->fd_lock); 1514 mutex_enter(&fdp->fd_lock);
1508 kn = SLIST_FIRST(list); 1515 kn = SLIST_FIRST(list);
1509 } 1516 }
1510} 1517}
1511 1518
1512 1519
1513/* 1520/*
1514 * fileops close method for a kqueue descriptor. 1521 * fileops close method for a kqueue descriptor.
1515 */ 1522 */
1516static int 1523static int
1517kqueue_close(file_t *fp) 1524kqueue_close(file_t *fp)
1518{ 1525{
1519 struct kqueue *kq; 1526 struct kqueue *kq;
1520 filedesc_t *fdp; 1527 filedesc_t *fdp;
1521 fdfile_t *ff; 1528 fdfile_t *ff;
1522 int i; 1529 int i;
1523 1530
1524 kq = fp->f_kqueue; 1531 kq = fp->f_kqueue;
1525 fp->f_kqueue = NULL; 1532 fp->f_kqueue = NULL;
1526 fp->f_type = 0; 1533 fp->f_type = 0;
1527 fdp = curlwp->l_fd; 1534 fdp = curlwp->l_fd;
1528 1535
1529 mutex_enter(&fdp->fd_lock); 1536 mutex_enter(&fdp->fd_lock);
1530 for (i = 0; i <= fdp->fd_lastkqfile; i++) { 1537 for (i = 0; i <= fdp->fd_lastkqfile; i++) {
1531 if ((ff = fdp->fd_dt->dt_ff[i]) == NULL) 1538 if ((ff = fdp->fd_dt->dt_ff[i]) == NULL)
1532 continue; 1539 continue;
1533 kqueue_doclose(kq, (struct klist *)&ff->ff_knlist, i); 1540 kqueue_doclose(kq, (struct klist *)&ff->ff_knlist, i);
1534 } 1541 }
1535 if (fdp->fd_knhashmask != 0) { 1542 if (fdp->fd_knhashmask != 0) {
1536 for (i = 0; i < fdp->fd_knhashmask + 1; i++) { 1543 for (i = 0; i < fdp->fd_knhashmask + 1; i++) {
1537 kqueue_doclose(kq, &fdp->fd_knhash[i], -1); 1544 kqueue_doclose(kq, &fdp->fd_knhash[i], -1);
1538 } 1545 }
1539 } 1546 }
1540 mutex_exit(&fdp->fd_lock); 1547 mutex_exit(&fdp->fd_lock);
1541 1548
1542 KASSERT(kq->kq_count == 0); 1549 KASSERT(kq->kq_count == 0);
1543 mutex_destroy(&kq->kq_lock); 1550 mutex_destroy(&kq->kq_lock);
1544 cv_destroy(&kq->kq_cv); 1551 cv_destroy(&kq->kq_cv);
1545 seldestroy(&kq->kq_sel); 1552 seldestroy(&kq->kq_sel);
1546 kmem_free(kq, sizeof(*kq)); 1553 kmem_free(kq, sizeof(*kq));
1547 1554
1548 return (0); 1555 return (0);
1549} 1556}
1550 1557
1551/* 1558/*
1552 * struct fileops kqfilter method for a kqueue descriptor. 1559 * struct fileops kqfilter method for a kqueue descriptor.
1553 * Event triggered when monitored kqueue changes. 1560 * Event triggered when monitored kqueue changes.
1554 */ 1561 */
1555static int 1562static int
1556kqueue_kqfilter(file_t *fp, struct knote *kn) 1563kqueue_kqfilter(file_t *fp, struct knote *kn)
1557{ 1564{
1558 struct kqueue *kq; 1565 struct kqueue *kq;
1559 1566
1560 kq = ((file_t *)kn->kn_obj)->f_kqueue; 1567 kq = ((file_t *)kn->kn_obj)->f_kqueue;
1561 1568
1562 KASSERT(fp == kn->kn_obj); 1569 KASSERT(fp == kn->kn_obj);
1563 1570
1564 if (kn->kn_filter != EVFILT_READ) 1571 if (kn->kn_filter != EVFILT_READ)
1565 return 1; 1572 return 1;
1566 1573
1567 kn->kn_fop = &kqread_filtops; 1574 kn->kn_fop = &kqread_filtops;
1568 mutex_enter(&kq->kq_lock); 1575 mutex_enter(&kq->kq_lock);
1569 SLIST_INSERT_HEAD(&kq->kq_sel.sel_klist, kn, kn_selnext); 1576 SLIST_INSERT_HEAD(&kq->kq_sel.sel_klist, kn, kn_selnext);
1570 mutex_exit(&kq->kq_lock); 1577 mutex_exit(&kq->kq_lock);
1571 1578
1572 return 0; 1579 return 0;
1573} 1580}
1574 1581
1575 1582
1576/* 1583/*
1577 * Walk down a list of knotes, activating them if their event has 1584 * Walk down a list of knotes, activating them if their event has
1578 * triggered. The caller's object lock (e.g. device driver lock) 1585 * triggered. The caller's object lock (e.g. device driver lock)
1579 * must be held. 1586 * must be held.
1580 */ 1587 */
1581void 1588void
1582knote(struct klist *list, long hint) 1589knote(struct klist *list, long hint)
1583{ 1590{
1584 struct knote *kn, *tmpkn; 1591 struct knote *kn, *tmpkn;
1585 1592
1586 SLIST_FOREACH_SAFE(kn, list, kn_selnext, tmpkn) { 1593 SLIST_FOREACH_SAFE(kn, list, kn_selnext, tmpkn) {
1587 KASSERT(kn->kn_fop != NULL); 1594 KASSERT(kn->kn_fop != NULL);
1588 KASSERT(kn->kn_fop->f_event != NULL); 1595 KASSERT(kn->kn_fop->f_event != NULL);
1589 if ((*kn->kn_fop->f_event)(kn, hint)) 1596 if ((*kn->kn_fop->f_event)(kn, hint))
1590 knote_activate(kn); 1597 knote_activate(kn);
1591 } 1598 }
1592} 1599}
1593 1600
1594/* 1601/*
1595 * Remove all knotes referencing a specified fd 1602 * Remove all knotes referencing a specified fd
1596 */ 1603 */
1597void 1604void
1598knote_fdclose(int fd) 1605knote_fdclose(int fd)
1599{ 1606{
1600 struct klist *list; 1607 struct klist *list;
1601 struct knote *kn; 1608 struct knote *kn;
1602 filedesc_t *fdp; 1609 filedesc_t *fdp;
1603 1610
1604 fdp = curlwp->l_fd; 1611 fdp = curlwp->l_fd;
1605 list = (struct klist *)&fdp->fd_dt->dt_ff[fd]->ff_knlist; 1612 list = (struct klist *)&fdp->fd_dt->dt_ff[fd]->ff_knlist;
1606 mutex_enter(&fdp->fd_lock); 1613 mutex_enter(&fdp->fd_lock);
1607 while ((kn = SLIST_FIRST(list)) != NULL) { 1614 while ((kn = SLIST_FIRST(list)) != NULL) {
1608 knote_detach(kn, fdp, true); 1615 knote_detach(kn, fdp, true);
1609 mutex_enter(&fdp->fd_lock); 1616 mutex_enter(&fdp->fd_lock);
1610 } 1617 }
1611 mutex_exit(&fdp->fd_lock); 1618 mutex_exit(&fdp->fd_lock);
1612} 1619}
1613 1620
1614/* 1621/*
1615 * Drop knote. Called with fdp->fd_lock held, and will drop before 1622 * Drop knote. Called with fdp->fd_lock held, and will drop before
1616 * returning. 1623 * returning.
1617 */ 1624 */
1618static void 1625static void
1619knote_detach(struct knote *kn, filedesc_t *fdp, bool dofop) 1626knote_detach(struct knote *kn, filedesc_t *fdp, bool dofop)
1620{ 1627{
1621 struct klist *list; 1628 struct klist *list;
1622 struct kqueue *kq; 1629 struct kqueue *kq;
1623 1630
1624 kq = kn->kn_kq; 1631 kq = kn->kn_kq;
1625 1632
1626 KASSERT((kn->kn_status & KN_MARKER) == 0); 1633 KASSERT((kn->kn_status & KN_MARKER) == 0);
1627 KASSERT(mutex_owned(&fdp->fd_lock)); 1634 KASSERT(mutex_owned(&fdp->fd_lock));
1628 1635
1629 KASSERT(kn->kn_fop != NULL); 1636 KASSERT(kn->kn_fop != NULL);
1630 /* Remove from monitored object. */ 1637 /* Remove from monitored object. */
1631 if (dofop) { 1638 if (dofop) {
1632 KASSERT(kn->kn_fop->f_detach != NULL); 1639 KASSERT(kn->kn_fop->f_detach != NULL);
1633 KERNEL_LOCK(1, NULL); /* XXXSMP */ 1640 KERNEL_LOCK(1, NULL); /* XXXSMP */
1634 (*kn->kn_fop->f_detach)(kn); 1641 (*kn->kn_fop->f_detach)(kn);
1635 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 1642 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */
1636 } 1643 }
1637 1644
1638 /* Remove from descriptor table. */ 1645 /* Remove from descriptor table. */
1639 if (kn->kn_fop->f_isfd) 1646 if (kn->kn_fop->f_isfd)
1640 list = (struct klist *)&fdp->fd_dt->dt_ff[kn->kn_id]->ff_knlist; 1647 list = (struct klist *)&fdp->fd_dt->dt_ff[kn->kn_id]->ff_knlist;
1641 else 1648 else
1642 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; 1649 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
1643 1650
1644 SLIST_REMOVE(list, kn, knote, kn_link); 1651 SLIST_REMOVE(list, kn, knote, kn_link);
1645 1652
1646 /* Remove from kqueue. */ 1653 /* Remove from kqueue. */
1647again: 1654again:
1648 mutex_spin_enter(&kq->kq_lock); 1655 mutex_spin_enter(&kq->kq_lock);
1649 if ((kn->kn_status & KN_QUEUED) != 0) { 1656 if ((kn->kn_status & KN_QUEUED) != 0) {
1650 kq_check(kq); 1657 kq_check(kq);
1651 kq->kq_count--; 1658 kq->kq_count--;
1652 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1659 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1653 kn->kn_status &= ~KN_QUEUED; 1660 kn->kn_status &= ~KN_QUEUED;
1654 kq_check(kq); 1661 kq_check(kq);
1655 } else if (kn->kn_status & KN_BUSY) { 1662 } else if (kn->kn_status & KN_BUSY) {
1656 mutex_spin_exit(&kq->kq_lock); 1663 mutex_spin_exit(&kq->kq_lock);
1657 goto again; 1664 goto again;
1658 } 1665 }
1659 mutex_spin_exit(&kq->kq_lock); 1666 mutex_spin_exit(&kq->kq_lock);
1660 1667
1661 mutex_exit(&fdp->fd_lock); 1668 mutex_exit(&fdp->fd_lock);
1662 if (kn->kn_fop->f_isfd) 1669 if (kn->kn_fop->f_isfd)
1663 fd_putfile(kn->kn_id); 1670 fd_putfile(kn->kn_id);
1664 atomic_dec_uint(&kn->kn_kfilter->refcnt); 1671 atomic_dec_uint(&kn->kn_kfilter->refcnt);
1665 kmem_free(kn, sizeof(*kn)); 1672 kmem_free(kn, sizeof(*kn));
1666} 1673}
1667 1674
1668/* 1675/*
1669 * Queue new event for knote. 1676 * Queue new event for knote.
1670 */ 1677 */
1671static void 1678static void
1672knote_enqueue(struct knote *kn) 1679knote_enqueue(struct knote *kn)
1673{ 1680{
1674 struct kqueue *kq; 1681 struct kqueue *kq;
1675 1682
1676 KASSERT((kn->kn_status & KN_MARKER) == 0); 1683 KASSERT((kn->kn_status & KN_MARKER) == 0);
1677 1684
1678 kq = kn->kn_kq; 1685 kq = kn->kn_kq;
1679 1686
1680 mutex_spin_enter(&kq->kq_lock); 1687 mutex_spin_enter(&kq->kq_lock);
1681 if ((kn->kn_status & KN_DISABLED) != 0) { 1688 if ((kn->kn_status & KN_DISABLED) != 0) {
1682 kn->kn_status &= ~KN_DISABLED; 1689 kn->kn_status &= ~KN_DISABLED;
1683 } 1690 }
1684 if ((kn->kn_status & (KN_ACTIVE | KN_QUEUED)) == KN_ACTIVE) { 1691 if ((kn->kn_status & (KN_ACTIVE | KN_QUEUED)) == KN_ACTIVE) {
1685 kq_check(kq); 1692 kq_check(kq);
1686 kn->kn_status |= KN_QUEUED; 1693 kn->kn_status |= KN_QUEUED;
1687 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1694 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1688 kq->kq_count++; 1695 kq->kq_count++;
1689 kq_check(kq); 1696 kq_check(kq);
1690 cv_broadcast(&kq->kq_cv); 1697 cv_broadcast(&kq->kq_cv);
1691 selnotify(&kq->kq_sel, 0, NOTE_SUBMIT); 1698 selnotify(&kq->kq_sel, 0, NOTE_SUBMIT);
1692 } 1699 }
1693 mutex_spin_exit(&kq->kq_lock); 1700 mutex_spin_exit(&kq->kq_lock);
1694} 1701}
1695/* 1702/*
1696 * Queue new event for knote. 1703 * Queue new event for knote.
1697 */ 1704 */
1698static void 1705static void
1699knote_activate(struct knote *kn) 1706knote_activate(struct knote *kn)
1700{ 1707{
1701 struct kqueue *kq; 1708 struct kqueue *kq;
1702 1709
1703 KASSERT((kn->kn_status & KN_MARKER) == 0); 1710 KASSERT((kn->kn_status & KN_MARKER) == 0);
1704 1711
1705 kq = kn->kn_kq; 1712 kq = kn->kn_kq;
1706 1713
1707 mutex_spin_enter(&kq->kq_lock); 1714 mutex_spin_enter(&kq->kq_lock);
1708 kn->kn_status |= KN_ACTIVE; 1715 kn->kn_status |= KN_ACTIVE;
1709 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) { 1716 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) {
1710 kq_check(kq); 1717 kq_check(kq);
1711 kn->kn_status |= KN_QUEUED; 1718 kn->kn_status |= KN_QUEUED;
1712 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1719 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1713 kq->kq_count++; 1720 kq->kq_count++;
1714 kq_check(kq); 1721 kq_check(kq);
1715 cv_broadcast(&kq->kq_cv); 1722 cv_broadcast(&kq->kq_cv);
1716 selnotify(&kq->kq_sel, 0, NOTE_SUBMIT); 1723 selnotify(&kq->kq_sel, 0, NOTE_SUBMIT);
1717 } 1724 }
1718 mutex_spin_exit(&kq->kq_lock); 1725 mutex_spin_exit(&kq->kq_lock);
1719} 1726}