Tue Apr 14 04:39:58 2015 UTC ()
Pull up following revision(s) (requested by christos in ticket #677):
	lib/libc/sys/kqueue.2: revision 1.34
	sys/kern/kern_event.c: revision 1.83
put the exit code of the process in data, like FreeBSD does.
--
say that we put the exit code in data.


(snj)
diff -r1.33 -r1.33.8.1 src/lib/libc/sys/kqueue.2
diff -r1.80 -r1.80.2.1 src/sys/kern/kern_event.c

cvs diff -r1.33 -r1.33.8.1 src/lib/libc/sys/kqueue.2 (switch to unified diff)

--- src/lib/libc/sys/kqueue.2 2012/11/24 15:16:52 1.33
+++ src/lib/libc/sys/kqueue.2 2015/04/14 04:39:58 1.33.8.1
@@ -1,647 +1,649 @@ @@ -1,647 +1,649 @@
1.\" $NetBSD: kqueue.2,v 1.33 2012/11/24 15:16:52 christos Exp $ 1.\" $NetBSD: kqueue.2,v 1.33.8.1 2015/04/14 04:39:58 snj Exp $
2.\" 2.\"
3.\" Copyright (c) 2000 Jonathan Lemon 3.\" Copyright (c) 2000 Jonathan Lemon
4.\" All rights reserved. 4.\" All rights reserved.
5.\" 5.\"
6.\" Copyright (c) 2001, 2002, 2003 The NetBSD Foundation, Inc. 6.\" Copyright (c) 2001, 2002, 2003 The NetBSD Foundation, Inc.
7.\" All rights reserved. 7.\" All rights reserved.
8.\" 8.\"
9.\" Portions of this documentation is derived from text contributed by 9.\" Portions of this documentation is derived from text contributed by
10.\" Luke Mewburn. 10.\" Luke Mewburn.
11.\" 11.\"
12.\" Redistribution and use in source and binary forms, with or without 12.\" Redistribution and use in source and binary forms, with or without
13.\" modification, are permitted provided that the following conditions 13.\" modification, are permitted provided that the following conditions
14.\" are met: 14.\" are met:
15.\" 1. Redistributions of source code must retain the above copyright 15.\" 1. Redistributions of source code must retain the above copyright
16.\" notice, this list of conditions and the following disclaimer. 16.\" notice, this list of conditions and the following disclaimer.
17.\" 2. Redistributions in binary form must reproduce the above copyright 17.\" 2. Redistributions in binary form must reproduce the above copyright
18.\" notice, this list of conditions and the following disclaimer in the 18.\" notice, this list of conditions and the following disclaimer in the
19.\" documentation and/or other materials provided with the distribution. 19.\" documentation and/or other materials provided with the distribution.
20.\" 20.\"
21.\" THIS SOFTWARE IS PROVIDED ``AS IS'' AND 21.\" THIS SOFTWARE IS PROVIDED ``AS IS'' AND
22.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31.\" SUCH DAMAGE. 31.\" SUCH DAMAGE.
32.\" 32.\"
33.\" $FreeBSD: src/lib/libc/sys/kqueue.2,v 1.22 2001/06/27 19:55:57 dd Exp $ 33.\" $FreeBSD: src/lib/libc/sys/kqueue.2,v 1.22 2001/06/27 19:55:57 dd Exp $
34.\" 34.\"
35.Dd November 24, 2012 35.Dd March 2, 2015
36.Dt KQUEUE 2 36.Dt KQUEUE 2
37.Os 37.Os
38.Sh NAME 38.Sh NAME
39.Nm kqueue , 39.Nm kqueue ,
40.Nm kqueue1 , 40.Nm kqueue1 ,
41.Nm kevent 41.Nm kevent
42.Nd kernel event notification mechanism 42.Nd kernel event notification mechanism
43.Sh LIBRARY 43.Sh LIBRARY
44.Lb libc 44.Lb libc
45.Sh SYNOPSIS 45.Sh SYNOPSIS
46.In sys/event.h 46.In sys/event.h
47.In sys/time.h 47.In sys/time.h
48.Ft int 48.Ft int
49.Fn kqueue "void" 49.Fn kqueue "void"
50.Ft int 50.Ft int
51.Fn kqueue1 "int flags" 51.Fn kqueue1 "int flags"
52.Ft int 52.Ft int
53.Fn kevent "int kq" "const struct kevent *changelist" "size_t nchanges" "struct kevent *eventlist" "size_t nevents" "const struct timespec *timeout" 53.Fn kevent "int kq" "const struct kevent *changelist" "size_t nchanges" "struct kevent *eventlist" "size_t nevents" "const struct timespec *timeout"
54.Fn EV_SET "\*[Am]kev" ident filter flags fflags data udata 54.Fn EV_SET "\*[Am]kev" ident filter flags fflags data udata
55.Sh DESCRIPTION 55.Sh DESCRIPTION
56.Fn kqueue 56.Fn kqueue
57provides a generic method of notifying the user when an event 57provides a generic method of notifying the user when an event
58happens or a condition holds, based on the results of small 58happens or a condition holds, based on the results of small
59pieces of kernel code termed filters. 59pieces of kernel code termed filters.
60A kevent is identified by the (ident, filter) pair; there may only 60A kevent is identified by the (ident, filter) pair; there may only
61be one unique kevent per kqueue. 61be one unique kevent per kqueue.
62.Pp 62.Pp
63The filter is executed upon the initial registration of a kevent 63The filter is executed upon the initial registration of a kevent
64in order to detect whether a preexisting condition is present, and is also 64in order to detect whether a preexisting condition is present, and is also
65executed whenever an event is passed to the filter for evaluation. 65executed whenever an event is passed to the filter for evaluation.
66If the filter determines that the condition should be reported, 66If the filter determines that the condition should be reported,
67then the kevent is placed on the kqueue for the user to retrieve. 67then the kevent is placed on the kqueue for the user to retrieve.
68.Pp 68.Pp
69The filter is also run when the user attempts to retrieve the kevent 69The filter is also run when the user attempts to retrieve the kevent
70from the kqueue. 70from the kqueue.
71If the filter indicates that the condition that triggered 71If the filter indicates that the condition that triggered
72the event no longer holds, the kevent is removed from the kqueue and 72the event no longer holds, the kevent is removed from the kqueue and
73is not returned. 73is not returned.
74.Pp 74.Pp
75Multiple events which trigger the filter do not result in multiple 75Multiple events which trigger the filter do not result in multiple
76kevents being placed on the kqueue; instead, the filter will aggregate 76kevents being placed on the kqueue; instead, the filter will aggregate
77the events into a single struct kevent. 77the events into a single struct kevent.
78Calling 78Calling
79.Fn close 79.Fn close
80on a file descriptor will remove any kevents that reference the descriptor. 80on a file descriptor will remove any kevents that reference the descriptor.
81.Pp 81.Pp
82.Fn kqueue 82.Fn kqueue
83creates a new kernel event queue and returns a descriptor. 83creates a new kernel event queue and returns a descriptor.
84.Pp 84.Pp
85The 85The
86.Fn kqueue1 86.Fn kqueue1
87also allows to set the following 87also allows to set the following
88.Fa flags 88.Fa flags
89on the returned file descriptor: 89on the returned file descriptor:
90.Bl -column O_NONBLOCK -offset indent 90.Bl -column O_NONBLOCK -offset indent
91.It Dv O_CLOEXEC 91.It Dv O_CLOEXEC
92Set the close on exec property. 92Set the close on exec property.
93.It Dv O_NONBLOCK 93.It Dv O_NONBLOCK
94Sets non-blocking I/O. 94Sets non-blocking I/O.
95.It Dv O_NOSIGPIPE 95.It Dv O_NOSIGPIPE
96Return 96Return
97.Er EPIPE 97.Er EPIPE
98instead of raising 98instead of raising
99.Dv SIGPIPE . 99.Dv SIGPIPE .
100.El 100.El
101The queue is not inherited by a child created with 101The queue is not inherited by a child created with
102.Xr fork 2 . 102.Xr fork 2 .
103.\" However, if 103.\" However, if
104.\" .Xr rfork 2 104.\" .Xr rfork 2
105.\" is called without the 105.\" is called without the
106.\" .Dv RFFDG 106.\" .Dv RFFDG
107.\" flag, then the descriptor table is shared, 107.\" flag, then the descriptor table is shared,
108.\" which will allow sharing of the kqueue between two processes. 108.\" which will allow sharing of the kqueue between two processes.
109.Pp 109.Pp
110.Fn kevent 110.Fn kevent
111is used to register events with the queue, and return any pending 111is used to register events with the queue, and return any pending
112events to the user. 112events to the user.
113.Fa changelist 113.Fa changelist
114is a pointer to an array of 114is a pointer to an array of
115.Va kevent 115.Va kevent
116structures, as defined in 116structures, as defined in
117.In sys/event.h . 117.In sys/event.h .
118All changes contained in the 118All changes contained in the
119.Fa changelist 119.Fa changelist
120are applied before any pending events are read from the queue. 120are applied before any pending events are read from the queue.
121.Fa nchanges 121.Fa nchanges
122gives the size of 122gives the size of
123.Fa changelist . 123.Fa changelist .
124.Fa eventlist 124.Fa eventlist
125is a pointer to an array of kevent structures. 125is a pointer to an array of kevent structures.
126.Fa nevents 126.Fa nevents
127determines the size of 127determines the size of
128.Fa eventlist . 128.Fa eventlist .
129If 129If
130.Fa timeout 130.Fa timeout
131is a 131is a
132.No non- Ns Dv NULL 132.No non- Ns Dv NULL
133pointer, it specifies a maximum interval to wait 133pointer, it specifies a maximum interval to wait
134for an event, which will be interpreted as a struct timespec. 134for an event, which will be interpreted as a struct timespec.
135If 135If
136.Fa timeout 136.Fa timeout
137is a 137is a
138.Dv NULL 138.Dv NULL
139pointer, 139pointer,
140.Fn kevent 140.Fn kevent
141waits indefinitely. 141waits indefinitely.
142To effect a poll, the 142To effect a poll, the
143.Fa timeout 143.Fa timeout
144argument should be 144argument should be
145.No non- Ns Dv NULL , 145.No non- Ns Dv NULL ,
146pointing to a zero-valued 146pointing to a zero-valued
147.Va timespec 147.Va timespec
148structure. 148structure.
149The same array may be used for the 149The same array may be used for the
150.Fa changelist 150.Fa changelist
151and 151and
152.Fa eventlist . 152.Fa eventlist .
153.Pp 153.Pp
154.Fn EV_SET 154.Fn EV_SET
155is a macro which is provided for ease of initializing a 155is a macro which is provided for ease of initializing a
156kevent structure. 156kevent structure.
157.Pp 157.Pp
158The 158The
159.Va kevent 159.Va kevent
160structure is defined as: 160structure is defined as:
161.Bd -literal 161.Bd -literal
162struct kevent { 162struct kevent {
163 uintptr_t ident; /* identifier for this event */ 163 uintptr_t ident; /* identifier for this event */
164 uint32_t filter; /* filter for event */ 164 uint32_t filter; /* filter for event */
165 uint32_t flags; /* action flags for kqueue */ 165 uint32_t flags; /* action flags for kqueue */
166 uint32_t fflags; /* filter flag value */ 166 uint32_t fflags; /* filter flag value */
167 int64_t data; /* filter data value */ 167 int64_t data; /* filter data value */
168 intptr_t udata; /* opaque user data identifier */ 168 intptr_t udata; /* opaque user data identifier */
169}; 169};
170.Ed 170.Ed
171.Pp 171.Pp
172The fields of 172The fields of
173.Fa struct kevent 173.Fa struct kevent
174are: 174are:
175.Bl -tag -width XXXfilter -offset indent 175.Bl -tag -width XXXfilter -offset indent
176.It ident 176.It ident
177Value used to identify this event. 177Value used to identify this event.
178The exact interpretation is determined by the attached filter, 178The exact interpretation is determined by the attached filter,
179but often is a file descriptor. 179but often is a file descriptor.
180.It filter 180.It filter
181Identifies the kernel filter used to process this event. 181Identifies the kernel filter used to process this event.
182There are pre-defined system filters (which are described below), and 182There are pre-defined system filters (which are described below), and
183other filters may be added by kernel subsystems as necessary. 183other filters may be added by kernel subsystems as necessary.
184.It flags 184.It flags
185Actions to perform on the event. 185Actions to perform on the event.
186.It fflags 186.It fflags
187Filter-specific flags. 187Filter-specific flags.
188.It data 188.It data
189Filter-specific data value. 189Filter-specific data value.
190.It udata 190.It udata
191Opaque user-defined value passed through the kernel unchanged. 191Opaque user-defined value passed through the kernel unchanged.
192.El 192.El
193.Pp 193.Pp
194The 194The
195.Va flags 195.Va flags
196field can contain the following values: 196field can contain the following values:
197.Bl -tag -width XXXEV_ONESHOT -offset indent 197.Bl -tag -width XXXEV_ONESHOT -offset indent
198.It EV_ADD 198.It EV_ADD
199Adds the event to the kqueue. 199Adds the event to the kqueue.
200Re-adding an existing event will modify the parameters of the original 200Re-adding an existing event will modify the parameters of the original
201event, and not result in a duplicate entry. 201event, and not result in a duplicate entry.
202Adding an event automatically enables it, 202Adding an event automatically enables it,
203unless overridden by the EV_DISABLE flag. 203unless overridden by the EV_DISABLE flag.
204.It EV_ENABLE 204.It EV_ENABLE
205Permit 205Permit
206.Fn kevent 206.Fn kevent
207to return the event if it is triggered. 207to return the event if it is triggered.
208.It EV_DISABLE 208.It EV_DISABLE
209Disable the event so 209Disable the event so
210.Fn kevent 210.Fn kevent
211will not return it. 211will not return it.
212The filter itself is not disabled. 212The filter itself is not disabled.
213.It EV_DELETE 213.It EV_DELETE
214Removes the event from the kqueue. 214Removes the event from the kqueue.
215Events which are attached to file descriptors are automatically deleted 215Events which are attached to file descriptors are automatically deleted
216on the last close of the descriptor. 216on the last close of the descriptor.
217.It EV_ONESHOT 217.It EV_ONESHOT
218Causes the event to return only the first occurrence of the filter 218Causes the event to return only the first occurrence of the filter
219being triggered. 219being triggered.
220After the user retrieves the event from the kqueue, it is deleted. 220After the user retrieves the event from the kqueue, it is deleted.
221.It EV_CLEAR 221.It EV_CLEAR
222After the event is retrieved by the user, its state is reset. 222After the event is retrieved by the user, its state is reset.
223This is useful for filters which report state transitions 223This is useful for filters which report state transitions
224instead of the current state. 224instead of the current state.
225Note that some filters may automatically set this flag internally. 225Note that some filters may automatically set this flag internally.
226.It EV_EOF 226.It EV_EOF
227Filters may set this flag to indicate filter-specific EOF condition. 227Filters may set this flag to indicate filter-specific EOF condition.
228.It EV_ERROR 228.It EV_ERROR
229See 229See
230.Sx RETURN VALUES 230.Sx RETURN VALUES
231below. 231below.
232.El 232.El
233.Ss Filters 233.Ss Filters
234Filters are identified by a number. 234Filters are identified by a number.
235There are two types of filters; pre-defined filters which 235There are two types of filters; pre-defined filters which
236are described below, and third-party filters that may be added with 236are described below, and third-party filters that may be added with
237.Xr kfilter_register 9 237.Xr kfilter_register 9
238by kernel sub-systems, third-party device drivers, or loadable 238by kernel sub-systems, third-party device drivers, or loadable
239kernel modules. 239kernel modules.
240.Pp 240.Pp
241As a third-party filter is referenced by a well-known name instead 241As a third-party filter is referenced by a well-known name instead
242of a statically assigned number, two 242of a statically assigned number, two
243.Xr ioctl 2 Ns s 243.Xr ioctl 2 Ns s
244are supported on the file descriptor returned by 244are supported on the file descriptor returned by
245.Fn kqueue 245.Fn kqueue
246to map a filter name to a filter number, and vice-versa (passing 246to map a filter name to a filter number, and vice-versa (passing
247arguments in a structure described below): 247arguments in a structure described below):
248.Bl -tag -width KFILTER_BYFILTER -offset indent 248.Bl -tag -width KFILTER_BYFILTER -offset indent
249.It KFILTER_BYFILTER 249.It KFILTER_BYFILTER
250Map 250Map
251.Va filter 251.Va filter
252to 252to
253.Va name , 253.Va name ,
254which is of size 254which is of size
255.Va len . 255.Va len .
256.It KFILTER_BYNAME 256.It KFILTER_BYNAME
257Map 257Map
258.Va name 258.Va name
259to 259to
260.Va filter . 260.Va filter .
261.Va len 261.Va len
262is ignored. 262is ignored.
263.El 263.El
264.Pp 264.Pp
265The following structure is used to pass arguments in and out of the 265The following structure is used to pass arguments in and out of the
266.Xr ioctl 2 : 266.Xr ioctl 2 :
267.Bd -literal -offset indent 267.Bd -literal -offset indent
268struct kfilter_mapping { 268struct kfilter_mapping {
269 char *name; /* name to lookup or return */ 269 char *name; /* name to lookup or return */
270 size_t len; /* length of name */ 270 size_t len; /* length of name */
271 uint32_t filter; /* filter to lookup or return */ 271 uint32_t filter; /* filter to lookup or return */
272}; 272};
273.Ed 273.Ed
274.Pp 274.Pp
275Arguments may be passed to and from the filter via the 275Arguments may be passed to and from the filter via the
276.Va fflags 276.Va fflags
277and 277and
278.Va data 278.Va data
279fields in the kevent structure. 279fields in the kevent structure.
280.Pp 280.Pp
281The predefined system filters are: 281The predefined system filters are:
282.Bl -tag -width EVFILT_SIGNAL 282.Bl -tag -width EVFILT_SIGNAL
283.It EVFILT_READ 283.It EVFILT_READ
284Takes a descriptor as the identifier, and returns whenever 284Takes a descriptor as the identifier, and returns whenever
285there is data available to read. 285there is data available to read.
286The behavior of the filter is slightly different depending 286The behavior of the filter is slightly different depending
287on the descriptor type. 287on the descriptor type.
288.Pp 288.Pp
289.Bl -tag -width 2n 289.Bl -tag -width 2n
290.It Sockets 290.It Sockets
291Sockets which have previously been passed to 291Sockets which have previously been passed to
292.Fn listen 292.Fn listen
293return when there is an incoming connection pending. 293return when there is an incoming connection pending.
294.Va data 294.Va data
295contains the size of the listen backlog (i.e., the number of 295contains the size of the listen backlog (i.e., the number of
296connections ready to be accepted with 296connections ready to be accepted with
297.Xr accept 2 . ) 297.Xr accept 2 . )
298.Pp 298.Pp
299Other socket descriptors return when there is data to be read, 299Other socket descriptors return when there is data to be read,
300subject to the 300subject to the
301.Dv SO_RCVLOWAT 301.Dv SO_RCVLOWAT
302value of the socket buffer. 302value of the socket buffer.
303This may be overridden with a per-filter low water mark at the 303This may be overridden with a per-filter low water mark at the
304time the filter is added by setting the 304time the filter is added by setting the
305NOTE_LOWAT 305NOTE_LOWAT
306flag in 306flag in
307.Va fflags , 307.Va fflags ,
308and specifying the new low water mark in 308and specifying the new low water mark in
309.Va data . 309.Va data .
310On return, 310On return,
311.Va data 311.Va data
312contains the number of bytes in the socket buffer. 312contains the number of bytes in the socket buffer.
313.Pp 313.Pp
314If the read direction of the socket has shutdown, then the filter 314If the read direction of the socket has shutdown, then the filter
315also sets EV_EOF in 315also sets EV_EOF in
316.Va flags , 316.Va flags ,
317and returns the socket error (if any) in 317and returns the socket error (if any) in
318.Va fflags . 318.Va fflags .
319It is possible for EOF to be returned (indicating the connection is gone) 319It is possible for EOF to be returned (indicating the connection is gone)
320while there is still data pending in the socket buffer. 320while there is still data pending in the socket buffer.
321.It Vnodes 321.It Vnodes
322Returns when the file pointer is not at the end of file. 322Returns when the file pointer is not at the end of file.
323.Va data 323.Va data
324contains the offset from current position to end of file, 324contains the offset from current position to end of file,
325and may be negative. 325and may be negative.
326.It "Fifos, Pipes" 326.It "Fifos, Pipes"
327Returns when there is data to read; 327Returns when there is data to read;
328.Va data 328.Va data
329contains the number of bytes available. 329contains the number of bytes available.
330.Pp 330.Pp
331When the last writer disconnects, the filter will set EV_EOF in 331When the last writer disconnects, the filter will set EV_EOF in
332.Va flags . 332.Va flags .
333This may be cleared by passing in EV_CLEAR, at which point the 333This may be cleared by passing in EV_CLEAR, at which point the
334filter will resume waiting for data to become available before 334filter will resume waiting for data to become available before
335returning. 335returning.
336.El 336.El
337.It EVFILT_WRITE 337.It EVFILT_WRITE
338Takes a descriptor as the identifier, and returns whenever 338Takes a descriptor as the identifier, and returns whenever
339it is possible to write to the descriptor. 339it is possible to write to the descriptor.
340For sockets, pipes, fifos, and ttys, 340For sockets, pipes, fifos, and ttys,
341.Va data 341.Va data
342will contain the amount of space remaining in the write buffer. 342will contain the amount of space remaining in the write buffer.
343The filter will set EV_EOF when the reader disconnects, and for 343The filter will set EV_EOF when the reader disconnects, and for
344the fifo case, this may be cleared by use of EV_CLEAR. 344the fifo case, this may be cleared by use of EV_CLEAR.
345Note that this filter is not supported for vnodes. 345Note that this filter is not supported for vnodes.
346.Pp 346.Pp
347For sockets, the low water mark and socket error handling is 347For sockets, the low water mark and socket error handling is
348identical to the EVFILT_READ case. 348identical to the EVFILT_READ case.
349.It EVFILT_AIO 349.It EVFILT_AIO
350This is not implemented in 350This is not implemented in
351.Nx . 351.Nx .
352.ig 352.ig
353The sigevent portion of the AIO request is filled in, with 353The sigevent portion of the AIO request is filled in, with
354.Va sigev_notify_kqueue 354.Va sigev_notify_kqueue
355containing the descriptor of the kqueue that the event should 355containing the descriptor of the kqueue that the event should
356be attached to, 356be attached to,
357.Va sigev_value 357.Va sigev_value
358containing the udata value, and 358containing the udata value, and
359.Va sigev_notify 359.Va sigev_notify
360set to SIGEV_EVENT. 360set to SIGEV_EVENT.
361When the aio_* function is called, the event will be registered 361When the aio_* function is called, the event will be registered
362with the specified kqueue, and the 362with the specified kqueue, and the
363.Va ident 363.Va ident
364argument set to the 364argument set to the
365.Fa struct aiocb 365.Fa struct aiocb
366returned by the aio_* function. 366returned by the aio_* function.
367The filter returns under the same conditions as aio_error. 367The filter returns under the same conditions as aio_error.
368.Pp 368.Pp
369Alternatively, a kevent structure may be initialized, with 369Alternatively, a kevent structure may be initialized, with
370.Va ident 370.Va ident
371containing the descriptor of the kqueue, and the 371containing the descriptor of the kqueue, and the
372address of the kevent structure placed in the 372address of the kevent structure placed in the
373.Va aio_lio_opcode 373.Va aio_lio_opcode
374field of the AIO request. 374field of the AIO request.
375However, this approach will not work on 375However, this approach will not work on
376architectures with 64-bit pointers, and should be considered deprecated. 376architectures with 64-bit pointers, and should be considered deprecated.
377.. 377..
378.It EVFILT_VNODE 378.It EVFILT_VNODE
379Takes a file descriptor as the identifier and the events to watch for in 379Takes a file descriptor as the identifier and the events to watch for in
380.Va fflags , 380.Va fflags ,
381and returns when one or more of the requested events occurs on the descriptor. 381and returns when one or more of the requested events occurs on the descriptor.
382The events to monitor are: 382The events to monitor are:
383.Bl -tag -width XXNOTE_RENAME 383.Bl -tag -width XXNOTE_RENAME
384.It NOTE_DELETE 384.It NOTE_DELETE
385.Fn unlink 385.Fn unlink
386was called on the file referenced by the descriptor. 386was called on the file referenced by the descriptor.
387.It NOTE_WRITE 387.It NOTE_WRITE
388A write occurred on the file referenced by the descriptor. 388A write occurred on the file referenced by the descriptor.
389.It NOTE_EXTEND 389.It NOTE_EXTEND
390The file referenced by the descriptor was extended. 390The file referenced by the descriptor was extended.
391.It NOTE_ATTRIB 391.It NOTE_ATTRIB
392The file referenced by the descriptor had its attributes changed. 392The file referenced by the descriptor had its attributes changed.
393.It NOTE_LINK 393.It NOTE_LINK
394The link count on the file changed. 394The link count on the file changed.
395.It NOTE_RENAME 395.It NOTE_RENAME
396The file referenced by the descriptor was renamed. 396The file referenced by the descriptor was renamed.
397.It NOTE_REVOKE 397.It NOTE_REVOKE
398Access to the file was revoked via 398Access to the file was revoked via
399.Xr revoke 2 399.Xr revoke 2
400or the underlying fileystem was unmounted. 400or the underlying fileystem was unmounted.
401.El 401.El
402.Pp 402.Pp
403On return, 403On return,
404.Va fflags 404.Va fflags
405contains the events which triggered the filter. 405contains the events which triggered the filter.
406.It EVFILT_PROC 406.It EVFILT_PROC
407Takes the process ID to monitor as the identifier and the events to watch for 407Takes the process ID to monitor as the identifier and the events to watch for
408in 408in
409.Va fflags , 409.Va fflags ,
410and returns when the process performs one or more of the requested events. 410and returns when the process performs one or more of the requested events.
411If a process can normally see another process, it can attach an event to it. 411If a process can normally see another process, it can attach an event to it.
412The events to monitor are: 412The events to monitor are:
413.Bl -tag -width XXNOTE_TRACKERR 413.Bl -tag -width XXNOTE_TRACKERR
414.It NOTE_EXIT 414.It NOTE_EXIT
415The process has exited. 415The process has exited.
 416The exit code of the process is stored in
 417.Va data .
416.It NOTE_FORK 418.It NOTE_FORK
417The process has called 419The process has called
418.Fn fork . 420.Fn fork .
419.It NOTE_EXEC 421.It NOTE_EXEC
420The process has executed a new process via 422The process has executed a new process via
421.Xr execve 2 423.Xr execve 2
422or similar call. 424or similar call.
423.It NOTE_TRACK 425.It NOTE_TRACK
424Follow a process across 426Follow a process across
425.Fn fork 427.Fn fork
426calls. 428calls.
427The parent process will return with NOTE_TRACK set in the 429The parent process will return with NOTE_TRACK set in the
428.Va fflags 430.Va fflags
429field, while the child process will return with NOTE_CHILD set in 431field, while the child process will return with NOTE_CHILD set in
430.Va fflags 432.Va fflags
431and the parent PID in 433and the parent PID in
432.Va data . 434.Va data .
433.It NOTE_TRACKERR 435.It NOTE_TRACKERR
434This flag is returned if the system was unable to attach an event to 436This flag is returned if the system was unable to attach an event to
435the child process, usually due to resource limitations. 437the child process, usually due to resource limitations.
436.El 438.El
437.Pp 439.Pp
438On return, 440On return,
439.Va fflags 441.Va fflags
440contains the events which triggered the filter. 442contains the events which triggered the filter.
441.It EVFILT_SIGNAL 443.It EVFILT_SIGNAL
442Takes the signal number to monitor as the identifier and returns 444Takes the signal number to monitor as the identifier and returns
443when the given signal is delivered to the current process. 445when the given signal is delivered to the current process.
444This coexists with the 446This coexists with the
445.Fn signal 447.Fn signal
446and 448and
447.Fn sigaction 449.Fn sigaction
448facilities, and has a lower precedence. 450facilities, and has a lower precedence.
449The filter will record 451The filter will record
450all attempts to deliver a signal to a process, even if the signal has 452all attempts to deliver a signal to a process, even if the signal has
451been marked as SIG_IGN. 453been marked as SIG_IGN.
452Event notification happens after normal signal delivery processing. 454Event notification happens after normal signal delivery processing.
453.Va data 455.Va data
454returns the number of times the signal has occurred since the last call to 456returns the number of times the signal has occurred since the last call to
455.Fn kevent . 457.Fn kevent .
456This filter automatically sets the EV_CLEAR flag internally. 458This filter automatically sets the EV_CLEAR flag internally.
457.It EVFILT_TIMER 459.It EVFILT_TIMER
458Establishes an arbitrary timer identified by 460Establishes an arbitrary timer identified by
459.Va ident . 461.Va ident .
460When adding a timer, 462When adding a timer,
461.Va data 463.Va data
462specifies the timeout period in milliseconds. 464specifies the timeout period in milliseconds.
463The timer will be periodic unless EV_ONESHOT is specified. 465The timer will be periodic unless EV_ONESHOT is specified.
464On return, 466On return,
465.Va data 467.Va data
466contains the number of times the timeout has expired since the last call to 468contains the number of times the timeout has expired since the last call to
467.Fn kevent . 469.Fn kevent .
468This filter automatically sets the EV_CLEAR flag internally. 470This filter automatically sets the EV_CLEAR flag internally.
469.El 471.El
470.Sh RETURN VALUES 472.Sh RETURN VALUES
471.Fn kqueue 473.Fn kqueue
472creates a new kernel event queue and returns a file descriptor. 474creates a new kernel event queue and returns a file descriptor.
473If there was an error creating the kernel event queue, a value of \-1 is 475If there was an error creating the kernel event queue, a value of \-1 is
474returned and errno set. 476returned and errno set.
475.Pp 477.Pp
476.Fn kevent 478.Fn kevent
477returns the number of events placed in the 479returns the number of events placed in the
478.Fa eventlist , 480.Fa eventlist ,
479up to the value given by 481up to the value given by
480.Fa nevents . 482.Fa nevents .
481If an error occurs while processing an element of the 483If an error occurs while processing an element of the
482.Fa changelist 484.Fa changelist
483and there is enough room in the 485and there is enough room in the
484.Fa eventlist , 486.Fa eventlist ,
485then the event will be placed in the 487then the event will be placed in the
486.Fa eventlist 488.Fa eventlist
487with 489with
488.Dv EV_ERROR 490.Dv EV_ERROR
489set in 491set in
490.Va flags 492.Va flags
491and the system error in 493and the system error in
492.Va data . 494.Va data .
493Otherwise, 495Otherwise,
494.Dv \-1 496.Dv \-1
495will be returned, and 497will be returned, and
496.Dv errno 498.Dv errno
497will be set to indicate the error condition. 499will be set to indicate the error condition.
498If the time limit expires, then 500If the time limit expires, then
499.Fn kevent 501.Fn kevent
500returns 0. 502returns 0.
501.Sh EXAMPLES 503.Sh EXAMPLES
502The following example program monitors a file (provided to it as the first 504The following example program monitors a file (provided to it as the first
503argument) and prints information about some common events it receives 505argument) and prints information about some common events it receives
504notifications for: 506notifications for:
505.Bd -literal -offset indent 507.Bd -literal -offset indent
506#include \*[Lt]sys/types.h\*[Gt] 508#include \*[Lt]sys/types.h\*[Gt]
507#include \*[Lt]sys/event.h\*[Gt] 509#include \*[Lt]sys/event.h\*[Gt]
508#include \*[Lt]sys/time.h\*[Gt] 510#include \*[Lt]sys/time.h\*[Gt]
509#include \*[Lt]stdio.h\*[Gt] 511#include \*[Lt]stdio.h\*[Gt]
510#include \*[Lt]unistd.h\*[Gt] 512#include \*[Lt]unistd.h\*[Gt]
511#include \*[Lt]stdlib.h\*[Gt] 513#include \*[Lt]stdlib.h\*[Gt]
512#include \*[Lt]fcntl.h\*[Gt] 514#include \*[Lt]fcntl.h\*[Gt]
513#include \*[Lt]err.h\*[Gt] 515#include \*[Lt]err.h\*[Gt]
514 516
515int 517int
516main(int argc, char *argv[]) 518main(int argc, char *argv[])
517{ 519{
518 int fd, kq, nev; 520 int fd, kq, nev;
519 struct kevent ev; 521 struct kevent ev;
520 static const struct timespec tout = { 1, 0 }; 522 static const struct timespec tout = { 1, 0 };
521 523
522 if ((fd = open(argv[1], O_RDONLY)) == -1) 524 if ((fd = open(argv[1], O_RDONLY)) == -1)
523 err(1, "Cannot open `%s'", argv[1]); 525 err(1, "Cannot open `%s'", argv[1]);
524 526
525 if ((kq = kqueue()) == -1) 527 if ((kq = kqueue()) == -1)
526 err(1, "Cannot create kqueue"); 528 err(1, "Cannot create kqueue");
527 529
528 EV_SET(\*[Am]ev, fd, EVFILT_VNODE, EV_ADD | EV_ENABLE | EV_CLEAR, 530 EV_SET(\*[Am]ev, fd, EVFILT_VNODE, EV_ADD | EV_ENABLE | EV_CLEAR,
529 NOTE_DELETE|NOTE_WRITE|NOTE_EXTEND|NOTE_ATTRIB|NOTE_LINK| 531 NOTE_DELETE|NOTE_WRITE|NOTE_EXTEND|NOTE_ATTRIB|NOTE_LINK|
530 NOTE_RENAME|NOTE_REVOKE, 0, 0); 532 NOTE_RENAME|NOTE_REVOKE, 0, 0);
531 if (kevent(kq, \*[Am]ev, 1, NULL, 0, \*[Am]tout) == -1) 533 if (kevent(kq, \*[Am]ev, 1, NULL, 0, \*[Am]tout) == -1)
532 err(1, "kevent"); 534 err(1, "kevent");
533 for (;;) { 535 for (;;) {
534 nev = kevent(kq, NULL, 0, \*[Am]ev, 1, \*[Am]tout); 536 nev = kevent(kq, NULL, 0, \*[Am]ev, 1, \*[Am]tout);
535 if (nev == -1) 537 if (nev == -1)
536 err(1, "kevent"); 538 err(1, "kevent");
537 if (nev == 0) 539 if (nev == 0)
538 continue; 540 continue;
539 if (ev.fflags \*[Am] NOTE_DELETE) { 541 if (ev.fflags \*[Am] NOTE_DELETE) {
540 printf("deleted "); 542 printf("deleted ");
541 ev.fflags \*[Am]= ~NOTE_DELETE; 543 ev.fflags \*[Am]= ~NOTE_DELETE;
542 } 544 }
543 if (ev.fflags \*[Am] NOTE_WRITE) { 545 if (ev.fflags \*[Am] NOTE_WRITE) {
544 printf("written "); 546 printf("written ");
545 ev.fflags \*[Am]= ~NOTE_WRITE; 547 ev.fflags \*[Am]= ~NOTE_WRITE;
546 } 548 }
547 if (ev.fflags \*[Am] NOTE_EXTEND) { 549 if (ev.fflags \*[Am] NOTE_EXTEND) {
548 printf("extended "); 550 printf("extended ");
549 ev.fflags \*[Am]= ~NOTE_EXTEND; 551 ev.fflags \*[Am]= ~NOTE_EXTEND;
550 } 552 }
551 if (ev.fflags \*[Am] NOTE_ATTRIB) { 553 if (ev.fflags \*[Am] NOTE_ATTRIB) {
552 printf("chmod/chown/utimes "); 554 printf("chmod/chown/utimes ");
553 ev.fflags \*[Am]= ~NOTE_ATTRIB; 555 ev.fflags \*[Am]= ~NOTE_ATTRIB;
554 } 556 }
555 if (ev.fflags \*[Am] NOTE_LINK) { 557 if (ev.fflags \*[Am] NOTE_LINK) {
556 printf("hardlinked "); 558 printf("hardlinked ");
557 ev.fflags \*[Am]= ~NOTE_LINK; 559 ev.fflags \*[Am]= ~NOTE_LINK;
558 } 560 }
559 if (ev.fflags \*[Am] NOTE_RENAME) { 561 if (ev.fflags \*[Am] NOTE_RENAME) {
560 printf("renamed "); 562 printf("renamed ");
561 ev.fflags \*[Am]= ~NOTE_RENAME; 563 ev.fflags \*[Am]= ~NOTE_RENAME;
562 } 564 }
563 if (ev.fflags \*[Am] NOTE_REVOKE) { 565 if (ev.fflags \*[Am] NOTE_REVOKE) {
564 printf("revoked "); 566 printf("revoked ");
565 ev.fflags \*[Am]= ~NOTE_REVOKE; 567 ev.fflags \*[Am]= ~NOTE_REVOKE;
566 } 568 }
567 printf("\\n"); 569 printf("\\n");
568 if (ev.fflags) 570 if (ev.fflags)
569 warnx("unknown event 0x%x\\n", ev.fflags); 571 warnx("unknown event 0x%x\\n", ev.fflags);
570 } 572 }
571} 573}
572.Ed 574.Ed
573.Sh ERRORS 575.Sh ERRORS
574The 576The
575.Fn kqueue 577.Fn kqueue
576function fails if: 578function fails if:
577.Bl -tag -width Er 579.Bl -tag -width Er
578.It Bq Er EMFILE 580.It Bq Er EMFILE
579The per-process descriptor table is full. 581The per-process descriptor table is full.
580.It Bq Er ENFILE 582.It Bq Er ENFILE
581The system file table is full. 583The system file table is full.
582.It Bq Er ENOMEM 584.It Bq Er ENOMEM
583The kernel failed to allocate enough memory for the kernel queue. 585The kernel failed to allocate enough memory for the kernel queue.
584.El 586.El
585.Pp 587.Pp
586The 588The
587.Fn kevent 589.Fn kevent
588function fails if: 590function fails if:
589.Bl -tag -width Er 591.Bl -tag -width Er
590.It Bq Er EACCES 592.It Bq Er EACCES
591The process does not have permission to register a filter. 593The process does not have permission to register a filter.
592.It Bq Er EBADF 594.It Bq Er EBADF
593The specified descriptor is invalid. 595The specified descriptor is invalid.
594.It Bq Er EFAULT 596.It Bq Er EFAULT
595There was an error reading or writing the 597There was an error reading or writing the
596.Va kevent 598.Va kevent
597structure. 599structure.
598.It Bq Er EINTR 600.It Bq Er EINTR
599A signal was delivered before the timeout expired and before any 601A signal was delivered before the timeout expired and before any
600events were placed on the kqueue for return. 602events were placed on the kqueue for return.
601.It Bq Er EINVAL 603.It Bq Er EINVAL
602The specified time limit or filter is invalid. 604The specified time limit or filter is invalid.
603.It Bq Er ENOENT 605.It Bq Er ENOENT
604The event could not be found to be modified or deleted. 606The event could not be found to be modified or deleted.
605.It Bq Er ENOMEM 607.It Bq Er ENOMEM
606No memory was available to register the event. 608No memory was available to register the event.
607.It Bq Er EOPNOTSUPP 609.It Bq Er EOPNOTSUPP
608This type of file descriptor is not supported for 610This type of file descriptor is not supported for
609.Fn kevent 611.Fn kevent
610operations. 612operations.
611.It Bq Er ESRCH 613.It Bq Er ESRCH
612The specified process to attach to does not exist. 614The specified process to attach to does not exist.
613.El 615.El
614.Sh SEE ALSO 616.Sh SEE ALSO
615.\" .Xr aio_error 2 , 617.\" .Xr aio_error 2 ,
616.\" .Xr aio_read 2 , 618.\" .Xr aio_read 2 ,
617.\" .Xr aio_return 2 , 619.\" .Xr aio_return 2 ,
618.Xr ioctl 2 , 620.Xr ioctl 2 ,
619.Xr poll 2 , 621.Xr poll 2 ,
620.Xr read 2 , 622.Xr read 2 ,
621.Xr select 2 , 623.Xr select 2 ,
622.Xr sigaction 2 , 624.Xr sigaction 2 ,
623.Xr write 2 , 625.Xr write 2 ,
624.Xr signal 3 , 626.Xr signal 3 ,
625.Xr kfilter_register 9 , 627.Xr kfilter_register 9 ,
626.Xr knote 9 628.Xr knote 9
627.Rs 629.Rs
628.%A Jonathan Lemon 630.%A Jonathan Lemon
629.%T "Kqueue: A Generic and Scalable Event Notification Facility" 631.%T "Kqueue: A Generic and Scalable Event Notification Facility"
630.%I USENIX Association 632.%I USENIX Association
631.%B Proceedings of the FREENIX Track: 2001 USENIX Annual Technical Conference 633.%B Proceedings of the FREENIX Track: 2001 USENIX Annual Technical Conference
632.%D June 25-30, 2001 634.%D June 25-30, 2001
633.%U http://www.usenix.org/event/usenix01/freenix01/full_papers/lemon/lemon.pdf 635.%U http://www.usenix.org/event/usenix01/freenix01/full_papers/lemon/lemon.pdf
634.Re 636.Re
635.Sh HISTORY 637.Sh HISTORY
636The 638The
637.Fn kqueue 639.Fn kqueue
638and 640and
639.Fn kevent 641.Fn kevent
640functions first appeared in 642functions first appeared in
641.Fx 4.1 , 643.Fx 4.1 ,
642and then in 644and then in
643.Nx 2.0 . 645.Nx 2.0 .
644The 646The
645.Fn kqueue1 647.Fn kqueue1
646function first appeared in 648function first appeared in
647.Nx 6.0 . 649.Nx 6.0 .

cvs diff -r1.80 -r1.80.2.1 src/sys/kern/kern_event.c (switch to unified diff)

--- src/sys/kern/kern_event.c 2014/06/24 14:42:43 1.80
+++ src/sys/kern/kern_event.c 2015/04/14 04:39:58 1.80.2.1
@@ -1,1549 +1,1553 @@ @@ -1,1549 +1,1553 @@
1/* $NetBSD: kern_event.c,v 1.80 2014/06/24 14:42:43 maxv Exp $ */ 1/* $NetBSD: kern_event.c,v 1.80.2.1 2015/04/14 04:39:58 snj Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran. 8 * by Andrew Doran.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32/*- 32/*-
33 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 33 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
34 * All rights reserved. 34 * All rights reserved.
35 * 35 *
36 * Redistribution and use in source and binary forms, with or without 36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions 37 * modification, are permitted provided that the following conditions
38 * are met: 38 * are met:
39 * 1. Redistributions of source code must retain the above copyright 39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer. 40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright 41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the 42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution. 43 * documentation and/or other materials provided with the distribution.
44 * 44 *
45 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 45 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE. 55 * SUCH DAMAGE.
56 * 56 *
57 * FreeBSD: src/sys/kern/kern_event.c,v 1.27 2001/07/05 17:10:44 rwatson Exp 57 * FreeBSD: src/sys/kern/kern_event.c,v 1.27 2001/07/05 17:10:44 rwatson Exp
58 */ 58 */
59 59
60#include <sys/cdefs.h> 60#include <sys/cdefs.h>
61__KERNEL_RCSID(0, "$NetBSD: kern_event.c,v 1.80 2014/06/24 14:42:43 maxv Exp $"); 61__KERNEL_RCSID(0, "$NetBSD: kern_event.c,v 1.80.2.1 2015/04/14 04:39:58 snj Exp $");
62 62
63#include <sys/param.h> 63#include <sys/param.h>
64#include <sys/systm.h> 64#include <sys/systm.h>
65#include <sys/kernel.h> 65#include <sys/kernel.h>
66#include <sys/proc.h> 66#include <sys/proc.h>
67#include <sys/file.h> 67#include <sys/file.h>
68#include <sys/select.h> 68#include <sys/select.h>
69#include <sys/queue.h> 69#include <sys/queue.h>
70#include <sys/event.h> 70#include <sys/event.h>
71#include <sys/eventvar.h> 71#include <sys/eventvar.h>
72#include <sys/poll.h> 72#include <sys/poll.h>
73#include <sys/kmem.h> 73#include <sys/kmem.h>
74#include <sys/stat.h> 74#include <sys/stat.h>
75#include <sys/filedesc.h> 75#include <sys/filedesc.h>
76#include <sys/syscallargs.h> 76#include <sys/syscallargs.h>
77#include <sys/kauth.h> 77#include <sys/kauth.h>
78#include <sys/conf.h> 78#include <sys/conf.h>
79#include <sys/atomic.h> 79#include <sys/atomic.h>
80 80
81static int kqueue_scan(file_t *, size_t, struct kevent *, 81static int kqueue_scan(file_t *, size_t, struct kevent *,
82 const struct timespec *, register_t *, 82 const struct timespec *, register_t *,
83 const struct kevent_ops *, struct kevent *, 83 const struct kevent_ops *, struct kevent *,
84 size_t); 84 size_t);
85static int kqueue_ioctl(file_t *, u_long, void *); 85static int kqueue_ioctl(file_t *, u_long, void *);
86static int kqueue_fcntl(file_t *, u_int, void *); 86static int kqueue_fcntl(file_t *, u_int, void *);
87static int kqueue_poll(file_t *, int); 87static int kqueue_poll(file_t *, int);
88static int kqueue_kqfilter(file_t *, struct knote *); 88static int kqueue_kqfilter(file_t *, struct knote *);
89static int kqueue_stat(file_t *, struct stat *); 89static int kqueue_stat(file_t *, struct stat *);
90static int kqueue_close(file_t *); 90static int kqueue_close(file_t *);
91static int kqueue_register(struct kqueue *, struct kevent *); 91static int kqueue_register(struct kqueue *, struct kevent *);
92static void kqueue_doclose(struct kqueue *, struct klist *, int); 92static void kqueue_doclose(struct kqueue *, struct klist *, int);
93 93
94static void knote_detach(struct knote *, filedesc_t *fdp, bool); 94static void knote_detach(struct knote *, filedesc_t *fdp, bool);
95static void knote_enqueue(struct knote *); 95static void knote_enqueue(struct knote *);
96static void knote_activate(struct knote *); 96static void knote_activate(struct knote *);
97 97
98static void filt_kqdetach(struct knote *); 98static void filt_kqdetach(struct knote *);
99static int filt_kqueue(struct knote *, long hint); 99static int filt_kqueue(struct knote *, long hint);
100static int filt_procattach(struct knote *); 100static int filt_procattach(struct knote *);
101static void filt_procdetach(struct knote *); 101static void filt_procdetach(struct knote *);
102static int filt_proc(struct knote *, long hint); 102static int filt_proc(struct knote *, long hint);
103static int filt_fileattach(struct knote *); 103static int filt_fileattach(struct knote *);
104static void filt_timerexpire(void *x); 104static void filt_timerexpire(void *x);
105static int filt_timerattach(struct knote *); 105static int filt_timerattach(struct knote *);
106static void filt_timerdetach(struct knote *); 106static void filt_timerdetach(struct knote *);
107static int filt_timer(struct knote *, long hint); 107static int filt_timer(struct knote *, long hint);
108 108
109static const struct fileops kqueueops = { 109static const struct fileops kqueueops = {
110 .fo_read = (void *)enxio, 110 .fo_read = (void *)enxio,
111 .fo_write = (void *)enxio, 111 .fo_write = (void *)enxio,
112 .fo_ioctl = kqueue_ioctl, 112 .fo_ioctl = kqueue_ioctl,
113 .fo_fcntl = kqueue_fcntl, 113 .fo_fcntl = kqueue_fcntl,
114 .fo_poll = kqueue_poll, 114 .fo_poll = kqueue_poll,
115 .fo_stat = kqueue_stat, 115 .fo_stat = kqueue_stat,
116 .fo_close = kqueue_close, 116 .fo_close = kqueue_close,
117 .fo_kqfilter = kqueue_kqfilter, 117 .fo_kqfilter = kqueue_kqfilter,
118 .fo_restart = fnullop_restart, 118 .fo_restart = fnullop_restart,
119}; 119};
120 120
121static const struct filterops kqread_filtops = 121static const struct filterops kqread_filtops =
122 { 1, NULL, filt_kqdetach, filt_kqueue }; 122 { 1, NULL, filt_kqdetach, filt_kqueue };
123static const struct filterops proc_filtops = 123static const struct filterops proc_filtops =
124 { 0, filt_procattach, filt_procdetach, filt_proc }; 124 { 0, filt_procattach, filt_procdetach, filt_proc };
125static const struct filterops file_filtops = 125static const struct filterops file_filtops =
126 { 1, filt_fileattach, NULL, NULL }; 126 { 1, filt_fileattach, NULL, NULL };
127static const struct filterops timer_filtops = 127static const struct filterops timer_filtops =
128 { 0, filt_timerattach, filt_timerdetach, filt_timer }; 128 { 0, filt_timerattach, filt_timerdetach, filt_timer };
129 129
130static u_int kq_ncallouts = 0; 130static u_int kq_ncallouts = 0;
131static int kq_calloutmax = (4 * 1024); 131static int kq_calloutmax = (4 * 1024);
132 132
133#define KN_HASHSIZE 64 /* XXX should be tunable */ 133#define KN_HASHSIZE 64 /* XXX should be tunable */
134#define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 134#define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask))
135 135
136extern const struct filterops sig_filtops; 136extern const struct filterops sig_filtops;
137 137
138/* 138/*
139 * Table for for all system-defined filters. 139 * Table for for all system-defined filters.
140 * These should be listed in the numeric order of the EVFILT_* defines. 140 * These should be listed in the numeric order of the EVFILT_* defines.
141 * If filtops is NULL, the filter isn't implemented in NetBSD. 141 * If filtops is NULL, the filter isn't implemented in NetBSD.
142 * End of list is when name is NULL. 142 * End of list is when name is NULL.
143 *  143 *
144 * Note that 'refcnt' is meaningless for built-in filters. 144 * Note that 'refcnt' is meaningless for built-in filters.
145 */ 145 */
146struct kfilter { 146struct kfilter {
147 const char *name; /* name of filter */ 147 const char *name; /* name of filter */
148 uint32_t filter; /* id of filter */ 148 uint32_t filter; /* id of filter */
149 unsigned refcnt; /* reference count */ 149 unsigned refcnt; /* reference count */
150 const struct filterops *filtops;/* operations for filter */ 150 const struct filterops *filtops;/* operations for filter */
151 size_t namelen; /* length of name string */ 151 size_t namelen; /* length of name string */
152}; 152};
153 153
154/* System defined filters */ 154/* System defined filters */
155static struct kfilter sys_kfilters[] = { 155static struct kfilter sys_kfilters[] = {
156 { "EVFILT_READ", EVFILT_READ, 0, &file_filtops, 0 }, 156 { "EVFILT_READ", EVFILT_READ, 0, &file_filtops, 0 },
157 { "EVFILT_WRITE", EVFILT_WRITE, 0, &file_filtops, 0, }, 157 { "EVFILT_WRITE", EVFILT_WRITE, 0, &file_filtops, 0, },
158 { "EVFILT_AIO", EVFILT_AIO, 0, NULL, 0 }, 158 { "EVFILT_AIO", EVFILT_AIO, 0, NULL, 0 },
159 { "EVFILT_VNODE", EVFILT_VNODE, 0, &file_filtops, 0 }, 159 { "EVFILT_VNODE", EVFILT_VNODE, 0, &file_filtops, 0 },
160 { "EVFILT_PROC", EVFILT_PROC, 0, &proc_filtops, 0 }, 160 { "EVFILT_PROC", EVFILT_PROC, 0, &proc_filtops, 0 },
161 { "EVFILT_SIGNAL", EVFILT_SIGNAL, 0, &sig_filtops, 0 }, 161 { "EVFILT_SIGNAL", EVFILT_SIGNAL, 0, &sig_filtops, 0 },
162 { "EVFILT_TIMER", EVFILT_TIMER, 0, &timer_filtops, 0 }, 162 { "EVFILT_TIMER", EVFILT_TIMER, 0, &timer_filtops, 0 },
163 { NULL, 0, 0, NULL, 0 }, 163 { NULL, 0, 0, NULL, 0 },
164}; 164};
165 165
166/* User defined kfilters */ 166/* User defined kfilters */
167static struct kfilter *user_kfilters; /* array */ 167static struct kfilter *user_kfilters; /* array */
168static int user_kfilterc; /* current offset */ 168static int user_kfilterc; /* current offset */
169static int user_kfiltermaxc; /* max size so far */ 169static int user_kfiltermaxc; /* max size so far */
170static size_t user_kfiltersz; /* size of allocated memory */ 170static size_t user_kfiltersz; /* size of allocated memory */
171 171
172/* Locks */ 172/* Locks */
173static krwlock_t kqueue_filter_lock; /* lock on filter lists */ 173static krwlock_t kqueue_filter_lock; /* lock on filter lists */
174static kmutex_t kqueue_misc_lock; /* miscellaneous */ 174static kmutex_t kqueue_misc_lock; /* miscellaneous */
175 175
176static kauth_listener_t kqueue_listener; 176static kauth_listener_t kqueue_listener;
177 177
178static int 178static int
179kqueue_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 179kqueue_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
180 void *arg0, void *arg1, void *arg2, void *arg3) 180 void *arg0, void *arg1, void *arg2, void *arg3)
181{ 181{
182 struct proc *p; 182 struct proc *p;
183 int result; 183 int result;
184 184
185 result = KAUTH_RESULT_DEFER; 185 result = KAUTH_RESULT_DEFER;
186 p = arg0; 186 p = arg0;
187 187
188 if (action != KAUTH_PROCESS_KEVENT_FILTER) 188 if (action != KAUTH_PROCESS_KEVENT_FILTER)
189 return result; 189 return result;
190 190
191 if ((kauth_cred_getuid(p->p_cred) != kauth_cred_getuid(cred) || 191 if ((kauth_cred_getuid(p->p_cred) != kauth_cred_getuid(cred) ||
192 ISSET(p->p_flag, PK_SUGID))) 192 ISSET(p->p_flag, PK_SUGID)))
193 return result; 193 return result;
194 194
195 result = KAUTH_RESULT_ALLOW; 195 result = KAUTH_RESULT_ALLOW;
196 196
197 return result; 197 return result;
198} 198}
199 199
200/* 200/*
201 * Initialize the kqueue subsystem. 201 * Initialize the kqueue subsystem.
202 */ 202 */
203void 203void
204kqueue_init(void) 204kqueue_init(void)
205{ 205{
206 206
207 rw_init(&kqueue_filter_lock); 207 rw_init(&kqueue_filter_lock);
208 mutex_init(&kqueue_misc_lock, MUTEX_DEFAULT, IPL_NONE); 208 mutex_init(&kqueue_misc_lock, MUTEX_DEFAULT, IPL_NONE);
209 209
210 kqueue_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS, 210 kqueue_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
211 kqueue_listener_cb, NULL); 211 kqueue_listener_cb, NULL);
212} 212}
213 213
214/* 214/*
215 * Find kfilter entry by name, or NULL if not found. 215 * Find kfilter entry by name, or NULL if not found.
216 */ 216 */
217static struct kfilter * 217static struct kfilter *
218kfilter_byname_sys(const char *name) 218kfilter_byname_sys(const char *name)
219{ 219{
220 int i; 220 int i;
221 221
222 KASSERT(rw_lock_held(&kqueue_filter_lock)); 222 KASSERT(rw_lock_held(&kqueue_filter_lock));
223 223
224 for (i = 0; sys_kfilters[i].name != NULL; i++) { 224 for (i = 0; sys_kfilters[i].name != NULL; i++) {
225 if (strcmp(name, sys_kfilters[i].name) == 0) 225 if (strcmp(name, sys_kfilters[i].name) == 0)
226 return &sys_kfilters[i]; 226 return &sys_kfilters[i];
227 } 227 }
228 return NULL; 228 return NULL;
229} 229}
230 230
231static struct kfilter * 231static struct kfilter *
232kfilter_byname_user(const char *name) 232kfilter_byname_user(const char *name)
233{ 233{
234 int i; 234 int i;
235 235
236 KASSERT(rw_lock_held(&kqueue_filter_lock)); 236 KASSERT(rw_lock_held(&kqueue_filter_lock));
237 237
238 /* user filter slots have a NULL name if previously deregistered */ 238 /* user filter slots have a NULL name if previously deregistered */
239 for (i = 0; i < user_kfilterc ; i++) { 239 for (i = 0; i < user_kfilterc ; i++) {
240 if (user_kfilters[i].name != NULL && 240 if (user_kfilters[i].name != NULL &&
241 strcmp(name, user_kfilters[i].name) == 0) 241 strcmp(name, user_kfilters[i].name) == 0)
242 return &user_kfilters[i]; 242 return &user_kfilters[i];
243 } 243 }
244 return NULL; 244 return NULL;
245} 245}
246 246
247static struct kfilter * 247static struct kfilter *
248kfilter_byname(const char *name) 248kfilter_byname(const char *name)
249{ 249{
250 struct kfilter *kfilter; 250 struct kfilter *kfilter;
251 251
252 KASSERT(rw_lock_held(&kqueue_filter_lock)); 252 KASSERT(rw_lock_held(&kqueue_filter_lock));
253 253
254 if ((kfilter = kfilter_byname_sys(name)) != NULL) 254 if ((kfilter = kfilter_byname_sys(name)) != NULL)
255 return kfilter; 255 return kfilter;
256 256
257 return kfilter_byname_user(name); 257 return kfilter_byname_user(name);
258} 258}
259 259
260/* 260/*
261 * Find kfilter entry by filter id, or NULL if not found. 261 * Find kfilter entry by filter id, or NULL if not found.
262 * Assumes entries are indexed in filter id order, for speed. 262 * Assumes entries are indexed in filter id order, for speed.
263 */ 263 */
264static struct kfilter * 264static struct kfilter *
265kfilter_byfilter(uint32_t filter) 265kfilter_byfilter(uint32_t filter)
266{ 266{
267 struct kfilter *kfilter; 267 struct kfilter *kfilter;
268 268
269 KASSERT(rw_lock_held(&kqueue_filter_lock)); 269 KASSERT(rw_lock_held(&kqueue_filter_lock));
270 270
271 if (filter < EVFILT_SYSCOUNT) /* it's a system filter */ 271 if (filter < EVFILT_SYSCOUNT) /* it's a system filter */
272 kfilter = &sys_kfilters[filter]; 272 kfilter = &sys_kfilters[filter];
273 else if (user_kfilters != NULL && 273 else if (user_kfilters != NULL &&
274 filter < EVFILT_SYSCOUNT + user_kfilterc) 274 filter < EVFILT_SYSCOUNT + user_kfilterc)
275 /* it's a user filter */ 275 /* it's a user filter */
276 kfilter = &user_kfilters[filter - EVFILT_SYSCOUNT]; 276 kfilter = &user_kfilters[filter - EVFILT_SYSCOUNT];
277 else 277 else
278 return (NULL); /* out of range */ 278 return (NULL); /* out of range */
279 KASSERT(kfilter->filter == filter); /* sanity check! */ 279 KASSERT(kfilter->filter == filter); /* sanity check! */
280 return (kfilter); 280 return (kfilter);
281} 281}
282 282
283/* 283/*
284 * Register a new kfilter. Stores the entry in user_kfilters. 284 * Register a new kfilter. Stores the entry in user_kfilters.
285 * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise. 285 * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise.
286 * If retfilter != NULL, the new filterid is returned in it. 286 * If retfilter != NULL, the new filterid is returned in it.
287 */ 287 */
288int 288int
289kfilter_register(const char *name, const struct filterops *filtops, 289kfilter_register(const char *name, const struct filterops *filtops,
290 int *retfilter) 290 int *retfilter)
291{ 291{
292 struct kfilter *kfilter; 292 struct kfilter *kfilter;
293 size_t len; 293 size_t len;
294 int i; 294 int i;
295 295
296 if (name == NULL || name[0] == '\0' || filtops == NULL) 296 if (name == NULL || name[0] == '\0' || filtops == NULL)
297 return (EINVAL); /* invalid args */ 297 return (EINVAL); /* invalid args */
298 298
299 rw_enter(&kqueue_filter_lock, RW_WRITER); 299 rw_enter(&kqueue_filter_lock, RW_WRITER);
300 if (kfilter_byname(name) != NULL) { 300 if (kfilter_byname(name) != NULL) {
301 rw_exit(&kqueue_filter_lock); 301 rw_exit(&kqueue_filter_lock);
302 return (EEXIST); /* already exists */ 302 return (EEXIST); /* already exists */
303 } 303 }
304 if (user_kfilterc > 0xffffffff - EVFILT_SYSCOUNT) { 304 if (user_kfilterc > 0xffffffff - EVFILT_SYSCOUNT) {
305 rw_exit(&kqueue_filter_lock); 305 rw_exit(&kqueue_filter_lock);
306 return (EINVAL); /* too many */ 306 return (EINVAL); /* too many */
307 } 307 }
308 308
309 for (i = 0; i < user_kfilterc; i++) { 309 for (i = 0; i < user_kfilterc; i++) {
310 kfilter = &user_kfilters[i]; 310 kfilter = &user_kfilters[i];
311 if (kfilter->name == NULL) { 311 if (kfilter->name == NULL) {
312 /* Previously deregistered slot. Reuse. */ 312 /* Previously deregistered slot. Reuse. */
313 goto reuse; 313 goto reuse;
314 } 314 }
315 } 315 }
316 316
317 /* check if need to grow user_kfilters */ 317 /* check if need to grow user_kfilters */
318 if (user_kfilterc + 1 > user_kfiltermaxc) { 318 if (user_kfilterc + 1 > user_kfiltermaxc) {
319 /* Grow in KFILTER_EXTENT chunks. */ 319 /* Grow in KFILTER_EXTENT chunks. */
320 user_kfiltermaxc += KFILTER_EXTENT; 320 user_kfiltermaxc += KFILTER_EXTENT;
321 len = user_kfiltermaxc * sizeof(*kfilter); 321 len = user_kfiltermaxc * sizeof(*kfilter);
322 kfilter = kmem_alloc(len, KM_SLEEP); 322 kfilter = kmem_alloc(len, KM_SLEEP);
323 memset((char *)kfilter + user_kfiltersz, 0, len - user_kfiltersz); 323 memset((char *)kfilter + user_kfiltersz, 0, len - user_kfiltersz);
324 if (user_kfilters != NULL) { 324 if (user_kfilters != NULL) {
325 memcpy(kfilter, user_kfilters, user_kfiltersz); 325 memcpy(kfilter, user_kfilters, user_kfiltersz);
326 kmem_free(user_kfilters, user_kfiltersz); 326 kmem_free(user_kfilters, user_kfiltersz);
327 } 327 }
328 user_kfiltersz = len; 328 user_kfiltersz = len;
329 user_kfilters = kfilter; 329 user_kfilters = kfilter;
330 } 330 }
331 /* Adding new slot */ 331 /* Adding new slot */
332 kfilter = &user_kfilters[user_kfilterc++]; 332 kfilter = &user_kfilters[user_kfilterc++];
333reuse: 333reuse:
334 kfilter->namelen = strlen(name) + 1; 334 kfilter->namelen = strlen(name) + 1;
335 kfilter->name = kmem_alloc(kfilter->namelen, KM_SLEEP); 335 kfilter->name = kmem_alloc(kfilter->namelen, KM_SLEEP);
336 memcpy(__UNCONST(kfilter->name), name, kfilter->namelen); 336 memcpy(__UNCONST(kfilter->name), name, kfilter->namelen);
337 337
338 kfilter->filter = (kfilter - user_kfilters) + EVFILT_SYSCOUNT; 338 kfilter->filter = (kfilter - user_kfilters) + EVFILT_SYSCOUNT;
339 339
340 kfilter->filtops = kmem_alloc(sizeof(*filtops), KM_SLEEP); 340 kfilter->filtops = kmem_alloc(sizeof(*filtops), KM_SLEEP);
341 memcpy(__UNCONST(kfilter->filtops), filtops, sizeof(*filtops)); 341 memcpy(__UNCONST(kfilter->filtops), filtops, sizeof(*filtops));
342 342
343 if (retfilter != NULL) 343 if (retfilter != NULL)
344 *retfilter = kfilter->filter; 344 *retfilter = kfilter->filter;
345 rw_exit(&kqueue_filter_lock); 345 rw_exit(&kqueue_filter_lock);
346 346
347 return (0); 347 return (0);
348} 348}
349 349
350/* 350/*
351 * Unregister a kfilter previously registered with kfilter_register. 351 * Unregister a kfilter previously registered with kfilter_register.
352 * This retains the filter id, but clears the name and frees filtops (filter 352 * This retains the filter id, but clears the name and frees filtops (filter
353 * operations), so that the number isn't reused during a boot. 353 * operations), so that the number isn't reused during a boot.
354 * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise. 354 * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise.
355 */ 355 */
356int 356int
357kfilter_unregister(const char *name) 357kfilter_unregister(const char *name)
358{ 358{
359 struct kfilter *kfilter; 359 struct kfilter *kfilter;
360 360
361 if (name == NULL || name[0] == '\0') 361 if (name == NULL || name[0] == '\0')
362 return (EINVAL); /* invalid name */ 362 return (EINVAL); /* invalid name */
363 363
364 rw_enter(&kqueue_filter_lock, RW_WRITER); 364 rw_enter(&kqueue_filter_lock, RW_WRITER);
365 if (kfilter_byname_sys(name) != NULL) { 365 if (kfilter_byname_sys(name) != NULL) {
366 rw_exit(&kqueue_filter_lock); 366 rw_exit(&kqueue_filter_lock);
367 return (EINVAL); /* can't detach system filters */ 367 return (EINVAL); /* can't detach system filters */
368 } 368 }
369 369
370 kfilter = kfilter_byname_user(name); 370 kfilter = kfilter_byname_user(name);
371 if (kfilter == NULL) { 371 if (kfilter == NULL) {
372 rw_exit(&kqueue_filter_lock); 372 rw_exit(&kqueue_filter_lock);
373 return (ENOENT); 373 return (ENOENT);
374 } 374 }
375 if (kfilter->refcnt != 0) { 375 if (kfilter->refcnt != 0) {
376 rw_exit(&kqueue_filter_lock); 376 rw_exit(&kqueue_filter_lock);
377 return (EBUSY); 377 return (EBUSY);
378 } 378 }
379 379
380 /* Cast away const (but we know it's safe. */ 380 /* Cast away const (but we know it's safe. */
381 kmem_free(__UNCONST(kfilter->name), kfilter->namelen); 381 kmem_free(__UNCONST(kfilter->name), kfilter->namelen);
382 kfilter->name = NULL; /* mark as `not implemented' */ 382 kfilter->name = NULL; /* mark as `not implemented' */
383 383
384 if (kfilter->filtops != NULL) { 384 if (kfilter->filtops != NULL) {
385 /* Cast away const (but we know it's safe. */ 385 /* Cast away const (but we know it's safe. */
386 kmem_free(__UNCONST(kfilter->filtops), 386 kmem_free(__UNCONST(kfilter->filtops),
387 sizeof(*kfilter->filtops)); 387 sizeof(*kfilter->filtops));
388 kfilter->filtops = NULL; /* mark as `not implemented' */ 388 kfilter->filtops = NULL; /* mark as `not implemented' */
389 } 389 }
390 rw_exit(&kqueue_filter_lock); 390 rw_exit(&kqueue_filter_lock);
391 391
392 return (0); 392 return (0);
393} 393}
394 394
395 395
396/* 396/*
397 * Filter attach method for EVFILT_READ and EVFILT_WRITE on normal file 397 * Filter attach method for EVFILT_READ and EVFILT_WRITE on normal file
398 * descriptors. Calls fileops kqfilter method for given file descriptor. 398 * descriptors. Calls fileops kqfilter method for given file descriptor.
399 */ 399 */
400static int 400static int
401filt_fileattach(struct knote *kn) 401filt_fileattach(struct knote *kn)
402{ 402{
403 file_t *fp; 403 file_t *fp;
404 404
405 fp = kn->kn_obj; 405 fp = kn->kn_obj;
406 406
407 return (*fp->f_ops->fo_kqfilter)(fp, kn); 407 return (*fp->f_ops->fo_kqfilter)(fp, kn);
408} 408}
409 409
410/* 410/*
411 * Filter detach method for EVFILT_READ on kqueue descriptor. 411 * Filter detach method for EVFILT_READ on kqueue descriptor.
412 */ 412 */
413static void 413static void
414filt_kqdetach(struct knote *kn) 414filt_kqdetach(struct knote *kn)
415{ 415{
416 struct kqueue *kq; 416 struct kqueue *kq;
417 417
418 kq = ((file_t *)kn->kn_obj)->f_data; 418 kq = ((file_t *)kn->kn_obj)->f_data;
419 419
420 mutex_spin_enter(&kq->kq_lock); 420 mutex_spin_enter(&kq->kq_lock);
421 SLIST_REMOVE(&kq->kq_sel.sel_klist, kn, knote, kn_selnext); 421 SLIST_REMOVE(&kq->kq_sel.sel_klist, kn, knote, kn_selnext);
422 mutex_spin_exit(&kq->kq_lock); 422 mutex_spin_exit(&kq->kq_lock);
423} 423}
424 424
425/* 425/*
426 * Filter event method for EVFILT_READ on kqueue descriptor. 426 * Filter event method for EVFILT_READ on kqueue descriptor.
427 */ 427 */
428/*ARGSUSED*/ 428/*ARGSUSED*/
429static int 429static int
430filt_kqueue(struct knote *kn, long hint) 430filt_kqueue(struct knote *kn, long hint)
431{ 431{
432 struct kqueue *kq; 432 struct kqueue *kq;
433 int rv; 433 int rv;
434 434
435 kq = ((file_t *)kn->kn_obj)->f_data; 435 kq = ((file_t *)kn->kn_obj)->f_data;
436 436
437 if (hint != NOTE_SUBMIT) 437 if (hint != NOTE_SUBMIT)
438 mutex_spin_enter(&kq->kq_lock); 438 mutex_spin_enter(&kq->kq_lock);
439 kn->kn_data = kq->kq_count; 439 kn->kn_data = kq->kq_count;
440 rv = (kn->kn_data > 0); 440 rv = (kn->kn_data > 0);
441 if (hint != NOTE_SUBMIT) 441 if (hint != NOTE_SUBMIT)
442 mutex_spin_exit(&kq->kq_lock); 442 mutex_spin_exit(&kq->kq_lock);
443 443
444 return rv; 444 return rv;
445} 445}
446 446
447/* 447/*
448 * Filter attach method for EVFILT_PROC. 448 * Filter attach method for EVFILT_PROC.
449 */ 449 */
450static int 450static int
451filt_procattach(struct knote *kn) 451filt_procattach(struct knote *kn)
452{ 452{
453 struct proc *p; 453 struct proc *p;
454 struct lwp *curl; 454 struct lwp *curl;
455 455
456 curl = curlwp; 456 curl = curlwp;
457 457
458 mutex_enter(proc_lock); 458 mutex_enter(proc_lock);
459 if (kn->kn_flags & EV_FLAG1) { 459 if (kn->kn_flags & EV_FLAG1) {
460 /* 460 /*
461 * NOTE_TRACK attaches to the child process too early 461 * NOTE_TRACK attaches to the child process too early
462 * for proc_find, so do a raw look up and check the state 462 * for proc_find, so do a raw look up and check the state
463 * explicitly. 463 * explicitly.
464 */ 464 */
465 p = proc_find_raw(kn->kn_id); 465 p = proc_find_raw(kn->kn_id);
466 if (p != NULL && p->p_stat != SIDL) 466 if (p != NULL && p->p_stat != SIDL)
467 p = NULL; 467 p = NULL;
468 } else { 468 } else {
469 p = proc_find(kn->kn_id); 469 p = proc_find(kn->kn_id);
470 } 470 }
471 471
472 if (p == NULL) { 472 if (p == NULL) {
473 mutex_exit(proc_lock); 473 mutex_exit(proc_lock);
474 return ESRCH; 474 return ESRCH;
475 } 475 }
476 476
477 /* 477 /*
478 * Fail if it's not owned by you, or the last exec gave us 478 * Fail if it's not owned by you, or the last exec gave us
479 * setuid/setgid privs (unless you're root). 479 * setuid/setgid privs (unless you're root).
480 */ 480 */
481 mutex_enter(p->p_lock); 481 mutex_enter(p->p_lock);
482 mutex_exit(proc_lock); 482 mutex_exit(proc_lock);
483 if (kauth_authorize_process(curl->l_cred, KAUTH_PROCESS_KEVENT_FILTER, 483 if (kauth_authorize_process(curl->l_cred, KAUTH_PROCESS_KEVENT_FILTER,
484 p, NULL, NULL, NULL) != 0) { 484 p, NULL, NULL, NULL) != 0) {
485 mutex_exit(p->p_lock); 485 mutex_exit(p->p_lock);
486 return EACCES; 486 return EACCES;
487 } 487 }
488 488
489 kn->kn_obj = p; 489 kn->kn_obj = p;
490 kn->kn_flags |= EV_CLEAR; /* automatically set */ 490 kn->kn_flags |= EV_CLEAR; /* automatically set */
491 491
492 /* 492 /*
493 * internal flag indicating registration done by kernel 493 * internal flag indicating registration done by kernel
494 */ 494 */
495 if (kn->kn_flags & EV_FLAG1) { 495 if (kn->kn_flags & EV_FLAG1) {
496 kn->kn_data = kn->kn_sdata; /* ppid */ 496 kn->kn_data = kn->kn_sdata; /* ppid */
497 kn->kn_fflags = NOTE_CHILD; 497 kn->kn_fflags = NOTE_CHILD;
498 kn->kn_flags &= ~EV_FLAG1; 498 kn->kn_flags &= ~EV_FLAG1;
499 } 499 }
500 SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext); 500 SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext);
501 mutex_exit(p->p_lock); 501 mutex_exit(p->p_lock);
502 502
503 return 0; 503 return 0;
504} 504}
505 505
506/* 506/*
507 * Filter detach method for EVFILT_PROC. 507 * Filter detach method for EVFILT_PROC.
508 * 508 *
509 * The knote may be attached to a different process, which may exit, 509 * The knote may be attached to a different process, which may exit,
510 * leaving nothing for the knote to be attached to. So when the process 510 * leaving nothing for the knote to be attached to. So when the process
511 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 511 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so
512 * it will be deleted when read out. However, as part of the knote deletion, 512 * it will be deleted when read out. However, as part of the knote deletion,
513 * this routine is called, so a check is needed to avoid actually performing 513 * this routine is called, so a check is needed to avoid actually performing
514 * a detach, because the original process might not exist any more. 514 * a detach, because the original process might not exist any more.
515 */ 515 */
516static void 516static void
517filt_procdetach(struct knote *kn) 517filt_procdetach(struct knote *kn)
518{ 518{
519 struct proc *p; 519 struct proc *p;
520 520
521 if (kn->kn_status & KN_DETACHED) 521 if (kn->kn_status & KN_DETACHED)
522 return; 522 return;
523 523
524 p = kn->kn_obj; 524 p = kn->kn_obj;
525 525
526 mutex_enter(p->p_lock); 526 mutex_enter(p->p_lock);
527 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext); 527 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext);
528 mutex_exit(p->p_lock); 528 mutex_exit(p->p_lock);
529} 529}
530 530
531/* 531/*
532 * Filter event method for EVFILT_PROC. 532 * Filter event method for EVFILT_PROC.
533 */ 533 */
534static int 534static int
535filt_proc(struct knote *kn, long hint) 535filt_proc(struct knote *kn, long hint)
536{ 536{
537 u_int event, fflag; 537 u_int event, fflag;
538 struct kevent kev; 538 struct kevent kev;
539 struct kqueue *kq; 539 struct kqueue *kq;
540 int error; 540 int error;
541 541
542 event = (u_int)hint & NOTE_PCTRLMASK; 542 event = (u_int)hint & NOTE_PCTRLMASK;
543 kq = kn->kn_kq; 543 kq = kn->kn_kq;
544 fflag = 0; 544 fflag = 0;
545 545
546 /* If the user is interested in this event, record it. */ 546 /* If the user is interested in this event, record it. */
547 if (kn->kn_sfflags & event) 547 if (kn->kn_sfflags & event)
548 fflag |= event; 548 fflag |= event;
549 549
550 if (event == NOTE_EXIT) { 550 if (event == NOTE_EXIT) {
 551 struct proc *p = kn->kn_obj;
 552
 553 if (p != NULL)
 554 kn->kn_data = p->p_xstat;
551 /* 555 /*
552 * Process is gone, so flag the event as finished. 556 * Process is gone, so flag the event as finished.
553 * 557 *
554 * Detach the knote from watched process and mark 558 * Detach the knote from watched process and mark
555 * it as such. We can't leave this to kqueue_scan(), 559 * it as such. We can't leave this to kqueue_scan(),
556 * since the process might not exist by then. And we 560 * since the process might not exist by then. And we
557 * have to do this now, since psignal KNOTE() is called 561 * have to do this now, since psignal KNOTE() is called
558 * also for zombies and we might end up reading freed 562 * also for zombies and we might end up reading freed
559 * memory if the kevent would already be picked up 563 * memory if the kevent would already be picked up
560 * and knote g/c'ed. 564 * and knote g/c'ed.
561 */ 565 */
562 filt_procdetach(kn); 566 filt_procdetach(kn);
563 567
564 mutex_spin_enter(&kq->kq_lock); 568 mutex_spin_enter(&kq->kq_lock);
565 kn->kn_status |= KN_DETACHED; 569 kn->kn_status |= KN_DETACHED;
566 /* Mark as ONESHOT, so that the knote it g/c'ed when read */ 570 /* Mark as ONESHOT, so that the knote it g/c'ed when read */
567 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 571 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
568 kn->kn_fflags |= fflag; 572 kn->kn_fflags |= fflag;
569 mutex_spin_exit(&kq->kq_lock); 573 mutex_spin_exit(&kq->kq_lock);
570 574
571 return 1; 575 return 1;
572 } 576 }
573 577
574 mutex_spin_enter(&kq->kq_lock); 578 mutex_spin_enter(&kq->kq_lock);
575 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 579 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) {
576 /* 580 /*
577 * Process forked, and user wants to track the new process, 581 * Process forked, and user wants to track the new process,
578 * so attach a new knote to it, and immediately report an 582 * so attach a new knote to it, and immediately report an
579 * event with the parent's pid. Register knote with new 583 * event with the parent's pid. Register knote with new
580 * process. 584 * process.
581 */ 585 */
582 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 586 kev.ident = hint & NOTE_PDATAMASK; /* pid */
583 kev.filter = kn->kn_filter; 587 kev.filter = kn->kn_filter;
584 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 588 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1;
585 kev.fflags = kn->kn_sfflags; 589 kev.fflags = kn->kn_sfflags;
586 kev.data = kn->kn_id; /* parent */ 590 kev.data = kn->kn_id; /* parent */
587 kev.udata = kn->kn_kevent.udata; /* preserve udata */ 591 kev.udata = kn->kn_kevent.udata; /* preserve udata */
588 mutex_spin_exit(&kq->kq_lock); 592 mutex_spin_exit(&kq->kq_lock);
589 error = kqueue_register(kq, &kev); 593 error = kqueue_register(kq, &kev);
590 mutex_spin_enter(&kq->kq_lock); 594 mutex_spin_enter(&kq->kq_lock);
591 if (error != 0) 595 if (error != 0)
592 kn->kn_fflags |= NOTE_TRACKERR; 596 kn->kn_fflags |= NOTE_TRACKERR;
593 } 597 }
594 kn->kn_fflags |= fflag; 598 kn->kn_fflags |= fflag;
595 fflag = kn->kn_fflags; 599 fflag = kn->kn_fflags;
596 mutex_spin_exit(&kq->kq_lock); 600 mutex_spin_exit(&kq->kq_lock);
597 601
598 return fflag != 0; 602 return fflag != 0;
599} 603}
600 604
601static void 605static void
602filt_timerexpire(void *knx) 606filt_timerexpire(void *knx)
603{ 607{
604 struct knote *kn = knx; 608 struct knote *kn = knx;
605 int tticks; 609 int tticks;
606 610
607 mutex_enter(&kqueue_misc_lock); 611 mutex_enter(&kqueue_misc_lock);
608 kn->kn_data++; 612 kn->kn_data++;
609 knote_activate(kn); 613 knote_activate(kn);
610 if ((kn->kn_flags & EV_ONESHOT) == 0) { 614 if ((kn->kn_flags & EV_ONESHOT) == 0) {
611 tticks = mstohz(kn->kn_sdata); 615 tticks = mstohz(kn->kn_sdata);
612 if (tticks <= 0) 616 if (tticks <= 0)
613 tticks = 1; 617 tticks = 1;
614 callout_schedule((callout_t *)kn->kn_hook, tticks); 618 callout_schedule((callout_t *)kn->kn_hook, tticks);
615 } 619 }
616 mutex_exit(&kqueue_misc_lock); 620 mutex_exit(&kqueue_misc_lock);
617} 621}
618 622
619/* 623/*
620 * data contains amount of time to sleep, in milliseconds 624 * data contains amount of time to sleep, in milliseconds
621 */ 625 */
622static int 626static int
623filt_timerattach(struct knote *kn) 627filt_timerattach(struct knote *kn)
624{ 628{
625 callout_t *calloutp; 629 callout_t *calloutp;
626 struct kqueue *kq; 630 struct kqueue *kq;
627 int tticks; 631 int tticks;
628 632
629 tticks = mstohz(kn->kn_sdata); 633 tticks = mstohz(kn->kn_sdata);
630 634
631 /* if the supplied value is under our resolution, use 1 tick */ 635 /* if the supplied value is under our resolution, use 1 tick */
632 if (tticks == 0) { 636 if (tticks == 0) {
633 if (kn->kn_sdata == 0) 637 if (kn->kn_sdata == 0)
634 return EINVAL; 638 return EINVAL;
635 tticks = 1; 639 tticks = 1;
636 } 640 }
637 641
638 if (atomic_inc_uint_nv(&kq_ncallouts) >= kq_calloutmax || 642 if (atomic_inc_uint_nv(&kq_ncallouts) >= kq_calloutmax ||
639 (calloutp = kmem_alloc(sizeof(*calloutp), KM_NOSLEEP)) == NULL) { 643 (calloutp = kmem_alloc(sizeof(*calloutp), KM_NOSLEEP)) == NULL) {
640 atomic_dec_uint(&kq_ncallouts); 644 atomic_dec_uint(&kq_ncallouts);
641 return ENOMEM; 645 return ENOMEM;
642 } 646 }
643 callout_init(calloutp, CALLOUT_MPSAFE); 647 callout_init(calloutp, CALLOUT_MPSAFE);
644 648
645 kq = kn->kn_kq; 649 kq = kn->kn_kq;
646 mutex_spin_enter(&kq->kq_lock); 650 mutex_spin_enter(&kq->kq_lock);
647 kn->kn_flags |= EV_CLEAR; /* automatically set */ 651 kn->kn_flags |= EV_CLEAR; /* automatically set */
648 kn->kn_hook = calloutp; 652 kn->kn_hook = calloutp;
649 mutex_spin_exit(&kq->kq_lock); 653 mutex_spin_exit(&kq->kq_lock);
650 654
651 callout_reset(calloutp, tticks, filt_timerexpire, kn); 655 callout_reset(calloutp, tticks, filt_timerexpire, kn);
652 656
653 return (0); 657 return (0);
654} 658}
655 659
656static void 660static void
657filt_timerdetach(struct knote *kn) 661filt_timerdetach(struct knote *kn)
658{ 662{
659 callout_t *calloutp; 663 callout_t *calloutp;
660 664
661 calloutp = (callout_t *)kn->kn_hook; 665 calloutp = (callout_t *)kn->kn_hook;
662 callout_halt(calloutp, NULL); 666 callout_halt(calloutp, NULL);
663 callout_destroy(calloutp); 667 callout_destroy(calloutp);
664 kmem_free(calloutp, sizeof(*calloutp)); 668 kmem_free(calloutp, sizeof(*calloutp));
665 atomic_dec_uint(&kq_ncallouts); 669 atomic_dec_uint(&kq_ncallouts);
666} 670}
667 671
668static int 672static int
669filt_timer(struct knote *kn, long hint) 673filt_timer(struct knote *kn, long hint)
670{ 674{
671 int rv; 675 int rv;
672 676
673 mutex_enter(&kqueue_misc_lock); 677 mutex_enter(&kqueue_misc_lock);
674 rv = (kn->kn_data != 0); 678 rv = (kn->kn_data != 0);
675 mutex_exit(&kqueue_misc_lock); 679 mutex_exit(&kqueue_misc_lock);
676 680
677 return rv; 681 return rv;
678} 682}
679 683
680/* 684/*
681 * filt_seltrue: 685 * filt_seltrue:
682 * 686 *
683 * This filter "event" routine simulates seltrue(). 687 * This filter "event" routine simulates seltrue().
684 */ 688 */
685int 689int
686filt_seltrue(struct knote *kn, long hint) 690filt_seltrue(struct knote *kn, long hint)
687{ 691{
688 692
689 /* 693 /*
690 * We don't know how much data can be read/written, 694 * We don't know how much data can be read/written,
691 * but we know that it *can* be. This is about as 695 * but we know that it *can* be. This is about as
692 * good as select/poll does as well. 696 * good as select/poll does as well.
693 */ 697 */
694 kn->kn_data = 0; 698 kn->kn_data = 0;
695 return (1); 699 return (1);
696} 700}
697 701
698/* 702/*
699 * This provides full kqfilter entry for device switch tables, which 703 * This provides full kqfilter entry for device switch tables, which
700 * has same effect as filter using filt_seltrue() as filter method. 704 * has same effect as filter using filt_seltrue() as filter method.
701 */ 705 */
702static void 706static void
703filt_seltruedetach(struct knote *kn) 707filt_seltruedetach(struct knote *kn)
704{ 708{
705 /* Nothing to do */ 709 /* Nothing to do */
706} 710}
707 711
708const struct filterops seltrue_filtops = 712const struct filterops seltrue_filtops =
709 { 1, NULL, filt_seltruedetach, filt_seltrue }; 713 { 1, NULL, filt_seltruedetach, filt_seltrue };
710 714
711int 715int
712seltrue_kqfilter(dev_t dev, struct knote *kn) 716seltrue_kqfilter(dev_t dev, struct knote *kn)
713{ 717{
714 switch (kn->kn_filter) { 718 switch (kn->kn_filter) {
715 case EVFILT_READ: 719 case EVFILT_READ:
716 case EVFILT_WRITE: 720 case EVFILT_WRITE:
717 kn->kn_fop = &seltrue_filtops; 721 kn->kn_fop = &seltrue_filtops;
718 break; 722 break;
719 default: 723 default:
720 return (EINVAL); 724 return (EINVAL);
721 } 725 }
722 726
723 /* Nothing more to do */ 727 /* Nothing more to do */
724 return (0); 728 return (0);
725} 729}
726 730
727/* 731/*
728 * kqueue(2) system call. 732 * kqueue(2) system call.
729 */ 733 */
730static int 734static int
731kqueue1(struct lwp *l, int flags, register_t *retval) 735kqueue1(struct lwp *l, int flags, register_t *retval)
732{ 736{
733 struct kqueue *kq; 737 struct kqueue *kq;
734 file_t *fp; 738 file_t *fp;
735 int fd, error; 739 int fd, error;
736 740
737 if ((error = fd_allocfile(&fp, &fd)) != 0) 741 if ((error = fd_allocfile(&fp, &fd)) != 0)
738 return error; 742 return error;
739 fp->f_flag = FREAD | FWRITE | (flags & (FNONBLOCK|FNOSIGPIPE)); 743 fp->f_flag = FREAD | FWRITE | (flags & (FNONBLOCK|FNOSIGPIPE));
740 fp->f_type = DTYPE_KQUEUE; 744 fp->f_type = DTYPE_KQUEUE;
741 fp->f_ops = &kqueueops; 745 fp->f_ops = &kqueueops;
742 kq = kmem_zalloc(sizeof(*kq), KM_SLEEP); 746 kq = kmem_zalloc(sizeof(*kq), KM_SLEEP);
743 mutex_init(&kq->kq_lock, MUTEX_DEFAULT, IPL_SCHED); 747 mutex_init(&kq->kq_lock, MUTEX_DEFAULT, IPL_SCHED);
744 cv_init(&kq->kq_cv, "kqueue"); 748 cv_init(&kq->kq_cv, "kqueue");
745 selinit(&kq->kq_sel); 749 selinit(&kq->kq_sel);
746 TAILQ_INIT(&kq->kq_head); 750 TAILQ_INIT(&kq->kq_head);
747 fp->f_data = kq; 751 fp->f_data = kq;
748 *retval = fd; 752 *retval = fd;
749 kq->kq_fdp = curlwp->l_fd; 753 kq->kq_fdp = curlwp->l_fd;
750 fd_set_exclose(l, fd, (flags & O_CLOEXEC) != 0); 754 fd_set_exclose(l, fd, (flags & O_CLOEXEC) != 0);
751 fd_affix(curproc, fp, fd); 755 fd_affix(curproc, fp, fd);
752 return error; 756 return error;
753} 757}
754 758
755/* 759/*
756 * kqueue(2) system call. 760 * kqueue(2) system call.
757 */ 761 */
758int 762int
759sys_kqueue(struct lwp *l, const void *v, register_t *retval) 763sys_kqueue(struct lwp *l, const void *v, register_t *retval)
760{ 764{
761 return kqueue1(l, 0, retval); 765 return kqueue1(l, 0, retval);
762} 766}
763 767
764int 768int
765sys_kqueue1(struct lwp *l, const struct sys_kqueue1_args *uap, 769sys_kqueue1(struct lwp *l, const struct sys_kqueue1_args *uap,
766 register_t *retval) 770 register_t *retval)
767{ 771{
768 /* { 772 /* {
769 syscallarg(int) flags; 773 syscallarg(int) flags;
770 } */ 774 } */
771 return kqueue1(l, SCARG(uap, flags), retval); 775 return kqueue1(l, SCARG(uap, flags), retval);
772} 776}
773 777
774/* 778/*
775 * kevent(2) system call. 779 * kevent(2) system call.
776 */ 780 */
777int 781int
778kevent_fetch_changes(void *private, const struct kevent *changelist, 782kevent_fetch_changes(void *private, const struct kevent *changelist,
779 struct kevent *changes, size_t index, int n) 783 struct kevent *changes, size_t index, int n)
780{ 784{
781 785
782 return copyin(changelist + index, changes, n * sizeof(*changes)); 786 return copyin(changelist + index, changes, n * sizeof(*changes));
783} 787}
784 788
785int 789int
786kevent_put_events(void *private, struct kevent *events, 790kevent_put_events(void *private, struct kevent *events,
787 struct kevent *eventlist, size_t index, int n) 791 struct kevent *eventlist, size_t index, int n)
788{ 792{
789 793
790 return copyout(events, eventlist + index, n * sizeof(*events)); 794 return copyout(events, eventlist + index, n * sizeof(*events));
791} 795}
792 796
793static const struct kevent_ops kevent_native_ops = { 797static const struct kevent_ops kevent_native_ops = {
794 .keo_private = NULL, 798 .keo_private = NULL,
795 .keo_fetch_timeout = copyin, 799 .keo_fetch_timeout = copyin,
796 .keo_fetch_changes = kevent_fetch_changes, 800 .keo_fetch_changes = kevent_fetch_changes,
797 .keo_put_events = kevent_put_events, 801 .keo_put_events = kevent_put_events,
798}; 802};
799 803
800int 804int
801sys___kevent50(struct lwp *l, const struct sys___kevent50_args *uap, 805sys___kevent50(struct lwp *l, const struct sys___kevent50_args *uap,
802 register_t *retval) 806 register_t *retval)
803{ 807{
804 /* { 808 /* {
805 syscallarg(int) fd; 809 syscallarg(int) fd;
806 syscallarg(const struct kevent *) changelist; 810 syscallarg(const struct kevent *) changelist;
807 syscallarg(size_t) nchanges; 811 syscallarg(size_t) nchanges;
808 syscallarg(struct kevent *) eventlist; 812 syscallarg(struct kevent *) eventlist;
809 syscallarg(size_t) nevents; 813 syscallarg(size_t) nevents;
810 syscallarg(const struct timespec *) timeout; 814 syscallarg(const struct timespec *) timeout;
811 } */ 815 } */
812 816
813 return kevent1(retval, SCARG(uap, fd), SCARG(uap, changelist), 817 return kevent1(retval, SCARG(uap, fd), SCARG(uap, changelist),
814 SCARG(uap, nchanges), SCARG(uap, eventlist), SCARG(uap, nevents), 818 SCARG(uap, nchanges), SCARG(uap, eventlist), SCARG(uap, nevents),
815 SCARG(uap, timeout), &kevent_native_ops); 819 SCARG(uap, timeout), &kevent_native_ops);
816} 820}
817 821
818int 822int
819kevent1(register_t *retval, int fd, 823kevent1(register_t *retval, int fd,
820 const struct kevent *changelist, size_t nchanges, 824 const struct kevent *changelist, size_t nchanges,
821 struct kevent *eventlist, size_t nevents, 825 struct kevent *eventlist, size_t nevents,
822 const struct timespec *timeout, 826 const struct timespec *timeout,
823 const struct kevent_ops *keops) 827 const struct kevent_ops *keops)
824{ 828{
825 struct kevent *kevp; 829 struct kevent *kevp;
826 struct kqueue *kq; 830 struct kqueue *kq;
827 struct timespec ts; 831 struct timespec ts;
828 size_t i, n, ichange; 832 size_t i, n, ichange;
829 int nerrors, error; 833 int nerrors, error;
830 struct kevent kevbuf[KQ_NEVENTS]; /* approx 300 bytes on 64-bit */ 834 struct kevent kevbuf[KQ_NEVENTS]; /* approx 300 bytes on 64-bit */
831 file_t *fp; 835 file_t *fp;
832 836
833 /* check that we're dealing with a kq */ 837 /* check that we're dealing with a kq */
834 fp = fd_getfile(fd); 838 fp = fd_getfile(fd);
835 if (fp == NULL) 839 if (fp == NULL)
836 return (EBADF); 840 return (EBADF);
837 841
838 if (fp->f_type != DTYPE_KQUEUE) { 842 if (fp->f_type != DTYPE_KQUEUE) {
839 fd_putfile(fd); 843 fd_putfile(fd);
840 return (EBADF); 844 return (EBADF);
841 } 845 }
842 846
843 if (timeout != NULL) { 847 if (timeout != NULL) {
844 error = (*keops->keo_fetch_timeout)(timeout, &ts, sizeof(ts)); 848 error = (*keops->keo_fetch_timeout)(timeout, &ts, sizeof(ts));
845 if (error) 849 if (error)
846 goto done; 850 goto done;
847 timeout = &ts; 851 timeout = &ts;
848 } 852 }
849 853
850 kq = (struct kqueue *)fp->f_data; 854 kq = (struct kqueue *)fp->f_data;
851 nerrors = 0; 855 nerrors = 0;
852 ichange = 0; 856 ichange = 0;
853 857
854 /* traverse list of events to register */ 858 /* traverse list of events to register */
855 while (nchanges > 0) { 859 while (nchanges > 0) {
856 n = MIN(nchanges, __arraycount(kevbuf)); 860 n = MIN(nchanges, __arraycount(kevbuf));
857 error = (*keops->keo_fetch_changes)(keops->keo_private, 861 error = (*keops->keo_fetch_changes)(keops->keo_private,
858 changelist, kevbuf, ichange, n); 862 changelist, kevbuf, ichange, n);
859 if (error) 863 if (error)
860 goto done; 864 goto done;
861 for (i = 0; i < n; i++) { 865 for (i = 0; i < n; i++) {
862 kevp = &kevbuf[i]; 866 kevp = &kevbuf[i];
863 kevp->flags &= ~EV_SYSFLAGS; 867 kevp->flags &= ~EV_SYSFLAGS;
864 /* register each knote */ 868 /* register each knote */
865 error = kqueue_register(kq, kevp); 869 error = kqueue_register(kq, kevp);
866 if (error) { 870 if (error) {
867 if (nevents != 0) { 871 if (nevents != 0) {
868 kevp->flags = EV_ERROR; 872 kevp->flags = EV_ERROR;
869 kevp->data = error; 873 kevp->data = error;
870 error = (*keops->keo_put_events) 874 error = (*keops->keo_put_events)
871 (keops->keo_private, kevp, 875 (keops->keo_private, kevp,
872 eventlist, nerrors, 1); 876 eventlist, nerrors, 1);
873 if (error) 877 if (error)
874 goto done; 878 goto done;
875 nevents--; 879 nevents--;
876 nerrors++; 880 nerrors++;
877 } else { 881 } else {
878 goto done; 882 goto done;
879 } 883 }
880 } 884 }
881 } 885 }
882 nchanges -= n; /* update the results */ 886 nchanges -= n; /* update the results */
883 ichange += n; 887 ichange += n;
884 } 888 }
885 if (nerrors) { 889 if (nerrors) {
886 *retval = nerrors; 890 *retval = nerrors;
887 error = 0; 891 error = 0;
888 goto done; 892 goto done;
889 } 893 }
890 894
891 /* actually scan through the events */ 895 /* actually scan through the events */
892 error = kqueue_scan(fp, nevents, eventlist, timeout, retval, keops, 896 error = kqueue_scan(fp, nevents, eventlist, timeout, retval, keops,
893 kevbuf, __arraycount(kevbuf)); 897 kevbuf, __arraycount(kevbuf));
894 done: 898 done:
895 fd_putfile(fd); 899 fd_putfile(fd);
896 return (error); 900 return (error);
897} 901}
898 902
899/* 903/*
900 * Register a given kevent kev onto the kqueue 904 * Register a given kevent kev onto the kqueue
901 */ 905 */
902static int 906static int
903kqueue_register(struct kqueue *kq, struct kevent *kev) 907kqueue_register(struct kqueue *kq, struct kevent *kev)
904{ 908{
905 struct kfilter *kfilter; 909 struct kfilter *kfilter;
906 filedesc_t *fdp; 910 filedesc_t *fdp;
907 file_t *fp; 911 file_t *fp;
908 fdfile_t *ff; 912 fdfile_t *ff;
909 struct knote *kn, *newkn; 913 struct knote *kn, *newkn;
910 struct klist *list; 914 struct klist *list;
911 int error, fd, rv; 915 int error, fd, rv;
912 916
913 fdp = kq->kq_fdp; 917 fdp = kq->kq_fdp;
914 fp = NULL; 918 fp = NULL;
915 kn = NULL; 919 kn = NULL;
916 error = 0; 920 error = 0;
917 fd = 0; 921 fd = 0;
918 922
919 newkn = kmem_zalloc(sizeof(*newkn), KM_SLEEP); 923 newkn = kmem_zalloc(sizeof(*newkn), KM_SLEEP);
920 924
921 rw_enter(&kqueue_filter_lock, RW_READER); 925 rw_enter(&kqueue_filter_lock, RW_READER);
922 kfilter = kfilter_byfilter(kev->filter); 926 kfilter = kfilter_byfilter(kev->filter);
923 if (kfilter == NULL || kfilter->filtops == NULL) { 927 if (kfilter == NULL || kfilter->filtops == NULL) {
924 /* filter not found nor implemented */ 928 /* filter not found nor implemented */
925 rw_exit(&kqueue_filter_lock); 929 rw_exit(&kqueue_filter_lock);
926 kmem_free(newkn, sizeof(*newkn)); 930 kmem_free(newkn, sizeof(*newkn));
927 return (EINVAL); 931 return (EINVAL);
928 } 932 }
929 933
930 /* search if knote already exists */ 934 /* search if knote already exists */
931 if (kfilter->filtops->f_isfd) { 935 if (kfilter->filtops->f_isfd) {
932 /* monitoring a file descriptor */ 936 /* monitoring a file descriptor */
933 fd = kev->ident; 937 fd = kev->ident;
934 if ((fp = fd_getfile(fd)) == NULL) { 938 if ((fp = fd_getfile(fd)) == NULL) {
935 rw_exit(&kqueue_filter_lock); 939 rw_exit(&kqueue_filter_lock);
936 kmem_free(newkn, sizeof(*newkn)); 940 kmem_free(newkn, sizeof(*newkn));
937 return EBADF; 941 return EBADF;
938 } 942 }
939 mutex_enter(&fdp->fd_lock); 943 mutex_enter(&fdp->fd_lock);
940 ff = fdp->fd_dt->dt_ff[fd]; 944 ff = fdp->fd_dt->dt_ff[fd];
941 if (fd <= fdp->fd_lastkqfile) { 945 if (fd <= fdp->fd_lastkqfile) {
942 SLIST_FOREACH(kn, &ff->ff_knlist, kn_link) { 946 SLIST_FOREACH(kn, &ff->ff_knlist, kn_link) {
943 if (kq == kn->kn_kq && 947 if (kq == kn->kn_kq &&
944 kev->filter == kn->kn_filter) 948 kev->filter == kn->kn_filter)
945 break; 949 break;
946 } 950 }
947 } 951 }
948 } else { 952 } else {
949 /* 953 /*
950 * not monitoring a file descriptor, so 954 * not monitoring a file descriptor, so
951 * lookup knotes in internal hash table 955 * lookup knotes in internal hash table
952 */ 956 */
953 mutex_enter(&fdp->fd_lock); 957 mutex_enter(&fdp->fd_lock);
954 if (fdp->fd_knhashmask != 0) { 958 if (fdp->fd_knhashmask != 0) {
955 list = &fdp->fd_knhash[ 959 list = &fdp->fd_knhash[
956 KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)]; 960 KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
957 SLIST_FOREACH(kn, list, kn_link) { 961 SLIST_FOREACH(kn, list, kn_link) {
958 if (kev->ident == kn->kn_id && 962 if (kev->ident == kn->kn_id &&
959 kq == kn->kn_kq && 963 kq == kn->kn_kq &&
960 kev->filter == kn->kn_filter) 964 kev->filter == kn->kn_filter)
961 break; 965 break;
962 } 966 }
963 } 967 }
964 } 968 }
965 969
966 /* 970 /*
967 * kn now contains the matching knote, or NULL if no match 971 * kn now contains the matching knote, or NULL if no match
968 */ 972 */
969 if (kev->flags & EV_ADD) { 973 if (kev->flags & EV_ADD) {
970 if (kn == NULL) { 974 if (kn == NULL) {
971 /* create new knote */ 975 /* create new knote */
972 kn = newkn; 976 kn = newkn;
973 newkn = NULL; 977 newkn = NULL;
974 kn->kn_obj = fp; 978 kn->kn_obj = fp;
975 kn->kn_id = kev->ident; 979 kn->kn_id = kev->ident;
976 kn->kn_kq = kq; 980 kn->kn_kq = kq;
977 kn->kn_fop = kfilter->filtops; 981 kn->kn_fop = kfilter->filtops;
978 kn->kn_kfilter = kfilter; 982 kn->kn_kfilter = kfilter;
979 kn->kn_sfflags = kev->fflags; 983 kn->kn_sfflags = kev->fflags;
980 kn->kn_sdata = kev->data; 984 kn->kn_sdata = kev->data;
981 kev->fflags = 0; 985 kev->fflags = 0;
982 kev->data = 0; 986 kev->data = 0;
983 kn->kn_kevent = *kev; 987 kn->kn_kevent = *kev;
984 988
985 /* 989 /*
986 * apply reference count to knote structure, and 990 * apply reference count to knote structure, and
987 * do not release it at the end of this routine. 991 * do not release it at the end of this routine.
988 */ 992 */
989 fp = NULL; 993 fp = NULL;
990 994
991 if (!kn->kn_fop->f_isfd) { 995 if (!kn->kn_fop->f_isfd) {
992 /* 996 /*
993 * If knote is not on an fd, store on 997 * If knote is not on an fd, store on
994 * internal hash table. 998 * internal hash table.
995 */ 999 */
996 if (fdp->fd_knhashmask == 0) { 1000 if (fdp->fd_knhashmask == 0) {
997 /* XXXAD can block with fd_lock held */ 1001 /* XXXAD can block with fd_lock held */
998 fdp->fd_knhash = hashinit(KN_HASHSIZE, 1002 fdp->fd_knhash = hashinit(KN_HASHSIZE,
999 HASH_LIST, true, 1003 HASH_LIST, true,
1000 &fdp->fd_knhashmask); 1004 &fdp->fd_knhashmask);
1001 } 1005 }
1002 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, 1006 list = &fdp->fd_knhash[KN_HASH(kn->kn_id,
1003 fdp->fd_knhashmask)]; 1007 fdp->fd_knhashmask)];
1004 } else { 1008 } else {
1005 /* Otherwise, knote is on an fd. */ 1009 /* Otherwise, knote is on an fd. */
1006 list = (struct klist *) 1010 list = (struct klist *)
1007 &fdp->fd_dt->dt_ff[kn->kn_id]->ff_knlist; 1011 &fdp->fd_dt->dt_ff[kn->kn_id]->ff_knlist;
1008 if ((int)kn->kn_id > fdp->fd_lastkqfile) 1012 if ((int)kn->kn_id > fdp->fd_lastkqfile)
1009 fdp->fd_lastkqfile = kn->kn_id; 1013 fdp->fd_lastkqfile = kn->kn_id;
1010 } 1014 }
1011 SLIST_INSERT_HEAD(list, kn, kn_link); 1015 SLIST_INSERT_HEAD(list, kn, kn_link);
1012 1016
1013 KERNEL_LOCK(1, NULL); /* XXXSMP */ 1017 KERNEL_LOCK(1, NULL); /* XXXSMP */
1014 error = (*kfilter->filtops->f_attach)(kn); 1018 error = (*kfilter->filtops->f_attach)(kn);
1015 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 1019 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */
1016 if (error != 0) { 1020 if (error != 0) {
1017#ifdef DIAGNOSTIC 1021#ifdef DIAGNOSTIC
1018 printf("%s: event not supported for file type" 1022 printf("%s: event not supported for file type"
1019 " %d\n", __func__, fp ? fp->f_type : -1); 1023 " %d\n", __func__, fp ? fp->f_type : -1);
1020#endif 1024#endif
1021 /* knote_detach() drops fdp->fd_lock */ 1025 /* knote_detach() drops fdp->fd_lock */
1022 knote_detach(kn, fdp, false); 1026 knote_detach(kn, fdp, false);
1023 goto done; 1027 goto done;
1024 } 1028 }
1025 atomic_inc_uint(&kfilter->refcnt); 1029 atomic_inc_uint(&kfilter->refcnt);
1026 } else { 1030 } else {
1027 /* 1031 /*
1028 * The user may change some filter values after the 1032 * The user may change some filter values after the
1029 * initial EV_ADD, but doing so will not reset any 1033 * initial EV_ADD, but doing so will not reset any
1030 * filter which have already been triggered. 1034 * filter which have already been triggered.
1031 */ 1035 */
1032 kn->kn_sfflags = kev->fflags; 1036 kn->kn_sfflags = kev->fflags;
1033 kn->kn_sdata = kev->data; 1037 kn->kn_sdata = kev->data;
1034 kn->kn_kevent.udata = kev->udata; 1038 kn->kn_kevent.udata = kev->udata;
1035 } 1039 }
1036 /* 1040 /*
1037 * We can get here if we are trying to attach 1041 * We can get here if we are trying to attach
1038 * an event to a file descriptor that does not 1042 * an event to a file descriptor that does not
1039 * support events, and the attach routine is 1043 * support events, and the attach routine is
1040 * broken and does not return an error. 1044 * broken and does not return an error.
1041 */ 1045 */
1042 KASSERT(kn->kn_fop->f_event != NULL); 1046 KASSERT(kn->kn_fop->f_event != NULL);
1043 KERNEL_LOCK(1, NULL); /* XXXSMP */ 1047 KERNEL_LOCK(1, NULL); /* XXXSMP */
1044 rv = (*kn->kn_fop->f_event)(kn, 0); 1048 rv = (*kn->kn_fop->f_event)(kn, 0);
1045 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 1049 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */
1046 if (rv) 1050 if (rv)
1047 knote_activate(kn); 1051 knote_activate(kn);
1048 } else { 1052 } else {
1049 if (kn == NULL) { 1053 if (kn == NULL) {
1050 error = ENOENT; 1054 error = ENOENT;
1051 mutex_exit(&fdp->fd_lock); 1055 mutex_exit(&fdp->fd_lock);
1052 goto done; 1056 goto done;
1053 } 1057 }
1054 if (kev->flags & EV_DELETE) { 1058 if (kev->flags & EV_DELETE) {
1055 /* knote_detach() drops fdp->fd_lock */ 1059 /* knote_detach() drops fdp->fd_lock */
1056 knote_detach(kn, fdp, true); 1060 knote_detach(kn, fdp, true);
1057 goto done; 1061 goto done;
1058 } 1062 }
1059 } 1063 }
1060 1064
1061 /* disable knote */ 1065 /* disable knote */
1062 if ((kev->flags & EV_DISABLE)) { 1066 if ((kev->flags & EV_DISABLE)) {
1063 mutex_spin_enter(&kq->kq_lock); 1067 mutex_spin_enter(&kq->kq_lock);
1064 if ((kn->kn_status & KN_DISABLED) == 0) 1068 if ((kn->kn_status & KN_DISABLED) == 0)
1065 kn->kn_status |= KN_DISABLED; 1069 kn->kn_status |= KN_DISABLED;
1066 mutex_spin_exit(&kq->kq_lock); 1070 mutex_spin_exit(&kq->kq_lock);
1067 } 1071 }
1068 1072
1069 /* enable knote */ 1073 /* enable knote */
1070 if ((kev->flags & EV_ENABLE)) { 1074 if ((kev->flags & EV_ENABLE)) {
1071 knote_enqueue(kn); 1075 knote_enqueue(kn);
1072 } 1076 }
1073 mutex_exit(&fdp->fd_lock); 1077 mutex_exit(&fdp->fd_lock);
1074 done: 1078 done:
1075 rw_exit(&kqueue_filter_lock); 1079 rw_exit(&kqueue_filter_lock);
1076 if (newkn != NULL) 1080 if (newkn != NULL)
1077 kmem_free(newkn, sizeof(*newkn)); 1081 kmem_free(newkn, sizeof(*newkn));
1078 if (fp != NULL) 1082 if (fp != NULL)
1079 fd_putfile(fd); 1083 fd_putfile(fd);
1080 return (error); 1084 return (error);
1081} 1085}
1082 1086
1083#if defined(DEBUG) 1087#if defined(DEBUG)
1084static void 1088static void
1085kq_check(struct kqueue *kq) 1089kq_check(struct kqueue *kq)
1086{ 1090{
1087 const struct knote *kn; 1091 const struct knote *kn;
1088 int count; 1092 int count;
1089 int nmarker; 1093 int nmarker;
1090 1094
1091 KASSERT(mutex_owned(&kq->kq_lock)); 1095 KASSERT(mutex_owned(&kq->kq_lock));
1092 KASSERT(kq->kq_count >= 0); 1096 KASSERT(kq->kq_count >= 0);
1093 1097
1094 count = 0; 1098 count = 0;
1095 nmarker = 0; 1099 nmarker = 0;
1096 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) { 1100 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) {
1097 if ((kn->kn_status & (KN_MARKER | KN_QUEUED)) == 0) { 1101 if ((kn->kn_status & (KN_MARKER | KN_QUEUED)) == 0) {
1098 panic("%s: kq=%p kn=%p inconsist 1", __func__, kq, kn); 1102 panic("%s: kq=%p kn=%p inconsist 1", __func__, kq, kn);
1099 } 1103 }
1100 if ((kn->kn_status & KN_MARKER) == 0) { 1104 if ((kn->kn_status & KN_MARKER) == 0) {
1101 if (kn->kn_kq != kq) { 1105 if (kn->kn_kq != kq) {
1102 panic("%s: kq=%p kn=%p inconsist 2", 1106 panic("%s: kq=%p kn=%p inconsist 2",
1103 __func__, kq, kn); 1107 __func__, kq, kn);
1104 } 1108 }
1105 if ((kn->kn_status & KN_ACTIVE) == 0) { 1109 if ((kn->kn_status & KN_ACTIVE) == 0) {
1106 panic("%s: kq=%p kn=%p: not active", 1110 panic("%s: kq=%p kn=%p: not active",
1107 __func__, kq, kn); 1111 __func__, kq, kn);
1108 } 1112 }
1109 count++; 1113 count++;
1110 if (count > kq->kq_count) { 1114 if (count > kq->kq_count) {
1111 goto bad; 1115 goto bad;
1112 } 1116 }
1113 } else { 1117 } else {
1114 nmarker++; 1118 nmarker++;
1115#if 0 1119#if 0
1116 if (nmarker > 10000) { 1120 if (nmarker > 10000) {
1117 panic("%s: kq=%p too many markers: %d != %d, " 1121 panic("%s: kq=%p too many markers: %d != %d, "
1118 "nmarker=%d", 1122 "nmarker=%d",
1119 __func__, kq, kq->kq_count, count, nmarker); 1123 __func__, kq, kq->kq_count, count, nmarker);
1120 } 1124 }
1121#endif 1125#endif
1122 } 1126 }
1123 } 1127 }
1124 if (kq->kq_count != count) { 1128 if (kq->kq_count != count) {
1125bad: 1129bad:
1126 panic("%s: kq=%p inconsist 3: %d != %d, nmarker=%d", 1130 panic("%s: kq=%p inconsist 3: %d != %d, nmarker=%d",
1127 __func__, kq, kq->kq_count, count, nmarker); 1131 __func__, kq, kq->kq_count, count, nmarker);
1128 } 1132 }
1129} 1133}
1130#else /* defined(DEBUG) */ 1134#else /* defined(DEBUG) */
1131#define kq_check(a) /* nothing */ 1135#define kq_check(a) /* nothing */
1132#endif /* defined(DEBUG) */ 1136#endif /* defined(DEBUG) */
1133 1137
1134/* 1138/*
1135 * Scan through the list of events on fp (for a maximum of maxevents), 1139 * Scan through the list of events on fp (for a maximum of maxevents),
1136 * returning the results in to ulistp. Timeout is determined by tsp; if 1140 * returning the results in to ulistp. Timeout is determined by tsp; if
1137 * NULL, wait indefinitely, if 0 valued, perform a poll, otherwise wait 1141 * NULL, wait indefinitely, if 0 valued, perform a poll, otherwise wait
1138 * as appropriate. 1142 * as appropriate.
1139 */ 1143 */
1140static int 1144static int
1141kqueue_scan(file_t *fp, size_t maxevents, struct kevent *ulistp, 1145kqueue_scan(file_t *fp, size_t maxevents, struct kevent *ulistp,
1142 const struct timespec *tsp, register_t *retval, 1146 const struct timespec *tsp, register_t *retval,
1143 const struct kevent_ops *keops, struct kevent *kevbuf, 1147 const struct kevent_ops *keops, struct kevent *kevbuf,
1144 size_t kevcnt) 1148 size_t kevcnt)
1145{ 1149{
1146 struct kqueue *kq; 1150 struct kqueue *kq;
1147 struct kevent *kevp; 1151 struct kevent *kevp;
1148 struct timespec ats, sleepts; 1152 struct timespec ats, sleepts;
1149 struct knote *kn, *marker; 1153 struct knote *kn, *marker;
1150 size_t count, nkev, nevents; 1154 size_t count, nkev, nevents;
1151 int timeout, error, rv; 1155 int timeout, error, rv;
1152 filedesc_t *fdp; 1156 filedesc_t *fdp;
1153 1157
1154 fdp = curlwp->l_fd; 1158 fdp = curlwp->l_fd;
1155 kq = fp->f_data; 1159 kq = fp->f_data;
1156 count = maxevents; 1160 count = maxevents;
1157 nkev = nevents = error = 0; 1161 nkev = nevents = error = 0;
1158 if (count == 0) { 1162 if (count == 0) {
1159 *retval = 0; 1163 *retval = 0;
1160 return 0; 1164 return 0;
1161 } 1165 }
1162 1166
1163 if (tsp) { /* timeout supplied */ 1167 if (tsp) { /* timeout supplied */
1164 ats = *tsp; 1168 ats = *tsp;
1165 if (inittimeleft(&ats, &sleepts) == -1) { 1169 if (inittimeleft(&ats, &sleepts) == -1) {
1166 *retval = maxevents; 1170 *retval = maxevents;
1167 return EINVAL; 1171 return EINVAL;
1168 } 1172 }
1169 timeout = tstohz(&ats); 1173 timeout = tstohz(&ats);
1170 if (timeout <= 0) 1174 if (timeout <= 0)
1171 timeout = -1; /* do poll */ 1175 timeout = -1; /* do poll */
1172 } else { 1176 } else {
1173 /* no timeout, wait forever */ 1177 /* no timeout, wait forever */
1174 timeout = 0; 1178 timeout = 0;
1175 }  1179 }
1176 1180
1177 marker = kmem_zalloc(sizeof(*marker), KM_SLEEP); 1181 marker = kmem_zalloc(sizeof(*marker), KM_SLEEP);
1178 marker->kn_status = KN_MARKER; 1182 marker->kn_status = KN_MARKER;
1179 mutex_spin_enter(&kq->kq_lock); 1183 mutex_spin_enter(&kq->kq_lock);
1180 retry: 1184 retry:
1181 kevp = kevbuf; 1185 kevp = kevbuf;
1182 if (kq->kq_count == 0) { 1186 if (kq->kq_count == 0) {
1183 if (timeout >= 0) { 1187 if (timeout >= 0) {
1184 error = cv_timedwait_sig(&kq->kq_cv, 1188 error = cv_timedwait_sig(&kq->kq_cv,
1185 &kq->kq_lock, timeout); 1189 &kq->kq_lock, timeout);
1186 if (error == 0) { 1190 if (error == 0) {
1187 if (tsp == NULL || (timeout = 1191 if (tsp == NULL || (timeout =
1188 gettimeleft(&ats, &sleepts)) > 0) 1192 gettimeleft(&ats, &sleepts)) > 0)
1189 goto retry; 1193 goto retry;
1190 } else { 1194 } else {
1191 /* don't restart after signals... */ 1195 /* don't restart after signals... */
1192 if (error == ERESTART) 1196 if (error == ERESTART)
1193 error = EINTR; 1197 error = EINTR;
1194 if (error == EWOULDBLOCK) 1198 if (error == EWOULDBLOCK)
1195 error = 0; 1199 error = 0;
1196 } 1200 }
1197 } 1201 }
1198 } else { 1202 } else {
1199 /* mark end of knote list */ 1203 /* mark end of knote list */
1200 TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe); 1204 TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe);
1201 1205
1202 while (count != 0) { 1206 while (count != 0) {
1203 kn = TAILQ_FIRST(&kq->kq_head); /* get next knote */ 1207 kn = TAILQ_FIRST(&kq->kq_head); /* get next knote */
1204 while ((kn->kn_status & KN_MARKER) != 0) { 1208 while ((kn->kn_status & KN_MARKER) != 0) {
1205 if (kn == marker) { 1209 if (kn == marker) {
1206 /* it's our marker, stop */ 1210 /* it's our marker, stop */
1207 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1211 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1208 if (count < maxevents || (tsp != NULL && 1212 if (count < maxevents || (tsp != NULL &&
1209 (timeout = gettimeleft(&ats, 1213 (timeout = gettimeleft(&ats,
1210 &sleepts)) <= 0)) 1214 &sleepts)) <= 0))
1211 goto done; 1215 goto done;
1212 goto retry; 1216 goto retry;
1213 } 1217 }
1214 /* someone else's marker. */ 1218 /* someone else's marker. */
1215 kn = TAILQ_NEXT(kn, kn_tqe); 1219 kn = TAILQ_NEXT(kn, kn_tqe);
1216 } 1220 }
1217 kq_check(kq); 1221 kq_check(kq);
1218 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1222 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1219 kq->kq_count--; 1223 kq->kq_count--;
1220 kn->kn_status &= ~KN_QUEUED; 1224 kn->kn_status &= ~KN_QUEUED;
1221 kq_check(kq); 1225 kq_check(kq);
1222 if (kn->kn_status & KN_DISABLED) { 1226 if (kn->kn_status & KN_DISABLED) {
1223 /* don't want disabled events */ 1227 /* don't want disabled events */
1224 continue; 1228 continue;
1225 } 1229 }
1226 if ((kn->kn_flags & EV_ONESHOT) == 0) { 1230 if ((kn->kn_flags & EV_ONESHOT) == 0) {
1227 mutex_spin_exit(&kq->kq_lock); 1231 mutex_spin_exit(&kq->kq_lock);
1228 KERNEL_LOCK(1, NULL); /* XXXSMP */ 1232 KERNEL_LOCK(1, NULL); /* XXXSMP */
1229 rv = (*kn->kn_fop->f_event)(kn, 0); 1233 rv = (*kn->kn_fop->f_event)(kn, 0);
1230 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 1234 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */
1231 mutex_spin_enter(&kq->kq_lock); 1235 mutex_spin_enter(&kq->kq_lock);
1232 /* Re-poll if note was re-enqueued. */ 1236 /* Re-poll if note was re-enqueued. */
1233 if ((kn->kn_status & KN_QUEUED) != 0) 1237 if ((kn->kn_status & KN_QUEUED) != 0)
1234 continue; 1238 continue;
1235 if (rv == 0) { 1239 if (rv == 0) {
1236 /* 1240 /*
1237 * non-ONESHOT event that hasn't 1241 * non-ONESHOT event that hasn't
1238 * triggered again, so de-queue. 1242 * triggered again, so de-queue.
1239 */ 1243 */
1240 kn->kn_status &= ~KN_ACTIVE; 1244 kn->kn_status &= ~KN_ACTIVE;
1241 continue; 1245 continue;
1242 } 1246 }
1243 } 1247 }
1244 /* XXXAD should be got from f_event if !oneshot. */ 1248 /* XXXAD should be got from f_event if !oneshot. */
1245 *kevp++ = kn->kn_kevent; 1249 *kevp++ = kn->kn_kevent;
1246 nkev++; 1250 nkev++;
1247 if (kn->kn_flags & EV_ONESHOT) { 1251 if (kn->kn_flags & EV_ONESHOT) {
1248 /* delete ONESHOT events after retrieval */ 1252 /* delete ONESHOT events after retrieval */
1249 mutex_spin_exit(&kq->kq_lock); 1253 mutex_spin_exit(&kq->kq_lock);
1250 mutex_enter(&fdp->fd_lock); 1254 mutex_enter(&fdp->fd_lock);
1251 knote_detach(kn, fdp, true); 1255 knote_detach(kn, fdp, true);
1252 mutex_spin_enter(&kq->kq_lock); 1256 mutex_spin_enter(&kq->kq_lock);
1253 } else if (kn->kn_flags & EV_CLEAR) { 1257 } else if (kn->kn_flags & EV_CLEAR) {
1254 /* clear state after retrieval */ 1258 /* clear state after retrieval */
1255 kn->kn_data = 0; 1259 kn->kn_data = 0;
1256 kn->kn_fflags = 0; 1260 kn->kn_fflags = 0;
1257 kn->kn_status &= ~KN_ACTIVE; 1261 kn->kn_status &= ~KN_ACTIVE;
1258 } else { 1262 } else {
1259 /* add event back on list */ 1263 /* add event back on list */
1260 kq_check(kq); 1264 kq_check(kq);
1261 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1265 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1262 kq->kq_count++; 1266 kq->kq_count++;
1263 kn->kn_status |= KN_QUEUED; 1267 kn->kn_status |= KN_QUEUED;
1264 kq_check(kq); 1268 kq_check(kq);
1265 } 1269 }
1266 if (nkev == kevcnt) { 1270 if (nkev == kevcnt) {
1267 /* do copyouts in kevcnt chunks */ 1271 /* do copyouts in kevcnt chunks */
1268 mutex_spin_exit(&kq->kq_lock); 1272 mutex_spin_exit(&kq->kq_lock);
1269 error = (*keops->keo_put_events) 1273 error = (*keops->keo_put_events)
1270 (keops->keo_private, 1274 (keops->keo_private,
1271 kevbuf, ulistp, nevents, nkev); 1275 kevbuf, ulistp, nevents, nkev);
1272 mutex_spin_enter(&kq->kq_lock); 1276 mutex_spin_enter(&kq->kq_lock);
1273 nevents += nkev; 1277 nevents += nkev;
1274 nkev = 0; 1278 nkev = 0;
1275 kevp = kevbuf; 1279 kevp = kevbuf;
1276 } 1280 }
1277 count--; 1281 count--;
1278 if (error != 0 || count == 0) { 1282 if (error != 0 || count == 0) {
1279 /* remove marker */ 1283 /* remove marker */
1280 TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe); 1284 TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe);
1281 break; 1285 break;
1282 } 1286 }
1283 } 1287 }
1284 } 1288 }
1285 done: 1289 done:
1286 mutex_spin_exit(&kq->kq_lock); 1290 mutex_spin_exit(&kq->kq_lock);
1287 if (marker != NULL) 1291 if (marker != NULL)
1288 kmem_free(marker, sizeof(*marker)); 1292 kmem_free(marker, sizeof(*marker));
1289 if (nkev != 0) { 1293 if (nkev != 0) {
1290 /* copyout remaining events */ 1294 /* copyout remaining events */
1291 error = (*keops->keo_put_events)(keops->keo_private, 1295 error = (*keops->keo_put_events)(keops->keo_private,
1292 kevbuf, ulistp, nevents, nkev); 1296 kevbuf, ulistp, nevents, nkev);
1293 } 1297 }
1294 *retval = maxevents - count; 1298 *retval = maxevents - count;
1295 1299
1296 return error; 1300 return error;
1297} 1301}
1298 1302
1299/* 1303/*
1300 * fileops ioctl method for a kqueue descriptor. 1304 * fileops ioctl method for a kqueue descriptor.
1301 * 1305 *
1302 * Two ioctls are currently supported. They both use struct kfilter_mapping: 1306 * Two ioctls are currently supported. They both use struct kfilter_mapping:
1303 * KFILTER_BYNAME find name for filter, and return result in 1307 * KFILTER_BYNAME find name for filter, and return result in
1304 * name, which is of size len. 1308 * name, which is of size len.
1305 * KFILTER_BYFILTER find filter for name. len is ignored. 1309 * KFILTER_BYFILTER find filter for name. len is ignored.
1306 */ 1310 */
1307/*ARGSUSED*/ 1311/*ARGSUSED*/
1308static int 1312static int
1309kqueue_ioctl(file_t *fp, u_long com, void *data) 1313kqueue_ioctl(file_t *fp, u_long com, void *data)
1310{ 1314{
1311 struct kfilter_mapping *km; 1315 struct kfilter_mapping *km;
1312 const struct kfilter *kfilter; 1316 const struct kfilter *kfilter;
1313 char *name; 1317 char *name;
1314 int error; 1318 int error;
1315 1319
1316 km = data; 1320 km = data;
1317 error = 0; 1321 error = 0;
1318 name = kmem_alloc(KFILTER_MAXNAME, KM_SLEEP); 1322 name = kmem_alloc(KFILTER_MAXNAME, KM_SLEEP);
1319 1323
1320 switch (com) { 1324 switch (com) {
1321 case KFILTER_BYFILTER: /* convert filter -> name */ 1325 case KFILTER_BYFILTER: /* convert filter -> name */
1322 rw_enter(&kqueue_filter_lock, RW_READER); 1326 rw_enter(&kqueue_filter_lock, RW_READER);
1323 kfilter = kfilter_byfilter(km->filter); 1327 kfilter = kfilter_byfilter(km->filter);
1324 if (kfilter != NULL) { 1328 if (kfilter != NULL) {
1325 strlcpy(name, kfilter->name, KFILTER_MAXNAME); 1329 strlcpy(name, kfilter->name, KFILTER_MAXNAME);
1326 rw_exit(&kqueue_filter_lock); 1330 rw_exit(&kqueue_filter_lock);
1327 error = copyoutstr(name, km->name, km->len, NULL); 1331 error = copyoutstr(name, km->name, km->len, NULL);
1328 } else { 1332 } else {
1329 rw_exit(&kqueue_filter_lock); 1333 rw_exit(&kqueue_filter_lock);
1330 error = ENOENT; 1334 error = ENOENT;
1331 } 1335 }
1332 break; 1336 break;
1333 1337
1334 case KFILTER_BYNAME: /* convert name -> filter */ 1338 case KFILTER_BYNAME: /* convert name -> filter */
1335 error = copyinstr(km->name, name, KFILTER_MAXNAME, NULL); 1339 error = copyinstr(km->name, name, KFILTER_MAXNAME, NULL);
1336 if (error) { 1340 if (error) {
1337 break; 1341 break;
1338 } 1342 }
1339 rw_enter(&kqueue_filter_lock, RW_READER); 1343 rw_enter(&kqueue_filter_lock, RW_READER);
1340 kfilter = kfilter_byname(name); 1344 kfilter = kfilter_byname(name);
1341 if (kfilter != NULL) 1345 if (kfilter != NULL)
1342 km->filter = kfilter->filter; 1346 km->filter = kfilter->filter;
1343 else 1347 else
1344 error = ENOENT; 1348 error = ENOENT;
1345 rw_exit(&kqueue_filter_lock); 1349 rw_exit(&kqueue_filter_lock);
1346 break; 1350 break;
1347 1351
1348 default: 1352 default:
1349 error = ENOTTY; 1353 error = ENOTTY;
1350 break; 1354 break;
1351 1355
1352 } 1356 }
1353 kmem_free(name, KFILTER_MAXNAME); 1357 kmem_free(name, KFILTER_MAXNAME);
1354 return (error); 1358 return (error);
1355} 1359}
1356 1360
1357/* 1361/*
1358 * fileops fcntl method for a kqueue descriptor. 1362 * fileops fcntl method for a kqueue descriptor.
1359 */ 1363 */
1360static int 1364static int
1361kqueue_fcntl(file_t *fp, u_int com, void *data) 1365kqueue_fcntl(file_t *fp, u_int com, void *data)
1362{ 1366{
1363 1367
1364 return (ENOTTY); 1368 return (ENOTTY);
1365} 1369}
1366 1370
1367/* 1371/*
1368 * fileops poll method for a kqueue descriptor. 1372 * fileops poll method for a kqueue descriptor.
1369 * Determine if kqueue has events pending. 1373 * Determine if kqueue has events pending.
1370 */ 1374 */
1371static int 1375static int
1372kqueue_poll(file_t *fp, int events) 1376kqueue_poll(file_t *fp, int events)
1373{ 1377{
1374 struct kqueue *kq; 1378 struct kqueue *kq;
1375 int revents; 1379 int revents;
1376 1380
1377 kq = fp->f_data; 1381 kq = fp->f_data;
1378 1382
1379 revents = 0; 1383 revents = 0;
1380 if (events & (POLLIN | POLLRDNORM)) { 1384 if (events & (POLLIN | POLLRDNORM)) {
1381 mutex_spin_enter(&kq->kq_lock); 1385 mutex_spin_enter(&kq->kq_lock);
1382 if (kq->kq_count != 0) { 1386 if (kq->kq_count != 0) {
1383 revents |= events & (POLLIN | POLLRDNORM); 1387 revents |= events & (POLLIN | POLLRDNORM);
1384 } else { 1388 } else {
1385 selrecord(curlwp, &kq->kq_sel); 1389 selrecord(curlwp, &kq->kq_sel);
1386 } 1390 }
1387 kq_check(kq); 1391 kq_check(kq);
1388 mutex_spin_exit(&kq->kq_lock); 1392 mutex_spin_exit(&kq->kq_lock);
1389 } 1393 }
1390 1394
1391 return revents; 1395 return revents;
1392} 1396}
1393 1397
1394/* 1398/*
1395 * fileops stat method for a kqueue descriptor. 1399 * fileops stat method for a kqueue descriptor.
1396 * Returns dummy info, with st_size being number of events pending. 1400 * Returns dummy info, with st_size being number of events pending.
1397 */ 1401 */
1398static int 1402static int
1399kqueue_stat(file_t *fp, struct stat *st) 1403kqueue_stat(file_t *fp, struct stat *st)
1400{ 1404{
1401 struct kqueue *kq; 1405 struct kqueue *kq;
1402 1406
1403 kq = fp->f_data; 1407 kq = fp->f_data;
1404 1408
1405 memset(st, 0, sizeof(*st)); 1409 memset(st, 0, sizeof(*st));
1406 st->st_size = kq->kq_count; 1410 st->st_size = kq->kq_count;
1407 st->st_blksize = sizeof(struct kevent); 1411 st->st_blksize = sizeof(struct kevent);
1408 st->st_mode = S_IFIFO; 1412 st->st_mode = S_IFIFO;
1409 1413
1410 return 0; 1414 return 0;
1411} 1415}
1412 1416
1413static void 1417static void
1414kqueue_doclose(struct kqueue *kq, struct klist *list, int fd) 1418kqueue_doclose(struct kqueue *kq, struct klist *list, int fd)
1415{ 1419{
1416 struct knote *kn; 1420 struct knote *kn;
1417 filedesc_t *fdp; 1421 filedesc_t *fdp;
1418 1422
1419 fdp = kq->kq_fdp; 1423 fdp = kq->kq_fdp;
1420 1424
1421 KASSERT(mutex_owned(&fdp->fd_lock)); 1425 KASSERT(mutex_owned(&fdp->fd_lock));
1422 1426
1423 for (kn = SLIST_FIRST(list); kn != NULL;) { 1427 for (kn = SLIST_FIRST(list); kn != NULL;) {
1424 if (kq != kn->kn_kq) { 1428 if (kq != kn->kn_kq) {
1425 kn = SLIST_NEXT(kn, kn_link); 1429 kn = SLIST_NEXT(kn, kn_link);
1426 continue; 1430 continue;
1427 } 1431 }
1428 knote_detach(kn, fdp, true); 1432 knote_detach(kn, fdp, true);
1429 mutex_enter(&fdp->fd_lock); 1433 mutex_enter(&fdp->fd_lock);
1430 kn = SLIST_FIRST(list); 1434 kn = SLIST_FIRST(list);
1431 } 1435 }
1432} 1436}
1433 1437
1434 1438
1435/* 1439/*
1436 * fileops close method for a kqueue descriptor. 1440 * fileops close method for a kqueue descriptor.
1437 */ 1441 */
1438static int 1442static int
1439kqueue_close(file_t *fp) 1443kqueue_close(file_t *fp)
1440{ 1444{
1441 struct kqueue *kq; 1445 struct kqueue *kq;
1442 filedesc_t *fdp; 1446 filedesc_t *fdp;
1443 fdfile_t *ff; 1447 fdfile_t *ff;
1444 int i; 1448 int i;
1445 1449
1446 kq = fp->f_data; 1450 kq = fp->f_data;
1447 fp->f_data = NULL; 1451 fp->f_data = NULL;
1448 fp->f_type = 0; 1452 fp->f_type = 0;
1449 fdp = curlwp->l_fd; 1453 fdp = curlwp->l_fd;
1450 1454
1451 mutex_enter(&fdp->fd_lock); 1455 mutex_enter(&fdp->fd_lock);
1452 for (i = 0; i <= fdp->fd_lastkqfile; i++) { 1456 for (i = 0; i <= fdp->fd_lastkqfile; i++) {
1453 if ((ff = fdp->fd_dt->dt_ff[i]) == NULL) 1457 if ((ff = fdp->fd_dt->dt_ff[i]) == NULL)
1454 continue; 1458 continue;
1455 kqueue_doclose(kq, (struct klist *)&ff->ff_knlist, i); 1459 kqueue_doclose(kq, (struct klist *)&ff->ff_knlist, i);
1456 } 1460 }
1457 if (fdp->fd_knhashmask != 0) { 1461 if (fdp->fd_knhashmask != 0) {
1458 for (i = 0; i < fdp->fd_knhashmask + 1; i++) { 1462 for (i = 0; i < fdp->fd_knhashmask + 1; i++) {
1459 kqueue_doclose(kq, &fdp->fd_knhash[i], -1); 1463 kqueue_doclose(kq, &fdp->fd_knhash[i], -1);
1460 } 1464 }
1461 } 1465 }
1462 mutex_exit(&fdp->fd_lock); 1466 mutex_exit(&fdp->fd_lock);
1463 1467
1464 KASSERT(kq->kq_count == 0); 1468 KASSERT(kq->kq_count == 0);
1465 mutex_destroy(&kq->kq_lock); 1469 mutex_destroy(&kq->kq_lock);
1466 cv_destroy(&kq->kq_cv); 1470 cv_destroy(&kq->kq_cv);
1467 seldestroy(&kq->kq_sel); 1471 seldestroy(&kq->kq_sel);
1468 kmem_free(kq, sizeof(*kq)); 1472 kmem_free(kq, sizeof(*kq));
1469 1473
1470 return (0); 1474 return (0);
1471} 1475}
1472 1476
1473/* 1477/*
1474 * struct fileops kqfilter method for a kqueue descriptor. 1478 * struct fileops kqfilter method for a kqueue descriptor.
1475 * Event triggered when monitored kqueue changes. 1479 * Event triggered when monitored kqueue changes.
1476 */ 1480 */
1477static int 1481static int
1478kqueue_kqfilter(file_t *fp, struct knote *kn) 1482kqueue_kqfilter(file_t *fp, struct knote *kn)
1479{ 1483{
1480 struct kqueue *kq; 1484 struct kqueue *kq;
1481 1485
1482 kq = ((file_t *)kn->kn_obj)->f_data; 1486 kq = ((file_t *)kn->kn_obj)->f_data;
1483 1487
1484 KASSERT(fp == kn->kn_obj); 1488 KASSERT(fp == kn->kn_obj);
1485 1489
1486 if (kn->kn_filter != EVFILT_READ) 1490 if (kn->kn_filter != EVFILT_READ)
1487 return 1; 1491 return 1;
1488 1492
1489 kn->kn_fop = &kqread_filtops; 1493 kn->kn_fop = &kqread_filtops;
1490 mutex_enter(&kq->kq_lock); 1494 mutex_enter(&kq->kq_lock);
1491 SLIST_INSERT_HEAD(&kq->kq_sel.sel_klist, kn, kn_selnext); 1495 SLIST_INSERT_HEAD(&kq->kq_sel.sel_klist, kn, kn_selnext);
1492 mutex_exit(&kq->kq_lock); 1496 mutex_exit(&kq->kq_lock);
1493 1497
1494 return 0; 1498 return 0;
1495} 1499}
1496 1500
1497 1501
1498/* 1502/*
1499 * Walk down a list of knotes, activating them if their event has 1503 * Walk down a list of knotes, activating them if their event has
1500 * triggered. The caller's object lock (e.g. device driver lock) 1504 * triggered. The caller's object lock (e.g. device driver lock)
1501 * must be held. 1505 * must be held.
1502 */ 1506 */
1503void 1507void
1504knote(struct klist *list, long hint) 1508knote(struct klist *list, long hint)
1505{ 1509{
1506 struct knote *kn, *tmpkn; 1510 struct knote *kn, *tmpkn;
1507 1511
1508 SLIST_FOREACH_SAFE(kn, list, kn_selnext, tmpkn) { 1512 SLIST_FOREACH_SAFE(kn, list, kn_selnext, tmpkn) {
1509 if ((*kn->kn_fop->f_event)(kn, hint)) 1513 if ((*kn->kn_fop->f_event)(kn, hint))
1510 knote_activate(kn); 1514 knote_activate(kn);
1511 } 1515 }
1512} 1516}
1513 1517
1514/* 1518/*
1515 * Remove all knotes referencing a specified fd 1519 * Remove all knotes referencing a specified fd
1516 */ 1520 */
1517void 1521void
1518knote_fdclose(int fd) 1522knote_fdclose(int fd)
1519{ 1523{
1520 struct klist *list; 1524 struct klist *list;
1521 struct knote *kn; 1525 struct knote *kn;
1522 filedesc_t *fdp; 1526 filedesc_t *fdp;
1523 1527
1524 fdp = curlwp->l_fd; 1528 fdp = curlwp->l_fd;
1525 list = (struct klist *)&fdp->fd_dt->dt_ff[fd]->ff_knlist; 1529 list = (struct klist *)&fdp->fd_dt->dt_ff[fd]->ff_knlist;
1526 mutex_enter(&fdp->fd_lock); 1530 mutex_enter(&fdp->fd_lock);
1527 while ((kn = SLIST_FIRST(list)) != NULL) { 1531 while ((kn = SLIST_FIRST(list)) != NULL) {
1528 knote_detach(kn, fdp, true); 1532 knote_detach(kn, fdp, true);
1529 mutex_enter(&fdp->fd_lock); 1533 mutex_enter(&fdp->fd_lock);
1530 } 1534 }
1531 mutex_exit(&fdp->fd_lock); 1535 mutex_exit(&fdp->fd_lock);
1532} 1536}
1533 1537
1534/* 1538/*
1535 * Drop knote. Called with fdp->fd_lock held, and will drop before 1539 * Drop knote. Called with fdp->fd_lock held, and will drop before
1536 * returning. 1540 * returning.
1537 */ 1541 */
1538static void 1542static void
1539knote_detach(struct knote *kn, filedesc_t *fdp, bool dofop) 1543knote_detach(struct knote *kn, filedesc_t *fdp, bool dofop)
1540{ 1544{
1541 struct klist *list; 1545 struct klist *list;
1542 struct kqueue *kq; 1546 struct kqueue *kq;
1543 1547
1544 kq = kn->kn_kq; 1548 kq = kn->kn_kq;
1545 1549
1546 KASSERT((kn->kn_status & KN_MARKER) == 0); 1550 KASSERT((kn->kn_status & KN_MARKER) == 0);
1547 KASSERT(mutex_owned(&fdp->fd_lock)); 1551 KASSERT(mutex_owned(&fdp->fd_lock));
1548 1552
1549 /* Remove from monitored object. */ 1553 /* Remove from monitored object. */