| @@ -1,14 +1,14 @@ | | | @@ -1,14 +1,14 @@ |
1 | /* $NetBSD: if_tap.c,v 1.101 2017/10/30 16:01:19 ozaki-r Exp $ */ | | 1 | /* $NetBSD: if_tap.c,v 1.102 2017/11/29 19:21:44 jmcneill Exp $ */ |
2 | | | 2 | |
3 | /* | | 3 | /* |
4 | * Copyright (c) 2003, 2004, 2008, 2009 The NetBSD Foundation. | | 4 | * Copyright (c) 2003, 2004, 2008, 2009 The NetBSD Foundation. |
5 | * All rights reserved. | | 5 | * All rights reserved. |
6 | * | | 6 | * |
7 | * Redistribution and use in source and binary forms, with or without | | 7 | * Redistribution and use in source and binary forms, with or without |
8 | * modification, are permitted provided that the following conditions | | 8 | * modification, are permitted provided that the following conditions |
9 | * are met: | | 9 | * are met: |
10 | * 1. Redistributions of source code must retain the above copyright | | 10 | * 1. Redistributions of source code must retain the above copyright |
11 | * notice, this list of conditions and the following disclaimer. | | 11 | * notice, this list of conditions and the following disclaimer. |
12 | * 2. Redistributions in binary form must reproduce the above copyright | | 12 | * 2. Redistributions in binary form must reproduce the above copyright |
13 | * notice, this list of conditions and the following disclaimer in the | | 13 | * notice, this list of conditions and the following disclaimer in the |
14 | * documentation and/or other materials provided with the distribution. | | 14 | * documentation and/or other materials provided with the distribution. |
| @@ -23,47 +23,48 @@ | | | @@ -23,47 +23,48 @@ |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. | | 26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ | | 27 | */ |
28 | | | 28 | |
29 | /* | | 29 | /* |
30 | * tap(4) is a virtual Ethernet interface. It appears as a real Ethernet | | 30 | * tap(4) is a virtual Ethernet interface. It appears as a real Ethernet |
31 | * device to the system, but can also be accessed by userland through a | | 31 | * device to the system, but can also be accessed by userland through a |
32 | * character device interface, which allows reading and injecting frames. | | 32 | * character device interface, which allows reading and injecting frames. |
33 | */ | | 33 | */ |
34 | | | 34 | |
35 | #include <sys/cdefs.h> | | 35 | #include <sys/cdefs.h> |
36 | __KERNEL_RCSID(0, "$NetBSD: if_tap.c,v 1.101 2017/10/30 16:01:19 ozaki-r Exp $"); | | 36 | __KERNEL_RCSID(0, "$NetBSD: if_tap.c,v 1.102 2017/11/29 19:21:44 jmcneill Exp $"); |
37 | | | 37 | |
38 | #if defined(_KERNEL_OPT) | | 38 | #if defined(_KERNEL_OPT) |
39 | | | 39 | |
40 | #include "opt_modular.h" | | 40 | #include "opt_modular.h" |
41 | #include "opt_compat_netbsd.h" | | 41 | #include "opt_compat_netbsd.h" |
42 | #endif | | 42 | #endif |
43 | | | 43 | |
44 | #include <sys/param.h> | | 44 | #include <sys/param.h> |
45 | #include <sys/atomic.h> | | 45 | #include <sys/atomic.h> |
46 | #include <sys/conf.h> | | 46 | #include <sys/conf.h> |
47 | #include <sys/cprng.h> | | 47 | #include <sys/cprng.h> |
48 | #include <sys/device.h> | | 48 | #include <sys/device.h> |
49 | #include <sys/file.h> | | 49 | #include <sys/file.h> |
50 | #include <sys/filedesc.h> | | 50 | #include <sys/filedesc.h> |
51 | #include <sys/intr.h> | | 51 | #include <sys/intr.h> |
52 | #include <sys/kauth.h> | | 52 | #include <sys/kauth.h> |
53 | #include <sys/kernel.h> | | 53 | #include <sys/kernel.h> |
54 | #include <sys/kmem.h> | | 54 | #include <sys/kmem.h> |
55 | #include <sys/module.h> | | 55 | #include <sys/module.h> |
56 | #include <sys/mutex.h> | | 56 | #include <sys/mutex.h> |
| | | 57 | #include <sys/condvar.h> |
57 | #include <sys/poll.h> | | 58 | #include <sys/poll.h> |
58 | #include <sys/proc.h> | | 59 | #include <sys/proc.h> |
59 | #include <sys/select.h> | | 60 | #include <sys/select.h> |
60 | #include <sys/sockio.h> | | 61 | #include <sys/sockio.h> |
61 | #include <sys/stat.h> | | 62 | #include <sys/stat.h> |
62 | #include <sys/sysctl.h> | | 63 | #include <sys/sysctl.h> |
63 | #include <sys/systm.h> | | 64 | #include <sys/systm.h> |
64 | | | 65 | |
65 | #include <net/if.h> | | 66 | #include <net/if.h> |
66 | #include <net/if_dl.h> | | 67 | #include <net/if_dl.h> |
67 | #include <net/if_ether.h> | | 68 | #include <net/if_ether.h> |
68 | #include <net/if_media.h> | | 69 | #include <net/if_media.h> |
69 | #include <net/if_tap.h> | | 70 | #include <net/if_tap.h> |
| @@ -99,28 +100,28 @@ static void sysctl_tap_setup(struct sysc | | | @@ -99,28 +100,28 @@ static void sysctl_tap_setup(struct sysc |
99 | */ | | 100 | */ |
100 | | | 101 | |
101 | struct tap_softc { | | 102 | struct tap_softc { |
102 | device_t sc_dev; | | 103 | device_t sc_dev; |
103 | struct ifmedia sc_im; | | 104 | struct ifmedia sc_im; |
104 | struct ethercom sc_ec; | | 105 | struct ethercom sc_ec; |
105 | int sc_flags; | | 106 | int sc_flags; |
106 | #define TAP_INUSE 0x00000001 /* tap device can only be opened once */ | | 107 | #define TAP_INUSE 0x00000001 /* tap device can only be opened once */ |
107 | #define TAP_ASYNCIO 0x00000002 /* user is using async I/O (SIGIO) on the device */ | | 108 | #define TAP_ASYNCIO 0x00000002 /* user is using async I/O (SIGIO) on the device */ |
108 | #define TAP_NBIO 0x00000004 /* user wants calls to avoid blocking */ | | 109 | #define TAP_NBIO 0x00000004 /* user wants calls to avoid blocking */ |
109 | #define TAP_GOING 0x00000008 /* interface is being destroyed */ | | 110 | #define TAP_GOING 0x00000008 /* interface is being destroyed */ |
110 | struct selinfo sc_rsel; | | 111 | struct selinfo sc_rsel; |
111 | pid_t sc_pgid; /* For async. IO */ | | 112 | pid_t sc_pgid; /* For async. IO */ |
112 | kmutex_t sc_rdlock; | | 113 | kmutex_t sc_lock; |
113 | kmutex_t sc_kqlock; | | 114 | kcondvar_t sc_cv; |
114 | void *sc_sih; | | 115 | void *sc_sih; |
115 | struct timespec sc_atime; | | 116 | struct timespec sc_atime; |
116 | struct timespec sc_mtime; | | 117 | struct timespec sc_mtime; |
117 | struct timespec sc_btime; | | 118 | struct timespec sc_btime; |
118 | }; | | 119 | }; |
119 | | | 120 | |
120 | /* autoconf(9) glue */ | | 121 | /* autoconf(9) glue */ |
121 | | | 122 | |
122 | static int tap_match(device_t, cfdata_t, void *); | | 123 | static int tap_match(device_t, cfdata_t, void *); |
123 | static void tap_attach(device_t, device_t, void *); | | 124 | static void tap_attach(device_t, device_t, void *); |
124 | static int tap_detach(device_t, int); | | 125 | static int tap_detach(device_t, int); |
125 | | | 126 | |
126 | CFATTACH_DECL_NEW(tap, sizeof(struct tap_softc), | | 127 | CFATTACH_DECL_NEW(tap, sizeof(struct tap_softc), |
| @@ -172,27 +173,27 @@ static int tap_cdev_kqfilter(dev_t, stru | | | @@ -172,27 +173,27 @@ static int tap_cdev_kqfilter(dev_t, stru |
172 | | | 173 | |
173 | const struct cdevsw tap_cdevsw = { | | 174 | const struct cdevsw tap_cdevsw = { |
174 | .d_open = tap_cdev_open, | | 175 | .d_open = tap_cdev_open, |
175 | .d_close = tap_cdev_close, | | 176 | .d_close = tap_cdev_close, |
176 | .d_read = tap_cdev_read, | | 177 | .d_read = tap_cdev_read, |
177 | .d_write = tap_cdev_write, | | 178 | .d_write = tap_cdev_write, |
178 | .d_ioctl = tap_cdev_ioctl, | | 179 | .d_ioctl = tap_cdev_ioctl, |
179 | .d_stop = nostop, | | 180 | .d_stop = nostop, |
180 | .d_tty = notty, | | 181 | .d_tty = notty, |
181 | .d_poll = tap_cdev_poll, | | 182 | .d_poll = tap_cdev_poll, |
182 | .d_mmap = nommap, | | 183 | .d_mmap = nommap, |
183 | .d_kqfilter = tap_cdev_kqfilter, | | 184 | .d_kqfilter = tap_cdev_kqfilter, |
184 | .d_discard = nodiscard, | | 185 | .d_discard = nodiscard, |
185 | .d_flag = D_OTHER | | 186 | .d_flag = D_OTHER | D_MPSAFE |
186 | }; | | 187 | }; |
187 | | | 188 | |
188 | #define TAP_CLONER 0xfffff /* Maximal minor value */ | | 189 | #define TAP_CLONER 0xfffff /* Maximal minor value */ |
189 | | | 190 | |
190 | /* kqueue-related routines */ | | 191 | /* kqueue-related routines */ |
191 | static void tap_kqdetach(struct knote *); | | 192 | static void tap_kqdetach(struct knote *); |
192 | static int tap_kqread(struct knote *, long); | | 193 | static int tap_kqread(struct knote *, long); |
193 | | | 194 | |
194 | /* | | 195 | /* |
195 | * Those are needed by the if_media interface. | | 196 | * Those are needed by the if_media interface. |
196 | */ | | 197 | */ |
197 | | | 198 | |
198 | static int tap_mediachange(struct ifnet *); | | 199 | static int tap_mediachange(struct ifnet *); |
| @@ -305,42 +306,28 @@ tap_attach(device_t parent, device_t sel | | | @@ -305,42 +306,28 @@ tap_attach(device_t parent, device_t sel |
305 | const struct sysctlnode *node; | | 306 | const struct sysctlnode *node; |
306 | int error; | | 307 | int error; |
307 | uint8_t enaddr[ETHER_ADDR_LEN] = | | 308 | uint8_t enaddr[ETHER_ADDR_LEN] = |
308 | { 0xf2, 0x0b, 0xa4, 0xff, 0xff, 0xff }; | | 309 | { 0xf2, 0x0b, 0xa4, 0xff, 0xff, 0xff }; |
309 | char enaddrstr[3 * ETHER_ADDR_LEN]; | | 310 | char enaddrstr[3 * ETHER_ADDR_LEN]; |
310 | | | 311 | |
311 | sc->sc_dev = self; | | 312 | sc->sc_dev = self; |
312 | sc->sc_sih = NULL; | | 313 | sc->sc_sih = NULL; |
313 | getnanotime(&sc->sc_btime); | | 314 | getnanotime(&sc->sc_btime); |
314 | sc->sc_atime = sc->sc_mtime = sc->sc_btime; | | 315 | sc->sc_atime = sc->sc_mtime = sc->sc_btime; |
315 | sc->sc_flags = 0; | | 316 | sc->sc_flags = 0; |
316 | selinit(&sc->sc_rsel); | | 317 | selinit(&sc->sc_rsel); |
317 | | | 318 | |
318 | /* | | 319 | cv_init(&sc->sc_cv, "tapread"); |
319 | * Initialize the two locks for the device. | | 320 | mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NET); |
320 | * | | | |
321 | * We need a lock here because even though the tap device can be | | | |
322 | * opened only once, the file descriptor might be passed to another | | | |
323 | * process, say a fork(2)ed child. | | | |
324 | * | | | |
325 | * The Giant saves us from most of the hassle, but since the read | | | |
326 | * operation can sleep, we don't want two processes to wake up at | | | |
327 | * the same moment and both try and dequeue a single packet. | | | |
328 | * | | | |
329 | * The queue for event listeners (used by kqueue(9), see below) has | | | |
330 | * to be protected too, so use a spin lock. | | | |
331 | */ | | | |
332 | mutex_init(&sc->sc_rdlock, MUTEX_DEFAULT, IPL_NONE); | | | |
333 | mutex_init(&sc->sc_kqlock, MUTEX_DEFAULT, IPL_VM); | | | |
334 | | | 321 | |
335 | if (!pmf_device_register(self, NULL, NULL)) | | 322 | if (!pmf_device_register(self, NULL, NULL)) |
336 | aprint_error_dev(self, "couldn't establish power handler\n"); | | 323 | aprint_error_dev(self, "couldn't establish power handler\n"); |
337 | | | 324 | |
338 | /* | | 325 | /* |
339 | * In order to obtain unique initial Ethernet address on a host, | | 326 | * In order to obtain unique initial Ethernet address on a host, |
340 | * do some randomisation. It's not meant for anything but avoiding | | 327 | * do some randomisation. It's not meant for anything but avoiding |
341 | * hard-coding an address. | | 328 | * hard-coding an address. |
342 | */ | | 329 | */ |
343 | cprng_fast(&enaddr[3], 3); | | 330 | cprng_fast(&enaddr[3], 3); |
344 | | | 331 | |
345 | aprint_verbose_dev(self, "Ethernet address %s\n", | | 332 | aprint_verbose_dev(self, "Ethernet address %s\n", |
346 | ether_snprintf(enaddrstr, sizeof(enaddrstr), enaddr)); | | 333 | ether_snprintf(enaddrstr, sizeof(enaddrstr), enaddr)); |
| @@ -375,32 +362,32 @@ tap_attach(device_t parent, device_t sel | | | @@ -375,32 +362,32 @@ tap_attach(device_t parent, device_t sel |
375 | ifp->if_start = tap_start; | | 362 | ifp->if_start = tap_start; |
376 | ifp->if_stop = tap_stop; | | 363 | ifp->if_stop = tap_stop; |
377 | ifp->if_init = tap_init; | | 364 | ifp->if_init = tap_init; |
378 | IFQ_SET_READY(&ifp->if_snd); | | 365 | IFQ_SET_READY(&ifp->if_snd); |
379 | | | 366 | |
380 | sc->sc_ec.ec_capabilities = ETHERCAP_VLAN_MTU | ETHERCAP_JUMBO_MTU; | | 367 | sc->sc_ec.ec_capabilities = ETHERCAP_VLAN_MTU | ETHERCAP_JUMBO_MTU; |
381 | | | 368 | |
382 | /* Those steps are mandatory for an Ethernet driver. */ | | 369 | /* Those steps are mandatory for an Ethernet driver. */ |
383 | error = if_initialize(ifp); | | 370 | error = if_initialize(ifp); |
384 | if (error != 0) { | | 371 | if (error != 0) { |
385 | aprint_error_dev(self, "if_initialize failed(%d)\n", error); | | 372 | aprint_error_dev(self, "if_initialize failed(%d)\n", error); |
386 | ifmedia_removeall(&sc->sc_im); | | 373 | ifmedia_removeall(&sc->sc_im); |
387 | pmf_device_deregister(self); | | 374 | pmf_device_deregister(self); |
388 | mutex_destroy(&sc->sc_rdlock); | | 375 | mutex_destroy(&sc->sc_lock); |
389 | mutex_destroy(&sc->sc_kqlock); | | | |
390 | seldestroy(&sc->sc_rsel); | | 376 | seldestroy(&sc->sc_rsel); |
391 | | | 377 | |
392 | return; /* Error */ | | 378 | return; /* Error */ |
393 | } | | 379 | } |
| | | 380 | ifp->if_percpuq = if_percpuq_create(ifp); |
394 | ether_ifattach(ifp, enaddr); | | 381 | ether_ifattach(ifp, enaddr); |
395 | if_register(ifp); | | 382 | if_register(ifp); |
396 | | | 383 | |
397 | /* | | 384 | /* |
398 | * Add a sysctl node for that interface. | | 385 | * Add a sysctl node for that interface. |
399 | * | | 386 | * |
400 | * The pointer transmitted is not a string, but instead a pointer to | | 387 | * The pointer transmitted is not a string, but instead a pointer to |
401 | * the softc structure, which we can use to build the string value on | | 388 | * the softc structure, which we can use to build the string value on |
402 | * the fly in the helper function of the node. See the comments for | | 389 | * the fly in the helper function of the node. See the comments for |
403 | * tap_sysctl_handler for details. | | 390 | * tap_sysctl_handler for details. |
404 | * | | 391 | * |
405 | * Usually sysctl_createv is called with CTL_CREATE as the before-last | | 392 | * Usually sysctl_createv is called with CTL_CREATE as the before-last |
406 | * component. However, we can allocate a number ourselves, as we are | | 393 | * component. However, we can allocate a number ourselves, as we are |
| @@ -418,54 +405,51 @@ tap_attach(device_t parent, device_t sel | | | @@ -418,54 +405,51 @@ tap_attach(device_t parent, device_t sel |
418 | "sysctl_createv returned %d, ignoring\n", error); | | 405 | "sysctl_createv returned %d, ignoring\n", error); |
419 | } | | 406 | } |
420 | | | 407 | |
421 | /* | | 408 | /* |
422 | * When detaching, we do the inverse of what is done in the attach | | 409 | * When detaching, we do the inverse of what is done in the attach |
423 | * routine, in reversed order. | | 410 | * routine, in reversed order. |
424 | */ | | 411 | */ |
425 | static int | | 412 | static int |
426 | tap_detach(device_t self, int flags) | | 413 | tap_detach(device_t self, int flags) |
427 | { | | 414 | { |
428 | struct tap_softc *sc = device_private(self); | | 415 | struct tap_softc *sc = device_private(self); |
429 | struct ifnet *ifp = &sc->sc_ec.ec_if; | | 416 | struct ifnet *ifp = &sc->sc_ec.ec_if; |
430 | int error; | | 417 | int error; |
431 | int s; | | | |
432 | | | 418 | |
433 | sc->sc_flags |= TAP_GOING; | | 419 | sc->sc_flags |= TAP_GOING; |
434 | s = splnet(); | | | |
435 | tap_stop(ifp, 1); | | 420 | tap_stop(ifp, 1); |
436 | if_down(ifp); | | 421 | if_down(ifp); |
437 | splx(s); | | | |
438 | | | 422 | |
439 | if (sc->sc_sih != NULL) { | | 423 | if (sc->sc_sih != NULL) { |
440 | softint_disestablish(sc->sc_sih); | | 424 | softint_disestablish(sc->sc_sih); |
441 | sc->sc_sih = NULL; | | 425 | sc->sc_sih = NULL; |
442 | } | | 426 | } |
443 | | | 427 | |
444 | /* | | 428 | /* |
445 | * Destroying a single leaf is a very straightforward operation using | | 429 | * Destroying a single leaf is a very straightforward operation using |
446 | * sysctl_destroyv. One should be sure to always end the path with | | 430 | * sysctl_destroyv. One should be sure to always end the path with |
447 | * CTL_EOL. | | 431 | * CTL_EOL. |
448 | */ | | 432 | */ |
449 | if ((error = sysctl_destroyv(NULL, CTL_NET, AF_LINK, tap_node, | | 433 | if ((error = sysctl_destroyv(NULL, CTL_NET, AF_LINK, tap_node, |
450 | device_unit(sc->sc_dev), CTL_EOL)) != 0) | | 434 | device_unit(sc->sc_dev), CTL_EOL)) != 0) |
451 | aprint_error_dev(self, | | 435 | aprint_error_dev(self, |
452 | "sysctl_destroyv returned %d, ignoring\n", error); | | 436 | "sysctl_destroyv returned %d, ignoring\n", error); |
453 | ether_ifdetach(ifp); | | 437 | ether_ifdetach(ifp); |
454 | if_detach(ifp); | | 438 | if_detach(ifp); |
455 | ifmedia_removeall(&sc->sc_im); | | 439 | ifmedia_removeall(&sc->sc_im); |
456 | seldestroy(&sc->sc_rsel); | | 440 | seldestroy(&sc->sc_rsel); |
457 | mutex_destroy(&sc->sc_rdlock); | | 441 | mutex_destroy(&sc->sc_lock); |
458 | mutex_destroy(&sc->sc_kqlock); | | 442 | cv_destroy(&sc->sc_cv); |
459 | | | 443 | |
460 | pmf_device_deregister(self); | | 444 | pmf_device_deregister(self); |
461 | | | 445 | |
462 | return 0; | | 446 | return 0; |
463 | } | | 447 | } |
464 | | | 448 | |
465 | /* | | 449 | /* |
466 | * This function is called by the ifmedia layer to notify the driver | | 450 | * This function is called by the ifmedia layer to notify the driver |
467 | * that the user requested a media change. A real driver would | | 451 | * that the user requested a media change. A real driver would |
468 | * reconfigure the hardware. | | 452 | * reconfigure the hardware. |
469 | */ | | 453 | */ |
470 | static int | | 454 | static int |
471 | tap_mediachange(struct ifnet *ifp) | | 455 | tap_mediachange(struct ifnet *ifp) |
| @@ -506,45 +490,48 @@ tap_mediastatus(struct ifnet *ifp, struc | | | @@ -506,45 +490,48 @@ tap_mediastatus(struct ifnet *ifp, struc |
506 | * userland. For that we stay in OACTIVE mode while the userland gets | | 490 | * userland. For that we stay in OACTIVE mode while the userland gets |
507 | * the packets, and we send a signal to the processes waiting to read. | | 491 | * the packets, and we send a signal to the processes waiting to read. |
508 | * | | 492 | * |
509 | * wakeup(sc) is the counterpart to the tsleep call in | | 493 | * wakeup(sc) is the counterpart to the tsleep call in |
510 | * tap_dev_read, while selnotify() is used for kevent(2) and | | 494 | * tap_dev_read, while selnotify() is used for kevent(2) and |
511 | * poll(2) (which includes select(2)) listeners. | | 495 | * poll(2) (which includes select(2)) listeners. |
512 | */ | | 496 | */ |
513 | static void | | 497 | static void |
514 | tap_start(struct ifnet *ifp) | | 498 | tap_start(struct ifnet *ifp) |
515 | { | | 499 | { |
516 | struct tap_softc *sc = (struct tap_softc *)ifp->if_softc; | | 500 | struct tap_softc *sc = (struct tap_softc *)ifp->if_softc; |
517 | struct mbuf *m0; | | 501 | struct mbuf *m0; |
518 | | | 502 | |
| | | 503 | mutex_enter(&sc->sc_lock); |
519 | if ((sc->sc_flags & TAP_INUSE) == 0) { | | 504 | if ((sc->sc_flags & TAP_INUSE) == 0) { |
520 | /* Simply drop packets */ | | 505 | /* Simply drop packets */ |
521 | for(;;) { | | 506 | for(;;) { |
522 | IFQ_DEQUEUE(&ifp->if_snd, m0); | | 507 | IFQ_DEQUEUE(&ifp->if_snd, m0); |
523 | if (m0 == NULL) | | 508 | if (m0 == NULL) |
524 | return; | | 509 | goto done; |
525 | | | 510 | |
526 | ifp->if_opackets++; | | 511 | ifp->if_opackets++; |
527 | bpf_mtap(ifp, m0); | | 512 | bpf_mtap(ifp, m0); |
528 | | | 513 | |
529 | m_freem(m0); | | 514 | m_freem(m0); |
530 | } | | 515 | } |
531 | } else if (!IFQ_IS_EMPTY(&ifp->if_snd)) { | | 516 | } else if (!IFQ_IS_EMPTY(&ifp->if_snd)) { |
532 | ifp->if_flags |= IFF_OACTIVE; | | 517 | ifp->if_flags |= IFF_OACTIVE; |
533 | wakeup(sc); | | 518 | cv_broadcast(&sc->sc_cv); |
534 | selnotify(&sc->sc_rsel, 0, 1); | | 519 | selnotify(&sc->sc_rsel, 0, 1); |
535 | if (sc->sc_flags & TAP_ASYNCIO) | | 520 | if (sc->sc_flags & TAP_ASYNCIO) |
536 | softint_schedule(sc->sc_sih); | | 521 | softint_schedule(sc->sc_sih); |
537 | } | | 522 | } |
| | | 523 | done: |
| | | 524 | mutex_exit(&sc->sc_lock); |
538 | } | | 525 | } |
539 | | | 526 | |
540 | static void | | 527 | static void |
541 | tap_softintr(void *cookie) | | 528 | tap_softintr(void *cookie) |
542 | { | | 529 | { |
543 | struct tap_softc *sc; | | 530 | struct tap_softc *sc; |
544 | struct ifnet *ifp; | | 531 | struct ifnet *ifp; |
545 | int a, b; | | 532 | int a, b; |
546 | | | 533 | |
547 | sc = cookie; | | 534 | sc = cookie; |
548 | | | 535 | |
549 | if (sc->sc_flags & TAP_ASYNCIO) { | | 536 | if (sc->sc_flags & TAP_ASYNCIO) { |
550 | ifp = &sc->sc_ec.ec_if; | | 537 | ifp = &sc->sc_ec.ec_if; |
| @@ -635,31 +622,33 @@ tap_init(struct ifnet *ifp) | | | @@ -635,31 +622,33 @@ tap_init(struct ifnet *ifp) |
635 | /* | | 622 | /* |
636 | * _stop() is called when an interface goes down. It is our | | 623 | * _stop() is called when an interface goes down. It is our |
637 | * responsability to validate that state by clearing the | | 624 | * responsability to validate that state by clearing the |
638 | * IFF_RUNNING flag. | | 625 | * IFF_RUNNING flag. |
639 | * | | 626 | * |
640 | * We have to wake up all the sleeping processes to have the pending | | 627 | * We have to wake up all the sleeping processes to have the pending |
641 | * read requests cancelled. | | 628 | * read requests cancelled. |
642 | */ | | 629 | */ |
643 | static void | | 630 | static void |
644 | tap_stop(struct ifnet *ifp, int disable) | | 631 | tap_stop(struct ifnet *ifp, int disable) |
645 | { | | 632 | { |
646 | struct tap_softc *sc = (struct tap_softc *)ifp->if_softc; | | 633 | struct tap_softc *sc = (struct tap_softc *)ifp->if_softc; |
647 | | | 634 | |
| | | 635 | mutex_enter(&sc->sc_lock); |
648 | ifp->if_flags &= ~IFF_RUNNING; | | 636 | ifp->if_flags &= ~IFF_RUNNING; |
649 | wakeup(sc); | | 637 | cv_broadcast(&sc->sc_cv); |
650 | selnotify(&sc->sc_rsel, 0, 1); | | 638 | selnotify(&sc->sc_rsel, 0, 1); |
651 | if (sc->sc_flags & TAP_ASYNCIO) | | 639 | if (sc->sc_flags & TAP_ASYNCIO) |
652 | softint_schedule(sc->sc_sih); | | 640 | softint_schedule(sc->sc_sih); |
| | | 641 | mutex_exit(&sc->sc_lock); |
653 | } | | 642 | } |
654 | | | 643 | |
655 | /* | | 644 | /* |
656 | * The 'create' command of ifconfig can be used to create | | 645 | * The 'create' command of ifconfig can be used to create |
657 | * any numbered instance of a given device. Thus we have to | | 646 | * any numbered instance of a given device. Thus we have to |
658 | * make sure we have enough room in cd_devs to create the | | 647 | * make sure we have enough room in cd_devs to create the |
659 | * user-specified instance. config_attach_pseudo will do this | | 648 | * user-specified instance. config_attach_pseudo will do this |
660 | * for us. | | 649 | * for us. |
661 | */ | | 650 | */ |
662 | static int | | 651 | static int |
663 | tap_clone_create(struct if_clone *ifc, int unit) | | 652 | tap_clone_create(struct if_clone *ifc, int unit) |
664 | { | | 653 | { |
665 | if (tap_clone_creator(unit) == NULL) { | | 654 | if (tap_clone_creator(unit) == NULL) { |
| @@ -921,100 +910,90 @@ tap_fops_read(file_t *fp, off_t *offp, s | | | @@ -921,100 +910,90 @@ tap_fops_read(file_t *fp, off_t *offp, s |
921 | | | 910 | |
922 | KERNEL_LOCK(1, NULL); | | 911 | KERNEL_LOCK(1, NULL); |
923 | error = tap_dev_read(fp->f_devunit, uio, flags); | | 912 | error = tap_dev_read(fp->f_devunit, uio, flags); |
924 | KERNEL_UNLOCK_ONE(NULL); | | 913 | KERNEL_UNLOCK_ONE(NULL); |
925 | return error; | | 914 | return error; |
926 | } | | 915 | } |
927 | | | 916 | |
928 | static int | | 917 | static int |
929 | tap_dev_read(int unit, struct uio *uio, int flags) | | 918 | tap_dev_read(int unit, struct uio *uio, int flags) |
930 | { | | 919 | { |
931 | struct tap_softc *sc = device_lookup_private(&tap_cd, unit); | | 920 | struct tap_softc *sc = device_lookup_private(&tap_cd, unit); |
932 | struct ifnet *ifp; | | 921 | struct ifnet *ifp; |
933 | struct mbuf *m, *n; | | 922 | struct mbuf *m, *n; |
934 | int error = 0, s; | | 923 | int error = 0; |
935 | | | 924 | |
936 | if (sc == NULL) | | 925 | if (sc == NULL) |
937 | return ENXIO; | | 926 | return ENXIO; |
938 | | | 927 | |
939 | getnanotime(&sc->sc_atime); | | 928 | getnanotime(&sc->sc_atime); |
940 | | | 929 | |
941 | ifp = &sc->sc_ec.ec_if; | | 930 | ifp = &sc->sc_ec.ec_if; |
942 | if ((ifp->if_flags & IFF_UP) == 0) | | 931 | if ((ifp->if_flags & IFF_UP) == 0) |
943 | return EHOSTDOWN; | | 932 | return EHOSTDOWN; |
944 | | | 933 | |
945 | /* | | 934 | /* |
946 | * In the TAP_NBIO case, we have to make sure we won't be sleeping | | 935 | * In the TAP_NBIO case, we have to make sure we won't be sleeping |
947 | */ | | 936 | */ |
948 | if ((sc->sc_flags & TAP_NBIO) != 0) { | | 937 | if ((sc->sc_flags & TAP_NBIO) != 0) { |
949 | if (!mutex_tryenter(&sc->sc_rdlock)) | | 938 | if (!mutex_tryenter(&sc->sc_lock)) |
950 | return EWOULDBLOCK; | | 939 | return EWOULDBLOCK; |
951 | } else { | | 940 | } else { |
952 | mutex_enter(&sc->sc_rdlock); | | 941 | mutex_enter(&sc->sc_lock); |
953 | } | | 942 | } |
954 | | | 943 | |
955 | s = splnet(); | | | |
956 | if (IFQ_IS_EMPTY(&ifp->if_snd)) { | | 944 | if (IFQ_IS_EMPTY(&ifp->if_snd)) { |
957 | ifp->if_flags &= ~IFF_OACTIVE; | | 945 | ifp->if_flags &= ~IFF_OACTIVE; |
958 | /* | | | |
959 | * We must release the lock before sleeping, and re-acquire it | | | |
960 | * after. | | | |
961 | */ | | | |
962 | mutex_exit(&sc->sc_rdlock); | | | |
963 | if (sc->sc_flags & TAP_NBIO) | | 946 | if (sc->sc_flags & TAP_NBIO) |
964 | error = EWOULDBLOCK; | | 947 | error = EWOULDBLOCK; |
965 | else | | 948 | else |
966 | error = tsleep(sc, PSOCK|PCATCH, "tap", 0); | | 949 | error = cv_wait_sig(&sc->sc_cv, &sc->sc_lock); |
967 | splx(s); | | | |
968 | | | 950 | |
969 | if (error != 0) | | 951 | if (error != 0) { |
| | | 952 | mutex_exit(&sc->sc_lock); |
970 | return error; | | 953 | return error; |
| | | 954 | } |
971 | /* The device might have been downed */ | | 955 | /* The device might have been downed */ |
972 | if ((ifp->if_flags & IFF_UP) == 0) | | 956 | if ((ifp->if_flags & IFF_UP) == 0) { |
| | | 957 | mutex_exit(&sc->sc_lock); |
973 | return EHOSTDOWN; | | 958 | return EHOSTDOWN; |
974 | if ((sc->sc_flags & TAP_NBIO)) { | | | |
975 | if (!mutex_tryenter(&sc->sc_rdlock)) | | | |
976 | return EWOULDBLOCK; | | | |
977 | } else { | | | |
978 | mutex_enter(&sc->sc_rdlock); | | | |
979 | } | | 959 | } |
980 | s = splnet(); | | | |
981 | } | | 960 | } |
982 | | | 961 | |
983 | IFQ_DEQUEUE(&ifp->if_snd, m); | | 962 | IFQ_DEQUEUE(&ifp->if_snd, m); |
| | | 963 | mutex_exit(&sc->sc_lock); |
| | | 964 | |
984 | ifp->if_flags &= ~IFF_OACTIVE; | | 965 | ifp->if_flags &= ~IFF_OACTIVE; |
985 | splx(s); | | | |
986 | if (m == NULL) { | | 966 | if (m == NULL) { |
987 | error = 0; | | 967 | error = 0; |
988 | goto out; | | 968 | goto out; |
989 | } | | 969 | } |
990 | | | 970 | |
991 | ifp->if_opackets++; | | 971 | ifp->if_opackets++; |
992 | bpf_mtap(ifp, m); | | 972 | bpf_mtap(ifp, m); |
993 | | | 973 | |
994 | /* | | 974 | /* |
995 | * One read is one packet. | | 975 | * One read is one packet. |
996 | */ | | 976 | */ |
997 | do { | | 977 | do { |
998 | error = uiomove(mtod(m, void *), | | 978 | error = uiomove(mtod(m, void *), |
999 | min(m->m_len, uio->uio_resid), uio); | | 979 | min(m->m_len, uio->uio_resid), uio); |
1000 | m = n = m_free(m); | | 980 | m = n = m_free(m); |
1001 | } while (m != NULL && uio->uio_resid > 0 && error == 0); | | 981 | } while (m != NULL && uio->uio_resid > 0 && error == 0); |
1002 | | | 982 | |
1003 | if (m != NULL) | | 983 | if (m != NULL) |
1004 | m_freem(m); | | 984 | m_freem(m); |
1005 | | | 985 | |
1006 | out: | | 986 | out: |
1007 | mutex_exit(&sc->sc_rdlock); | | | |
1008 | return error; | | 987 | return error; |
1009 | } | | 988 | } |
1010 | | | 989 | |
1011 | static int | | 990 | static int |
1012 | tap_fops_stat(file_t *fp, struct stat *st) | | 991 | tap_fops_stat(file_t *fp, struct stat *st) |
1013 | { | | 992 | { |
1014 | int error = 0; | | 993 | int error = 0; |
1015 | struct tap_softc *sc; | | 994 | struct tap_softc *sc; |
1016 | int unit = fp->f_devunit; | | 995 | int unit = fp->f_devunit; |
1017 | | | 996 | |
1018 | (void)memset(st, 0, sizeof(*st)); | | 997 | (void)memset(st, 0, sizeof(*st)); |
1019 | | | 998 | |
1020 | KERNEL_LOCK(1, NULL); | | 999 | KERNEL_LOCK(1, NULL); |
| @@ -1051,27 +1030,26 @@ tap_fops_write(file_t *fp, off_t *offp, | | | @@ -1051,27 +1030,26 @@ tap_fops_write(file_t *fp, off_t *offp, |
1051 | error = tap_dev_write(fp->f_devunit, uio, flags); | | 1030 | error = tap_dev_write(fp->f_devunit, uio, flags); |
1052 | KERNEL_UNLOCK_ONE(NULL); | | 1031 | KERNEL_UNLOCK_ONE(NULL); |
1053 | return error; | | 1032 | return error; |
1054 | } | | 1033 | } |
1055 | | | 1034 | |
1056 | static int | | 1035 | static int |
1057 | tap_dev_write(int unit, struct uio *uio, int flags) | | 1036 | tap_dev_write(int unit, struct uio *uio, int flags) |
1058 | { | | 1037 | { |
1059 | struct tap_softc *sc = | | 1038 | struct tap_softc *sc = |
1060 | device_lookup_private(&tap_cd, unit); | | 1039 | device_lookup_private(&tap_cd, unit); |
1061 | struct ifnet *ifp; | | 1040 | struct ifnet *ifp; |
1062 | struct mbuf *m, **mp; | | 1041 | struct mbuf *m, **mp; |
1063 | int error = 0; | | 1042 | int error = 0; |
1064 | int s; | | | |
1065 | | | 1043 | |
1066 | if (sc == NULL) | | 1044 | if (sc == NULL) |
1067 | return ENXIO; | | 1045 | return ENXIO; |
1068 | | | 1046 | |
1069 | getnanotime(&sc->sc_mtime); | | 1047 | getnanotime(&sc->sc_mtime); |
1070 | ifp = &sc->sc_ec.ec_if; | | 1048 | ifp = &sc->sc_ec.ec_if; |
1071 | | | 1049 | |
1072 | /* One write, one packet, that's the rule */ | | 1050 | /* One write, one packet, that's the rule */ |
1073 | MGETHDR(m, M_DONTWAIT, MT_DATA); | | 1051 | MGETHDR(m, M_DONTWAIT, MT_DATA); |
1074 | if (m == NULL) { | | 1052 | if (m == NULL) { |
1075 | ifp->if_ierrors++; | | 1053 | ifp->if_ierrors++; |
1076 | return ENOBUFS; | | 1054 | return ENOBUFS; |
1077 | } | | 1055 | } |
| @@ -1088,29 +1066,27 @@ tap_dev_write(int unit, struct uio *uio, | | | @@ -1088,29 +1066,27 @@ tap_dev_write(int unit, struct uio *uio, |
1088 | } | | 1066 | } |
1089 | (*mp)->m_len = min(MHLEN, uio->uio_resid); | | 1067 | (*mp)->m_len = min(MHLEN, uio->uio_resid); |
1090 | error = uiomove(mtod(*mp, void *), (*mp)->m_len, uio); | | 1068 | error = uiomove(mtod(*mp, void *), (*mp)->m_len, uio); |
1091 | mp = &(*mp)->m_next; | | 1069 | mp = &(*mp)->m_next; |
1092 | } | | 1070 | } |
1093 | if (error) { | | 1071 | if (error) { |
1094 | ifp->if_ierrors++; | | 1072 | ifp->if_ierrors++; |
1095 | m_freem(m); | | 1073 | m_freem(m); |
1096 | return error; | | 1074 | return error; |
1097 | } | | 1075 | } |
1098 | | | 1076 | |
1099 | m_set_rcvif(m, ifp); | | 1077 | m_set_rcvif(m, ifp); |
1100 | | | 1078 | |
1101 | s = splnet(); | | 1079 | if_percpuq_enqueue(ifp->if_percpuq, m); |
1102 | if_input(ifp, m); | | | |
1103 | splx(s); | | | |
1104 | | | 1080 | |
1105 | return 0; | | 1081 | return 0; |
1106 | } | | 1082 | } |
1107 | | | 1083 | |
1108 | static int | | 1084 | static int |
1109 | tap_cdev_ioctl(dev_t dev, u_long cmd, void *data, int flags, | | 1085 | tap_cdev_ioctl(dev_t dev, u_long cmd, void *data, int flags, |
1110 | struct lwp *l) | | 1086 | struct lwp *l) |
1111 | { | | 1087 | { |
1112 | return tap_dev_ioctl(minor(dev), cmd, data, l); | | 1088 | return tap_dev_ioctl(minor(dev), cmd, data, l); |
1113 | } | | 1089 | } |
1114 | | | 1090 | |
1115 | static int | | 1091 | static int |
1116 | tap_fops_ioctl(file_t *fp, u_long cmd, void *data) | | 1092 | tap_fops_ioctl(file_t *fp, u_long cmd, void *data) |
| @@ -1211,29 +1187,29 @@ tap_dev_poll(int unit, int events, struc | | | @@ -1211,29 +1187,29 @@ tap_dev_poll(int unit, int events, struc |
1211 | return POLLERR; | | 1187 | return POLLERR; |
1212 | | | 1188 | |
1213 | if (events & (POLLIN|POLLRDNORM)) { | | 1189 | if (events & (POLLIN|POLLRDNORM)) { |
1214 | struct ifnet *ifp = &sc->sc_ec.ec_if; | | 1190 | struct ifnet *ifp = &sc->sc_ec.ec_if; |
1215 | struct mbuf *m; | | 1191 | struct mbuf *m; |
1216 | int s; | | 1192 | int s; |
1217 | | | 1193 | |
1218 | s = splnet(); | | 1194 | s = splnet(); |
1219 | IFQ_POLL(&ifp->if_snd, m); | | 1195 | IFQ_POLL(&ifp->if_snd, m); |
1220 | | | 1196 | |
1221 | if (m != NULL) | | 1197 | if (m != NULL) |
1222 | revents |= events & (POLLIN|POLLRDNORM); | | 1198 | revents |= events & (POLLIN|POLLRDNORM); |
1223 | else { | | 1199 | else { |
1224 | mutex_spin_enter(&sc->sc_kqlock); | | 1200 | mutex_spin_enter(&sc->sc_lock); |
1225 | selrecord(l, &sc->sc_rsel); | | 1201 | selrecord(l, &sc->sc_rsel); |
1226 | mutex_spin_exit(&sc->sc_kqlock); | | 1202 | mutex_spin_exit(&sc->sc_lock); |
1227 | } | | 1203 | } |
1228 | splx(s); | | 1204 | splx(s); |
1229 | } | | 1205 | } |
1230 | revents |= events & (POLLOUT|POLLWRNORM); | | 1206 | revents |= events & (POLLOUT|POLLWRNORM); |
1231 | | | 1207 | |
1232 | return revents; | | 1208 | return revents; |
1233 | } | | 1209 | } |
1234 | | | 1210 | |
1235 | static struct filterops tap_read_filterops = { 1, NULL, tap_kqdetach, | | 1211 | static struct filterops tap_read_filterops = { 1, NULL, tap_kqdetach, |
1236 | tap_kqread }; | | 1212 | tap_kqread }; |
1237 | static struct filterops tap_seltrue_filterops = { 1, NULL, tap_kqdetach, | | 1213 | static struct filterops tap_seltrue_filterops = { 1, NULL, tap_kqdetach, |
1238 | filt_seltrue }; | | 1214 | filt_seltrue }; |
1239 | | | 1215 | |
| @@ -1262,42 +1238,42 @@ tap_dev_kqfilter(int unit, struct knote | | | @@ -1262,42 +1238,42 @@ tap_dev_kqfilter(int unit, struct knote |
1262 | switch(kn->kn_filter) { | | 1238 | switch(kn->kn_filter) { |
1263 | case EVFILT_READ: | | 1239 | case EVFILT_READ: |
1264 | kn->kn_fop = &tap_read_filterops; | | 1240 | kn->kn_fop = &tap_read_filterops; |
1265 | break; | | 1241 | break; |
1266 | case EVFILT_WRITE: | | 1242 | case EVFILT_WRITE: |
1267 | kn->kn_fop = &tap_seltrue_filterops; | | 1243 | kn->kn_fop = &tap_seltrue_filterops; |
1268 | break; | | 1244 | break; |
1269 | default: | | 1245 | default: |
1270 | KERNEL_UNLOCK_ONE(NULL); | | 1246 | KERNEL_UNLOCK_ONE(NULL); |
1271 | return EINVAL; | | 1247 | return EINVAL; |
1272 | } | | 1248 | } |
1273 | | | 1249 | |
1274 | kn->kn_hook = sc; | | 1250 | kn->kn_hook = sc; |
1275 | mutex_spin_enter(&sc->sc_kqlock); | | 1251 | mutex_spin_enter(&sc->sc_lock); |
1276 | SLIST_INSERT_HEAD(&sc->sc_rsel.sel_klist, kn, kn_selnext); | | 1252 | SLIST_INSERT_HEAD(&sc->sc_rsel.sel_klist, kn, kn_selnext); |
1277 | mutex_spin_exit(&sc->sc_kqlock); | | 1253 | mutex_spin_exit(&sc->sc_lock); |
1278 | KERNEL_UNLOCK_ONE(NULL); | | 1254 | KERNEL_UNLOCK_ONE(NULL); |
1279 | return 0; | | 1255 | return 0; |
1280 | } | | 1256 | } |
1281 | | | 1257 | |
1282 | static void | | 1258 | static void |
1283 | tap_kqdetach(struct knote *kn) | | 1259 | tap_kqdetach(struct knote *kn) |
1284 | { | | 1260 | { |
1285 | struct tap_softc *sc = (struct tap_softc *)kn->kn_hook; | | 1261 | struct tap_softc *sc = (struct tap_softc *)kn->kn_hook; |
1286 | | | 1262 | |
1287 | KERNEL_LOCK(1, NULL); | | 1263 | KERNEL_LOCK(1, NULL); |
1288 | mutex_spin_enter(&sc->sc_kqlock); | | 1264 | mutex_spin_enter(&sc->sc_lock); |
1289 | SLIST_REMOVE(&sc->sc_rsel.sel_klist, kn, knote, kn_selnext); | | 1265 | SLIST_REMOVE(&sc->sc_rsel.sel_klist, kn, knote, kn_selnext); |
1290 | mutex_spin_exit(&sc->sc_kqlock); | | 1266 | mutex_spin_exit(&sc->sc_lock); |
1291 | KERNEL_UNLOCK_ONE(NULL); | | 1267 | KERNEL_UNLOCK_ONE(NULL); |
1292 | } | | 1268 | } |
1293 | | | 1269 | |
1294 | static int | | 1270 | static int |
1295 | tap_kqread(struct knote *kn, long hint) | | 1271 | tap_kqread(struct knote *kn, long hint) |
1296 | { | | 1272 | { |
1297 | struct tap_softc *sc = (struct tap_softc *)kn->kn_hook; | | 1273 | struct tap_softc *sc = (struct tap_softc *)kn->kn_hook; |
1298 | struct ifnet *ifp = &sc->sc_ec.ec_if; | | 1274 | struct ifnet *ifp = &sc->sc_ec.ec_if; |
1299 | struct mbuf *m; | | 1275 | struct mbuf *m; |
1300 | int s, rv; | | 1276 | int s, rv; |
1301 | | | 1277 | |
1302 | KERNEL_LOCK(1, NULL); | | 1278 | KERNEL_LOCK(1, NULL); |
1303 | s = splnet(); | | 1279 | s = splnet(); |