| @@ -1,14 +1,14 @@ | | | @@ -1,14 +1,14 @@ |
1 | /* $NetBSD: kern_descrip.c,v 1.251 2021/06/29 22:40:53 dholland Exp $ */ | | 1 | /* $NetBSD: kern_descrip.c,v 1.251.10.1 2023/07/30 12:09:51 martin Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. | | 4 | * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. |
5 | * All rights reserved. | | 5 | * All rights reserved. |
6 | * | | 6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation | | 7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Andrew Doran. | | 8 | * by Andrew Doran. |
9 | * | | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
12 | * are met: | | 12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
| @@ -60,27 +60,27 @@ | | | @@ -60,27 +60,27 @@ |
60 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 60 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
61 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 61 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
62 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 62 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
63 | * SUCH DAMAGE. | | 63 | * SUCH DAMAGE. |
64 | * | | 64 | * |
65 | * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95 | | 65 | * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95 |
66 | */ | | 66 | */ |
67 | | | 67 | |
68 | /* | | 68 | /* |
69 | * File descriptor management. | | 69 | * File descriptor management. |
70 | */ | | 70 | */ |
71 | | | 71 | |
72 | #include <sys/cdefs.h> | | 72 | #include <sys/cdefs.h> |
73 | __KERNEL_RCSID(0, "$NetBSD: kern_descrip.c,v 1.251 2021/06/29 22:40:53 dholland Exp $"); | | 73 | __KERNEL_RCSID(0, "$NetBSD: kern_descrip.c,v 1.251.10.1 2023/07/30 12:09:51 martin Exp $"); |
74 | | | 74 | |
75 | #include <sys/param.h> | | 75 | #include <sys/param.h> |
76 | #include <sys/systm.h> | | 76 | #include <sys/systm.h> |
77 | #include <sys/filedesc.h> | | 77 | #include <sys/filedesc.h> |
78 | #include <sys/kernel.h> | | 78 | #include <sys/kernel.h> |
79 | #include <sys/proc.h> | | 79 | #include <sys/proc.h> |
80 | #include <sys/file.h> | | 80 | #include <sys/file.h> |
81 | #include <sys/socket.h> | | 81 | #include <sys/socket.h> |
82 | #include <sys/socketvar.h> | | 82 | #include <sys/socketvar.h> |
83 | #include <sys/stat.h> | | 83 | #include <sys/stat.h> |
84 | #include <sys/ioctl.h> | | 84 | #include <sys/ioctl.h> |
85 | #include <sys/fcntl.h> | | 85 | #include <sys/fcntl.h> |
86 | #include <sys/pool.h> | | 86 | #include <sys/pool.h> |
| @@ -382,30 +382,65 @@ fd_getfile(unsigned fd) | | | @@ -382,30 +382,65 @@ fd_getfile(unsigned fd) |
382 | /* Now get a reference to the descriptor. */ | | 382 | /* Now get a reference to the descriptor. */ |
383 | if (fdp->fd_refcnt == 1) { | | 383 | if (fdp->fd_refcnt == 1) { |
384 | /* | | 384 | /* |
385 | * Single threaded: don't need to worry about concurrent | | 385 | * Single threaded: don't need to worry about concurrent |
386 | * access (other than earlier calls to kqueue, which may | | 386 | * access (other than earlier calls to kqueue, which may |
387 | * hold a reference to the descriptor). | | 387 | * hold a reference to the descriptor). |
388 | */ | | 388 | */ |
389 | ff->ff_refcnt++; | | 389 | ff->ff_refcnt++; |
390 | } else { | | 390 | } else { |
391 | /* | | 391 | /* |
392 | * Multi threaded: issue a memory barrier to ensure that we | | 392 | * Multi threaded: issue a memory barrier to ensure that we |
393 | * acquire the file pointer _after_ adding a reference. If | | 393 | * acquire the file pointer _after_ adding a reference. If |
394 | * no memory barrier, we could fetch a stale pointer. | | 394 | * no memory barrier, we could fetch a stale pointer. |
| | | 395 | * |
| | | 396 | * In particular, we must coordinate the following four |
| | | 397 | * memory operations: |
| | | 398 | * |
| | | 399 | * A. fd_close store ff->ff_file = NULL |
| | | 400 | * B. fd_close refcnt = atomic_dec_uint_nv(&ff->ff_refcnt) |
| | | 401 | * C. fd_getfile atomic_inc_uint(&ff->ff_refcnt) |
| | | 402 | * D. fd_getfile load fp = ff->ff_file |
| | | 403 | * |
| | | 404 | * If the order is D;A;B;C: |
| | | 405 | * |
| | | 406 | * 1. D: fp = ff->ff_file |
| | | 407 | * 2. A: ff->ff_file = NULL |
| | | 408 | * 3. B: refcnt = atomic_dec_uint_nv(&ff->ff_refcnt) |
| | | 409 | * 4. C: atomic_inc_uint(&ff->ff_refcnt) |
| | | 410 | * |
| | | 411 | * then fd_close determines that there are no more |
| | | 412 | * references and decides to free fp immediately, at |
| | | 413 | * the same that fd_getfile ends up with an fp that's |
| | | 414 | * about to be freed. *boom* |
| | | 415 | * |
| | | 416 | * By making B a release operation in fd_close, and by |
| | | 417 | * making C an acquire operation in fd_getfile, since |
| | | 418 | * they are atomic operations on the same object, which |
| | | 419 | * has a total modification order, we guarantee either: |
| | | 420 | * |
| | | 421 | * - B happens before C. Then since A is |
| | | 422 | * sequenced before B in fd_close, and C is |
| | | 423 | * sequenced before D in fd_getfile, we |
| | | 424 | * guarantee A happens before D, so fd_getfile |
| | | 425 | * reads a null fp and safely fails. |
| | | 426 | * |
| | | 427 | * - C happens before B. Then fd_getfile may read |
| | | 428 | * null or nonnull, but either way, fd_close |
| | | 429 | * will safely wait for references to drain. |
395 | */ | | 430 | */ |
396 | atomic_inc_uint(&ff->ff_refcnt); | | 431 | atomic_inc_uint(&ff->ff_refcnt); |
397 | #ifndef __HAVE_ATOMIC_AS_MEMBAR | | 432 | #ifndef __HAVE_ATOMIC_AS_MEMBAR |
398 | membar_enter(); | | 433 | membar_acquire(); |
399 | #endif | | 434 | #endif |
400 | } | | 435 | } |
401 | | | 436 | |
402 | /* | | 437 | /* |
403 | * If the file is not open or is being closed then put the | | 438 | * If the file is not open or is being closed then put the |
404 | * reference back. | | 439 | * reference back. |
405 | */ | | 440 | */ |
406 | fp = atomic_load_consume(&ff->ff_file); | | 441 | fp = atomic_load_consume(&ff->ff_file); |
407 | if (__predict_true(fp != NULL)) { | | 442 | if (__predict_true(fp != NULL)) { |
408 | return fp; | | 443 | return fp; |
409 | } | | 444 | } |
410 | fd_putfile(fd); | | 445 | fd_putfile(fd); |
411 | return NULL; | | 446 | return NULL; |
| @@ -441,27 +476,27 @@ fd_putfile(unsigned fd) | | | @@ -441,27 +476,27 @@ fd_putfile(unsigned fd) |
441 | } | | 476 | } |
442 | ff->ff_refcnt--; | | 477 | ff->ff_refcnt--; |
443 | return; | | 478 | return; |
444 | } | | 479 | } |
445 | | | 480 | |
446 | /* | | 481 | /* |
447 | * Ensure that any use of the file is complete and globally | | 482 | * Ensure that any use of the file is complete and globally |
448 | * visible before dropping the final reference. If no membar, | | 483 | * visible before dropping the final reference. If no membar, |
449 | * the current CPU could still access memory associated with | | 484 | * the current CPU could still access memory associated with |
450 | * the file after it has been freed or recycled by another | | 485 | * the file after it has been freed or recycled by another |
451 | * CPU. | | 486 | * CPU. |
452 | */ | | 487 | */ |
453 | #ifndef __HAVE_ATOMIC_AS_MEMBAR | | 488 | #ifndef __HAVE_ATOMIC_AS_MEMBAR |
454 | membar_exit(); | | 489 | membar_release(); |
455 | #endif | | 490 | #endif |
456 | | | 491 | |
457 | /* | | 492 | /* |
458 | * Be optimistic and start out with the assumption that no other | | 493 | * Be optimistic and start out with the assumption that no other |
459 | * threads are trying to close the descriptor. If the CAS fails, | | 494 | * threads are trying to close the descriptor. If the CAS fails, |
460 | * we lost a race and/or it's being closed. | | 495 | * we lost a race and/or it's being closed. |
461 | */ | | 496 | */ |
462 | for (u = ff->ff_refcnt & FR_MASK;; u = v) { | | 497 | for (u = ff->ff_refcnt & FR_MASK;; u = v) { |
463 | v = atomic_cas_uint(&ff->ff_refcnt, u, u - 1); | | 498 | v = atomic_cas_uint(&ff->ff_refcnt, u, u - 1); |
464 | if (__predict_true(u == v)) { | | 499 | if (__predict_true(u == v)) { |
465 | return; | | 500 | return; |
466 | } | | 501 | } |
467 | if (__predict_false((v & FR_CLOSING) != 0)) { | | 502 | if (__predict_false((v & FR_CLOSING) != 0)) { |
| @@ -592,64 +627,70 @@ fd_close(unsigned fd) | | | @@ -592,64 +627,70 @@ fd_close(unsigned fd) |
592 | ff = atomic_load_consume(&fdp->fd_dt)->dt_ff[fd]; | | 627 | ff = atomic_load_consume(&fdp->fd_dt)->dt_ff[fd]; |
593 | | | 628 | |
594 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | | 629 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); |
595 | | | 630 | |
596 | mutex_enter(&fdp->fd_lock); | | 631 | mutex_enter(&fdp->fd_lock); |
597 | KASSERT((ff->ff_refcnt & FR_MASK) > 0); | | 632 | KASSERT((ff->ff_refcnt & FR_MASK) > 0); |
598 | fp = atomic_load_consume(&ff->ff_file); | | 633 | fp = atomic_load_consume(&ff->ff_file); |
599 | if (__predict_false(fp == NULL)) { | | 634 | if (__predict_false(fp == NULL)) { |
600 | /* | | 635 | /* |
601 | * Another user of the file is already closing, and is | | 636 | * Another user of the file is already closing, and is |
602 | * waiting for other users of the file to drain. Release | | 637 | * waiting for other users of the file to drain. Release |
603 | * our reference, and wake up the closer. | | 638 | * our reference, and wake up the closer. |
604 | */ | | 639 | */ |
| | | 640 | #ifndef __HAVE_ATOMIC_AS_MEMBAR |
| | | 641 | membar_release(); |
| | | 642 | #endif |
605 | atomic_dec_uint(&ff->ff_refcnt); | | 643 | atomic_dec_uint(&ff->ff_refcnt); |
606 | cv_broadcast(&ff->ff_closing); | | 644 | cv_broadcast(&ff->ff_closing); |
607 | mutex_exit(&fdp->fd_lock); | | 645 | mutex_exit(&fdp->fd_lock); |
608 | | | 646 | |
609 | /* | | 647 | /* |
610 | * An application error, so pretend that the descriptor | | 648 | * An application error, so pretend that the descriptor |
611 | * was already closed. We can't safely wait for it to | | 649 | * was already closed. We can't safely wait for it to |
612 | * be closed without potentially deadlocking. | | 650 | * be closed without potentially deadlocking. |
613 | */ | | 651 | */ |
614 | return (EBADF); | | 652 | return (EBADF); |
615 | } | | 653 | } |
616 | KASSERT((ff->ff_refcnt & FR_CLOSING) == 0); | | 654 | KASSERT((ff->ff_refcnt & FR_CLOSING) == 0); |
617 | | | 655 | |
618 | /* | | 656 | /* |
619 | * There may be multiple users of this file within the process. | | 657 | * There may be multiple users of this file within the process. |
620 | * Notify existing and new users that the file is closing. This | | 658 | * Notify existing and new users that the file is closing. This |
621 | * will prevent them from adding additional uses to this file | | 659 | * will prevent them from adding additional uses to this file |
622 | * while we are closing it. | | 660 | * while we are closing it. |
623 | */ | | 661 | */ |
624 | ff->ff_file = NULL; | | 662 | atomic_store_relaxed(&ff->ff_file, NULL); |
625 | ff->ff_exclose = false; | | 663 | ff->ff_exclose = false; |
626 | | | 664 | |
627 | /* | | 665 | /* |
628 | * We expect the caller to hold a descriptor reference - drop it. | | 666 | * We expect the caller to hold a descriptor reference - drop it. |
629 | * The reference count may increase beyond zero at this point due | | 667 | * The reference count may increase beyond zero at this point due |
630 | * to an erroneous descriptor reference by an application, but | | 668 | * to an erroneous descriptor reference by an application, but |
631 | * fd_getfile() will notice that the file is being closed and drop | | 669 | * fd_getfile() will notice that the file is being closed and drop |
632 | * the reference again. | | 670 | * the reference again. |
633 | */ | | 671 | */ |
634 | if (fdp->fd_refcnt == 1) { | | 672 | if (fdp->fd_refcnt == 1) { |
635 | /* Single threaded. */ | | 673 | /* Single threaded. */ |
636 | refcnt = --(ff->ff_refcnt); | | 674 | refcnt = --(ff->ff_refcnt); |
637 | } else { | | 675 | } else { |
638 | /* Multi threaded. */ | | 676 | /* Multi threaded. */ |
639 | #ifndef __HAVE_ATOMIC_AS_MEMBAR | | 677 | #ifndef __HAVE_ATOMIC_AS_MEMBAR |
640 | membar_producer(); | | 678 | membar_release(); |
641 | #endif | | 679 | #endif |
642 | refcnt = atomic_dec_uint_nv(&ff->ff_refcnt); | | 680 | refcnt = atomic_dec_uint_nv(&ff->ff_refcnt); |
| | | 681 | #ifndef __HAVE_ATOMIC_AS_MEMBAR |
| | | 682 | membar_acquire(); |
| | | 683 | #endif |
643 | } | | 684 | } |
644 | if (__predict_false(refcnt != 0)) { | | 685 | if (__predict_false(refcnt != 0)) { |
645 | /* | | 686 | /* |
646 | * Wait for other references to drain. This is typically | | 687 | * Wait for other references to drain. This is typically |
647 | * an application error - the descriptor is being closed | | 688 | * an application error - the descriptor is being closed |
648 | * while still in use. | | 689 | * while still in use. |
649 | * (Or just a threaded application trying to unblock its | | 690 | * (Or just a threaded application trying to unblock its |
650 | * thread that sleeps in (say) accept()). | | 691 | * thread that sleeps in (say) accept()). |
651 | */ | | 692 | */ |
652 | atomic_or_uint(&ff->ff_refcnt, FR_CLOSING); | | 693 | atomic_or_uint(&ff->ff_refcnt, FR_CLOSING); |
653 | | | 694 | |
654 | /* | | 695 | /* |
655 | * Remove any knotes attached to the file. A knote | | 696 | * Remove any knotes attached to the file. A knote |
| @@ -1136,45 +1177,39 @@ fd_affix(proc_t *p, file_t *fp, unsigned | | | @@ -1136,45 +1177,39 @@ fd_affix(proc_t *p, file_t *fp, unsigned |
1136 | fdfile_t *ff; | | 1177 | fdfile_t *ff; |
1137 | filedesc_t *fdp; | | 1178 | filedesc_t *fdp; |
1138 | fdtab_t *dt; | | 1179 | fdtab_t *dt; |
1139 | | | 1180 | |
1140 | KASSERT(p == curproc || p == &proc0); | | 1181 | KASSERT(p == curproc || p == &proc0); |
1141 | | | 1182 | |
1142 | /* Add a reference to the file structure. */ | | 1183 | /* Add a reference to the file structure. */ |
1143 | mutex_enter(&fp->f_lock); | | 1184 | mutex_enter(&fp->f_lock); |
1144 | fp->f_count++; | | 1185 | fp->f_count++; |
1145 | mutex_exit(&fp->f_lock); | | 1186 | mutex_exit(&fp->f_lock); |
1146 | | | 1187 | |
1147 | /* | | 1188 | /* |
1148 | * Insert the new file into the descriptor slot. | | 1189 | * Insert the new file into the descriptor slot. |
1149 | * | | | |
1150 | * The memory barriers provided by lock activity in this routine | | | |
1151 | * ensure that any updates to the file structure become globally | | | |
1152 | * visible before the file becomes visible to other LWPs in the | | | |
1153 | * current process; otherwise we would set ff->ff_file with | | | |
1154 | * atomic_store_release(&ff->ff_file, fp) at the bottom. | | | |
1155 | */ | | 1190 | */ |
1156 | fdp = p->p_fd; | | 1191 | fdp = p->p_fd; |
1157 | dt = atomic_load_consume(&fdp->fd_dt); | | 1192 | dt = atomic_load_consume(&fdp->fd_dt); |
1158 | ff = dt->dt_ff[fd]; | | 1193 | ff = dt->dt_ff[fd]; |
1159 | | | 1194 | |
1160 | KASSERT(ff != NULL); | | 1195 | KASSERT(ff != NULL); |
1161 | KASSERT(ff->ff_file == NULL); | | 1196 | KASSERT(ff->ff_file == NULL); |
1162 | KASSERT(ff->ff_allocated); | | 1197 | KASSERT(ff->ff_allocated); |
1163 | KASSERT(fd_isused(fdp, fd)); | | 1198 | KASSERT(fd_isused(fdp, fd)); |
1164 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | | 1199 | KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); |
1165 | | | 1200 | |
1166 | /* No need to lock in order to make file initially visible. */ | | 1201 | /* No need to lock in order to make file initially visible. */ |
1167 | ff->ff_file = fp; | | 1202 | atomic_store_release(&ff->ff_file, fp); |
1168 | } | | 1203 | } |
1169 | | | 1204 | |
1170 | /* | | 1205 | /* |
1171 | * Abort creation of a new descriptor: free descriptor slot and file. | | 1206 | * Abort creation of a new descriptor: free descriptor slot and file. |
1172 | */ | | 1207 | */ |
1173 | void | | 1208 | void |
1174 | fd_abort(proc_t *p, file_t *fp, unsigned fd) | | 1209 | fd_abort(proc_t *p, file_t *fp, unsigned fd) |
1175 | { | | 1210 | { |
1176 | filedesc_t *fdp; | | 1211 | filedesc_t *fdp; |
1177 | fdfile_t *ff; | | 1212 | fdfile_t *ff; |
1178 | | | 1213 | |
1179 | KASSERT(p == curproc || p == &proc0); | | 1214 | KASSERT(p == curproc || p == &proc0); |
1180 | | | 1215 | |
| @@ -1522,30 +1557,33 @@ fd_free(void) | | | @@ -1522,30 +1557,33 @@ fd_free(void) |
1522 | file_t *fp; | | 1557 | file_t *fp; |
1523 | int fd, nf; | | 1558 | int fd, nf; |
1524 | fdtab_t *dt; | | 1559 | fdtab_t *dt; |
1525 | lwp_t * const l = curlwp; | | 1560 | lwp_t * const l = curlwp; |
1526 | filedesc_t * const fdp = l->l_fd; | | 1561 | filedesc_t * const fdp = l->l_fd; |
1527 | const bool noadvlock = (l->l_proc->p_flag & PK_ADVLOCK) == 0; | | 1562 | const bool noadvlock = (l->l_proc->p_flag & PK_ADVLOCK) == 0; |
1528 | | | 1563 | |
1529 | KASSERT(atomic_load_consume(&fdp->fd_dt)->dt_ff[0] == | | 1564 | KASSERT(atomic_load_consume(&fdp->fd_dt)->dt_ff[0] == |
1530 | (fdfile_t *)fdp->fd_dfdfile[0]); | | 1565 | (fdfile_t *)fdp->fd_dfdfile[0]); |
1531 | KASSERT(fdp->fd_dtbuiltin.dt_nfiles == NDFILE); | | 1566 | KASSERT(fdp->fd_dtbuiltin.dt_nfiles == NDFILE); |
1532 | KASSERT(fdp->fd_dtbuiltin.dt_link == NULL); | | 1567 | KASSERT(fdp->fd_dtbuiltin.dt_link == NULL); |
1533 | | | 1568 | |
1534 | #ifndef __HAVE_ATOMIC_AS_MEMBAR | | 1569 | #ifndef __HAVE_ATOMIC_AS_MEMBAR |
1535 | membar_exit(); | | 1570 | membar_release(); |
1536 | #endif | | 1571 | #endif |
1537 | if (atomic_dec_uint_nv(&fdp->fd_refcnt) > 0) | | 1572 | if (atomic_dec_uint_nv(&fdp->fd_refcnt) > 0) |
1538 | return; | | 1573 | return; |
| | | 1574 | #ifndef __HAVE_ATOMIC_AS_MEMBAR |
| | | 1575 | membar_acquire(); |
| | | 1576 | #endif |
1539 | | | 1577 | |
1540 | /* | | 1578 | /* |
1541 | * Close any files that the process holds open. | | 1579 | * Close any files that the process holds open. |
1542 | */ | | 1580 | */ |
1543 | dt = fdp->fd_dt; | | 1581 | dt = fdp->fd_dt; |
1544 | fd_checkmaps(fdp); | | 1582 | fd_checkmaps(fdp); |
1545 | #ifdef DEBUG | | 1583 | #ifdef DEBUG |
1546 | fdp->fd_refcnt = -1; /* see fd_checkmaps */ | | 1584 | fdp->fd_refcnt = -1; /* see fd_checkmaps */ |
1547 | #endif | | 1585 | #endif |
1548 | for (fd = 0, nf = dt->dt_nfiles; fd < nf; fd++) { | | 1586 | for (fd = 0, nf = dt->dt_nfiles; fd < nf; fd++) { |
1549 | ff = dt->dt_ff[fd]; | | 1587 | ff = dt->dt_ff[fd]; |
1550 | KASSERT(fd >= NDFDFILE || | | 1588 | KASSERT(fd >= NDFDFILE || |
1551 | ff == (fdfile_t *)fdp->fd_dfdfile[fd]); | | 1589 | ff == (fdfile_t *)fdp->fd_dfdfile[fd]); |