@@ -1,12 +1,12 @@
-/* $NetBSD: uipc_usrreq.c,v 1.119.4.1 2009/02/16 03:31:13 snj Exp $ */
+/* $NetBSD: uipc_usrreq.c,v 1.119.4.2 2009/03/18 05:33:23 snj Exp $ */
/*-
- * Copyright (c) 1998, 2000, 2004, 2008 The NetBSD Foundation, Inc.
+ * Copyright (c) 1998, 2000, 2004, 2008, 2009 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
- * NASA Ames Research Center.
+ * NASA Ames Research Center, and by Andrew Doran.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -96,7 +96,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.119.4.1 2009/02/16 03:31:13 snj Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.119.4.2 2009/03/18 05:33:23 snj Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -117,6 +117,8 @@
#include <sys/kmem.h>
#include <sys/atomic.h>
#include <sys/uidinfo.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
/*
* Unix communications domain.
@@ -169,16 +171,34 @@
ino_t unp_ino; /* prototype for fake inode numbers */
struct mbuf *unp_addsockcred(struct lwp *, struct mbuf *);
+static void unp_mark(file_t *);
+static void unp_scan(struct mbuf *, void (*)(file_t *), int);
+static void unp_discard_now(file_t *);
+static void unp_discard_later(file_t *);
+static void unp_thread(void *);
+static void unp_thread_kick(void);
static kmutex_t *uipc_lock;
+static kcondvar_t unp_thread_cv;
+static lwp_t *unp_thread_lwp;
+static SLIST_HEAD(,file) unp_thread_discard;
+static int unp_defer;
+
/*
* Initialize Unix protocols.
*/
void
uipc_init(void)
{
+ int error;
uipc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
+ cv_init(&unp_thread_cv, "unpgc");
+
+ error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, unp_thread,
+ NULL, &unp_thread_lwp, "unpgc");
+ if (error != 0)
+ panic("uipc_init %d", error);
}
/*
@@ -290,11 +310,9 @@
if (sbappendaddr(&so2->so_rcv, (const struct sockaddr *)sun, m,
control) == 0) {
so2->so_rcv.sb_overflowed++;
- sounlock(so2);
unp_dispose(control);
m_freem(control);
m_freem(m);
- solock(so2);
return (ENOBUFS);
} else {
sorwakeup(so2);
@@ -518,11 +536,9 @@
error = ENOTCONN;
}
if (error) {
- sounlock(so);
unp_dispose(control);
m_freem(control);
m_freem(m);
- solock(so);
break;
}
KASSERT(p != NULL);
@@ -571,10 +587,8 @@
#undef snd
#undef rcv
if (control != NULL) {
- sounlock(so);
unp_dispose(control);
m_freem(control);
- solock(so);
}
break;
@@ -724,7 +738,8 @@
u_long unpdg_sendspace = 2*1024; /* really max datagram size */
u_long unpdg_recvspace = 4*1024;
-u_int unp_rights; /* file descriptors in flight */
+u_int unp_rights; /* files in flight */
+u_int unp_rights_ratio = 2; /* limit, fraction of maxfiles */
int
unp_attach(struct socket *so)
@@ -808,17 +823,14 @@
so->so_pcb = NULL;
if (unp_rights) {
/*
- * Normally the receive buffer is flushed later,
- * in sofree, but if our receive buffer holds references
- * to descriptors that are now garbage, we will dispose
- * of those descriptor references after the garbage collector
- * gets them (resulting in a "panic: closef: count < 0").
+ * Normally the receive buffer is flushed later, in sofree,
+ * but if our receive buffer holds references to files that
+ * are now garbage, we will enqueue those file references to
+ * the garbage collector and kick it into action.
*/
sorflush(so);
unp_free(unp);
- sounlock(so);
- unp_gc();
- solock(so);
+ unp_thread_kick();
} else
unp_free(unp);
}
@@ -1165,7 +1177,7 @@
fdp = malloc(nfds * sizeof(int), M_TEMP, M_WAITOK);
rw_enter(&p->p_cwdi->cwdi_lock, RW_READER);
- /* Make sure the recipient should be able to see the descriptors.. */
+ /* Make sure the recipient should be able to see the files.. */
if (p->p_cwdi->cwdi_rdir != NULL) {
rp = (file_t **)CMSG_DATA(cm);
for (i = 0; i < nfds; i++) {
@@ -1192,19 +1204,15 @@
if (error != 0) {
for (i = 0; i < nfds; i++) {
fp = *rp;
- /*
- * zero the pointer before calling unp_discard,
- * since it may end up in unp_gc()..
- */
*rp++ = 0;
- unp_discard(fp);
+ unp_discard_now(fp);
}
goto out;
}
/*
* First loop -- allocate file descriptor table slots for the
- * new descriptors.
+ * new files.
*/
for (i = 0; i < nfds; i++) {
fp = *rp++;
@@ -1232,7 +1240,7 @@
/*
* Now that adding them has succeeded, update all of the
- * descriptor passing state.
+ * file passing state and affix the descriptors.
*/
rp = (file_t **)CMSG_DATA(cm);
for (i = 0; i < nfds; i++) {
@@ -1267,13 +1275,14 @@
int
unp_internalize(struct mbuf **controlp)
{
- struct filedesc *fdescp = curlwp->l_fd;
+ filedesc_t *fdescp = curlwp->l_fd;
struct mbuf *control = *controlp;
struct cmsghdr *newcm, *cm = mtod(control, struct cmsghdr *);
file_t **rp, **files;
file_t *fp;
int i, fd, *fdp;
int nfds, error;
+ u_int maxmsg;
error = 0;
newcm = NULL;
@@ -1290,9 +1299,17 @@
*/
nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int);
fdp = (int *)CMSG_DATA(cm);
+ maxmsg = maxfiles / unp_rights_ratio;
for (i = 0; i < nfds; i++) {
fd = *fdp++;
+ if (atomic_inc_uint_nv(&unp_rights) > maxmsg) {
+ atomic_dec_uint(&unp_rights);
+ nfds = i;
+ error = EAGAIN;
+ goto out;
+ }
if ((fp = fd_getfile(fd)) == NULL) {
+ atomic_dec_uint(&unp_rights);
nfds = i;
error = EBADF;
goto out;
@@ -1324,7 +1341,6 @@
fp->f_count++;
fp->f_msgcount++;
mutex_exit(&fp->f_lock);
- atomic_inc_uint(&unp_rights);
}
out:
@@ -1332,6 +1348,9 @@
fdp = (int *)CMSG_DATA(cm);
for (i = 0; i < nfds; i++) {
fd_putfile(*fdp++);
+ if (error != 0) {
+ atomic_dec_uint(&unp_rights);
+ }
}
if (error == 0) {
@@ -1404,68 +1423,82 @@
return (control);
}
-int unp_defer, unp_gcing;
-extern struct domain unixdomain;
-
/*
- * Comment added long after the fact explaining what's going on here.
- * Do a mark-sweep GC of file descriptors on the system, to free up
- * any which are caught in flight to an about-to-be-closed socket.
- *
- * Traditional mark-sweep gc's start at the "root", and mark
- * everything reachable from the root (which, in our case would be the
- * process table). The mark bits are cleared during the sweep.
- *
- * XXX For some inexplicable reason (perhaps because the file
- * descriptor tables used to live in the u area which could be swapped
- * out and thus hard to reach), we do multiple scans over the set of
- * descriptors, using use *two* mark bits per object (DEFER and MARK).
- * Whenever we find a descriptor which references other descriptors,
- * the ones it references are marked with both bits, and we iterate
- * over the whole file table until there are no more DEFER bits set.
- * We also make an extra pass *before* the GC to clear the mark bits,
- * which could have been cleared at almost no cost during the previous
- * sweep.
+ * Do a mark-sweep GC of files in the system, to free up any which are
+ * caught in flight to an about-to-be-closed socket. Additionally,
+ * process deferred file closures.
*/
-void
-unp_gc(void)
+static void
+unp_gc(file_t *dp)
{
- file_t *fp, *nextfp;
+ extern struct domain unixdomain;
+ file_t *fp, *np;
struct socket *so, *so1;
- file_t **extra_ref, **fpp;
- int nunref, nslots, i;
+ u_int i, old, new;
+ bool didwork;
- if (atomic_swap_uint(&unp_gcing, 1) == 1)
- return;
+ KASSERT(curlwp == unp_thread_lwp);
+ KASSERT(mutex_owned(&filelist_lock));
- restart:
- nslots = nfiles * 2;
- extra_ref = kmem_alloc(nslots * sizeof(file_t *), KM_SLEEP);
+ /*
+ * First, process deferred file closures.
+ */
+ while (!SLIST_EMPTY(&unp_thread_discard)) {
+ fp = SLIST_FIRST(&unp_thread_discard);
+ KASSERT(fp->f_unpcount > 0);
+ KASSERT(fp->f_count > 0);
+ KASSERT(fp->f_msgcount > 0);
+ KASSERT(fp->f_count >= fp->f_unpcount);
+ KASSERT(fp->f_count >= fp->f_msgcount);
+ KASSERT(fp->f_msgcount >= fp->f_unpcount);
+ SLIST_REMOVE_HEAD(&unp_thread_discard, f_unplist);
+ i = fp->f_unpcount;
+ fp->f_unpcount = 0;
+ mutex_exit(&filelist_lock);
+ for (; i != 0; i--) {
+ unp_discard_now(fp);
+ }
+ mutex_enter(&filelist_lock);
+ }
- mutex_enter(&filelist_lock);
+ /*
+ * Clear mark bits. Ensure that we don't consider new files
+ * entering the file table during this loop (they will not have
+ * FSCAN set).
+ */
unp_defer = 0;
-
- /* Clear mark bits */
LIST_FOREACH(fp, &filehead, f_list) {
- atomic_and_uint(&fp->f_flag, ~(FMARK|FDEFER));
+ for (old = fp->f_flag;; old = new) {
+ new = atomic_cas_uint(&fp->f_flag, old,
+ (old | FSCAN) & ~(FMARK|FDEFER));
+ if (__predict_true(old == new)) {
+ break;
+ }
+ }
}
/*
- * Iterate over the set of descriptors, marking ones believed
- * (based on refcount) to be referenced from a process, and
- * marking for rescan descriptors which are queued on a socket.
+ * Iterate over the set of sockets, marking ones believed (based on
+ * refcount) to be referenced from a process, and marking for rescan
+ * sockets which are queued on a socket. Recan continues descending
+ * and searching for sockets referenced by sockets (FDEFER), until
+ * there are no more socket->socket references to be discovered.
*/
do {
- LIST_FOREACH(fp, &filehead, f_list) {
+ didwork = false;
+ for (fp = LIST_FIRST(&filehead); fp != NULL; fp = np) {
+ KASSERT(mutex_owned(&filelist_lock));
+ np = LIST_NEXT(fp, f_list);
mutex_enter(&fp->f_lock);
- if (fp->f_flag & FDEFER) {
+ if ((fp->f_flag & FDEFER) != 0) {
atomic_and_uint(&fp->f_flag, ~FDEFER);
unp_defer--;
KASSERT(fp->f_count != 0);
} else {
if (fp->f_count == 0 ||
- (fp->f_flag & FMARK) ||
- fp->f_count == fp->f_msgcount) {
+ (fp->f_flag & FMARK) != 0 ||
+ fp->f_count == fp->f_msgcount ||
+ fp->f_unpcount != 0) {
mutex_exit(&fp->f_lock);
continue;
}
@@ -1475,44 +1508,25 @@
if (fp->f_type != DTYPE_SOCKET ||
(so = fp->f_data) == NULL ||
so->so_proto->pr_domain != &unixdomain ||
- (so->so_proto->pr_flags&PR_RIGHTS) == 0) {
+ (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
mutex_exit(&fp->f_lock);
continue;
}
-#ifdef notdef
- if (so->so_rcv.sb_flags & SB_LOCK) {
- mutex_exit(&fp->f_lock);
- mutex_exit(&filelist_lock);
- kmem_free(extra_ref, nslots * sizeof(file_t *));
- /*
- * This is problematical; it's not clear
- * we need to wait for the sockbuf to be
- * unlocked (on a uniprocessor, at least),
- * and it's also not clear what to do
- * if sbwait returns an error due to receipt
- * of a signal. If sbwait does return
- * an error, we'll go into an infinite
- * loop. Delete all of this for now.
- */
- (void) sbwait(&so->so_rcv);
- goto restart;
- }
-#endif
+
+ /* Gain file ref, mark our position, and unlock. */
+ didwork = true;
+ LIST_INSERT_AFTER(fp, dp, f_list);
+ fp->f_count++;
mutex_exit(&fp->f_lock);
+ mutex_exit(&filelist_lock);
/*
- * XXX Locking a socket with filelist_lock held
- * is ugly. filelist_lock can be taken by the
- * pagedaemon when reclaiming items from file_cache.
- * Socket activity could delay the pagedaemon.
+ * Mark files referenced from sockets queued on the
+ * accept queue as well.
*/
solock(so);
unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
- /*
- * Mark descriptors referenced from sockets queued
- * on the accept queue as well.
- */
- if (so->so_options & SO_ACCEPTCONN) {
+ if ((so->so_options & SO_ACCEPTCONN) != 0) {
TAILQ_FOREACH(so1, &so->so_q0, so_qe) {
unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
}
@@ -1521,84 +1535,115 @@
}
}
sounlock(so);
+
+ /* Re-lock and restart from where we left off. */
+ closef(fp);
+ mutex_enter(&filelist_lock);
+ np = LIST_NEXT(dp, f_list);
+ LIST_REMOVE(dp, f_list);
}
- } while (unp_defer);
+ /*
+ * Bail early if we did nothing in the loop above. Could
+ * happen because of concurrent activity causing unp_defer
+ * to get out of sync.
+ */
+ } while (unp_defer != 0 && didwork);
/*
- * Sweep pass. Find unmarked descriptors, and free them.
+ * Sweep pass.
*
- * We grab an extra reference to each of the file table entries
- * that are not otherwise accessible and then free the rights
- * that are stored in messages on them.
- *
- * The bug in the original code is a little tricky, so I'll describe
- * what's wrong with it here.
- *
- * It is incorrect to simply unp_discard each entry for f_msgcount
- * times -- consider the case of sockets A and B that contain
- * references to each other. On a last close of some other socket,
- * we trigger a gc since the number of outstanding rights (unp_rights)
- * is non-zero. If during the sweep phase the gc code un_discards,
- * we end up doing a (full) closef on the descriptor. A closef on A
- * results in the following chain. Closef calls soo_close, which
- * calls soclose. Soclose calls first (through the switch
- * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
- * returns because the previous instance had set unp_gcing, and
- * we return all the way back to soclose, which marks the socket
- * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush
- * to free up the rights that are queued in messages on the socket A,
- * i.e., the reference on B. The sorflush calls via the dom_dispose
- * switch unp_dispose, which unp_scans with unp_discard. This second
- * instance of unp_discard just calls closef on B.
- *
- * Well, a similar chain occurs on B, resulting in a sorflush on B,
- * which results in another closef on A. Unfortunately, A is already
- * being closed, and the descriptor has already been marked with
- * SS_NOFDREF, and soclose panics at this point.
- *
- * Here, we first take an extra reference to each inaccessible
- * descriptor. Then, if the inaccessible descriptor is a
- * socket, we call sorflush in case it is a Unix domain
- * socket. After we destroy all the rights carried in
- * messages, we do a last closef to get rid of our extra
- * reference. This is the last close, and the unp_detach etc
- * will shut down the socket.
- *
- * 91/09/19, bsy@cs.cmu.edu
+ * We grab an extra reference to each of the files that are
+ * not otherwise accessible and then free the rights that are
+ * stored in messages on them.
*/
- if (nslots < nfiles) {
- mutex_exit(&filelist_lock);
- kmem_free(extra_ref, nslots * sizeof(file_t *));
- goto restart;
- }
- for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; fp != 0;
- fp = nextfp) {
- nextfp = LIST_NEXT(fp, f_list);
+ for (fp = LIST_FIRST(&filehead); fp != NULL; fp = np) {
+ KASSERT(mutex_owned(&filelist_lock));
+ np = LIST_NEXT(fp, f_list);
mutex_enter(&fp->f_lock);
- if (fp->f_count != 0 &&
- fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
- *fpp++ = fp;
- nunref++;
- fp->f_count++;
+
+ /*
+ * Ignore non-sockets.
+ * Ignore dead sockets, or sockets with pending close.
+ * Ignore sockets obviously referenced elsewhere.
+ * Ignore sockets marked as referenced by our scan.
+ * Ignore new sockets that did not exist during the scan.
+ */
+ if (fp->f_type != DTYPE_SOCKET ||
+ fp->f_count == 0 || fp->f_unpcount != 0 ||
+ fp->f_count != fp->f_msgcount ||
+ (fp->f_flag & (FMARK | FSCAN)) != FSCAN) {
+ mutex_exit(&fp->f_lock);
+ continue;
}
+
+ /* Gain file ref, mark our position, and unlock. */
+ LIST_INSERT_AFTER(fp, dp, f_list);
+ fp->f_count++;
mutex_exit(&fp->f_lock);
+ mutex_exit(&filelist_lock);
+
+ /*
+ * Flush all data from the socket's receive buffer.
+ * This will cause files referenced only by the
+ * socket to be queued for close.
+ */
+ so = fp->f_data;
+ solock(so);
+ sorflush(so);
+ sounlock(so);
+
+ /* Re-lock and restart from where we left off. */
+ closef(fp);
+ mutex_enter(&filelist_lock);
+ np = LIST_NEXT(dp, f_list);
+ LIST_REMOVE(dp, f_list);
}
- mutex_exit(&filelist_lock);
+}
- for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
- fp = *fpp;
- if (fp->f_type == DTYPE_SOCKET) {
- so = fp->f_data;
- solock(so);
- sorflush(fp->f_data);
- sounlock(so);
+/*
+ * Garbage collector thread. While SCM_RIGHTS messages are in transit,
+ * wake once per second to garbage collect. Run continually while we
+ * have deferred closes to process.
+ */
+static void
+unp_thread(void *cookie)
+{
+ file_t *dp;
+
+ /* Allocate a dummy file for our scans. */
+ if ((dp = fgetdummy()) == NULL) {
+ panic("unp_thread");
+ }
+
+ mutex_enter(&filelist_lock);
+ for (;;) {
+ KASSERT(mutex_owned(&filelist_lock));
+ if (SLIST_EMPTY(&unp_thread_discard)) {
+ if (unp_rights != 0) {
+ (void)cv_timedwait(&unp_thread_cv,
+ &filelist_lock, hz);
+ } else {
+ cv_wait(&unp_thread_cv, &filelist_lock);
+ }
}
+ unp_gc(dp);
}
- for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
- closef(*fpp);
+ /* NOTREACHED */
+}
+
+/*
+ * Kick the garbage collector into action if there is something for
+ * it to process.
+ */
+static void
+unp_thread_kick(void)
+{
+
+ if (!SLIST_EMPTY(&unp_thread_discard) || unp_rights != 0) {
+ mutex_enter(&filelist_lock);
+ cv_signal(&unp_thread_cv);
+ mutex_exit(&filelist_lock);
}
- kmem_free(extra_ref, nslots * sizeof(file_t *));
- atomic_swap_uint(&unp_gcing, 0);
}
void
@@ -1606,37 +1651,37 @@
{
if (m)
- unp_scan(m, unp_discard, 1);
+ unp_scan(m, unp_discard_later, 1);
}
void
unp_scan(struct mbuf *m0, void (*op)(file_t *), int discard)
{
struct mbuf *m;
- file_t **rp;
+ file_t **rp, *fp;
struct cmsghdr *cm;
- int i;
- int qfds;
+ int i, qfds;
while (m0) {
for (m = m0; m; m = m->m_next) {
- if (m->m_type == MT_CONTROL &&
- m->m_len >= sizeof(*cm)) {
- cm = mtod(m, struct cmsghdr *);
- if (cm->cmsg_level != SOL_SOCKET ||
- cm->cmsg_type != SCM_RIGHTS)
- continue;
- qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm)))
- / sizeof(file_t *);
- rp = (file_t **)CMSG_DATA(cm);
- for (i = 0; i < qfds; i++) {
- file_t *fp = *rp;
- if (discard)
- *rp = 0;
- (*op)(fp);
- rp++;
+ if (m->m_type != MT_CONTROL ||
+ m->m_len < sizeof(*cm)) {
+ continue;
+ }
+ cm = mtod(m, struct cmsghdr *);
+ if (cm->cmsg_level != SOL_SOCKET ||
+ cm->cmsg_type != SCM_RIGHTS)
+ continue;
+ qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm)))
+ / sizeof(file_t *);
+ rp = (file_t **)CMSG_DATA(cm);
+ for (i = 0; i < qfds; i++) {
+ fp = *rp;
+ if (discard) {
+ *rp = 0;
}
- break; /* XXX, but saves time */
+ (*op)(fp);
+ rp++;
}
}
m0 = m0->m_nextpkt;
@@ -1658,10 +1703,9 @@
}
/*
- * Minimize the number of deferrals... Sockets are the only
- * type of descriptor which can hold references to another
- * descriptor, so just mark other descriptors, and defer
- * unmarked sockets for the next pass.
+ * Minimize the number of deferrals... Sockets are the only type of
+ * file which can hold references to another file, so just mark
+ * other files, and defer unmarked sockets for the next pass.
*/
if (fp->f_type == DTYPE_SOCKET) {
unp_defer++;
@@ -1671,20 +1715,38 @@
atomic_or_uint(&fp->f_flag, FMARK);
}
mutex_exit(&fp->f_lock);
- return;
}
-void
-unp_discard(file_t *fp)
+static void
+unp_discard_now(file_t *fp)
{
if (fp == NULL)
return;
- mutex_enter(&fp->f_lock);
KASSERT(fp->f_count > 0);
+ KASSERT(fp->f_msgcount > 0);
+
+ mutex_enter(&fp->f_lock);
fp->f_msgcount--;
mutex_exit(&fp->f_lock);
atomic_dec_uint(&unp_rights);
(void)closef(fp);
+}
+
+static void
+unp_discard_later(file_t *fp)
+{
+
+ if (fp == NULL)
+ return;
+
+ KASSERT(fp->f_count > 0);
+ KASSERT(fp->f_msgcount > 0);
+
+ mutex_enter(&filelist_lock);
+ if (fp->f_unpcount++ == 0) {
+ SLIST_INSERT_HEAD(&unp_thread_discard, fp, f_unplist);
+ }
+ mutex_exit(&filelist_lock);
}
@@ -1,5 +1,34 @@
-/* $NetBSD: file.h,v 1.65 2008/06/24 10:26:27 gmcgarry Exp $ */
+/* $NetBSD: file.h,v 1.65.6.1 2009/03/18 05:33:23 snj Exp $ */
+/*-
+ * Copyright (c) 2009 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Andrew Doran.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/*
* Copyright (c) 1982, 1986, 1989, 1993
* The Regents of the University of California. All rights reserved.
@@ -53,6 +82,9 @@
/*
* Kernel file descriptor. One entry for each open kernel vnode and
* socket.
+ *
+ * This structure is exported via the KERN_FILE and KERN_FILE2 sysctl
+ * calls. Only add members to the end, do not delete them.
*/
struct file {
off_t f_offset; /* first, is 64-bit */
@@ -73,7 +105,7 @@
LIST_ENTRY(file) f_list; /* list of active files */
kmutex_t f_lock; /* lock on structure */
int f_flag; /* see fcntl.h */
- u_int f_iflags; /* internal flags; FIF_* */
+ u_int f_unused1; /* unused; was internal flags; FIF_* */
#define DTYPE_VNODE 1 /* file */
#define DTYPE_SOCKET 2 /* communications endpoint */
#define DTYPE_PIPE 3 /* pipe */
@@ -87,6 +119,8 @@
u_int f_advice; /* access pattern hint; UVM_ADV_* */
u_int f_count; /* reference count */
u_int f_msgcount; /* references from message queue */
+ u_int f_unpcount; /* deferred close: see uipc_usrreq.c */
+ SLIST_ENTRY(file) f_unplist; /* deferred close: see uipc_usrreq.c */
};
#define FILE_LOCK(fp) mutex_enter(&(fp)->f_lock)