Tue Dec 20 16:49:37 2011 UTC ()
Move the diagnostic check for a missing VOP_CLOSE() to the top of vrelel().
As long as we hold the vnode interlock there is no chance for this vnode
to gain new references.

Fixes false alarms observed by Thor Lancelot Simon and reported on tech-kern.

Ok: David Holland <dholland@netbsd.org>


(hannken)
diff -r1.14 -r1.15 src/sys/kern/vfs_vnode.c

cvs diff -r1.14 -r1.15 src/sys/kern/vfs_vnode.c (switch to unified diff)

--- src/sys/kern/vfs_vnode.c 2011/10/07 09:35:06 1.14
+++ src/sys/kern/vfs_vnode.c 2011/12/20 16:49:37 1.15
@@ -1,1195 +1,1195 @@ @@ -1,1195 +1,1195 @@
1/* $NetBSD: vfs_vnode.c,v 1.14 2011/10/07 09:35:06 hannken Exp $ */ 1/* $NetBSD: vfs_vnode.c,v 1.15 2011/12/20 16:49:37 hannken Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1997-2011 The NetBSD Foundation, Inc. 4 * Copyright (c) 1997-2011 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution. 18 * documentation and/or other materials provided with the distribution.
19 * 19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE. 30 * POSSIBILITY OF SUCH DAMAGE.
31 */ 31 */
32 32
33/* 33/*
34 * Copyright (c) 1989, 1993 34 * Copyright (c) 1989, 1993
35 * The Regents of the University of California. All rights reserved. 35 * The Regents of the University of California. All rights reserved.
36 * (c) UNIX System Laboratories, Inc. 36 * (c) UNIX System Laboratories, Inc.
37 * All or some portions of this file are derived from material licensed 37 * All or some portions of this file are derived from material licensed
38 * to the University of California by American Telephone and Telegraph 38 * to the University of California by American Telephone and Telegraph
39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40 * the permission of UNIX System Laboratories, Inc. 40 * the permission of UNIX System Laboratories, Inc.
41 * 41 *
42 * Redistribution and use in source and binary forms, with or without 42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions 43 * modification, are permitted provided that the following conditions
44 * are met: 44 * are met:
45 * 1. Redistributions of source code must retain the above copyright 45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer. 46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright 47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the 48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution. 49 * documentation and/or other materials provided with the distribution.
50 * 3. Neither the name of the University nor the names of its contributors 50 * 3. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software 51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission. 52 * without specific prior written permission.
53 * 53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE. 64 * SUCH DAMAGE.
65 * 65 *
66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
67 */ 67 */
68 68
69/* 69/*
70 * The vnode cache subsystem. 70 * The vnode cache subsystem.
71 * 71 *
72 * Life-cycle 72 * Life-cycle
73 * 73 *
74 * Normally, there are two points where new vnodes are created: 74 * Normally, there are two points where new vnodes are created:
75 * VOP_CREATE(9) and VOP_LOOKUP(9). The life-cycle of a vnode 75 * VOP_CREATE(9) and VOP_LOOKUP(9). The life-cycle of a vnode
76 * starts in one of the following ways: 76 * starts in one of the following ways:
77 * 77 *
78 * - Allocation, via getnewvnode(9) and/or vnalloc(9). 78 * - Allocation, via getnewvnode(9) and/or vnalloc(9).
79 * - Reclamation of inactive vnode, via vget(9). 79 * - Reclamation of inactive vnode, via vget(9).
80 * 80 *
81 * The life-cycle ends when the last reference is dropped, usually 81 * The life-cycle ends when the last reference is dropped, usually
82 * in VOP_REMOVE(9). In such case, VOP_INACTIVE(9) is called to inform 82 * in VOP_REMOVE(9). In such case, VOP_INACTIVE(9) is called to inform
83 * the file system that vnode is inactive. Via this call, file system 83 * the file system that vnode is inactive. Via this call, file system
84 * indicates whether vnode should be recycled (usually, count of links 84 * indicates whether vnode should be recycled (usually, count of links
85 * is checked i.e. whether file was removed). 85 * is checked i.e. whether file was removed).
86 * 86 *
87 * Depending on indication, vnode can be put into a free list (cache), 87 * Depending on indication, vnode can be put into a free list (cache),
88 * or cleaned via vclean(9), which calls VOP_RECLAIM(9) to disassociate 88 * or cleaned via vclean(9), which calls VOP_RECLAIM(9) to disassociate
89 * underlying file system from the vnode, and finally destroyed. 89 * underlying file system from the vnode, and finally destroyed.
90 * 90 *
91 * Reference counting 91 * Reference counting
92 * 92 *
93 * Vnode is considered active, if reference count (vnode_t::v_usecount) 93 * Vnode is considered active, if reference count (vnode_t::v_usecount)
94 * is non-zero. It is maintained using: vref(9) and vrele(9), as well 94 * is non-zero. It is maintained using: vref(9) and vrele(9), as well
95 * as vput(9), routines. Common points holding references are e.g. 95 * as vput(9), routines. Common points holding references are e.g.
96 * file openings, current working directory, mount points, etc.  96 * file openings, current working directory, mount points, etc.
97 * 97 *
98 * Note on v_usecount and its locking 98 * Note on v_usecount and its locking
99 * 99 *
100 * At nearly all points it is known that v_usecount could be zero, 100 * At nearly all points it is known that v_usecount could be zero,
101 * the vnode_t::v_interlock will be held. To change v_usecount away 101 * the vnode_t::v_interlock will be held. To change v_usecount away
102 * from zero, the interlock must be held. To change from a non-zero 102 * from zero, the interlock must be held. To change from a non-zero
103 * value to zero, again the interlock must be held. 103 * value to zero, again the interlock must be held.
104 * 104 *
105 * There is a flag bit, VC_XLOCK, embedded in v_usecount. To raise 105 * There is a flag bit, VC_XLOCK, embedded in v_usecount. To raise
106 * v_usecount, if the VC_XLOCK bit is set in it, the interlock must 106 * v_usecount, if the VC_XLOCK bit is set in it, the interlock must
107 * be held. To modify the VC_XLOCK bit, the interlock must be held. 107 * be held. To modify the VC_XLOCK bit, the interlock must be held.
108 * We always keep the usecount (v_usecount & VC_MASK) non-zero while 108 * We always keep the usecount (v_usecount & VC_MASK) non-zero while
109 * the VC_XLOCK bit is set. 109 * the VC_XLOCK bit is set.
110 * 110 *
111 * Unless the VC_XLOCK bit is set, changing the usecount from a non-zero 111 * Unless the VC_XLOCK bit is set, changing the usecount from a non-zero
112 * value to a non-zero value can safely be done using atomic operations, 112 * value to a non-zero value can safely be done using atomic operations,
113 * without the interlock held. 113 * without the interlock held.
114 * 114 *
115 * Even if the VC_XLOCK bit is set, decreasing the usecount to a non-zero 115 * Even if the VC_XLOCK bit is set, decreasing the usecount to a non-zero
116 * value can be done using atomic operations, without the interlock held. 116 * value can be done using atomic operations, without the interlock held.
117 * 117 *
118 * Note: if VI_CLEAN is set, vnode_t::v_interlock will be released while 118 * Note: if VI_CLEAN is set, vnode_t::v_interlock will be released while
119 * mntvnode_lock is still held. 119 * mntvnode_lock is still held.
120 */ 120 */
121 121
122#include <sys/cdefs.h> 122#include <sys/cdefs.h>
123__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.14 2011/10/07 09:35:06 hannken Exp $"); 123__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.15 2011/12/20 16:49:37 hannken Exp $");
124 124
125#include <sys/param.h> 125#include <sys/param.h>
126#include <sys/kernel.h> 126#include <sys/kernel.h>
127 127
128#include <sys/atomic.h> 128#include <sys/atomic.h>
129#include <sys/buf.h> 129#include <sys/buf.h>
130#include <sys/conf.h> 130#include <sys/conf.h>
131#include <sys/device.h> 131#include <sys/device.h>
132#include <sys/kauth.h> 132#include <sys/kauth.h>
133#include <sys/kmem.h> 133#include <sys/kmem.h>
134#include <sys/kthread.h> 134#include <sys/kthread.h>
135#include <sys/module.h> 135#include <sys/module.h>
136#include <sys/mount.h> 136#include <sys/mount.h>
137#include <sys/namei.h> 137#include <sys/namei.h>
138#include <sys/syscallargs.h> 138#include <sys/syscallargs.h>
139#include <sys/sysctl.h> 139#include <sys/sysctl.h>
140#include <sys/systm.h> 140#include <sys/systm.h>
141#include <sys/vnode.h> 141#include <sys/vnode.h>
142#include <sys/wapbl.h> 142#include <sys/wapbl.h>
143 143
144#include <uvm/uvm.h> 144#include <uvm/uvm.h>
145#include <uvm/uvm_readahead.h> 145#include <uvm/uvm_readahead.h>
146 146
147u_int numvnodes __cacheline_aligned; 147u_int numvnodes __cacheline_aligned;
148 148
149static pool_cache_t vnode_cache __read_mostly; 149static pool_cache_t vnode_cache __read_mostly;
150static kmutex_t vnode_free_list_lock __cacheline_aligned; 150static kmutex_t vnode_free_list_lock __cacheline_aligned;
151 151
152static vnodelst_t vnode_free_list __cacheline_aligned; 152static vnodelst_t vnode_free_list __cacheline_aligned;
153static vnodelst_t vnode_hold_list __cacheline_aligned; 153static vnodelst_t vnode_hold_list __cacheline_aligned;
154static vnodelst_t vrele_list __cacheline_aligned; 154static vnodelst_t vrele_list __cacheline_aligned;
155 155
156static kmutex_t vrele_lock __cacheline_aligned; 156static kmutex_t vrele_lock __cacheline_aligned;
157static kcondvar_t vrele_cv __cacheline_aligned; 157static kcondvar_t vrele_cv __cacheline_aligned;
158static lwp_t * vrele_lwp __cacheline_aligned; 158static lwp_t * vrele_lwp __cacheline_aligned;
159static int vrele_pending __cacheline_aligned; 159static int vrele_pending __cacheline_aligned;
160static int vrele_gen __cacheline_aligned; 160static int vrele_gen __cacheline_aligned;
161static kcondvar_t vdrain_cv __cacheline_aligned; 161static kcondvar_t vdrain_cv __cacheline_aligned;
162 162
163static int cleanvnode(void); 163static int cleanvnode(void);
164static void vdrain_thread(void *); 164static void vdrain_thread(void *);
165static void vrele_thread(void *); 165static void vrele_thread(void *);
166static void vnpanic(vnode_t *, const char *, ...) 166static void vnpanic(vnode_t *, const char *, ...)
167 __attribute__((__format__(__printf__, 2, 3))); 167 __attribute__((__format__(__printf__, 2, 3)));
168 168
169/* Routines having to do with the management of the vnode table. */ 169/* Routines having to do with the management of the vnode table. */
170extern int (**dead_vnodeop_p)(void *); 170extern int (**dead_vnodeop_p)(void *);
171 171
172void 172void
173vfs_vnode_sysinit(void) 173vfs_vnode_sysinit(void)
174{ 174{
175 int error; 175 int error;
176 176
177 vnode_cache = pool_cache_init(sizeof(vnode_t), 0, 0, 0, "vnodepl", 177 vnode_cache = pool_cache_init(sizeof(vnode_t), 0, 0, 0, "vnodepl",
178 NULL, IPL_NONE, NULL, NULL, NULL); 178 NULL, IPL_NONE, NULL, NULL, NULL);
179 KASSERT(vnode_cache != NULL); 179 KASSERT(vnode_cache != NULL);
180 180
181 mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE); 181 mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE);
182 TAILQ_INIT(&vnode_free_list); 182 TAILQ_INIT(&vnode_free_list);
183 TAILQ_INIT(&vnode_hold_list); 183 TAILQ_INIT(&vnode_hold_list);
184 TAILQ_INIT(&vrele_list); 184 TAILQ_INIT(&vrele_list);
185 185
186 mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE); 186 mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE);
187 cv_init(&vdrain_cv, "vdrain"); 187 cv_init(&vdrain_cv, "vdrain");
188 cv_init(&vrele_cv, "vrele"); 188 cv_init(&vrele_cv, "vrele");
189 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vdrain_thread, 189 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vdrain_thread,
190 NULL, NULL, "vdrain"); 190 NULL, NULL, "vdrain");
191 KASSERT(error == 0); 191 KASSERT(error == 0);
192 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread, 192 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread,
193 NULL, &vrele_lwp, "vrele"); 193 NULL, &vrele_lwp, "vrele");
194 KASSERT(error == 0); 194 KASSERT(error == 0);
195} 195}
196 196
197/* 197/*
198 * Allocate a new, uninitialized vnode. If 'mp' is non-NULL, this is a 198 * Allocate a new, uninitialized vnode. If 'mp' is non-NULL, this is a
199 * marker vnode. 199 * marker vnode.
200 */ 200 */
201vnode_t * 201vnode_t *
202vnalloc(struct mount *mp) 202vnalloc(struct mount *mp)
203{ 203{
204 vnode_t *vp; 204 vnode_t *vp;
205 205
206 vp = pool_cache_get(vnode_cache, PR_WAITOK); 206 vp = pool_cache_get(vnode_cache, PR_WAITOK);
207 KASSERT(vp != NULL); 207 KASSERT(vp != NULL);
208 208
209 memset(vp, 0, sizeof(*vp)); 209 memset(vp, 0, sizeof(*vp));
210 uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 0); 210 uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 0);
211 cv_init(&vp->v_cv, "vnode"); 211 cv_init(&vp->v_cv, "vnode");
212 /* 212 /*
213 * Done by memset() above. 213 * Done by memset() above.
214 * LIST_INIT(&vp->v_nclist); 214 * LIST_INIT(&vp->v_nclist);
215 * LIST_INIT(&vp->v_dnclist); 215 * LIST_INIT(&vp->v_dnclist);
216 */ 216 */
217 217
218 if (mp != NULL) { 218 if (mp != NULL) {
219 vp->v_mount = mp; 219 vp->v_mount = mp;
220 vp->v_type = VBAD; 220 vp->v_type = VBAD;
221 vp->v_iflag = VI_MARKER; 221 vp->v_iflag = VI_MARKER;
222 } else { 222 } else {
223 rw_init(&vp->v_lock); 223 rw_init(&vp->v_lock);
224 } 224 }
225 225
226 return vp; 226 return vp;
227} 227}
228 228
229/* 229/*
230 * Free an unused, unreferenced vnode. 230 * Free an unused, unreferenced vnode.
231 */ 231 */
232void 232void
233vnfree(vnode_t *vp) 233vnfree(vnode_t *vp)
234{ 234{
235 235
236 KASSERT(vp->v_usecount == 0); 236 KASSERT(vp->v_usecount == 0);
237 237
238 if ((vp->v_iflag & VI_MARKER) == 0) { 238 if ((vp->v_iflag & VI_MARKER) == 0) {
239 rw_destroy(&vp->v_lock); 239 rw_destroy(&vp->v_lock);
240 mutex_enter(&vnode_free_list_lock); 240 mutex_enter(&vnode_free_list_lock);
241 numvnodes--; 241 numvnodes--;
242 mutex_exit(&vnode_free_list_lock); 242 mutex_exit(&vnode_free_list_lock);
243 } 243 }
244 244
245 /* 245 /*
246 * Note: the vnode interlock will either be freed, of reference 246 * Note: the vnode interlock will either be freed, of reference
247 * dropped (if VI_LOCKSHARE was in use). 247 * dropped (if VI_LOCKSHARE was in use).
248 */ 248 */
249 uvm_obj_destroy(&vp->v_uobj, true); 249 uvm_obj_destroy(&vp->v_uobj, true);
250 cv_destroy(&vp->v_cv); 250 cv_destroy(&vp->v_cv);
251 pool_cache_put(vnode_cache, vp); 251 pool_cache_put(vnode_cache, vp);
252} 252}
253 253
254/* 254/*
255 * cleanvnode: grab a vnode from freelist, clean and free it. 255 * cleanvnode: grab a vnode from freelist, clean and free it.
256 * 256 *
257 * => Releases vnode_free_list_lock. 257 * => Releases vnode_free_list_lock.
258 */ 258 */
259static int 259static int
260cleanvnode(void) 260cleanvnode(void)
261{ 261{
262 vnode_t *vp; 262 vnode_t *vp;
263 vnodelst_t *listhd; 263 vnodelst_t *listhd;
264 264
265 KASSERT(mutex_owned(&vnode_free_list_lock)); 265 KASSERT(mutex_owned(&vnode_free_list_lock));
266retry: 266retry:
267 listhd = &vnode_free_list; 267 listhd = &vnode_free_list;
268try_nextlist: 268try_nextlist:
269 TAILQ_FOREACH(vp, listhd, v_freelist) { 269 TAILQ_FOREACH(vp, listhd, v_freelist) {
270 /* 270 /*
271 * It's safe to test v_usecount and v_iflag 271 * It's safe to test v_usecount and v_iflag
272 * without holding the interlock here, since 272 * without holding the interlock here, since
273 * these vnodes should never appear on the 273 * these vnodes should never appear on the
274 * lists. 274 * lists.
275 */ 275 */
276 KASSERT(vp->v_usecount == 0); 276 KASSERT(vp->v_usecount == 0);
277 KASSERT((vp->v_iflag & VI_CLEAN) == 0); 277 KASSERT((vp->v_iflag & VI_CLEAN) == 0);
278 KASSERT(vp->v_freelisthd == listhd); 278 KASSERT(vp->v_freelisthd == listhd);
279 279
280 if (!mutex_tryenter(vp->v_interlock)) 280 if (!mutex_tryenter(vp->v_interlock))
281 continue; 281 continue;
282 if ((vp->v_iflag & VI_XLOCK) == 0) 282 if ((vp->v_iflag & VI_XLOCK) == 0)
283 break; 283 break;
284 mutex_exit(vp->v_interlock); 284 mutex_exit(vp->v_interlock);
285 } 285 }
286 286
287 if (vp == NULL) { 287 if (vp == NULL) {
288 if (listhd == &vnode_free_list) { 288 if (listhd == &vnode_free_list) {
289 listhd = &vnode_hold_list; 289 listhd = &vnode_hold_list;
290 goto try_nextlist; 290 goto try_nextlist;
291 } 291 }
292 mutex_exit(&vnode_free_list_lock); 292 mutex_exit(&vnode_free_list_lock);
293 return EBUSY; 293 return EBUSY;
294 } 294 }
295 295
296 /* Remove it from the freelist. */ 296 /* Remove it from the freelist. */
297 TAILQ_REMOVE(listhd, vp, v_freelist); 297 TAILQ_REMOVE(listhd, vp, v_freelist);
298 vp->v_freelisthd = NULL; 298 vp->v_freelisthd = NULL;
299 mutex_exit(&vnode_free_list_lock); 299 mutex_exit(&vnode_free_list_lock);
300 300
301 KASSERT(vp->v_usecount == 0); 301 KASSERT(vp->v_usecount == 0);
302 302
303 /* 303 /*
304 * The vnode is still associated with a file system, so we must 304 * The vnode is still associated with a file system, so we must
305 * clean it out before freeing it. We need to add a reference 305 * clean it out before freeing it. We need to add a reference
306 * before doing this. If the vnode gains another reference while 306 * before doing this. If the vnode gains another reference while
307 * being cleaned out then we lose - retry. 307 * being cleaned out then we lose - retry.
308 */ 308 */
309 atomic_add_int(&vp->v_usecount, 1 + VC_XLOCK); 309 atomic_add_int(&vp->v_usecount, 1 + VC_XLOCK);
310 vclean(vp, DOCLOSE); 310 vclean(vp, DOCLOSE);
311 KASSERT(vp->v_usecount >= 1 + VC_XLOCK); 311 KASSERT(vp->v_usecount >= 1 + VC_XLOCK);
312 atomic_add_int(&vp->v_usecount, -VC_XLOCK); 312 atomic_add_int(&vp->v_usecount, -VC_XLOCK);
313 if (vp->v_usecount > 1) { 313 if (vp->v_usecount > 1) {
314 /* 314 /*
315 * Don't return to freelist - the holder of the last 315 * Don't return to freelist - the holder of the last
316 * reference will destroy it. 316 * reference will destroy it.
317 */ 317 */
318 vrelel(vp, 0); /* releases vp->v_interlock */ 318 vrelel(vp, 0); /* releases vp->v_interlock */
319 mutex_enter(&vnode_free_list_lock); 319 mutex_enter(&vnode_free_list_lock);
320 goto retry; 320 goto retry;
321 } 321 }
322 322
323 KASSERT((vp->v_iflag & VI_CLEAN) == VI_CLEAN); 323 KASSERT((vp->v_iflag & VI_CLEAN) == VI_CLEAN);
324 mutex_exit(vp->v_interlock); 324 mutex_exit(vp->v_interlock);
325 if (vp->v_type == VBLK || vp->v_type == VCHR) { 325 if (vp->v_type == VBLK || vp->v_type == VCHR) {
326 spec_node_destroy(vp); 326 spec_node_destroy(vp);
327 } 327 }
328 vp->v_type = VNON; 328 vp->v_type = VNON;
329 329
330 KASSERT(vp->v_data == NULL); 330 KASSERT(vp->v_data == NULL);
331 KASSERT(vp->v_uobj.uo_npages == 0); 331 KASSERT(vp->v_uobj.uo_npages == 0);
332 KASSERT(TAILQ_EMPTY(&vp->v_uobj.memq)); 332 KASSERT(TAILQ_EMPTY(&vp->v_uobj.memq));
333 KASSERT(vp->v_numoutput == 0); 333 KASSERT(vp->v_numoutput == 0);
334 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 334 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
335 335
336 vrele(vp); 336 vrele(vp);
337 337
338 return 0; 338 return 0;
339} 339}
340 340
341/* 341/*
342 * getnewvnode: return a fresh vnode. 342 * getnewvnode: return a fresh vnode.
343 * 343 *
344 * => Returns referenced vnode, moved into the mount queue. 344 * => Returns referenced vnode, moved into the mount queue.
345 * => Shares the interlock specified by 'slock', if it is not NULL. 345 * => Shares the interlock specified by 'slock', if it is not NULL.
346 */ 346 */
347int 347int
348getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *), 348getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
349 kmutex_t *slock, vnode_t **vpp) 349 kmutex_t *slock, vnode_t **vpp)
350{ 350{
351 struct uvm_object *uobj; 351 struct uvm_object *uobj;
352 vnode_t *vp; 352 vnode_t *vp;
353 int error = 0; 353 int error = 0;
354 354
355 if (mp != NULL) { 355 if (mp != NULL) {
356 /* 356 /*
357 * Mark filesystem busy while we are creating a vnode. 357 * Mark filesystem busy while we are creating a vnode.
358 * If unmount is in progress, this will fail. 358 * If unmount is in progress, this will fail.
359 */ 359 */
360 error = vfs_busy(mp, NULL); 360 error = vfs_busy(mp, NULL);
361 if (error) 361 if (error)
362 return error; 362 return error;
363 } 363 }
364 364
365 vp = NULL; 365 vp = NULL;
366 366
367 /* Allocate a new vnode. */ 367 /* Allocate a new vnode. */
368 mutex_enter(&vnode_free_list_lock); 368 mutex_enter(&vnode_free_list_lock);
369 numvnodes++; 369 numvnodes++;
370 if (numvnodes > desiredvnodes + desiredvnodes / 10) 370 if (numvnodes > desiredvnodes + desiredvnodes / 10)
371 cv_signal(&vdrain_cv); 371 cv_signal(&vdrain_cv);
372 mutex_exit(&vnode_free_list_lock); 372 mutex_exit(&vnode_free_list_lock);
373 vp = vnalloc(NULL); 373 vp = vnalloc(NULL);
374 374
375 KASSERT(vp->v_freelisthd == NULL); 375 KASSERT(vp->v_freelisthd == NULL);
376 KASSERT(LIST_EMPTY(&vp->v_nclist)); 376 KASSERT(LIST_EMPTY(&vp->v_nclist));
377 KASSERT(LIST_EMPTY(&vp->v_dnclist)); 377 KASSERT(LIST_EMPTY(&vp->v_dnclist));
378 378
379 /* Initialize vnode. */ 379 /* Initialize vnode. */
380 vp->v_usecount = 1; 380 vp->v_usecount = 1;
381 vp->v_type = VNON; 381 vp->v_type = VNON;
382 vp->v_tag = tag; 382 vp->v_tag = tag;
383 vp->v_op = vops; 383 vp->v_op = vops;
384 vp->v_data = NULL; 384 vp->v_data = NULL;
385 385
386 uobj = &vp->v_uobj; 386 uobj = &vp->v_uobj;
387 KASSERT(uobj->pgops == &uvm_vnodeops); 387 KASSERT(uobj->pgops == &uvm_vnodeops);
388 KASSERT(uobj->uo_npages == 0); 388 KASSERT(uobj->uo_npages == 0);
389 KASSERT(TAILQ_FIRST(&uobj->memq) == NULL); 389 KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
390 vp->v_size = vp->v_writesize = VSIZENOTSET; 390 vp->v_size = vp->v_writesize = VSIZENOTSET;
391 391
392 /* Share the vnode_t::v_interlock, if requested. */ 392 /* Share the vnode_t::v_interlock, if requested. */
393 if (slock) { 393 if (slock) {
394 /* Set the interlock and mark that it is shared. */ 394 /* Set the interlock and mark that it is shared. */
395 KASSERT(vp->v_mount == NULL); 395 KASSERT(vp->v_mount == NULL);
396 mutex_obj_hold(slock); 396 mutex_obj_hold(slock);
397 uvm_obj_setlock(&vp->v_uobj, slock); 397 uvm_obj_setlock(&vp->v_uobj, slock);
398 KASSERT(vp->v_interlock == slock); 398 KASSERT(vp->v_interlock == slock);
399 vp->v_iflag |= VI_LOCKSHARE; 399 vp->v_iflag |= VI_LOCKSHARE;
400 } 400 }
401 401
402 /* Finally, move vnode into the mount queue. */ 402 /* Finally, move vnode into the mount queue. */
403 vfs_insmntque(vp, mp); 403 vfs_insmntque(vp, mp);
404 404
405 if (mp != NULL) { 405 if (mp != NULL) {
406 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) 406 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0)
407 vp->v_vflag |= VV_MPSAFE; 407 vp->v_vflag |= VV_MPSAFE;
408 vfs_unbusy(mp, true, NULL); 408 vfs_unbusy(mp, true, NULL);
409 } 409 }
410 410
411 *vpp = vp; 411 *vpp = vp;
412 return 0; 412 return 0;
413} 413}
414 414
415/* 415/*
416 * This is really just the reverse of getnewvnode(). Needed for 416 * This is really just the reverse of getnewvnode(). Needed for
417 * VFS_VGET functions who may need to push back a vnode in case 417 * VFS_VGET functions who may need to push back a vnode in case
418 * of a locking race. 418 * of a locking race.
419 */ 419 */
420void 420void
421ungetnewvnode(vnode_t *vp) 421ungetnewvnode(vnode_t *vp)
422{ 422{
423 423
424 KASSERT(vp->v_usecount == 1); 424 KASSERT(vp->v_usecount == 1);
425 KASSERT(vp->v_data == NULL); 425 KASSERT(vp->v_data == NULL);
426 KASSERT(vp->v_freelisthd == NULL); 426 KASSERT(vp->v_freelisthd == NULL);
427 427
428 mutex_enter(vp->v_interlock); 428 mutex_enter(vp->v_interlock);
429 vp->v_iflag |= VI_CLEAN; 429 vp->v_iflag |= VI_CLEAN;
430 vrelel(vp, 0); 430 vrelel(vp, 0);
431} 431}
432 432
433/* 433/*
434 * Helper thread to keep the number of vnodes below desiredvnodes. 434 * Helper thread to keep the number of vnodes below desiredvnodes.
435 */ 435 */
436static void 436static void
437vdrain_thread(void *cookie) 437vdrain_thread(void *cookie)
438{ 438{
439 int error; 439 int error;
440 440
441 mutex_enter(&vnode_free_list_lock); 441 mutex_enter(&vnode_free_list_lock);
442 442
443 for (;;) { 443 for (;;) {
444 cv_timedwait(&vdrain_cv, &vnode_free_list_lock, hz); 444 cv_timedwait(&vdrain_cv, &vnode_free_list_lock, hz);
445 while (numvnodes > desiredvnodes) { 445 while (numvnodes > desiredvnodes) {
446 error = cleanvnode(); 446 error = cleanvnode();
447 if (error) 447 if (error)
448 kpause("vndsbusy", false, hz, NULL); 448 kpause("vndsbusy", false, hz, NULL);
449 mutex_enter(&vnode_free_list_lock); 449 mutex_enter(&vnode_free_list_lock);
450 if (error) 450 if (error)
451 break; 451 break;
452 } 452 }
453 } 453 }
454} 454}
455 455
456/* 456/*
457 * Remove a vnode from its freelist. 457 * Remove a vnode from its freelist.
458 */ 458 */
459void 459void
460vremfree(vnode_t *vp) 460vremfree(vnode_t *vp)
461{ 461{
462 462
463 KASSERT(mutex_owned(vp->v_interlock)); 463 KASSERT(mutex_owned(vp->v_interlock));
464 KASSERT(vp->v_usecount == 0); 464 KASSERT(vp->v_usecount == 0);
465 465
466 /* 466 /*
467 * Note that the reference count must not change until 467 * Note that the reference count must not change until
468 * the vnode is removed. 468 * the vnode is removed.
469 */ 469 */
470 mutex_enter(&vnode_free_list_lock); 470 mutex_enter(&vnode_free_list_lock);
471 if (vp->v_holdcnt > 0) { 471 if (vp->v_holdcnt > 0) {
472 KASSERT(vp->v_freelisthd == &vnode_hold_list); 472 KASSERT(vp->v_freelisthd == &vnode_hold_list);
473 } else { 473 } else {
474 KASSERT(vp->v_freelisthd == &vnode_free_list); 474 KASSERT(vp->v_freelisthd == &vnode_free_list);
475 } 475 }
476 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); 476 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
477 vp->v_freelisthd = NULL; 477 vp->v_freelisthd = NULL;
478 mutex_exit(&vnode_free_list_lock); 478 mutex_exit(&vnode_free_list_lock);
479} 479}
480 480
481/* 481/*
482 * Try to gain a reference to a vnode, without acquiring its interlock. 482 * Try to gain a reference to a vnode, without acquiring its interlock.
483 * The caller must hold a lock that will prevent the vnode from being 483 * The caller must hold a lock that will prevent the vnode from being
484 * recycled or freed. 484 * recycled or freed.
485 */ 485 */
486bool 486bool
487vtryget(vnode_t *vp) 487vtryget(vnode_t *vp)
488{ 488{
489 u_int use, next; 489 u_int use, next;
490 490
491 /* 491 /*
492 * If the vnode is being freed, don't make life any harder 492 * If the vnode is being freed, don't make life any harder
493 * for vclean() by adding another reference without waiting. 493 * for vclean() by adding another reference without waiting.
494 * This is not strictly necessary, but we'll do it anyway. 494 * This is not strictly necessary, but we'll do it anyway.
495 */ 495 */
496 if (__predict_false((vp->v_iflag & VI_XLOCK) != 0)) { 496 if (__predict_false((vp->v_iflag & VI_XLOCK) != 0)) {
497 return false; 497 return false;
498 } 498 }
499 for (use = vp->v_usecount;; use = next) { 499 for (use = vp->v_usecount;; use = next) {
500 if (use == 0 || __predict_false((use & VC_XLOCK) != 0)) { 500 if (use == 0 || __predict_false((use & VC_XLOCK) != 0)) {
501 /* Need interlock held if first reference. */ 501 /* Need interlock held if first reference. */
502 return false; 502 return false;
503 } 503 }
504 next = atomic_cas_uint(&vp->v_usecount, use, use + 1); 504 next = atomic_cas_uint(&vp->v_usecount, use, use + 1);
505 if (__predict_true(next == use)) { 505 if (__predict_true(next == use)) {
506 return true; 506 return true;
507 } 507 }
508 } 508 }
509} 509}
510 510
511/* 511/*
512 * vget: get a particular vnode from the free list, increment its reference 512 * vget: get a particular vnode from the free list, increment its reference
513 * count and lock it. 513 * count and lock it.
514 * 514 *
515 * => Should be called with v_interlock held. 515 * => Should be called with v_interlock held.
516 * 516 *
517 * If VI_XLOCK is set, the vnode is being eliminated in vgone()/vclean(). 517 * If VI_XLOCK is set, the vnode is being eliminated in vgone()/vclean().
518 * In that case, we cannot grab the vnode, so the process is awakened when 518 * In that case, we cannot grab the vnode, so the process is awakened when
519 * the transition is completed, and an error returned to indicate that the 519 * the transition is completed, and an error returned to indicate that the
520 * vnode is no longer usable (e.g. changed to a new file system type). 520 * vnode is no longer usable (e.g. changed to a new file system type).
521 */ 521 */
522int 522int
523vget(vnode_t *vp, int flags) 523vget(vnode_t *vp, int flags)
524{ 524{
525 int error = 0; 525 int error = 0;
526 526
527 KASSERT((vp->v_iflag & VI_MARKER) == 0); 527 KASSERT((vp->v_iflag & VI_MARKER) == 0);
528 KASSERT(mutex_owned(vp->v_interlock)); 528 KASSERT(mutex_owned(vp->v_interlock));
529 KASSERT((flags & ~(LK_SHARED|LK_EXCLUSIVE|LK_NOWAIT)) == 0); 529 KASSERT((flags & ~(LK_SHARED|LK_EXCLUSIVE|LK_NOWAIT)) == 0);
530 530
531 /* 531 /*
532 * Before adding a reference, we must remove the vnode 532 * Before adding a reference, we must remove the vnode
533 * from its freelist. 533 * from its freelist.
534 */ 534 */
535 if (vp->v_usecount == 0) { 535 if (vp->v_usecount == 0) {
536 vremfree(vp); 536 vremfree(vp);
537 vp->v_usecount = 1; 537 vp->v_usecount = 1;
538 } else { 538 } else {
539 atomic_inc_uint(&vp->v_usecount); 539 atomic_inc_uint(&vp->v_usecount);
540 } 540 }
541 541
542 /* 542 /*
543 * If the vnode is in the process of being cleaned out for 543 * If the vnode is in the process of being cleaned out for
544 * another use, we wait for the cleaning to finish and then 544 * another use, we wait for the cleaning to finish and then
545 * return failure. Cleaning is determined by checking if 545 * return failure. Cleaning is determined by checking if
546 * the VI_XLOCK flag is set. 546 * the VI_XLOCK flag is set.
547 */ 547 */
548 if ((vp->v_iflag & VI_XLOCK) != 0) { 548 if ((vp->v_iflag & VI_XLOCK) != 0) {
549 if ((flags & LK_NOWAIT) != 0) { 549 if ((flags & LK_NOWAIT) != 0) {
550 vrelel(vp, 0); 550 vrelel(vp, 0);
551 return EBUSY; 551 return EBUSY;
552 } 552 }
553 vwait(vp, VI_XLOCK); 553 vwait(vp, VI_XLOCK);
554 vrelel(vp, 0); 554 vrelel(vp, 0);
555 return ENOENT; 555 return ENOENT;
556 } 556 }
557 557
558 /* 558 /*
559 * Ok, we got it in good shape. Just locking left. 559 * Ok, we got it in good shape. Just locking left.
560 */ 560 */
561 KASSERT((vp->v_iflag & VI_CLEAN) == 0); 561 KASSERT((vp->v_iflag & VI_CLEAN) == 0);
562 mutex_exit(vp->v_interlock); 562 mutex_exit(vp->v_interlock);
563 if (flags & (LK_EXCLUSIVE | LK_SHARED)) { 563 if (flags & (LK_EXCLUSIVE | LK_SHARED)) {
564 error = vn_lock(vp, flags); 564 error = vn_lock(vp, flags);
565 if (error != 0) { 565 if (error != 0) {
566 vrele(vp); 566 vrele(vp);
567 } 567 }
568 } 568 }
569 return error; 569 return error;
570} 570}
571 571
572/* 572/*
573 * vput: unlock and release the reference. 573 * vput: unlock and release the reference.
574 */ 574 */
575void 575void
576vput(vnode_t *vp) 576vput(vnode_t *vp)
577{ 577{
578 578
579 KASSERT((vp->v_iflag & VI_MARKER) == 0); 579 KASSERT((vp->v_iflag & VI_MARKER) == 0);
580 580
581 VOP_UNLOCK(vp); 581 VOP_UNLOCK(vp);
582 vrele(vp); 582 vrele(vp);
583} 583}
584 584
585/* 585/*
586 * Try to drop reference on a vnode. Abort if we are releasing the 586 * Try to drop reference on a vnode. Abort if we are releasing the
587 * last reference. Note: this _must_ succeed if not the last reference. 587 * last reference. Note: this _must_ succeed if not the last reference.
588 */ 588 */
589static inline bool 589static inline bool
590vtryrele(vnode_t *vp) 590vtryrele(vnode_t *vp)
591{ 591{
592 u_int use, next; 592 u_int use, next;
593 593
594 for (use = vp->v_usecount;; use = next) { 594 for (use = vp->v_usecount;; use = next) {
595 if (use == 1) { 595 if (use == 1) {
596 return false; 596 return false;
597 } 597 }
598 KASSERT((use & VC_MASK) > 1); 598 KASSERT((use & VC_MASK) > 1);
599 next = atomic_cas_uint(&vp->v_usecount, use, use - 1); 599 next = atomic_cas_uint(&vp->v_usecount, use, use - 1);
600 if (__predict_true(next == use)) { 600 if (__predict_true(next == use)) {
601 return true; 601 return true;
602 } 602 }
603 } 603 }
604} 604}
605 605
606/* 606/*
607 * Vnode release. If reference count drops to zero, call inactive 607 * Vnode release. If reference count drops to zero, call inactive
608 * routine and either return to freelist or free to the pool. 608 * routine and either return to freelist or free to the pool.
609 */ 609 */
610void 610void
611vrelel(vnode_t *vp, int flags) 611vrelel(vnode_t *vp, int flags)
612{ 612{
613 bool recycle, defer; 613 bool recycle, defer;
614 int error; 614 int error;
615 615
616 KASSERT(mutex_owned(vp->v_interlock)); 616 KASSERT(mutex_owned(vp->v_interlock));
617 KASSERT((vp->v_iflag & VI_MARKER) == 0); 617 KASSERT((vp->v_iflag & VI_MARKER) == 0);
618 KASSERT(vp->v_freelisthd == NULL); 618 KASSERT(vp->v_freelisthd == NULL);
619 619
620 if (__predict_false(vp->v_op == dead_vnodeop_p && 620 if (__predict_false(vp->v_op == dead_vnodeop_p &&
621 (vp->v_iflag & (VI_CLEAN|VI_XLOCK)) == 0)) { 621 (vp->v_iflag & (VI_CLEAN|VI_XLOCK)) == 0)) {
622 vnpanic(vp, "dead but not clean"); 622 vnpanic(vp, "dead but not clean");
623 } 623 }
624 624
625 /* 625 /*
626 * If not the last reference, just drop the reference count 626 * If not the last reference, just drop the reference count
627 * and unlock. 627 * and unlock.
628 */ 628 */
629 if (vtryrele(vp)) { 629 if (vtryrele(vp)) {
630 vp->v_iflag |= VI_INACTREDO; 630 vp->v_iflag |= VI_INACTREDO;
631 mutex_exit(vp->v_interlock); 631 mutex_exit(vp->v_interlock);
632 return; 632 return;
633 } 633 }
634 if (vp->v_usecount <= 0 || vp->v_writecount != 0) { 634 if (vp->v_usecount <= 0 || vp->v_writecount != 0) {
635 vnpanic(vp, "%s: bad ref count", __func__); 635 vnpanic(vp, "%s: bad ref count", __func__);
636 } 636 }
637 637
638 KASSERT((vp->v_iflag & VI_XLOCK) == 0); 638 KASSERT((vp->v_iflag & VI_XLOCK) == 0);
639 639
 640#ifdef DIAGNOSTIC
 641 if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
 642 vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) {
 643 vprint("vrelel: missing VOP_CLOSE()", vp);
 644 }
 645#endif
 646
640 /* 647 /*
641 * If not clean, deactivate the vnode, but preserve 648 * If not clean, deactivate the vnode, but preserve
642 * our reference across the call to VOP_INACTIVE(). 649 * our reference across the call to VOP_INACTIVE().
643 */ 650 */
644retry: 651retry:
645 if ((vp->v_iflag & VI_CLEAN) == 0) { 652 if ((vp->v_iflag & VI_CLEAN) == 0) {
646 recycle = false; 653 recycle = false;
647 vp->v_iflag |= VI_INACTNOW; 654 vp->v_iflag |= VI_INACTNOW;
648 655
649 /* 656 /*
650 * XXX This ugly block can be largely eliminated if 657 * XXX This ugly block can be largely eliminated if
651 * locking is pushed down into the file systems. 658 * locking is pushed down into the file systems.
652 * 659 *
653 * Defer vnode release to vrele_thread if caller 660 * Defer vnode release to vrele_thread if caller
654 * requests it explicitly. 661 * requests it explicitly.
655 */ 662 */
656 if ((curlwp == uvm.pagedaemon_lwp) || 663 if ((curlwp == uvm.pagedaemon_lwp) ||
657 (flags & VRELEL_ASYNC_RELE) != 0) { 664 (flags & VRELEL_ASYNC_RELE) != 0) {
658 /* The pagedaemon can't wait around; defer. */ 665 /* The pagedaemon can't wait around; defer. */
659 defer = true; 666 defer = true;
660 } else if (curlwp == vrele_lwp) { 667 } else if (curlwp == vrele_lwp) {
661 /* We have to try harder. */ 668 /* We have to try harder. */
662 vp->v_iflag &= ~VI_INACTREDO; 669 vp->v_iflag &= ~VI_INACTREDO;
663 mutex_exit(vp->v_interlock); 670 mutex_exit(vp->v_interlock);
664 error = vn_lock(vp, LK_EXCLUSIVE); 671 error = vn_lock(vp, LK_EXCLUSIVE);
665 if (error != 0) { 672 if (error != 0) {
666 /* XXX */ 673 /* XXX */
667 vnpanic(vp, "%s: unable to lock %p", 674 vnpanic(vp, "%s: unable to lock %p",
668 __func__, vp); 675 __func__, vp);
669 } 676 }
670 defer = false; 677 defer = false;
671 } else if ((vp->v_iflag & VI_LAYER) != 0) { 678 } else if ((vp->v_iflag & VI_LAYER) != 0) {
672 /*  679 /*
673 * Acquiring the stack's lock in vclean() even 680 * Acquiring the stack's lock in vclean() even
674 * for an honest vput/vrele is dangerous because 681 * for an honest vput/vrele is dangerous because
675 * our caller may hold other vnode locks; defer. 682 * our caller may hold other vnode locks; defer.
676 */ 683 */
677 defer = true; 684 defer = true;
678 } else { 685 } else {
679 /* If we can't acquire the lock, then defer. */ 686 /* If we can't acquire the lock, then defer. */
680 vp->v_iflag &= ~VI_INACTREDO; 687 vp->v_iflag &= ~VI_INACTREDO;
681 mutex_exit(vp->v_interlock); 688 mutex_exit(vp->v_interlock);
682 error = vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT); 689 error = vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT);
683 if (error != 0) { 690 if (error != 0) {
684 defer = true; 691 defer = true;
685 mutex_enter(vp->v_interlock); 692 mutex_enter(vp->v_interlock);
686 } else { 693 } else {
687 defer = false; 694 defer = false;
688 } 695 }
689 } 696 }
690 697
691 if (defer) { 698 if (defer) {
692 /* 699 /*
693 * Defer reclaim to the kthread; it's not safe to 700 * Defer reclaim to the kthread; it's not safe to
694 * clean it here. We donate it our last reference. 701 * clean it here. We donate it our last reference.
695 */ 702 */
696 KASSERT(mutex_owned(vp->v_interlock)); 703 KASSERT(mutex_owned(vp->v_interlock));
697 KASSERT((vp->v_iflag & VI_INACTPEND) == 0); 704 KASSERT((vp->v_iflag & VI_INACTPEND) == 0);
698 vp->v_iflag &= ~VI_INACTNOW; 705 vp->v_iflag &= ~VI_INACTNOW;
699 vp->v_iflag |= VI_INACTPEND; 706 vp->v_iflag |= VI_INACTPEND;
700 mutex_enter(&vrele_lock); 707 mutex_enter(&vrele_lock);
701 TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist); 708 TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist);
702 if (++vrele_pending > (desiredvnodes >> 8)) 709 if (++vrele_pending > (desiredvnodes >> 8))
703 cv_signal(&vrele_cv);  710 cv_signal(&vrele_cv);
704 mutex_exit(&vrele_lock); 711 mutex_exit(&vrele_lock);
705 mutex_exit(vp->v_interlock); 712 mutex_exit(vp->v_interlock);
706 return; 713 return;
707 } 714 }
708 715
709#ifdef DIAGNOSTIC 
710 if ((vp->v_type == VBLK || vp->v_type == VCHR) && 
711 vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) { 
712 vprint("vrelel: missing VOP_CLOSE()", vp); 
713 } 
714#endif 
715 
716 /* 716 /*
717 * The vnode can gain another reference while being 717 * The vnode can gain another reference while being
718 * deactivated. If VOP_INACTIVE() indicates that 718 * deactivated. If VOP_INACTIVE() indicates that
719 * the described file has been deleted, then recycle 719 * the described file has been deleted, then recycle
720 * the vnode irrespective of additional references. 720 * the vnode irrespective of additional references.
721 * Another thread may be waiting to re-use the on-disk 721 * Another thread may be waiting to re-use the on-disk
722 * inode. 722 * inode.
723 * 723 *
724 * Note that VOP_INACTIVE() will drop the vnode lock. 724 * Note that VOP_INACTIVE() will drop the vnode lock.
725 */ 725 */
726 VOP_INACTIVE(vp, &recycle); 726 VOP_INACTIVE(vp, &recycle);
727 mutex_enter(vp->v_interlock); 727 mutex_enter(vp->v_interlock);
728 vp->v_iflag &= ~VI_INACTNOW; 728 vp->v_iflag &= ~VI_INACTNOW;
729 if (!recycle) { 729 if (!recycle) {
730 if (vtryrele(vp)) { 730 if (vtryrele(vp)) {
731 mutex_exit(vp->v_interlock); 731 mutex_exit(vp->v_interlock);
732 return; 732 return;
733 } 733 }
734 734
735 /* 735 /*
736 * If we grew another reference while 736 * If we grew another reference while
737 * VOP_INACTIVE() was underway, retry. 737 * VOP_INACTIVE() was underway, retry.
738 */ 738 */
739 if ((vp->v_iflag & VI_INACTREDO) != 0) { 739 if ((vp->v_iflag & VI_INACTREDO) != 0) {
740 goto retry; 740 goto retry;
741 } 741 }
742 } 742 }
743 743
744 /* Take care of space accounting. */ 744 /* Take care of space accounting. */
745 if (vp->v_iflag & VI_EXECMAP) { 745 if (vp->v_iflag & VI_EXECMAP) {
746 atomic_add_int(&uvmexp.execpages, 746 atomic_add_int(&uvmexp.execpages,
747 -vp->v_uobj.uo_npages); 747 -vp->v_uobj.uo_npages);
748 atomic_add_int(&uvmexp.filepages, 748 atomic_add_int(&uvmexp.filepages,
749 vp->v_uobj.uo_npages); 749 vp->v_uobj.uo_npages);
750 } 750 }
751 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP); 751 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP);
752 vp->v_vflag &= ~VV_MAPPED; 752 vp->v_vflag &= ~VV_MAPPED;
753 753
754 /* 754 /*
755 * Recycle the vnode if the file is now unused (unlinked), 755 * Recycle the vnode if the file is now unused (unlinked),
756 * otherwise just free it. 756 * otherwise just free it.
757 */ 757 */
758 if (recycle) { 758 if (recycle) {
759 vclean(vp, DOCLOSE); 759 vclean(vp, DOCLOSE);
760 } 760 }
761 KASSERT(vp->v_usecount > 0); 761 KASSERT(vp->v_usecount > 0);
762 } 762 }
763 763
764 if (atomic_dec_uint_nv(&vp->v_usecount) != 0) { 764 if (atomic_dec_uint_nv(&vp->v_usecount) != 0) {
765 /* Gained another reference while being reclaimed. */ 765 /* Gained another reference while being reclaimed. */
766 mutex_exit(vp->v_interlock); 766 mutex_exit(vp->v_interlock);
767 return; 767 return;
768 } 768 }
769 769
770 if ((vp->v_iflag & VI_CLEAN) != 0) { 770 if ((vp->v_iflag & VI_CLEAN) != 0) {
771 /* 771 /*
772 * It's clean so destroy it. It isn't referenced 772 * It's clean so destroy it. It isn't referenced
773 * anywhere since it has been reclaimed. 773 * anywhere since it has been reclaimed.
774 */ 774 */
775 KASSERT(vp->v_holdcnt == 0); 775 KASSERT(vp->v_holdcnt == 0);
776 KASSERT(vp->v_writecount == 0); 776 KASSERT(vp->v_writecount == 0);
777 mutex_exit(vp->v_interlock); 777 mutex_exit(vp->v_interlock);
778 vfs_insmntque(vp, NULL); 778 vfs_insmntque(vp, NULL);
779 if (vp->v_type == VBLK || vp->v_type == VCHR) { 779 if (vp->v_type == VBLK || vp->v_type == VCHR) {
780 spec_node_destroy(vp); 780 spec_node_destroy(vp);
781 } 781 }
782 vnfree(vp); 782 vnfree(vp);
783 } else { 783 } else {
784 /* 784 /*
785 * Otherwise, put it back onto the freelist. It 785 * Otherwise, put it back onto the freelist. It
786 * can't be destroyed while still associated with 786 * can't be destroyed while still associated with
787 * a file system. 787 * a file system.
788 */ 788 */
789 mutex_enter(&vnode_free_list_lock); 789 mutex_enter(&vnode_free_list_lock);
790 if (vp->v_holdcnt > 0) { 790 if (vp->v_holdcnt > 0) {
791 vp->v_freelisthd = &vnode_hold_list; 791 vp->v_freelisthd = &vnode_hold_list;
792 } else { 792 } else {
793 vp->v_freelisthd = &vnode_free_list; 793 vp->v_freelisthd = &vnode_free_list;
794 } 794 }
795 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); 795 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
796 mutex_exit(&vnode_free_list_lock); 796 mutex_exit(&vnode_free_list_lock);
797 mutex_exit(vp->v_interlock); 797 mutex_exit(vp->v_interlock);
798 } 798 }
799} 799}
800 800
801void 801void
802vrele(vnode_t *vp) 802vrele(vnode_t *vp)
803{ 803{
804 804
805 KASSERT((vp->v_iflag & VI_MARKER) == 0); 805 KASSERT((vp->v_iflag & VI_MARKER) == 0);
806 806
807 if ((vp->v_iflag & VI_INACTNOW) == 0 && vtryrele(vp)) { 807 if ((vp->v_iflag & VI_INACTNOW) == 0 && vtryrele(vp)) {
808 return; 808 return;
809 } 809 }
810 mutex_enter(vp->v_interlock); 810 mutex_enter(vp->v_interlock);
811 vrelel(vp, 0); 811 vrelel(vp, 0);
812} 812}
813 813
814/* 814/*
815 * Asynchronous vnode release, vnode is released in different context. 815 * Asynchronous vnode release, vnode is released in different context.
816 */ 816 */
817void 817void
818vrele_async(vnode_t *vp) 818vrele_async(vnode_t *vp)
819{ 819{
820 820
821 KASSERT((vp->v_iflag & VI_MARKER) == 0); 821 KASSERT((vp->v_iflag & VI_MARKER) == 0);
822 822
823 if ((vp->v_iflag & VI_INACTNOW) == 0 && vtryrele(vp)) { 823 if ((vp->v_iflag & VI_INACTNOW) == 0 && vtryrele(vp)) {
824 return; 824 return;
825 } 825 }
826 mutex_enter(vp->v_interlock); 826 mutex_enter(vp->v_interlock);
827 vrelel(vp, VRELEL_ASYNC_RELE); 827 vrelel(vp, VRELEL_ASYNC_RELE);
828} 828}
829 829
830static void 830static void
831vrele_thread(void *cookie) 831vrele_thread(void *cookie)
832{ 832{
833 vnode_t *vp; 833 vnode_t *vp;
834 834
835 for (;;) { 835 for (;;) {
836 mutex_enter(&vrele_lock); 836 mutex_enter(&vrele_lock);
837 while (TAILQ_EMPTY(&vrele_list)) { 837 while (TAILQ_EMPTY(&vrele_list)) {
838 vrele_gen++; 838 vrele_gen++;
839 cv_broadcast(&vrele_cv); 839 cv_broadcast(&vrele_cv);
840 cv_timedwait(&vrele_cv, &vrele_lock, hz); 840 cv_timedwait(&vrele_cv, &vrele_lock, hz);
841 } 841 }
842 vp = TAILQ_FIRST(&vrele_list); 842 vp = TAILQ_FIRST(&vrele_list);
843 TAILQ_REMOVE(&vrele_list, vp, v_freelist); 843 TAILQ_REMOVE(&vrele_list, vp, v_freelist);
844 vrele_pending--; 844 vrele_pending--;
845 mutex_exit(&vrele_lock); 845 mutex_exit(&vrele_lock);
846 846
847 /* 847 /*
848 * If not the last reference, then ignore the vnode 848 * If not the last reference, then ignore the vnode
849 * and look for more work. 849 * and look for more work.
850 */ 850 */
851 mutex_enter(vp->v_interlock); 851 mutex_enter(vp->v_interlock);
852 KASSERT((vp->v_iflag & VI_INACTPEND) != 0); 852 KASSERT((vp->v_iflag & VI_INACTPEND) != 0);
853 vp->v_iflag &= ~VI_INACTPEND; 853 vp->v_iflag &= ~VI_INACTPEND;
854 vrelel(vp, 0); 854 vrelel(vp, 0);
855 } 855 }
856} 856}
857 857
858void 858void
859vrele_flush(void) 859vrele_flush(void)
860{ 860{
861 int gen; 861 int gen;
862 862
863 mutex_enter(&vrele_lock); 863 mutex_enter(&vrele_lock);
864 gen = vrele_gen; 864 gen = vrele_gen;
865 while (vrele_pending && gen == vrele_gen) { 865 while (vrele_pending && gen == vrele_gen) {
866 cv_broadcast(&vrele_cv); 866 cv_broadcast(&vrele_cv);
867 cv_wait(&vrele_cv, &vrele_lock); 867 cv_wait(&vrele_cv, &vrele_lock);
868 } 868 }
869 mutex_exit(&vrele_lock); 869 mutex_exit(&vrele_lock);
870} 870}
871 871
872/* 872/*
873 * Vnode reference, where a reference is already held by some other 873 * Vnode reference, where a reference is already held by some other
874 * object (for example, a file structure). 874 * object (for example, a file structure).
875 */ 875 */
876void 876void
877vref(vnode_t *vp) 877vref(vnode_t *vp)
878{ 878{
879 879
880 KASSERT((vp->v_iflag & VI_MARKER) == 0); 880 KASSERT((vp->v_iflag & VI_MARKER) == 0);
881 KASSERT(vp->v_usecount != 0); 881 KASSERT(vp->v_usecount != 0);
882 882
883 atomic_inc_uint(&vp->v_usecount); 883 atomic_inc_uint(&vp->v_usecount);
884} 884}
885 885
886/* 886/*
887 * Page or buffer structure gets a reference. 887 * Page or buffer structure gets a reference.
888 * Called with v_interlock held. 888 * Called with v_interlock held.
889 */ 889 */
890void 890void
891vholdl(vnode_t *vp) 891vholdl(vnode_t *vp)
892{ 892{
893 893
894 KASSERT(mutex_owned(vp->v_interlock)); 894 KASSERT(mutex_owned(vp->v_interlock));
895 KASSERT((vp->v_iflag & VI_MARKER) == 0); 895 KASSERT((vp->v_iflag & VI_MARKER) == 0);
896 896
897 if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) { 897 if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) {
898 mutex_enter(&vnode_free_list_lock); 898 mutex_enter(&vnode_free_list_lock);
899 KASSERT(vp->v_freelisthd == &vnode_free_list); 899 KASSERT(vp->v_freelisthd == &vnode_free_list);
900 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); 900 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
901 vp->v_freelisthd = &vnode_hold_list; 901 vp->v_freelisthd = &vnode_hold_list;
902 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); 902 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
903 mutex_exit(&vnode_free_list_lock); 903 mutex_exit(&vnode_free_list_lock);
904 } 904 }
905} 905}
906 906
907/* 907/*
908 * Page or buffer structure frees a reference. 908 * Page or buffer structure frees a reference.
909 * Called with v_interlock held. 909 * Called with v_interlock held.
910 */ 910 */
911void 911void
912holdrelel(vnode_t *vp) 912holdrelel(vnode_t *vp)
913{ 913{
914 914
915 KASSERT(mutex_owned(vp->v_interlock)); 915 KASSERT(mutex_owned(vp->v_interlock));
916 KASSERT((vp->v_iflag & VI_MARKER) == 0); 916 KASSERT((vp->v_iflag & VI_MARKER) == 0);
917 917
918 if (vp->v_holdcnt <= 0) { 918 if (vp->v_holdcnt <= 0) {
919 vnpanic(vp, "%s: holdcnt vp %p", __func__, vp); 919 vnpanic(vp, "%s: holdcnt vp %p", __func__, vp);
920 } 920 }
921 921
922 vp->v_holdcnt--; 922 vp->v_holdcnt--;
923 if (vp->v_holdcnt == 0 && vp->v_usecount == 0) { 923 if (vp->v_holdcnt == 0 && vp->v_usecount == 0) {
924 mutex_enter(&vnode_free_list_lock); 924 mutex_enter(&vnode_free_list_lock);
925 KASSERT(vp->v_freelisthd == &vnode_hold_list); 925 KASSERT(vp->v_freelisthd == &vnode_hold_list);
926 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); 926 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
927 vp->v_freelisthd = &vnode_free_list; 927 vp->v_freelisthd = &vnode_free_list;
928 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); 928 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
929 mutex_exit(&vnode_free_list_lock); 929 mutex_exit(&vnode_free_list_lock);
930 } 930 }
931} 931}
932 932
933/* 933/*
934 * Disassociate the underlying file system from a vnode. 934 * Disassociate the underlying file system from a vnode.
935 * 935 *
936 * Must be called with the interlock held, and will return with it held. 936 * Must be called with the interlock held, and will return with it held.
937 */ 937 */
938void 938void
939vclean(vnode_t *vp, int flags) 939vclean(vnode_t *vp, int flags)
940{ 940{
941 lwp_t *l = curlwp; 941 lwp_t *l = curlwp;
942 bool recycle, active; 942 bool recycle, active;
943 int error; 943 int error;
944 944
945 KASSERT(mutex_owned(vp->v_interlock)); 945 KASSERT(mutex_owned(vp->v_interlock));
946 KASSERT((vp->v_iflag & VI_MARKER) == 0); 946 KASSERT((vp->v_iflag & VI_MARKER) == 0);
947 KASSERT(vp->v_usecount != 0); 947 KASSERT(vp->v_usecount != 0);
948 948
949 /* If cleaning is already in progress wait until done and return. */ 949 /* If cleaning is already in progress wait until done and return. */
950 if (vp->v_iflag & VI_XLOCK) { 950 if (vp->v_iflag & VI_XLOCK) {
951 vwait(vp, VI_XLOCK); 951 vwait(vp, VI_XLOCK);
952 return; 952 return;
953 } 953 }
954 954
955 /* If already clean, nothing to do. */ 955 /* If already clean, nothing to do. */
956 if ((vp->v_iflag & VI_CLEAN) != 0) { 956 if ((vp->v_iflag & VI_CLEAN) != 0) {
957 return; 957 return;
958 } 958 }
959 959
960 /* 960 /*
961 * Prevent the vnode from being recycled or brought into use 961 * Prevent the vnode from being recycled or brought into use
962 * while we clean it out. 962 * while we clean it out.
963 */ 963 */
964 vp->v_iflag |= VI_XLOCK; 964 vp->v_iflag |= VI_XLOCK;
965 if (vp->v_iflag & VI_EXECMAP) { 965 if (vp->v_iflag & VI_EXECMAP) {
966 atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages); 966 atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages);
967 atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages); 967 atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages);
968 } 968 }
969 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP); 969 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP);
970 active = (vp->v_usecount & VC_MASK) > 1; 970 active = (vp->v_usecount & VC_MASK) > 1;
971 971
972 /* XXXAD should not lock vnode under layer */ 972 /* XXXAD should not lock vnode under layer */
973 mutex_exit(vp->v_interlock); 973 mutex_exit(vp->v_interlock);
974 VOP_LOCK(vp, LK_EXCLUSIVE); 974 VOP_LOCK(vp, LK_EXCLUSIVE);
975 975
976 /* 976 /*
977 * Clean out any cached data associated with the vnode. 977 * Clean out any cached data associated with the vnode.
978 * If purging an active vnode, it must be closed and 978 * If purging an active vnode, it must be closed and
979 * deactivated before being reclaimed. Note that the 979 * deactivated before being reclaimed. Note that the
980 * VOP_INACTIVE will unlock the vnode. 980 * VOP_INACTIVE will unlock the vnode.
981 */ 981 */
982 if (flags & DOCLOSE) { 982 if (flags & DOCLOSE) {
983 error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0); 983 error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
984 if (error != 0) { 984 if (error != 0) {
985 /* XXX, fix vn_start_write's grab of mp and use that. */ 985 /* XXX, fix vn_start_write's grab of mp and use that. */
986 986
987 if (wapbl_vphaswapbl(vp)) 987 if (wapbl_vphaswapbl(vp))
988 WAPBL_DISCARD(wapbl_vptomp(vp)); 988 WAPBL_DISCARD(wapbl_vptomp(vp));
989 error = vinvalbuf(vp, 0, NOCRED, l, 0, 0); 989 error = vinvalbuf(vp, 0, NOCRED, l, 0, 0);
990 } 990 }
991 KASSERT(error == 0); 991 KASSERT(error == 0);
992 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 992 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
993 if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) { 993 if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) {
994 spec_node_revoke(vp); 994 spec_node_revoke(vp);
995 } 995 }
996 } 996 }
997 if (active) { 997 if (active) {
998 VOP_INACTIVE(vp, &recycle); 998 VOP_INACTIVE(vp, &recycle);
999 } else { 999 } else {
1000 /* 1000 /*
1001 * Any other processes trying to obtain this lock must first 1001 * Any other processes trying to obtain this lock must first
1002 * wait for VI_XLOCK to clear, then call the new lock operation. 1002 * wait for VI_XLOCK to clear, then call the new lock operation.
1003 */ 1003 */
1004 VOP_UNLOCK(vp); 1004 VOP_UNLOCK(vp);
1005 } 1005 }
1006 1006
1007 /* Disassociate the underlying file system from the vnode. */ 1007 /* Disassociate the underlying file system from the vnode. */
1008 if (VOP_RECLAIM(vp)) { 1008 if (VOP_RECLAIM(vp)) {
1009 vnpanic(vp, "%s: cannot reclaim", __func__); 1009 vnpanic(vp, "%s: cannot reclaim", __func__);
1010 } 1010 }
1011 1011
1012 KASSERT(vp->v_data == NULL); 1012 KASSERT(vp->v_data == NULL);
1013 KASSERT(vp->v_uobj.uo_npages == 0); 1013 KASSERT(vp->v_uobj.uo_npages == 0);
1014 1014
1015 if (vp->v_type == VREG && vp->v_ractx != NULL) { 1015 if (vp->v_type == VREG && vp->v_ractx != NULL) {
1016 uvm_ra_freectx(vp->v_ractx); 1016 uvm_ra_freectx(vp->v_ractx);
1017 vp->v_ractx = NULL; 1017 vp->v_ractx = NULL;
1018 } 1018 }
1019 1019
1020 /* Purge name cache. */ 1020 /* Purge name cache. */
1021 cache_purge(vp); 1021 cache_purge(vp);
1022 1022
1023 /* Done with purge, notify sleepers of the grim news. */ 1023 /* Done with purge, notify sleepers of the grim news. */
1024 mutex_enter(vp->v_interlock); 1024 mutex_enter(vp->v_interlock);
1025 vp->v_op = dead_vnodeop_p; 1025 vp->v_op = dead_vnodeop_p;
1026 vp->v_tag = VT_NON; 1026 vp->v_tag = VT_NON;
1027 KNOTE(&vp->v_klist, NOTE_REVOKE); 1027 KNOTE(&vp->v_klist, NOTE_REVOKE);
1028 vp->v_iflag &= ~VI_XLOCK; 1028 vp->v_iflag &= ~VI_XLOCK;
1029 vp->v_vflag &= ~VV_LOCKSWORK; 1029 vp->v_vflag &= ~VV_LOCKSWORK;
1030 if ((flags & DOCLOSE) != 0) { 1030 if ((flags & DOCLOSE) != 0) {
1031 vp->v_iflag |= VI_CLEAN; 1031 vp->v_iflag |= VI_CLEAN;
1032 } 1032 }
1033 cv_broadcast(&vp->v_cv); 1033 cv_broadcast(&vp->v_cv);
1034 1034
1035 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 1035 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
1036} 1036}
1037 1037
1038/* 1038/*
1039 * Recycle an unused vnode to the front of the free list. 1039 * Recycle an unused vnode to the front of the free list.
1040 * Release the passed interlock if the vnode will be recycled. 1040 * Release the passed interlock if the vnode will be recycled.
1041 */ 1041 */
1042int 1042int
1043vrecycle(vnode_t *vp, kmutex_t *inter_lkp, struct lwp *l) 1043vrecycle(vnode_t *vp, kmutex_t *inter_lkp, struct lwp *l)
1044{ 1044{
1045 1045
1046 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1046 KASSERT((vp->v_iflag & VI_MARKER) == 0);
1047 1047
1048 mutex_enter(vp->v_interlock); 1048 mutex_enter(vp->v_interlock);
1049 if (vp->v_usecount != 0) { 1049 if (vp->v_usecount != 0) {
1050 mutex_exit(vp->v_interlock); 1050 mutex_exit(vp->v_interlock);
1051 return 0; 1051 return 0;
1052 } 1052 }
1053 if (inter_lkp) { 1053 if (inter_lkp) {
1054 mutex_exit(inter_lkp); 1054 mutex_exit(inter_lkp);
1055 } 1055 }
1056 vremfree(vp); 1056 vremfree(vp);
1057 vp->v_usecount = 1; 1057 vp->v_usecount = 1;
1058 vclean(vp, DOCLOSE); 1058 vclean(vp, DOCLOSE);
1059 vrelel(vp, 0); 1059 vrelel(vp, 0);
1060 return 1; 1060 return 1;
1061} 1061}
1062 1062
1063/* 1063/*
1064 * Eliminate all activity associated with the requested vnode 1064 * Eliminate all activity associated with the requested vnode
1065 * and with all vnodes aliased to the requested vnode. 1065 * and with all vnodes aliased to the requested vnode.
1066 */ 1066 */
1067void 1067void
1068vrevoke(vnode_t *vp) 1068vrevoke(vnode_t *vp)
1069{ 1069{
1070 vnode_t *vq, **vpp; 1070 vnode_t *vq, **vpp;
1071 enum vtype type; 1071 enum vtype type;
1072 dev_t dev; 1072 dev_t dev;
1073 1073
1074 KASSERT(vp->v_usecount > 0); 1074 KASSERT(vp->v_usecount > 0);
1075 1075
1076 mutex_enter(vp->v_interlock); 1076 mutex_enter(vp->v_interlock);
1077 if ((vp->v_iflag & VI_CLEAN) != 0) { 1077 if ((vp->v_iflag & VI_CLEAN) != 0) {
1078 mutex_exit(vp->v_interlock); 1078 mutex_exit(vp->v_interlock);
1079 return; 1079 return;
1080 } else if (vp->v_type != VBLK && vp->v_type != VCHR) { 1080 } else if (vp->v_type != VBLK && vp->v_type != VCHR) {
1081 atomic_inc_uint(&vp->v_usecount); 1081 atomic_inc_uint(&vp->v_usecount);
1082 vclean(vp, DOCLOSE); 1082 vclean(vp, DOCLOSE);
1083 vrelel(vp, 0); 1083 vrelel(vp, 0);
1084 return; 1084 return;
1085 } else { 1085 } else {
1086 dev = vp->v_rdev; 1086 dev = vp->v_rdev;
1087 type = vp->v_type; 1087 type = vp->v_type;
1088 mutex_exit(vp->v_interlock); 1088 mutex_exit(vp->v_interlock);
1089 } 1089 }
1090 1090
1091 vpp = &specfs_hash[SPECHASH(dev)]; 1091 vpp = &specfs_hash[SPECHASH(dev)];
1092 mutex_enter(&device_lock); 1092 mutex_enter(&device_lock);
1093 for (vq = *vpp; vq != NULL;) { 1093 for (vq = *vpp; vq != NULL;) {
1094 /* If clean or being cleaned, then ignore it. */ 1094 /* If clean or being cleaned, then ignore it. */
1095 mutex_enter(vq->v_interlock); 1095 mutex_enter(vq->v_interlock);
1096 if ((vq->v_iflag & (VI_CLEAN | VI_XLOCK)) != 0 || 1096 if ((vq->v_iflag & (VI_CLEAN | VI_XLOCK)) != 0 ||
1097 vq->v_type != type || vq->v_rdev != dev) { 1097 vq->v_type != type || vq->v_rdev != dev) {
1098 mutex_exit(vq->v_interlock); 1098 mutex_exit(vq->v_interlock);
1099 vq = vq->v_specnext; 1099 vq = vq->v_specnext;
1100 continue; 1100 continue;
1101 } 1101 }
1102 mutex_exit(&device_lock); 1102 mutex_exit(&device_lock);
1103 if (vq->v_usecount == 0) { 1103 if (vq->v_usecount == 0) {
1104 vremfree(vq); 1104 vremfree(vq);
1105 vq->v_usecount = 1; 1105 vq->v_usecount = 1;
1106 } else { 1106 } else {
1107 atomic_inc_uint(&vq->v_usecount); 1107 atomic_inc_uint(&vq->v_usecount);
1108 } 1108 }
1109 vclean(vq, DOCLOSE); 1109 vclean(vq, DOCLOSE);
1110 vrelel(vq, 0); 1110 vrelel(vq, 0);
1111 mutex_enter(&device_lock); 1111 mutex_enter(&device_lock);
1112 vq = *vpp; 1112 vq = *vpp;
1113 } 1113 }
1114 mutex_exit(&device_lock); 1114 mutex_exit(&device_lock);
1115} 1115}
1116 1116
1117/* 1117/*
1118 * Eliminate all activity associated with a vnode in preparation for 1118 * Eliminate all activity associated with a vnode in preparation for
1119 * reuse. Drops a reference from the vnode. 1119 * reuse. Drops a reference from the vnode.
1120 */ 1120 */
1121void 1121void
1122vgone(vnode_t *vp) 1122vgone(vnode_t *vp)
1123{ 1123{
1124 1124
1125 mutex_enter(vp->v_interlock); 1125 mutex_enter(vp->v_interlock);
1126 vclean(vp, DOCLOSE); 1126 vclean(vp, DOCLOSE);
1127 vrelel(vp, 0); 1127 vrelel(vp, 0);
1128} 1128}
1129 1129
1130/* 1130/*
1131 * Update outstanding I/O count and do wakeup if requested. 1131 * Update outstanding I/O count and do wakeup if requested.
1132 */ 1132 */
1133void 1133void
1134vwakeup(struct buf *bp) 1134vwakeup(struct buf *bp)
1135{ 1135{
1136 vnode_t *vp; 1136 vnode_t *vp;
1137 1137
1138 if ((vp = bp->b_vp) == NULL) 1138 if ((vp = bp->b_vp) == NULL)
1139 return; 1139 return;
1140 1140
1141 KASSERT(bp->b_objlock == vp->v_interlock); 1141 KASSERT(bp->b_objlock == vp->v_interlock);
1142 KASSERT(mutex_owned(bp->b_objlock)); 1142 KASSERT(mutex_owned(bp->b_objlock));
1143 1143
1144 if (--vp->v_numoutput < 0) 1144 if (--vp->v_numoutput < 0)
1145 vnpanic(vp, "%s: neg numoutput, vp %p", __func__, vp); 1145 vnpanic(vp, "%s: neg numoutput, vp %p", __func__, vp);
1146 if (vp->v_numoutput == 0) 1146 if (vp->v_numoutput == 0)
1147 cv_broadcast(&vp->v_cv); 1147 cv_broadcast(&vp->v_cv);
1148} 1148}
1149 1149
1150/* 1150/*
1151 * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or 1151 * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or
1152 * recycled. 1152 * recycled.
1153 */ 1153 */
1154void 1154void
1155vwait(vnode_t *vp, int flags) 1155vwait(vnode_t *vp, int flags)
1156{ 1156{
1157 1157
1158 KASSERT(mutex_owned(vp->v_interlock)); 1158 KASSERT(mutex_owned(vp->v_interlock));
1159 KASSERT(vp->v_usecount != 0); 1159 KASSERT(vp->v_usecount != 0);
1160 1160
1161 while ((vp->v_iflag & flags) != 0) 1161 while ((vp->v_iflag & flags) != 0)
1162 cv_wait(&vp->v_cv, vp->v_interlock); 1162 cv_wait(&vp->v_cv, vp->v_interlock);
1163} 1163}
1164 1164
1165int 1165int
1166vfs_drainvnodes(long target) 1166vfs_drainvnodes(long target)
1167{ 1167{
1168 int error; 1168 int error;
1169 1169
1170 mutex_enter(&vnode_free_list_lock); 1170 mutex_enter(&vnode_free_list_lock);
1171 1171
1172 while (numvnodes > target) { 1172 while (numvnodes > target) {
1173 error = cleanvnode(); 1173 error = cleanvnode();
1174 if (error != 0) 1174 if (error != 0)
1175 return error; 1175 return error;
1176 mutex_enter(&vnode_free_list_lock); 1176 mutex_enter(&vnode_free_list_lock);
1177 } 1177 }
1178 1178
1179 mutex_exit(&vnode_free_list_lock); 1179 mutex_exit(&vnode_free_list_lock);
1180 1180
1181 return 0; 1181 return 0;
1182} 1182}
1183 1183
1184void 1184void
1185vnpanic(vnode_t *vp, const char *fmt, ...) 1185vnpanic(vnode_t *vp, const char *fmt, ...)
1186{ 1186{
1187 va_list ap; 1187 va_list ap;
1188 1188
1189#ifdef DIAGNOSTIC 1189#ifdef DIAGNOSTIC
1190 vprint(NULL, vp); 1190 vprint(NULL, vp);
1191#endif 1191#endif
1192 va_start(ap, fmt); 1192 va_start(ap, fmt);
1193 vpanic(fmt, ap); 1193 vpanic(fmt, ap);
1194 va_end(ap); 1194 va_end(ap);
1195} 1195}