Sun Jan 4 02:21:49 2009 UTC ()
fix rootdev format.


(christos)
diff -r1.335.2.5 -r1.335.2.6 src/sys/kern/vfs_subr.c

cvs diff -r1.335.2.5 -r1.335.2.6 src/sys/kern/vfs_subr.c (switch to unified diff)

--- src/sys/kern/vfs_subr.c 2008/12/30 18:50:25 1.335.2.5
+++ src/sys/kern/vfs_subr.c 2009/01/04 02:21:49 1.335.2.6
@@ -1,1083 +1,1083 @@ @@ -1,1083 +1,1083 @@
1/* $NetBSD: vfs_subr.c,v 1.335.2.5 2008/12/30 18:50:25 christos Exp $ */ 1/* $NetBSD: vfs_subr.c,v 1.335.2.6 2009/01/04 02:21:49 christos Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc. 4 * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution. 18 * documentation and/or other materials provided with the distribution.
19 * 19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE. 30 * POSSIBILITY OF SUCH DAMAGE.
31 */ 31 */
32 32
33/* 33/*
34 * Copyright (c) 1989, 1993 34 * Copyright (c) 1989, 1993
35 * The Regents of the University of California. All rights reserved. 35 * The Regents of the University of California. All rights reserved.
36 * (c) UNIX System Laboratories, Inc. 36 * (c) UNIX System Laboratories, Inc.
37 * All or some portions of this file are derived from material licensed 37 * All or some portions of this file are derived from material licensed
38 * to the University of California by American Telephone and Telegraph 38 * to the University of California by American Telephone and Telegraph
39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40 * the permission of UNIX System Laboratories, Inc. 40 * the permission of UNIX System Laboratories, Inc.
41 * 41 *
42 * Redistribution and use in source and binary forms, with or without 42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions 43 * modification, are permitted provided that the following conditions
44 * are met: 44 * are met:
45 * 1. Redistributions of source code must retain the above copyright 45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer. 46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright 47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the 48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution. 49 * documentation and/or other materials provided with the distribution.
50 * 3. Neither the name of the University nor the names of its contributors 50 * 3. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software 51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission. 52 * without specific prior written permission.
53 * 53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE. 64 * SUCH DAMAGE.
65 * 65 *
66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
67 */ 67 */
68 68
69/* 69/*
70 * Note on v_usecount and locking: 70 * Note on v_usecount and locking:
71 * 71 *
72 * At nearly all points it is known that v_usecount could be zero, the 72 * At nearly all points it is known that v_usecount could be zero, the
73 * vnode interlock will be held. 73 * vnode interlock will be held.
74 * 74 *
75 * To change v_usecount away from zero, the interlock must be held. To 75 * To change v_usecount away from zero, the interlock must be held. To
76 * change from a non-zero value to zero, again the interlock must be 76 * change from a non-zero value to zero, again the interlock must be
77 * held. 77 * held.
78 * 78 *
79 * Changing the usecount from a non-zero value to a non-zero value can 79 * Changing the usecount from a non-zero value to a non-zero value can
80 * safely be done using atomic operations, without the interlock held. 80 * safely be done using atomic operations, without the interlock held.
81 */ 81 */
82 82
83#include <sys/cdefs.h> 83#include <sys/cdefs.h>
84__KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.335.2.5 2008/12/30 18:50:25 christos Exp $"); 84__KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.335.2.6 2009/01/04 02:21:49 christos Exp $");
85 85
86#include "opt_ddb.h" 86#include "opt_ddb.h"
87#include "opt_compat_netbsd.h" 87#include "opt_compat_netbsd.h"
88#include "opt_compat_43.h" 88#include "opt_compat_43.h"
89 89
90#include <sys/param.h> 90#include <sys/param.h>
91#include <sys/systm.h> 91#include <sys/systm.h>
92#include <sys/conf.h> 92#include <sys/conf.h>
93#include <sys/proc.h> 93#include <sys/proc.h>
94#include <sys/kernel.h> 94#include <sys/kernel.h>
95#include <sys/mount.h> 95#include <sys/mount.h>
96#include <sys/fcntl.h> 96#include <sys/fcntl.h>
97#include <sys/vnode.h> 97#include <sys/vnode.h>
98#include <sys/stat.h> 98#include <sys/stat.h>
99#include <sys/namei.h> 99#include <sys/namei.h>
100#include <sys/ucred.h> 100#include <sys/ucred.h>
101#include <sys/buf.h> 101#include <sys/buf.h>
102#include <sys/errno.h> 102#include <sys/errno.h>
103#include <sys/malloc.h> 103#include <sys/malloc.h>
104#include <sys/syscallargs.h> 104#include <sys/syscallargs.h>
105#include <sys/device.h> 105#include <sys/device.h>
106#include <sys/filedesc.h> 106#include <sys/filedesc.h>
107#include <sys/kauth.h> 107#include <sys/kauth.h>
108#include <sys/atomic.h> 108#include <sys/atomic.h>
109#include <sys/kthread.h> 109#include <sys/kthread.h>
110#include <sys/wapbl.h> 110#include <sys/wapbl.h>
111 111
112#include <miscfs/specfs/specdev.h> 112#include <miscfs/specfs/specdev.h>
113#include <miscfs/syncfs/syncfs.h> 113#include <miscfs/syncfs/syncfs.h>
114 114
115#include <uvm/uvm.h> 115#include <uvm/uvm.h>
116#include <uvm/uvm_readahead.h> 116#include <uvm/uvm_readahead.h>
117#include <uvm/uvm_ddb.h> 117#include <uvm/uvm_ddb.h>
118 118
119#include <sys/sysctl.h> 119#include <sys/sysctl.h>
120 120
121const enum vtype iftovt_tab[16] = { 121const enum vtype iftovt_tab[16] = {
122 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 122 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
123 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 123 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
124}; 124};
125const int vttoif_tab[9] = { 125const int vttoif_tab[9] = {
126 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 126 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
127 S_IFSOCK, S_IFIFO, S_IFMT, 127 S_IFSOCK, S_IFIFO, S_IFMT,
128}; 128};
129 129
130/* 130/*
131 * Insq/Remq for the vnode usage lists. 131 * Insq/Remq for the vnode usage lists.
132 */ 132 */
133#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 133#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
134#define bufremvn(bp) { \ 134#define bufremvn(bp) { \
135 LIST_REMOVE(bp, b_vnbufs); \ 135 LIST_REMOVE(bp, b_vnbufs); \
136 (bp)->b_vnbufs.le_next = NOLIST; \ 136 (bp)->b_vnbufs.le_next = NOLIST; \
137} 137}
138 138
139int doforce = 1; /* 1 => permit forcible unmounting */ 139int doforce = 1; /* 1 => permit forcible unmounting */
140int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 140int prtactive = 0; /* 1 => print out reclaim of active vnodes */
141 141
142static vnodelst_t vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list); 142static vnodelst_t vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
143static vnodelst_t vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list); 143static vnodelst_t vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
144static vnodelst_t vrele_list = TAILQ_HEAD_INITIALIZER(vrele_list); 144static vnodelst_t vrele_list = TAILQ_HEAD_INITIALIZER(vrele_list);
145 145
146struct mntlist mountlist = /* mounted filesystem list */ 146struct mntlist mountlist = /* mounted filesystem list */
147 CIRCLEQ_HEAD_INITIALIZER(mountlist); 147 CIRCLEQ_HEAD_INITIALIZER(mountlist);
148 148
149u_int numvnodes; 149u_int numvnodes;
150static specificdata_domain_t mount_specificdata_domain; 150static specificdata_domain_t mount_specificdata_domain;
151 151
152static int vrele_pending; 152static int vrele_pending;
153static int vrele_gen; 153static int vrele_gen;
154static kmutex_t vrele_lock; 154static kmutex_t vrele_lock;
155static kcondvar_t vrele_cv; 155static kcondvar_t vrele_cv;
156static lwp_t *vrele_lwp; 156static lwp_t *vrele_lwp;
157 157
158kmutex_t mountlist_lock; 158kmutex_t mountlist_lock;
159kmutex_t mntid_lock; 159kmutex_t mntid_lock;
160kmutex_t mntvnode_lock; 160kmutex_t mntvnode_lock;
161kmutex_t vnode_free_list_lock; 161kmutex_t vnode_free_list_lock;
162kmutex_t vfs_list_lock; 162kmutex_t vfs_list_lock;
163 163
164static pool_cache_t vnode_cache; 164static pool_cache_t vnode_cache;
165 165
166MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes"); 166MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes");
167 167
168/* 168/*
169 * These define the root filesystem and device. 169 * These define the root filesystem and device.
170 */ 170 */
171struct vnode *rootvnode; 171struct vnode *rootvnode;
172struct device *root_device; /* root device */ 172struct device *root_device; /* root device */
173 173
174/* 174/*
175 * Local declarations. 175 * Local declarations.
176 */ 176 */
177 177
178static void vrele_thread(void *); 178static void vrele_thread(void *);
179static void insmntque(vnode_t *, struct mount *); 179static void insmntque(vnode_t *, struct mount *);
180static int getdevvp(dev_t, vnode_t **, enum vtype); 180static int getdevvp(dev_t, vnode_t **, enum vtype);
181static vnode_t *getcleanvnode(void);; 181static vnode_t *getcleanvnode(void);;
182void vpanic(vnode_t *, const char *); 182void vpanic(vnode_t *, const char *);
183 183
184#ifdef DEBUG  184#ifdef DEBUG
185void printlockedvnodes(void); 185void printlockedvnodes(void);
186#endif 186#endif
187 187
188#ifdef DIAGNOSTIC 188#ifdef DIAGNOSTIC
189void 189void
190vpanic(vnode_t *vp, const char *msg) 190vpanic(vnode_t *vp, const char *msg)
191{ 191{
192 192
193 vprint(NULL, vp); 193 vprint(NULL, vp);
194 panic("%s\n", msg); 194 panic("%s\n", msg);
195} 195}
196#else 196#else
197#define vpanic(vp, msg) /* nothing */ 197#define vpanic(vp, msg) /* nothing */
198#endif 198#endif
199 199
200void 200void
201vn_init1(void) 201vn_init1(void)
202{ 202{
203 203
204 vnode_cache = pool_cache_init(sizeof(struct vnode), 0, 0, 0, "vnodepl", 204 vnode_cache = pool_cache_init(sizeof(struct vnode), 0, 0, 0, "vnodepl",
205 NULL, IPL_NONE, NULL, NULL, NULL); 205 NULL, IPL_NONE, NULL, NULL, NULL);
206 KASSERT(vnode_cache != NULL); 206 KASSERT(vnode_cache != NULL);
207 207
208 /* Create deferred release thread. */ 208 /* Create deferred release thread. */
209 mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE); 209 mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE);
210 cv_init(&vrele_cv, "vrele"); 210 cv_init(&vrele_cv, "vrele");
211 if (kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread, 211 if (kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread,
212 NULL, &vrele_lwp, "vrele")) 212 NULL, &vrele_lwp, "vrele"))
213 panic("fork vrele"); 213 panic("fork vrele");
214} 214}
215 215
216/* 216/*
217 * Initialize the vnode management data structures. 217 * Initialize the vnode management data structures.
218 */ 218 */
219void 219void
220vntblinit(void) 220vntblinit(void)
221{ 221{
222 222
223 mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE); 223 mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE);
224 mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE); 224 mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE);
225 mutex_init(&mntvnode_lock, MUTEX_DEFAULT, IPL_NONE); 225 mutex_init(&mntvnode_lock, MUTEX_DEFAULT, IPL_NONE);
226 mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE); 226 mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE);
227 mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE); 227 mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE);
228 228
229 mount_specificdata_domain = specificdata_domain_create(); 229 mount_specificdata_domain = specificdata_domain_create();
230 230
231 /* Initialize the filesystem syncer. */ 231 /* Initialize the filesystem syncer. */
232 vn_initialize_syncerd(); 232 vn_initialize_syncerd();
233 vn_init1(); 233 vn_init1();
234} 234}
235 235
236int 236int
237vfs_drainvnodes(long target, struct lwp *l) 237vfs_drainvnodes(long target, struct lwp *l)
238{ 238{
239 239
240 while (numvnodes > target) { 240 while (numvnodes > target) {
241 vnode_t *vp; 241 vnode_t *vp;
242 242
243 mutex_enter(&vnode_free_list_lock); 243 mutex_enter(&vnode_free_list_lock);
244 vp = getcleanvnode(); 244 vp = getcleanvnode();
245 if (vp == NULL) 245 if (vp == NULL)
246 return EBUSY; /* give up */ 246 return EBUSY; /* give up */
247 ungetnewvnode(vp); 247 ungetnewvnode(vp);
248 } 248 }
249 249
250 return 0; 250 return 0;
251} 251}
252 252
253/* 253/*
254 * Lookup a mount point by filesystem identifier. 254 * Lookup a mount point by filesystem identifier.
255 * 255 *
256 * XXX Needs to add a reference to the mount point. 256 * XXX Needs to add a reference to the mount point.
257 */ 257 */
258struct mount * 258struct mount *
259vfs_getvfs(fsid_t *fsid) 259vfs_getvfs(fsid_t *fsid)
260{ 260{
261 struct mount *mp; 261 struct mount *mp;
262 262
263 mutex_enter(&mountlist_lock); 263 mutex_enter(&mountlist_lock);
264 CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) { 264 CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
265 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] && 265 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
266 mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) { 266 mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
267 mutex_exit(&mountlist_lock); 267 mutex_exit(&mountlist_lock);
268 return (mp); 268 return (mp);
269 } 269 }
270 } 270 }
271 mutex_exit(&mountlist_lock); 271 mutex_exit(&mountlist_lock);
272 return ((struct mount *)0); 272 return ((struct mount *)0);
273} 273}
274 274
275/* 275/*
276 * Drop a reference to a mount structure, freeing if the last reference. 276 * Drop a reference to a mount structure, freeing if the last reference.
277 */ 277 */
278void 278void
279vfs_destroy(struct mount *mp) 279vfs_destroy(struct mount *mp)
280{ 280{
281 281
282 if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) { 282 if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) {
283 return; 283 return;
284 } 284 }
285 285
286 /* 286 /*
287 * Nothing else has visibility of the mount: we can now 287 * Nothing else has visibility of the mount: we can now
288 * free the data structures. 288 * free the data structures.
289 */ 289 */
290 KASSERT(mp->mnt_refcnt == 0); 290 KASSERT(mp->mnt_refcnt == 0);
291 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 291 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
292 rw_destroy(&mp->mnt_unmounting); 292 rw_destroy(&mp->mnt_unmounting);
293 mutex_destroy(&mp->mnt_updating); 293 mutex_destroy(&mp->mnt_updating);
294 mutex_destroy(&mp->mnt_renamelock); 294 mutex_destroy(&mp->mnt_renamelock);
295 if (mp->mnt_op != NULL) { 295 if (mp->mnt_op != NULL) {
296 vfs_delref(mp->mnt_op); 296 vfs_delref(mp->mnt_op);
297 } 297 }
298 kmem_free(mp, sizeof(*mp)); 298 kmem_free(mp, sizeof(*mp));
299} 299}
300 300
301/* 301/*
302 * grab a vnode from freelist and clean it. 302 * grab a vnode from freelist and clean it.
303 */ 303 */
304vnode_t * 304vnode_t *
305getcleanvnode(void) 305getcleanvnode(void)
306{ 306{
307 vnode_t *vp; 307 vnode_t *vp;
308 vnodelst_t *listhd; 308 vnodelst_t *listhd;
309 309
310 KASSERT(mutex_owned(&vnode_free_list_lock)); 310 KASSERT(mutex_owned(&vnode_free_list_lock));
311 311
312retry: 312retry:
313 listhd = &vnode_free_list; 313 listhd = &vnode_free_list;
314try_nextlist: 314try_nextlist:
315 TAILQ_FOREACH(vp, listhd, v_freelist) { 315 TAILQ_FOREACH(vp, listhd, v_freelist) {
316 /* 316 /*
317 * It's safe to test v_usecount and v_iflag 317 * It's safe to test v_usecount and v_iflag
318 * without holding the interlock here, since 318 * without holding the interlock here, since
319 * these vnodes should never appear on the 319 * these vnodes should never appear on the
320 * lists. 320 * lists.
321 */ 321 */
322 if (vp->v_usecount != 0) { 322 if (vp->v_usecount != 0) {
323 vpanic(vp, "free vnode isn't"); 323 vpanic(vp, "free vnode isn't");
324 } 324 }
325 if ((vp->v_iflag & VI_CLEAN) != 0) { 325 if ((vp->v_iflag & VI_CLEAN) != 0) {
326 vpanic(vp, "clean vnode on freelist"); 326 vpanic(vp, "clean vnode on freelist");
327 } 327 }
328 if (vp->v_freelisthd != listhd) { 328 if (vp->v_freelisthd != listhd) {
329 printf("vnode sez %p, listhd %p\n", vp->v_freelisthd, listhd); 329 printf("vnode sez %p, listhd %p\n", vp->v_freelisthd, listhd);
330 vpanic(vp, "list head mismatch"); 330 vpanic(vp, "list head mismatch");
331 } 331 }
332 if (!mutex_tryenter(&vp->v_interlock)) 332 if (!mutex_tryenter(&vp->v_interlock))
333 continue; 333 continue;
334 /* 334 /*
335 * Our lwp might hold the underlying vnode 335 * Our lwp might hold the underlying vnode
336 * locked, so don't try to reclaim a VI_LAYER 336 * locked, so don't try to reclaim a VI_LAYER
337 * node if it's locked. 337 * node if it's locked.
338 */ 338 */
339 if ((vp->v_iflag & VI_XLOCK) == 0 && 339 if ((vp->v_iflag & VI_XLOCK) == 0 &&
340 ((vp->v_iflag & VI_LAYER) == 0 || VOP_ISLOCKED(vp) == 0)) { 340 ((vp->v_iflag & VI_LAYER) == 0 || VOP_ISLOCKED(vp) == 0)) {
341 break; 341 break;
342 } 342 }
343 mutex_exit(&vp->v_interlock); 343 mutex_exit(&vp->v_interlock);
344 } 344 }
345 345
346 if (vp == NULL) { 346 if (vp == NULL) {
347 if (listhd == &vnode_free_list) { 347 if (listhd == &vnode_free_list) {
348 listhd = &vnode_hold_list; 348 listhd = &vnode_hold_list;
349 goto try_nextlist; 349 goto try_nextlist;
350 } 350 }
351 mutex_exit(&vnode_free_list_lock); 351 mutex_exit(&vnode_free_list_lock);
352 return NULL; 352 return NULL;
353 } 353 }
354 354
355 /* Remove it from the freelist. */ 355 /* Remove it from the freelist. */
356 TAILQ_REMOVE(listhd, vp, v_freelist); 356 TAILQ_REMOVE(listhd, vp, v_freelist);
357 vp->v_freelisthd = NULL; 357 vp->v_freelisthd = NULL;
358 mutex_exit(&vnode_free_list_lock); 358 mutex_exit(&vnode_free_list_lock);
359 359
360 /* 360 /*
361 * The vnode is still associated with a file system, so we must 361 * The vnode is still associated with a file system, so we must
362 * clean it out before reusing it. We need to add a reference 362 * clean it out before reusing it. We need to add a reference
363 * before doing this. If the vnode gains another reference while 363 * before doing this. If the vnode gains another reference while
364 * being cleaned out then we lose - retry. 364 * being cleaned out then we lose - retry.
365 */ 365 */
366 atomic_inc_uint(&vp->v_usecount); 366 atomic_inc_uint(&vp->v_usecount);
367 vclean(vp, DOCLOSE); 367 vclean(vp, DOCLOSE);
368 if (vp->v_usecount == 1) { 368 if (vp->v_usecount == 1) {
369 /* We're about to dirty it. */ 369 /* We're about to dirty it. */
370 vp->v_iflag &= ~VI_CLEAN; 370 vp->v_iflag &= ~VI_CLEAN;
371 mutex_exit(&vp->v_interlock); 371 mutex_exit(&vp->v_interlock);
372 if (vp->v_type == VBLK || vp->v_type == VCHR) { 372 if (vp->v_type == VBLK || vp->v_type == VCHR) {
373 spec_node_destroy(vp); 373 spec_node_destroy(vp);
374 } 374 }
375 vp->v_type = VNON; 375 vp->v_type = VNON;
376 } else { 376 } else {
377 /* 377 /*
378 * Don't return to freelist - the holder of the last 378 * Don't return to freelist - the holder of the last
379 * reference will destroy it. 379 * reference will destroy it.
380 */ 380 */
381 vrelel(vp, 0); /* releases vp->v_interlock */ 381 vrelel(vp, 0); /* releases vp->v_interlock */
382 mutex_enter(&vnode_free_list_lock); 382 mutex_enter(&vnode_free_list_lock);
383 goto retry; 383 goto retry;
384 } 384 }
385 385
386 if (vp->v_data != NULL || vp->v_uobj.uo_npages != 0 || 386 if (vp->v_data != NULL || vp->v_uobj.uo_npages != 0 ||
387 !TAILQ_EMPTY(&vp->v_uobj.memq)) { 387 !TAILQ_EMPTY(&vp->v_uobj.memq)) {
388 vpanic(vp, "cleaned vnode isn't"); 388 vpanic(vp, "cleaned vnode isn't");
389 } 389 }
390 if (vp->v_numoutput != 0) { 390 if (vp->v_numoutput != 0) {
391 vpanic(vp, "clean vnode has pending I/O's"); 391 vpanic(vp, "clean vnode has pending I/O's");
392 } 392 }
393 if ((vp->v_iflag & VI_ONWORKLST) != 0) { 393 if ((vp->v_iflag & VI_ONWORKLST) != 0) {
394 vpanic(vp, "clean vnode on syncer list"); 394 vpanic(vp, "clean vnode on syncer list");
395 } 395 }
396 396
397 return vp; 397 return vp;
398} 398}
399 399
400/* 400/*
401 * Mark a mount point as busy, and gain a new reference to it. Used to 401 * Mark a mount point as busy, and gain a new reference to it. Used to
402 * prevent the file system from being unmounted during critical sections. 402 * prevent the file system from being unmounted during critical sections.
403 * 403 *
404 * => The caller must hold a pre-existing reference to the mount. 404 * => The caller must hold a pre-existing reference to the mount.
405 * => Will fail if the file system is being unmounted, or is unmounted. 405 * => Will fail if the file system is being unmounted, or is unmounted.
406 */ 406 */
407int 407int
408vfs_busy(struct mount *mp, struct mount **nextp) 408vfs_busy(struct mount *mp, struct mount **nextp)
409{ 409{
410 410
411 KASSERT(mp->mnt_refcnt > 0); 411 KASSERT(mp->mnt_refcnt > 0);
412 412
413 if (__predict_false(!rw_tryenter(&mp->mnt_unmounting, RW_READER))) { 413 if (__predict_false(!rw_tryenter(&mp->mnt_unmounting, RW_READER))) {
414 if (nextp != NULL) { 414 if (nextp != NULL) {
415 KASSERT(mutex_owned(&mountlist_lock)); 415 KASSERT(mutex_owned(&mountlist_lock));
416 *nextp = CIRCLEQ_NEXT(mp, mnt_list); 416 *nextp = CIRCLEQ_NEXT(mp, mnt_list);
417 } 417 }
418 return EBUSY; 418 return EBUSY;
419 } 419 }
420 if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) { 420 if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) {
421 rw_exit(&mp->mnt_unmounting); 421 rw_exit(&mp->mnt_unmounting);
422 if (nextp != NULL) { 422 if (nextp != NULL) {
423 KASSERT(mutex_owned(&mountlist_lock)); 423 KASSERT(mutex_owned(&mountlist_lock));
424 *nextp = CIRCLEQ_NEXT(mp, mnt_list); 424 *nextp = CIRCLEQ_NEXT(mp, mnt_list);
425 } 425 }
426 return ENOENT; 426 return ENOENT;
427 } 427 }
428 if (nextp != NULL) { 428 if (nextp != NULL) {
429 mutex_exit(&mountlist_lock); 429 mutex_exit(&mountlist_lock);
430 } 430 }
431 atomic_inc_uint(&mp->mnt_refcnt); 431 atomic_inc_uint(&mp->mnt_refcnt);
432 return 0; 432 return 0;
433} 433}
434 434
435/* 435/*
436 * Unbusy a busy filesystem. 436 * Unbusy a busy filesystem.
437 * 437 *
438 * => If keepref is true, preserve reference added by vfs_busy(). 438 * => If keepref is true, preserve reference added by vfs_busy().
439 * => If nextp != NULL, acquire mountlist_lock. 439 * => If nextp != NULL, acquire mountlist_lock.
440 */ 440 */
441void 441void
442vfs_unbusy(struct mount *mp, bool keepref, struct mount **nextp) 442vfs_unbusy(struct mount *mp, bool keepref, struct mount **nextp)
443{ 443{
444 444
445 KASSERT(mp->mnt_refcnt > 0); 445 KASSERT(mp->mnt_refcnt > 0);
446 446
447 if (nextp != NULL) { 447 if (nextp != NULL) {
448 mutex_enter(&mountlist_lock); 448 mutex_enter(&mountlist_lock);
449 } 449 }
450 rw_exit(&mp->mnt_unmounting); 450 rw_exit(&mp->mnt_unmounting);
451 if (!keepref) { 451 if (!keepref) {
452 vfs_destroy(mp); 452 vfs_destroy(mp);
453 } 453 }
454 if (nextp != NULL) { 454 if (nextp != NULL) {
455 KASSERT(mutex_owned(&mountlist_lock)); 455 KASSERT(mutex_owned(&mountlist_lock));
456 *nextp = CIRCLEQ_NEXT(mp, mnt_list); 456 *nextp = CIRCLEQ_NEXT(mp, mnt_list);
457 } 457 }
458} 458}
459 459
460/* 460/*
461 * Lookup a filesystem type, and if found allocate and initialize 461 * Lookup a filesystem type, and if found allocate and initialize
462 * a mount structure for it. 462 * a mount structure for it.
463 * 463 *
464 * Devname is usually updated by mount(8) after booting. 464 * Devname is usually updated by mount(8) after booting.
465 */ 465 */
466int 466int
467vfs_rootmountalloc(const char *fstypename, const char *devname, 467vfs_rootmountalloc(const char *fstypename, const char *devname,
468 struct mount **mpp) 468 struct mount **mpp)
469{ 469{
470 struct vfsops *vfsp = NULL; 470 struct vfsops *vfsp = NULL;
471 struct mount *mp; 471 struct mount *mp;
472 472
473 mutex_enter(&vfs_list_lock); 473 mutex_enter(&vfs_list_lock);
474 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 474 LIST_FOREACH(vfsp, &vfs_list, vfs_list)
475 if (!strncmp(vfsp->vfs_name, fstypename,  475 if (!strncmp(vfsp->vfs_name, fstypename,
476 sizeof(mp->mnt_stat.f_fstypename))) 476 sizeof(mp->mnt_stat.f_fstypename)))
477 break; 477 break;
478 if (vfsp == NULL) { 478 if (vfsp == NULL) {
479 mutex_exit(&vfs_list_lock); 479 mutex_exit(&vfs_list_lock);
480 return (ENODEV); 480 return (ENODEV);
481 } 481 }
482 vfsp->vfs_refcount++; 482 vfsp->vfs_refcount++;
483 mutex_exit(&vfs_list_lock); 483 mutex_exit(&vfs_list_lock);
484 484
485 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); 485 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
486 if (mp == NULL) 486 if (mp == NULL)
487 return ENOMEM; 487 return ENOMEM;
488 mp->mnt_refcnt = 1; 488 mp->mnt_refcnt = 1;
489 rw_init(&mp->mnt_unmounting); 489 rw_init(&mp->mnt_unmounting);
490 mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE); 490 mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE);
491 mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE); 491 mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE);
492 (void)vfs_busy(mp, NULL); 492 (void)vfs_busy(mp, NULL);
493 TAILQ_INIT(&mp->mnt_vnodelist); 493 TAILQ_INIT(&mp->mnt_vnodelist);
494 mp->mnt_op = vfsp; 494 mp->mnt_op = vfsp;
495 mp->mnt_flag = MNT_RDONLY; 495 mp->mnt_flag = MNT_RDONLY;
496 mp->mnt_vnodecovered = NULL; 496 mp->mnt_vnodecovered = NULL;
497 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, 497 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name,
498 sizeof(mp->mnt_stat.f_fstypename)); 498 sizeof(mp->mnt_stat.f_fstypename));
499 mp->mnt_stat.f_mntonname[0] = '/'; 499 mp->mnt_stat.f_mntonname[0] = '/';
500 mp->mnt_stat.f_mntonname[1] = '\0'; 500 mp->mnt_stat.f_mntonname[1] = '\0';
501 mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] = 501 mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] =
502 '\0'; 502 '\0';
503 (void)copystr(devname, mp->mnt_stat.f_mntfromname, 503 (void)copystr(devname, mp->mnt_stat.f_mntfromname,
504 sizeof(mp->mnt_stat.f_mntfromname) - 1, 0); 504 sizeof(mp->mnt_stat.f_mntfromname) - 1, 0);
505 mount_initspecific(mp); 505 mount_initspecific(mp);
506 *mpp = mp; 506 *mpp = mp;
507 return (0); 507 return (0);
508} 508}
509 509
510/* 510/*
511 * Routines having to do with the management of the vnode table. 511 * Routines having to do with the management of the vnode table.
512 */ 512 */
513extern int (**dead_vnodeop_p)(void *); 513extern int (**dead_vnodeop_p)(void *);
514 514
515/* 515/*
516 * Return the next vnode from the free list. 516 * Return the next vnode from the free list.
517 */ 517 */
518int 518int
519getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *), 519getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
520 vnode_t **vpp) 520 vnode_t **vpp)
521{ 521{
522 struct uvm_object *uobj; 522 struct uvm_object *uobj;
523 static int toggle; 523 static int toggle;
524 vnode_t *vp; 524 vnode_t *vp;
525 int error = 0, tryalloc; 525 int error = 0, tryalloc;
526 526
527 try_again: 527 try_again:
528 if (mp != NULL) { 528 if (mp != NULL) {
529 /* 529 /*
530 * Mark filesystem busy while we're creating a 530 * Mark filesystem busy while we're creating a
531 * vnode. If unmount is in progress, this will 531 * vnode. If unmount is in progress, this will
532 * fail. 532 * fail.
533 */ 533 */
534 error = vfs_busy(mp, NULL); 534 error = vfs_busy(mp, NULL);
535 if (error) 535 if (error)
536 return error; 536 return error;
537 } 537 }
538 538
539 /* 539 /*
540 * We must choose whether to allocate a new vnode or recycle an 540 * We must choose whether to allocate a new vnode or recycle an
541 * existing one. The criterion for allocating a new one is that 541 * existing one. The criterion for allocating a new one is that
542 * the total number of vnodes is less than the number desired or 542 * the total number of vnodes is less than the number desired or
543 * there are no vnodes on either free list. Generally we only 543 * there are no vnodes on either free list. Generally we only
544 * want to recycle vnodes that have no buffers associated with 544 * want to recycle vnodes that have no buffers associated with
545 * them, so we look first on the vnode_free_list. If it is empty, 545 * them, so we look first on the vnode_free_list. If it is empty,
546 * we next consider vnodes with referencing buffers on the 546 * we next consider vnodes with referencing buffers on the
547 * vnode_hold_list. The toggle ensures that half the time we 547 * vnode_hold_list. The toggle ensures that half the time we
548 * will use a buffer from the vnode_hold_list, and half the time 548 * will use a buffer from the vnode_hold_list, and half the time
549 * we will allocate a new one unless the list has grown to twice 549 * we will allocate a new one unless the list has grown to twice
550 * the desired size. We are reticent to recycle vnodes from the 550 * the desired size. We are reticent to recycle vnodes from the
551 * vnode_hold_list because we will lose the identity of all its 551 * vnode_hold_list because we will lose the identity of all its
552 * referencing buffers. 552 * referencing buffers.
553 */ 553 */
554 554
555 vp = NULL; 555 vp = NULL;
556 556
557 mutex_enter(&vnode_free_list_lock); 557 mutex_enter(&vnode_free_list_lock);
558 558
559 toggle ^= 1; 559 toggle ^= 1;
560 if (numvnodes > 2 * desiredvnodes) 560 if (numvnodes > 2 * desiredvnodes)
561 toggle = 0; 561 toggle = 0;
562 562
563 tryalloc = numvnodes < desiredvnodes || 563 tryalloc = numvnodes < desiredvnodes ||
564 (TAILQ_FIRST(&vnode_free_list) == NULL && 564 (TAILQ_FIRST(&vnode_free_list) == NULL &&
565 (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle)); 565 (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
566 566
567 if (tryalloc) { 567 if (tryalloc) {
568 numvnodes++; 568 numvnodes++;
569 mutex_exit(&vnode_free_list_lock); 569 mutex_exit(&vnode_free_list_lock);
570 if ((vp = vnalloc(NULL)) == NULL) { 570 if ((vp = vnalloc(NULL)) == NULL) {
571 mutex_enter(&vnode_free_list_lock); 571 mutex_enter(&vnode_free_list_lock);
572 numvnodes--; 572 numvnodes--;
573 } else 573 } else
574 vp->v_usecount = 1; 574 vp->v_usecount = 1;
575 } 575 }
576 576
577 if (vp == NULL) { 577 if (vp == NULL) {
578 vp = getcleanvnode(); 578 vp = getcleanvnode();
579 if (vp == NULL) { 579 if (vp == NULL) {
580 if (mp != NULL) { 580 if (mp != NULL) {
581 vfs_unbusy(mp, false, NULL); 581 vfs_unbusy(mp, false, NULL);
582 } 582 }
583 if (tryalloc) { 583 if (tryalloc) {
584 printf("WARNING: unable to allocate new " 584 printf("WARNING: unable to allocate new "
585 "vnode, retrying...\n"); 585 "vnode, retrying...\n");
586 kpause("newvn", false, hz, NULL); 586 kpause("newvn", false, hz, NULL);
587 goto try_again; 587 goto try_again;
588 } 588 }
589 tablefull("vnode", "increase kern.maxvnodes or NVNODE"); 589 tablefull("vnode", "increase kern.maxvnodes or NVNODE");
590 *vpp = 0; 590 *vpp = 0;
591 return (ENFILE); 591 return (ENFILE);
592 } 592 }
593 vp->v_iflag = 0; 593 vp->v_iflag = 0;
594 vp->v_vflag = 0; 594 vp->v_vflag = 0;
595 vp->v_uflag = 0; 595 vp->v_uflag = 0;
596 vp->v_socket = NULL; 596 vp->v_socket = NULL;
597 } 597 }
598 598
599 KASSERT(vp->v_usecount == 1); 599 KASSERT(vp->v_usecount == 1);
600 KASSERT(vp->v_freelisthd == NULL); 600 KASSERT(vp->v_freelisthd == NULL);
601 KASSERT(LIST_EMPTY(&vp->v_nclist)); 601 KASSERT(LIST_EMPTY(&vp->v_nclist));
602 KASSERT(LIST_EMPTY(&vp->v_dnclist)); 602 KASSERT(LIST_EMPTY(&vp->v_dnclist));
603 603
604 vp->v_type = VNON; 604 vp->v_type = VNON;
605 vp->v_vnlock = &vp->v_lock; 605 vp->v_vnlock = &vp->v_lock;
606 vp->v_tag = tag; 606 vp->v_tag = tag;
607 vp->v_op = vops; 607 vp->v_op = vops;
608 insmntque(vp, mp); 608 insmntque(vp, mp);
609 *vpp = vp; 609 *vpp = vp;
610 vp->v_data = 0; 610 vp->v_data = 0;
611 611
612 /* 612 /*
613 * initialize uvm_object within vnode. 613 * initialize uvm_object within vnode.
614 */ 614 */
615 615
616 uobj = &vp->v_uobj; 616 uobj = &vp->v_uobj;
617 KASSERT(uobj->pgops == &uvm_vnodeops); 617 KASSERT(uobj->pgops == &uvm_vnodeops);
618 KASSERT(uobj->uo_npages == 0); 618 KASSERT(uobj->uo_npages == 0);
619 KASSERT(TAILQ_FIRST(&uobj->memq) == NULL); 619 KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
620 vp->v_size = vp->v_writesize = VSIZENOTSET; 620 vp->v_size = vp->v_writesize = VSIZENOTSET;
621 621
622 if (mp != NULL) { 622 if (mp != NULL) {
623 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) 623 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0)
624 vp->v_vflag |= VV_MPSAFE; 624 vp->v_vflag |= VV_MPSAFE;
625 vfs_unbusy(mp, true, NULL); 625 vfs_unbusy(mp, true, NULL);
626 } 626 }
627 627
628 return (0); 628 return (0);
629} 629}
630 630
631/* 631/*
632 * This is really just the reverse of getnewvnode(). Needed for 632 * This is really just the reverse of getnewvnode(). Needed for
633 * VFS_VGET functions who may need to push back a vnode in case 633 * VFS_VGET functions who may need to push back a vnode in case
634 * of a locking race. 634 * of a locking race.
635 */ 635 */
636void 636void
637ungetnewvnode(vnode_t *vp) 637ungetnewvnode(vnode_t *vp)
638{ 638{
639 639
640 KASSERT(vp->v_usecount == 1); 640 KASSERT(vp->v_usecount == 1);
641 KASSERT(vp->v_data == NULL); 641 KASSERT(vp->v_data == NULL);
642 KASSERT(vp->v_freelisthd == NULL); 642 KASSERT(vp->v_freelisthd == NULL);
643 643
644 mutex_enter(&vp->v_interlock); 644 mutex_enter(&vp->v_interlock);
645 vp->v_iflag |= VI_CLEAN; 645 vp->v_iflag |= VI_CLEAN;
646 vrelel(vp, 0); 646 vrelel(vp, 0);
647} 647}
648 648
649/* 649/*
650 * Allocate a new, uninitialized vnode. If 'mp' is non-NULL, this is a 650 * Allocate a new, uninitialized vnode. If 'mp' is non-NULL, this is a
651 * marker vnode and we are prepared to wait for the allocation. 651 * marker vnode and we are prepared to wait for the allocation.
652 */ 652 */
653vnode_t * 653vnode_t *
654vnalloc(struct mount *mp) 654vnalloc(struct mount *mp)
655{ 655{
656 vnode_t *vp; 656 vnode_t *vp;
657 657
658 vp = pool_cache_get(vnode_cache, (mp != NULL ? PR_WAITOK : PR_NOWAIT)); 658 vp = pool_cache_get(vnode_cache, (mp != NULL ? PR_WAITOK : PR_NOWAIT));
659 if (vp == NULL) { 659 if (vp == NULL) {
660 return NULL; 660 return NULL;
661 } 661 }
662 662
663 memset(vp, 0, sizeof(*vp)); 663 memset(vp, 0, sizeof(*vp));
664 UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 0); 664 UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 0);
665 cv_init(&vp->v_cv, "vnode"); 665 cv_init(&vp->v_cv, "vnode");
666 /* 666 /*
667 * done by memset() above. 667 * done by memset() above.
668 * LIST_INIT(&vp->v_nclist); 668 * LIST_INIT(&vp->v_nclist);
669 * LIST_INIT(&vp->v_dnclist); 669 * LIST_INIT(&vp->v_dnclist);
670 */ 670 */
671 671
672 if (mp != NULL) { 672 if (mp != NULL) {
673 vp->v_mount = mp; 673 vp->v_mount = mp;
674 vp->v_type = VBAD; 674 vp->v_type = VBAD;
675 vp->v_iflag = VI_MARKER; 675 vp->v_iflag = VI_MARKER;
676 } else { 676 } else {
677 rw_init(&vp->v_lock.vl_lock); 677 rw_init(&vp->v_lock.vl_lock);
678 } 678 }
679 679
680 return vp; 680 return vp;
681} 681}
682 682
683/* 683/*
684 * Free an unused, unreferenced vnode. 684 * Free an unused, unreferenced vnode.
685 */ 685 */
686void 686void
687vnfree(vnode_t *vp) 687vnfree(vnode_t *vp)
688{ 688{
689 689
690 KASSERT(vp->v_usecount == 0); 690 KASSERT(vp->v_usecount == 0);
691 691
692 if ((vp->v_iflag & VI_MARKER) == 0) { 692 if ((vp->v_iflag & VI_MARKER) == 0) {
693 rw_destroy(&vp->v_lock.vl_lock); 693 rw_destroy(&vp->v_lock.vl_lock);
694 mutex_enter(&vnode_free_list_lock); 694 mutex_enter(&vnode_free_list_lock);
695 numvnodes--; 695 numvnodes--;
696 mutex_exit(&vnode_free_list_lock); 696 mutex_exit(&vnode_free_list_lock);
697 } 697 }
698 698
699 UVM_OBJ_DESTROY(&vp->v_uobj); 699 UVM_OBJ_DESTROY(&vp->v_uobj);
700 cv_destroy(&vp->v_cv); 700 cv_destroy(&vp->v_cv);
701 pool_cache_put(vnode_cache, vp); 701 pool_cache_put(vnode_cache, vp);
702} 702}
703 703
704/* 704/*
705 * Remove a vnode from its freelist. 705 * Remove a vnode from its freelist.
706 */ 706 */
707static inline void 707static inline void
708vremfree(vnode_t *vp) 708vremfree(vnode_t *vp)
709{ 709{
710 710
711 KASSERT(mutex_owned(&vp->v_interlock)); 711 KASSERT(mutex_owned(&vp->v_interlock));
712 KASSERT(vp->v_usecount == 0); 712 KASSERT(vp->v_usecount == 0);
713 713
714 /* 714 /*
715 * Note that the reference count must not change until 715 * Note that the reference count must not change until
716 * the vnode is removed. 716 * the vnode is removed.
717 */ 717 */
718 mutex_enter(&vnode_free_list_lock); 718 mutex_enter(&vnode_free_list_lock);
719 if (vp->v_holdcnt > 0) { 719 if (vp->v_holdcnt > 0) {
720 KASSERT(vp->v_freelisthd == &vnode_hold_list); 720 KASSERT(vp->v_freelisthd == &vnode_hold_list);
721 } else { 721 } else {
722 KASSERT(vp->v_freelisthd == &vnode_free_list); 722 KASSERT(vp->v_freelisthd == &vnode_free_list);
723 } 723 }
724 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); 724 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
725 vp->v_freelisthd = NULL; 725 vp->v_freelisthd = NULL;
726 mutex_exit(&vnode_free_list_lock); 726 mutex_exit(&vnode_free_list_lock);
727} 727}
728 728
729/* 729/*
730 * Move a vnode from one mount queue to another. 730 * Move a vnode from one mount queue to another.
731 */ 731 */
732static void 732static void
733insmntque(vnode_t *vp, struct mount *mp) 733insmntque(vnode_t *vp, struct mount *mp)
734{ 734{
735 struct mount *omp; 735 struct mount *omp;
736 736
737#ifdef DIAGNOSTIC 737#ifdef DIAGNOSTIC
738 if ((mp != NULL) && 738 if ((mp != NULL) &&
739 (mp->mnt_iflag & IMNT_UNMOUNT) && 739 (mp->mnt_iflag & IMNT_UNMOUNT) &&
740 !(mp->mnt_flag & MNT_SOFTDEP) && 740 !(mp->mnt_flag & MNT_SOFTDEP) &&
741 vp->v_tag != VT_VFS) { 741 vp->v_tag != VT_VFS) {
742 panic("insmntque into dying filesystem"); 742 panic("insmntque into dying filesystem");
743 } 743 }
744#endif 744#endif
745 745
746 mutex_enter(&mntvnode_lock); 746 mutex_enter(&mntvnode_lock);
747 /* 747 /*
748 * Delete from old mount point vnode list, if on one. 748 * Delete from old mount point vnode list, if on one.
749 */ 749 */
750 if ((omp = vp->v_mount) != NULL) 750 if ((omp = vp->v_mount) != NULL)
751 TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vp, v_mntvnodes); 751 TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vp, v_mntvnodes);
752 /* 752 /*
753 * Insert into list of vnodes for the new mount point, if 753 * Insert into list of vnodes for the new mount point, if
754 * available. The caller must take a reference on the mount 754 * available. The caller must take a reference on the mount
755 * structure and donate to the vnode. 755 * structure and donate to the vnode.
756 */ 756 */
757 if ((vp->v_mount = mp) != NULL) 757 if ((vp->v_mount = mp) != NULL)
758 TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes); 758 TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes);
759 mutex_exit(&mntvnode_lock); 759 mutex_exit(&mntvnode_lock);
760 760
761 if (omp != NULL) { 761 if (omp != NULL) {
762 /* Release reference to old mount. */ 762 /* Release reference to old mount. */
763 vfs_destroy(omp); 763 vfs_destroy(omp);
764 } 764 }
765} 765}
766 766
767/* 767/*
768 * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or 768 * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or
769 * recycled. 769 * recycled.
770 */ 770 */
771void 771void
772vwait(vnode_t *vp, int flags) 772vwait(vnode_t *vp, int flags)
773{ 773{
774 774
775 KASSERT(mutex_owned(&vp->v_interlock)); 775 KASSERT(mutex_owned(&vp->v_interlock));
776 KASSERT(vp->v_usecount != 0); 776 KASSERT(vp->v_usecount != 0);
777 777
778 while ((vp->v_iflag & flags) != 0) 778 while ((vp->v_iflag & flags) != 0)
779 cv_wait(&vp->v_cv, &vp->v_interlock); 779 cv_wait(&vp->v_cv, &vp->v_interlock);
780} 780}
781 781
782/* 782/*
783 * Insert a marker vnode into a mount's vnode list, after the 783 * Insert a marker vnode into a mount's vnode list, after the
784 * specified vnode. mntvnode_lock must be held. 784 * specified vnode. mntvnode_lock must be held.
785 */ 785 */
786void 786void
787vmark(vnode_t *mvp, vnode_t *vp) 787vmark(vnode_t *mvp, vnode_t *vp)
788{ 788{
789 struct mount *mp; 789 struct mount *mp;
790 790
791 mp = mvp->v_mount; 791 mp = mvp->v_mount;
792 792
793 KASSERT(mutex_owned(&mntvnode_lock)); 793 KASSERT(mutex_owned(&mntvnode_lock));
794 KASSERT((mvp->v_iflag & VI_MARKER) != 0); 794 KASSERT((mvp->v_iflag & VI_MARKER) != 0);
795 KASSERT(vp->v_mount == mp); 795 KASSERT(vp->v_mount == mp);
796 796
797 TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vp, mvp, v_mntvnodes); 797 TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vp, mvp, v_mntvnodes);
798} 798}
799 799
800/* 800/*
801 * Remove a marker vnode from a mount's vnode list, and return 801 * Remove a marker vnode from a mount's vnode list, and return
802 * a pointer to the next vnode in the list. mntvnode_lock must 802 * a pointer to the next vnode in the list. mntvnode_lock must
803 * be held. 803 * be held.
804 */ 804 */
805vnode_t * 805vnode_t *
806vunmark(vnode_t *mvp) 806vunmark(vnode_t *mvp)
807{ 807{
808 vnode_t *vp; 808 vnode_t *vp;
809 struct mount *mp; 809 struct mount *mp;
810 810
811 mp = mvp->v_mount; 811 mp = mvp->v_mount;
812 812
813 KASSERT(mutex_owned(&mntvnode_lock)); 813 KASSERT(mutex_owned(&mntvnode_lock));
814 KASSERT((mvp->v_iflag & VI_MARKER) != 0); 814 KASSERT((mvp->v_iflag & VI_MARKER) != 0);
815 815
816 vp = TAILQ_NEXT(mvp, v_mntvnodes); 816 vp = TAILQ_NEXT(mvp, v_mntvnodes);
817 TAILQ_REMOVE(&mp->mnt_vnodelist, mvp, v_mntvnodes);  817 TAILQ_REMOVE(&mp->mnt_vnodelist, mvp, v_mntvnodes);
818 818
819 KASSERT(vp == NULL || vp->v_mount == mp); 819 KASSERT(vp == NULL || vp->v_mount == mp);
820 820
821 return vp; 821 return vp;
822} 822}
823 823
824/* 824/*
825 * Update outstanding I/O count and do wakeup if requested. 825 * Update outstanding I/O count and do wakeup if requested.
826 */ 826 */
827void 827void
828vwakeup(struct buf *bp) 828vwakeup(struct buf *bp)
829{ 829{
830 struct vnode *vp; 830 struct vnode *vp;
831 831
832 if ((vp = bp->b_vp) == NULL) 832 if ((vp = bp->b_vp) == NULL)
833 return; 833 return;
834 834
835 KASSERT(bp->b_objlock == &vp->v_interlock); 835 KASSERT(bp->b_objlock == &vp->v_interlock);
836 KASSERT(mutex_owned(bp->b_objlock)); 836 KASSERT(mutex_owned(bp->b_objlock));
837 837
838 if (--vp->v_numoutput < 0) 838 if (--vp->v_numoutput < 0)
839 panic("vwakeup: neg numoutput, vp %p", vp); 839 panic("vwakeup: neg numoutput, vp %p", vp);
840 if (vp->v_numoutput == 0) 840 if (vp->v_numoutput == 0)
841 cv_broadcast(&vp->v_cv); 841 cv_broadcast(&vp->v_cv);
842} 842}
843 843
844/* 844/*
845 * Flush out and invalidate all buffers associated with a vnode. 845 * Flush out and invalidate all buffers associated with a vnode.
846 * Called with the underlying vnode locked, which should prevent new dirty 846 * Called with the underlying vnode locked, which should prevent new dirty
847 * buffers from being queued. 847 * buffers from being queued.
848 */ 848 */
849int 849int
850vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l, 850vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l,
851 bool catch, int slptimeo) 851 bool catch, int slptimeo)
852{ 852{
853 struct buf *bp, *nbp; 853 struct buf *bp, *nbp;
854 int error; 854 int error;
855 int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO | 855 int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
856 (flags & V_SAVE ? PGO_CLEANIT | PGO_RECLAIM : 0); 856 (flags & V_SAVE ? PGO_CLEANIT | PGO_RECLAIM : 0);
857 857
858 /* XXXUBC this doesn't look at flags or slp* */ 858 /* XXXUBC this doesn't look at flags or slp* */
859 mutex_enter(&vp->v_interlock); 859 mutex_enter(&vp->v_interlock);
860 error = VOP_PUTPAGES(vp, 0, 0, flushflags); 860 error = VOP_PUTPAGES(vp, 0, 0, flushflags);
861 if (error) { 861 if (error) {
862 return error; 862 return error;
863 } 863 }
864 864
865 if (flags & V_SAVE) { 865 if (flags & V_SAVE) {
866 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0); 866 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0);
867 if (error) 867 if (error)
868 return (error); 868 return (error);
869 KASSERT(LIST_EMPTY(&vp->v_dirtyblkhd)); 869 KASSERT(LIST_EMPTY(&vp->v_dirtyblkhd));
870 } 870 }
871 871
872 mutex_enter(&bufcache_lock); 872 mutex_enter(&bufcache_lock);
873restart: 873restart:
874 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 874 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
875 nbp = LIST_NEXT(bp, b_vnbufs); 875 nbp = LIST_NEXT(bp, b_vnbufs);
876 error = bbusy(bp, catch, slptimeo, NULL); 876 error = bbusy(bp, catch, slptimeo, NULL);
877 if (error != 0) { 877 if (error != 0) {
878 if (error == EPASSTHROUGH) 878 if (error == EPASSTHROUGH)
879 goto restart; 879 goto restart;
880 mutex_exit(&bufcache_lock); 880 mutex_exit(&bufcache_lock);
881 return (error); 881 return (error);
882 } 882 }
883 brelsel(bp, BC_INVAL | BC_VFLUSH); 883 brelsel(bp, BC_INVAL | BC_VFLUSH);
884 } 884 }
885 885
886 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 886 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
887 nbp = LIST_NEXT(bp, b_vnbufs); 887 nbp = LIST_NEXT(bp, b_vnbufs);
888 error = bbusy(bp, catch, slptimeo, NULL); 888 error = bbusy(bp, catch, slptimeo, NULL);
889 if (error != 0) { 889 if (error != 0) {
890 if (error == EPASSTHROUGH) 890 if (error == EPASSTHROUGH)
891 goto restart; 891 goto restart;
892 mutex_exit(&bufcache_lock); 892 mutex_exit(&bufcache_lock);
893 return (error); 893 return (error);
894 } 894 }
895 /* 895 /*
896 * XXX Since there are no node locks for NFS, I believe 896 * XXX Since there are no node locks for NFS, I believe
897 * there is a slight chance that a delayed write will 897 * there is a slight chance that a delayed write will
898 * occur while sleeping just above, so check for it. 898 * occur while sleeping just above, so check for it.
899 */ 899 */
900 if ((bp->b_oflags & BO_DELWRI) && (flags & V_SAVE)) { 900 if ((bp->b_oflags & BO_DELWRI) && (flags & V_SAVE)) {
901#ifdef DEBUG 901#ifdef DEBUG
902 printf("buffer still DELWRI\n"); 902 printf("buffer still DELWRI\n");
903#endif 903#endif
904 bp->b_cflags |= BC_BUSY | BC_VFLUSH; 904 bp->b_cflags |= BC_BUSY | BC_VFLUSH;
905 mutex_exit(&bufcache_lock); 905 mutex_exit(&bufcache_lock);
906 VOP_BWRITE(bp); 906 VOP_BWRITE(bp);
907 mutex_enter(&bufcache_lock); 907 mutex_enter(&bufcache_lock);
908 goto restart; 908 goto restart;
909 } 909 }
910 brelsel(bp, BC_INVAL | BC_VFLUSH); 910 brelsel(bp, BC_INVAL | BC_VFLUSH);
911 } 911 }
912 912
913#ifdef DIAGNOSTIC 913#ifdef DIAGNOSTIC
914 if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd)) 914 if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
915 panic("vinvalbuf: flush failed, vp %p", vp); 915 panic("vinvalbuf: flush failed, vp %p", vp);
916#endif 916#endif
917 917
918 mutex_exit(&bufcache_lock); 918 mutex_exit(&bufcache_lock);
919 919
920 return (0); 920 return (0);
921} 921}
922 922
923/* 923/*
924 * Destroy any in core blocks past the truncation length. 924 * Destroy any in core blocks past the truncation length.
925 * Called with the underlying vnode locked, which should prevent new dirty 925 * Called with the underlying vnode locked, which should prevent new dirty
926 * buffers from being queued. 926 * buffers from being queued.
927 */ 927 */
928int 928int
929vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch, int slptimeo) 929vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch, int slptimeo)
930{ 930{
931 struct buf *bp, *nbp; 931 struct buf *bp, *nbp;
932 int error; 932 int error;
933 voff_t off; 933 voff_t off;
934 934
935 off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift); 935 off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
936 mutex_enter(&vp->v_interlock); 936 mutex_enter(&vp->v_interlock);
937 error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO); 937 error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
938 if (error) { 938 if (error) {
939 return error; 939 return error;
940 } 940 }
941 941
942 mutex_enter(&bufcache_lock); 942 mutex_enter(&bufcache_lock);
943restart: 943restart:
944 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 944 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
945 nbp = LIST_NEXT(bp, b_vnbufs); 945 nbp = LIST_NEXT(bp, b_vnbufs);
946 if (bp->b_lblkno < lbn) 946 if (bp->b_lblkno < lbn)
947 continue; 947 continue;
948 error = bbusy(bp, catch, slptimeo, NULL); 948 error = bbusy(bp, catch, slptimeo, NULL);
949 if (error != 0) { 949 if (error != 0) {
950 if (error == EPASSTHROUGH) 950 if (error == EPASSTHROUGH)
951 goto restart; 951 goto restart;
952 mutex_exit(&bufcache_lock); 952 mutex_exit(&bufcache_lock);
953 return (error); 953 return (error);
954 } 954 }
955 brelsel(bp, BC_INVAL | BC_VFLUSH); 955 brelsel(bp, BC_INVAL | BC_VFLUSH);
956 } 956 }
957 957
958 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 958 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
959 nbp = LIST_NEXT(bp, b_vnbufs); 959 nbp = LIST_NEXT(bp, b_vnbufs);
960 if (bp->b_lblkno < lbn) 960 if (bp->b_lblkno < lbn)
961 continue; 961 continue;
962 error = bbusy(bp, catch, slptimeo, NULL); 962 error = bbusy(bp, catch, slptimeo, NULL);
963 if (error != 0) { 963 if (error != 0) {
964 if (error == EPASSTHROUGH) 964 if (error == EPASSTHROUGH)
965 goto restart; 965 goto restart;
966 mutex_exit(&bufcache_lock); 966 mutex_exit(&bufcache_lock);
967 return (error); 967 return (error);
968 } 968 }
969 brelsel(bp, BC_INVAL | BC_VFLUSH); 969 brelsel(bp, BC_INVAL | BC_VFLUSH);
970 } 970 }
971 mutex_exit(&bufcache_lock); 971 mutex_exit(&bufcache_lock);
972 972
973 return (0); 973 return (0);
974} 974}
975 975
976/* 976/*
977 * Flush all dirty buffers from a vnode. 977 * Flush all dirty buffers from a vnode.
978 * Called with the underlying vnode locked, which should prevent new dirty 978 * Called with the underlying vnode locked, which should prevent new dirty
979 * buffers from being queued. 979 * buffers from being queued.
980 */ 980 */
981void 981void
982vflushbuf(struct vnode *vp, int sync) 982vflushbuf(struct vnode *vp, int sync)
983{ 983{
984 struct buf *bp, *nbp; 984 struct buf *bp, *nbp;
985 int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0); 985 int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
986 bool dirty; 986 bool dirty;
987 987
988 mutex_enter(&vp->v_interlock); 988 mutex_enter(&vp->v_interlock);
989 (void) VOP_PUTPAGES(vp, 0, 0, flags); 989 (void) VOP_PUTPAGES(vp, 0, 0, flags);
990 990
991loop: 991loop:
992 mutex_enter(&bufcache_lock); 992 mutex_enter(&bufcache_lock);
993 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 993 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
994 nbp = LIST_NEXT(bp, b_vnbufs); 994 nbp = LIST_NEXT(bp, b_vnbufs);
995 if ((bp->b_cflags & BC_BUSY)) 995 if ((bp->b_cflags & BC_BUSY))
996 continue; 996 continue;
997 if ((bp->b_oflags & BO_DELWRI) == 0) 997 if ((bp->b_oflags & BO_DELWRI) == 0)
998 panic("vflushbuf: not dirty, bp %p", bp); 998 panic("vflushbuf: not dirty, bp %p", bp);
999 bp->b_cflags |= BC_BUSY | BC_VFLUSH; 999 bp->b_cflags |= BC_BUSY | BC_VFLUSH;
1000 mutex_exit(&bufcache_lock); 1000 mutex_exit(&bufcache_lock);
1001 /* 1001 /*
1002 * Wait for I/O associated with indirect blocks to complete, 1002 * Wait for I/O associated with indirect blocks to complete,
1003 * since there is no way to quickly wait for them below. 1003 * since there is no way to quickly wait for them below.
1004 */ 1004 */
1005 if (bp->b_vp == vp || sync == 0) 1005 if (bp->b_vp == vp || sync == 0)
1006 (void) bawrite(bp); 1006 (void) bawrite(bp);
1007 else 1007 else
1008 (void) bwrite(bp); 1008 (void) bwrite(bp);
1009 goto loop; 1009 goto loop;
1010 } 1010 }
1011 mutex_exit(&bufcache_lock); 1011 mutex_exit(&bufcache_lock);
1012 1012
1013 if (sync == 0) 1013 if (sync == 0)
1014 return; 1014 return;
1015 1015
1016 mutex_enter(&vp->v_interlock); 1016 mutex_enter(&vp->v_interlock);
1017 while (vp->v_numoutput != 0) 1017 while (vp->v_numoutput != 0)
1018 cv_wait(&vp->v_cv, &vp->v_interlock); 1018 cv_wait(&vp->v_cv, &vp->v_interlock);
1019 dirty = !LIST_EMPTY(&vp->v_dirtyblkhd); 1019 dirty = !LIST_EMPTY(&vp->v_dirtyblkhd);
1020 mutex_exit(&vp->v_interlock); 1020 mutex_exit(&vp->v_interlock);
1021 1021
1022 if (dirty) { 1022 if (dirty) {
1023 vprint("vflushbuf: dirty", vp); 1023 vprint("vflushbuf: dirty", vp);
1024 goto loop; 1024 goto loop;
1025 } 1025 }
1026} 1026}
1027 1027
1028/* 1028/*
1029 * Create a vnode for a block device. 1029 * Create a vnode for a block device.
1030 * Used for root filesystem and swap areas. 1030 * Used for root filesystem and swap areas.
1031 * Also used for memory file system special devices. 1031 * Also used for memory file system special devices.
1032 */ 1032 */
1033int 1033int
1034bdevvp(dev_t dev, vnode_t **vpp) 1034bdevvp(dev_t dev, vnode_t **vpp)
1035{ 1035{
1036 1036
1037 return (getdevvp(dev, vpp, VBLK)); 1037 return (getdevvp(dev, vpp, VBLK));
1038} 1038}
1039 1039
1040/* 1040/*
1041 * Create a vnode for a character device. 1041 * Create a vnode for a character device.
1042 * Used for kernfs and some console handling. 1042 * Used for kernfs and some console handling.
1043 */ 1043 */
1044int 1044int
1045cdevvp(dev_t dev, vnode_t **vpp) 1045cdevvp(dev_t dev, vnode_t **vpp)
1046{ 1046{
1047 1047
1048 return (getdevvp(dev, vpp, VCHR)); 1048 return (getdevvp(dev, vpp, VCHR));
1049} 1049}
1050 1050
1051/* 1051/*
1052 * Associate a buffer with a vnode. There must already be a hold on 1052 * Associate a buffer with a vnode. There must already be a hold on
1053 * the vnode. 1053 * the vnode.
1054 */ 1054 */
1055void 1055void
1056bgetvp(struct vnode *vp, struct buf *bp) 1056bgetvp(struct vnode *vp, struct buf *bp)
1057{ 1057{
1058 1058
1059 KASSERT(bp->b_vp == NULL); 1059 KASSERT(bp->b_vp == NULL);
1060 KASSERT(bp->b_objlock == &buffer_lock); 1060 KASSERT(bp->b_objlock == &buffer_lock);
1061 KASSERT(mutex_owned(&vp->v_interlock)); 1061 KASSERT(mutex_owned(&vp->v_interlock));
1062 KASSERT(mutex_owned(&bufcache_lock)); 1062 KASSERT(mutex_owned(&bufcache_lock));
1063 KASSERT((bp->b_cflags & BC_BUSY) != 0); 1063 KASSERT((bp->b_cflags & BC_BUSY) != 0);
1064 KASSERT(!cv_has_waiters(&bp->b_done)); 1064 KASSERT(!cv_has_waiters(&bp->b_done));
1065 1065
1066 vholdl(vp); 1066 vholdl(vp);
1067 bp->b_vp = vp; 1067 bp->b_vp = vp;
1068 if (vp->v_type == VBLK || vp->v_type == VCHR) 1068 if (vp->v_type == VBLK || vp->v_type == VCHR)
1069 bp->b_dev = vp->v_rdev; 1069 bp->b_dev = vp->v_rdev;
1070 else 1070 else
1071 bp->b_dev = NODEV; 1071 bp->b_dev = NODEV;
1072 1072
1073 /* 1073 /*
1074 * Insert onto list for new vnode. 1074 * Insert onto list for new vnode.
1075 */ 1075 */
1076 bufinsvn(bp, &vp->v_cleanblkhd); 1076 bufinsvn(bp, &vp->v_cleanblkhd);
1077 bp->b_objlock = &vp->v_interlock; 1077 bp->b_objlock = &vp->v_interlock;
1078} 1078}
1079 1079
1080/* 1080/*
1081 * Disassociate a buffer from a vnode. 1081 * Disassociate a buffer from a vnode.
1082 */ 1082 */
1083void 1083void
@@ -1326,1876 +1326,1877 @@ vrelel(vnode_t *vp, int flags) @@ -1326,1876 +1326,1877 @@ vrelel(vnode_t *vp, int flags)
1326{ 1326{
1327 bool recycle, defer; 1327 bool recycle, defer;
1328 int error; 1328 int error;
1329 1329
1330 KASSERT(mutex_owned(&vp->v_interlock)); 1330 KASSERT(mutex_owned(&vp->v_interlock));
1331 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1331 KASSERT((vp->v_iflag & VI_MARKER) == 0);
1332 KASSERT(vp->v_freelisthd == NULL); 1332 KASSERT(vp->v_freelisthd == NULL);
1333 1333
1334 if (__predict_false(vp->v_op == dead_vnodeop_p && 1334 if (__predict_false(vp->v_op == dead_vnodeop_p &&
1335 (vp->v_iflag & (VI_CLEAN|VI_XLOCK)) == 0)) { 1335 (vp->v_iflag & (VI_CLEAN|VI_XLOCK)) == 0)) {
1336 vpanic(vp, "dead but not clean"); 1336 vpanic(vp, "dead but not clean");
1337 } 1337 }
1338 1338
1339 /* 1339 /*
1340 * If not the last reference, just drop the reference count 1340 * If not the last reference, just drop the reference count
1341 * and unlock. 1341 * and unlock.
1342 */ 1342 */
1343 if (vtryrele(vp)) { 1343 if (vtryrele(vp)) {
1344 vp->v_iflag |= VI_INACTREDO; 1344 vp->v_iflag |= VI_INACTREDO;
1345 mutex_exit(&vp->v_interlock); 1345 mutex_exit(&vp->v_interlock);
1346 return; 1346 return;
1347 } 1347 }
1348 if (vp->v_usecount <= 0 || vp->v_writecount != 0) { 1348 if (vp->v_usecount <= 0 || vp->v_writecount != 0) {
1349 vpanic(vp, "vrelel: bad ref count"); 1349 vpanic(vp, "vrelel: bad ref count");
1350 } 1350 }
1351 1351
1352 KASSERT((vp->v_iflag & VI_XLOCK) == 0); 1352 KASSERT((vp->v_iflag & VI_XLOCK) == 0);
1353 1353
1354 /* 1354 /*
1355 * If not clean, deactivate the vnode, but preserve 1355 * If not clean, deactivate the vnode, but preserve
1356 * our reference across the call to VOP_INACTIVE(). 1356 * our reference across the call to VOP_INACTIVE().
1357 */ 1357 */
1358 retry: 1358 retry:
1359 if ((vp->v_iflag & VI_CLEAN) == 0) { 1359 if ((vp->v_iflag & VI_CLEAN) == 0) {
1360 recycle = false; 1360 recycle = false;
1361 vp->v_iflag |= VI_INACTNOW; 1361 vp->v_iflag |= VI_INACTNOW;
1362 1362
1363 /* 1363 /*
1364 * XXX This ugly block can be largely eliminated if 1364 * XXX This ugly block can be largely eliminated if
1365 * locking is pushed down into the file systems. 1365 * locking is pushed down into the file systems.
1366 */ 1366 */
1367 if (curlwp == uvm.pagedaemon_lwp) { 1367 if (curlwp == uvm.pagedaemon_lwp) {
1368 /* The pagedaemon can't wait around; defer. */ 1368 /* The pagedaemon can't wait around; defer. */
1369 defer = true; 1369 defer = true;
1370 } else if (curlwp == vrele_lwp) { 1370 } else if (curlwp == vrele_lwp) {
1371 /* We have to try harder. */ 1371 /* We have to try harder. */
1372 vp->v_iflag &= ~VI_INACTREDO; 1372 vp->v_iflag &= ~VI_INACTREDO;
1373 error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | 1373 error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK |
1374 LK_RETRY); 1374 LK_RETRY);
1375 if (error != 0) { 1375 if (error != 0) {
1376 /* XXX */ 1376 /* XXX */
1377 vpanic(vp, "vrele: unable to lock %p"); 1377 vpanic(vp, "vrele: unable to lock %p");
1378 } 1378 }
1379 defer = false; 1379 defer = false;
1380 } else if ((vp->v_iflag & VI_LAYER) != 0) { 1380 } else if ((vp->v_iflag & VI_LAYER) != 0) {
1381 /*  1381 /*
1382 * Acquiring the stack's lock in vclean() even 1382 * Acquiring the stack's lock in vclean() even
1383 * for an honest vput/vrele is dangerous because 1383 * for an honest vput/vrele is dangerous because
1384 * our caller may hold other vnode locks; defer. 1384 * our caller may hold other vnode locks; defer.
1385 */ 1385 */
1386 defer = true; 1386 defer = true;
1387 } else {  1387 } else {
1388 /* If we can't acquire the lock, then defer. */ 1388 /* If we can't acquire the lock, then defer. */
1389 vp->v_iflag &= ~VI_INACTREDO; 1389 vp->v_iflag &= ~VI_INACTREDO;
1390 error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | 1390 error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK |
1391 LK_NOWAIT); 1391 LK_NOWAIT);
1392 if (error != 0) { 1392 if (error != 0) {
1393 defer = true; 1393 defer = true;
1394 mutex_enter(&vp->v_interlock); 1394 mutex_enter(&vp->v_interlock);
1395 } else { 1395 } else {
1396 defer = false; 1396 defer = false;
1397 } 1397 }
1398 } 1398 }
1399 1399
1400 if (defer) { 1400 if (defer) {
1401 /* 1401 /*
1402 * Defer reclaim to the kthread; it's not safe to 1402 * Defer reclaim to the kthread; it's not safe to
1403 * clean it here. We donate it our last reference. 1403 * clean it here. We donate it our last reference.
1404 */ 1404 */
1405 KASSERT(mutex_owned(&vp->v_interlock)); 1405 KASSERT(mutex_owned(&vp->v_interlock));
1406 KASSERT((vp->v_iflag & VI_INACTPEND) == 0); 1406 KASSERT((vp->v_iflag & VI_INACTPEND) == 0);
1407 vp->v_iflag &= ~VI_INACTNOW; 1407 vp->v_iflag &= ~VI_INACTNOW;
1408 vp->v_iflag |= VI_INACTPEND; 1408 vp->v_iflag |= VI_INACTPEND;
1409 mutex_enter(&vrele_lock); 1409 mutex_enter(&vrele_lock);
1410 TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist); 1410 TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist);
1411 if (++vrele_pending > (desiredvnodes >> 8)) 1411 if (++vrele_pending > (desiredvnodes >> 8))
1412 cv_signal(&vrele_cv);  1412 cv_signal(&vrele_cv);
1413 mutex_exit(&vrele_lock); 1413 mutex_exit(&vrele_lock);
1414 mutex_exit(&vp->v_interlock); 1414 mutex_exit(&vp->v_interlock);
1415 return; 1415 return;
1416 } 1416 }
1417 1417
1418#ifdef DIAGNOSTIC 1418#ifdef DIAGNOSTIC
1419 if ((vp->v_type == VBLK || vp->v_type == VCHR) && 1419 if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
1420 vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) { 1420 vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) {
1421 vprint("vrelel: missing VOP_CLOSE()", vp); 1421 vprint("vrelel: missing VOP_CLOSE()", vp);
1422 } 1422 }
1423#endif 1423#endif
1424 1424
1425 /* 1425 /*
1426 * The vnode can gain another reference while being 1426 * The vnode can gain another reference while being
1427 * deactivated. If VOP_INACTIVE() indicates that 1427 * deactivated. If VOP_INACTIVE() indicates that
1428 * the described file has been deleted, then recycle 1428 * the described file has been deleted, then recycle
1429 * the vnode irrespective of additional references. 1429 * the vnode irrespective of additional references.
1430 * Another thread may be waiting to re-use the on-disk 1430 * Another thread may be waiting to re-use the on-disk
1431 * inode. 1431 * inode.
1432 * 1432 *
1433 * Note that VOP_INACTIVE() will drop the vnode lock. 1433 * Note that VOP_INACTIVE() will drop the vnode lock.
1434 */ 1434 */
1435 VOP_INACTIVE(vp, &recycle); 1435 VOP_INACTIVE(vp, &recycle);
1436 mutex_enter(&vp->v_interlock); 1436 mutex_enter(&vp->v_interlock);
1437 vp->v_iflag &= ~VI_INACTNOW; 1437 vp->v_iflag &= ~VI_INACTNOW;
1438 if (!recycle) { 1438 if (!recycle) {
1439 if (vtryrele(vp)) { 1439 if (vtryrele(vp)) {
1440 mutex_exit(&vp->v_interlock); 1440 mutex_exit(&vp->v_interlock);
1441 return; 1441 return;
1442 } 1442 }
1443 1443
1444 /* 1444 /*
1445 * If we grew another reference while 1445 * If we grew another reference while
1446 * VOP_INACTIVE() was underway, retry. 1446 * VOP_INACTIVE() was underway, retry.
1447 */ 1447 */
1448 if ((vp->v_iflag & VI_INACTREDO) != 0) { 1448 if ((vp->v_iflag & VI_INACTREDO) != 0) {
1449 goto retry; 1449 goto retry;
1450 } 1450 }
1451 } 1451 }
1452 1452
1453 /* Take care of space accounting. */ 1453 /* Take care of space accounting. */
1454 if (vp->v_iflag & VI_EXECMAP) { 1454 if (vp->v_iflag & VI_EXECMAP) {
1455 atomic_add_int(&uvmexp.execpages, 1455 atomic_add_int(&uvmexp.execpages,
1456 -vp->v_uobj.uo_npages); 1456 -vp->v_uobj.uo_npages);
1457 atomic_add_int(&uvmexp.filepages, 1457 atomic_add_int(&uvmexp.filepages,
1458 vp->v_uobj.uo_npages); 1458 vp->v_uobj.uo_npages);
1459 } 1459 }
1460 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP); 1460 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP);
1461 vp->v_vflag &= ~VV_MAPPED; 1461 vp->v_vflag &= ~VV_MAPPED;
1462 1462
1463 /* 1463 /*
1464 * Recycle the vnode if the file is now unused (unlinked), 1464 * Recycle the vnode if the file is now unused (unlinked),
1465 * otherwise just free it. 1465 * otherwise just free it.
1466 */ 1466 */
1467 if (recycle) { 1467 if (recycle) {
1468 vclean(vp, DOCLOSE); 1468 vclean(vp, DOCLOSE);
1469 } 1469 }
1470 KASSERT(vp->v_usecount > 0); 1470 KASSERT(vp->v_usecount > 0);
1471 } 1471 }
1472 1472
1473 if (atomic_dec_uint_nv(&vp->v_usecount) != 0) { 1473 if (atomic_dec_uint_nv(&vp->v_usecount) != 0) {
1474 /* Gained another reference while being reclaimed. */ 1474 /* Gained another reference while being reclaimed. */
1475 mutex_exit(&vp->v_interlock); 1475 mutex_exit(&vp->v_interlock);
1476 return; 1476 return;
1477 } 1477 }
1478 1478
1479 if ((vp->v_iflag & VI_CLEAN) != 0) { 1479 if ((vp->v_iflag & VI_CLEAN) != 0) {
1480 /* 1480 /*
1481 * It's clean so destroy it. It isn't referenced 1481 * It's clean so destroy it. It isn't referenced
1482 * anywhere since it has been reclaimed. 1482 * anywhere since it has been reclaimed.
1483 */ 1483 */
1484 KASSERT(vp->v_holdcnt == 0); 1484 KASSERT(vp->v_holdcnt == 0);
1485 KASSERT(vp->v_writecount == 0); 1485 KASSERT(vp->v_writecount == 0);
1486 mutex_exit(&vp->v_interlock); 1486 mutex_exit(&vp->v_interlock);
1487 insmntque(vp, NULL); 1487 insmntque(vp, NULL);
1488 if (vp->v_type == VBLK || vp->v_type == VCHR) { 1488 if (vp->v_type == VBLK || vp->v_type == VCHR) {
1489 spec_node_destroy(vp); 1489 spec_node_destroy(vp);
1490 } 1490 }
1491 vnfree(vp); 1491 vnfree(vp);
1492 } else { 1492 } else {
1493 /* 1493 /*
1494 * Otherwise, put it back onto the freelist. It 1494 * Otherwise, put it back onto the freelist. It
1495 * can't be destroyed while still associated with 1495 * can't be destroyed while still associated with
1496 * a file system. 1496 * a file system.
1497 */ 1497 */
1498 mutex_enter(&vnode_free_list_lock); 1498 mutex_enter(&vnode_free_list_lock);
1499 if (vp->v_holdcnt > 0) { 1499 if (vp->v_holdcnt > 0) {
1500 vp->v_freelisthd = &vnode_hold_list; 1500 vp->v_freelisthd = &vnode_hold_list;
1501 } else { 1501 } else {
1502 vp->v_freelisthd = &vnode_free_list; 1502 vp->v_freelisthd = &vnode_free_list;
1503 } 1503 }
1504 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); 1504 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
1505 mutex_exit(&vnode_free_list_lock); 1505 mutex_exit(&vnode_free_list_lock);
1506 mutex_exit(&vp->v_interlock); 1506 mutex_exit(&vp->v_interlock);
1507 } 1507 }
1508} 1508}
1509 1509
1510void 1510void
1511vrele(vnode_t *vp) 1511vrele(vnode_t *vp)
1512{ 1512{
1513 1513
1514 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1514 KASSERT((vp->v_iflag & VI_MARKER) == 0);
1515 1515
1516 if ((vp->v_iflag & VI_INACTNOW) == 0 && vtryrele(vp)) { 1516 if ((vp->v_iflag & VI_INACTNOW) == 0 && vtryrele(vp)) {
1517 return; 1517 return;
1518 } 1518 }
1519 mutex_enter(&vp->v_interlock); 1519 mutex_enter(&vp->v_interlock);
1520 vrelel(vp, 0); 1520 vrelel(vp, 0);
1521} 1521}
1522 1522
1523static void 1523static void
1524vrele_thread(void *cookie) 1524vrele_thread(void *cookie)
1525{ 1525{
1526 vnode_t *vp; 1526 vnode_t *vp;
1527 1527
1528 for (;;) { 1528 for (;;) {
1529 mutex_enter(&vrele_lock); 1529 mutex_enter(&vrele_lock);
1530 while (TAILQ_EMPTY(&vrele_list)) { 1530 while (TAILQ_EMPTY(&vrele_list)) {
1531 vrele_gen++; 1531 vrele_gen++;
1532 cv_broadcast(&vrele_cv); 1532 cv_broadcast(&vrele_cv);
1533 cv_timedwait(&vrele_cv, &vrele_lock, hz); 1533 cv_timedwait(&vrele_cv, &vrele_lock, hz);
1534 } 1534 }
1535 vp = TAILQ_FIRST(&vrele_list); 1535 vp = TAILQ_FIRST(&vrele_list);
1536 TAILQ_REMOVE(&vrele_list, vp, v_freelist); 1536 TAILQ_REMOVE(&vrele_list, vp, v_freelist);
1537 vrele_pending--; 1537 vrele_pending--;
1538 mutex_exit(&vrele_lock); 1538 mutex_exit(&vrele_lock);
1539 1539
1540 /* 1540 /*
1541 * If not the last reference, then ignore the vnode 1541 * If not the last reference, then ignore the vnode
1542 * and look for more work. 1542 * and look for more work.
1543 */ 1543 */
1544 mutex_enter(&vp->v_interlock); 1544 mutex_enter(&vp->v_interlock);
1545 KASSERT((vp->v_iflag & VI_INACTPEND) != 0); 1545 KASSERT((vp->v_iflag & VI_INACTPEND) != 0);
1546 vp->v_iflag &= ~VI_INACTPEND; 1546 vp->v_iflag &= ~VI_INACTPEND;
1547 vrelel(vp, 0); 1547 vrelel(vp, 0);
1548 } 1548 }
1549} 1549}
1550 1550
1551/* 1551/*
1552 * Page or buffer structure gets a reference. 1552 * Page or buffer structure gets a reference.
1553 * Called with v_interlock held. 1553 * Called with v_interlock held.
1554 */ 1554 */
1555void 1555void
1556vholdl(vnode_t *vp) 1556vholdl(vnode_t *vp)
1557{ 1557{
1558 1558
1559 KASSERT(mutex_owned(&vp->v_interlock)); 1559 KASSERT(mutex_owned(&vp->v_interlock));
1560 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1560 KASSERT((vp->v_iflag & VI_MARKER) == 0);
1561 1561
1562 if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) { 1562 if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) {
1563 mutex_enter(&vnode_free_list_lock); 1563 mutex_enter(&vnode_free_list_lock);
1564 KASSERT(vp->v_freelisthd == &vnode_free_list); 1564 KASSERT(vp->v_freelisthd == &vnode_free_list);
1565 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); 1565 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
1566 vp->v_freelisthd = &vnode_hold_list; 1566 vp->v_freelisthd = &vnode_hold_list;
1567 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); 1567 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
1568 mutex_exit(&vnode_free_list_lock); 1568 mutex_exit(&vnode_free_list_lock);
1569 } 1569 }
1570} 1570}
1571 1571
1572/* 1572/*
1573 * Page or buffer structure frees a reference. 1573 * Page or buffer structure frees a reference.
1574 * Called with v_interlock held. 1574 * Called with v_interlock held.
1575 */ 1575 */
1576void 1576void
1577holdrelel(vnode_t *vp) 1577holdrelel(vnode_t *vp)
1578{ 1578{
1579 1579
1580 KASSERT(mutex_owned(&vp->v_interlock)); 1580 KASSERT(mutex_owned(&vp->v_interlock));
1581 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1581 KASSERT((vp->v_iflag & VI_MARKER) == 0);
1582 1582
1583 if (vp->v_holdcnt <= 0) { 1583 if (vp->v_holdcnt <= 0) {
1584 vpanic(vp, "holdrelel: holdcnt vp %p"); 1584 vpanic(vp, "holdrelel: holdcnt vp %p");
1585 } 1585 }
1586 1586
1587 vp->v_holdcnt--; 1587 vp->v_holdcnt--;
1588 if (vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1588 if (vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1589 mutex_enter(&vnode_free_list_lock); 1589 mutex_enter(&vnode_free_list_lock);
1590 KASSERT(vp->v_freelisthd == &vnode_hold_list); 1590 KASSERT(vp->v_freelisthd == &vnode_hold_list);
1591 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); 1591 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
1592 vp->v_freelisthd = &vnode_free_list; 1592 vp->v_freelisthd = &vnode_free_list;
1593 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); 1593 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
1594 mutex_exit(&vnode_free_list_lock); 1594 mutex_exit(&vnode_free_list_lock);
1595 } 1595 }
1596} 1596}
1597 1597
1598/* 1598/*
1599 * Vnode reference, where a reference is already held by some other 1599 * Vnode reference, where a reference is already held by some other
1600 * object (for example, a file structure). 1600 * object (for example, a file structure).
1601 */ 1601 */
1602void 1602void
1603vref(vnode_t *vp) 1603vref(vnode_t *vp)
1604{ 1604{
1605 1605
1606 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1606 KASSERT((vp->v_iflag & VI_MARKER) == 0);
1607 KASSERT(vp->v_usecount != 0); 1607 KASSERT(vp->v_usecount != 0);
1608 1608
1609 atomic_inc_uint(&vp->v_usecount); 1609 atomic_inc_uint(&vp->v_usecount);
1610} 1610}
1611 1611
1612/* 1612/*
1613 * Remove any vnodes in the vnode table belonging to mount point mp. 1613 * Remove any vnodes in the vnode table belonging to mount point mp.
1614 * 1614 *
1615 * If FORCECLOSE is not specified, there should not be any active ones, 1615 * If FORCECLOSE is not specified, there should not be any active ones,
1616 * return error if any are found (nb: this is a user error, not a 1616 * return error if any are found (nb: this is a user error, not a
1617 * system error). If FORCECLOSE is specified, detach any active vnodes 1617 * system error). If FORCECLOSE is specified, detach any active vnodes
1618 * that are found. 1618 * that are found.
1619 * 1619 *
1620 * If WRITECLOSE is set, only flush out regular file vnodes open for 1620 * If WRITECLOSE is set, only flush out regular file vnodes open for
1621 * writing. 1621 * writing.
1622 * 1622 *
1623 * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped. 1623 * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped.
1624 */ 1624 */
1625#ifdef DEBUG 1625#ifdef DEBUG
1626int busyprt = 0; /* print out busy vnodes */ 1626int busyprt = 0; /* print out busy vnodes */
1627struct ctldebug debug1 = { "busyprt", &busyprt }; 1627struct ctldebug debug1 = { "busyprt", &busyprt };
1628#endif 1628#endif
1629 1629
1630static vnode_t * 1630static vnode_t *
1631vflushnext(vnode_t *mvp, int *when) 1631vflushnext(vnode_t *mvp, int *when)
1632{ 1632{
1633 1633
1634 if (hardclock_ticks > *when) { 1634 if (hardclock_ticks > *when) {
1635 mutex_exit(&mntvnode_lock); 1635 mutex_exit(&mntvnode_lock);
1636 yield(); 1636 yield();
1637 mutex_enter(&mntvnode_lock); 1637 mutex_enter(&mntvnode_lock);
1638 *when = hardclock_ticks + hz / 10; 1638 *when = hardclock_ticks + hz / 10;
1639 } 1639 }
1640 1640
1641 return vunmark(mvp); 1641 return vunmark(mvp);
1642} 1642}
1643 1643
1644int 1644int
1645vflush(struct mount *mp, vnode_t *skipvp, int flags) 1645vflush(struct mount *mp, vnode_t *skipvp, int flags)
1646{ 1646{
1647 vnode_t *vp, *mvp; 1647 vnode_t *vp, *mvp;
1648 int busy = 0, when = 0, gen; 1648 int busy = 0, when = 0, gen;
1649 1649
1650 /* 1650 /*
1651 * First, flush out any vnode references from vrele_list. 1651 * First, flush out any vnode references from vrele_list.
1652 */ 1652 */
1653 mutex_enter(&vrele_lock); 1653 mutex_enter(&vrele_lock);
1654 gen = vrele_gen; 1654 gen = vrele_gen;
1655 while (vrele_pending && gen == vrele_gen) { 1655 while (vrele_pending && gen == vrele_gen) {
1656 cv_broadcast(&vrele_cv); 1656 cv_broadcast(&vrele_cv);
1657 cv_wait(&vrele_cv, &vrele_lock); 1657 cv_wait(&vrele_cv, &vrele_lock);
1658 } 1658 }
1659 mutex_exit(&vrele_lock); 1659 mutex_exit(&vrele_lock);
1660 1660
1661 /* Allocate a marker vnode. */ 1661 /* Allocate a marker vnode. */
1662 if ((mvp = vnalloc(mp)) == NULL) 1662 if ((mvp = vnalloc(mp)) == NULL)
1663 return (ENOMEM); 1663 return (ENOMEM);
1664 1664
1665 /* 1665 /*
1666 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone() 1666 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
1667 * and vclean() are called 1667 * and vclean() are called
1668 */ 1668 */
1669 mutex_enter(&mntvnode_lock); 1669 mutex_enter(&mntvnode_lock);
1670 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp != NULL; 1670 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp != NULL;
1671 vp = vflushnext(mvp, &when)) { 1671 vp = vflushnext(mvp, &when)) {
1672 vmark(mvp, vp); 1672 vmark(mvp, vp);
1673 if (vp->v_mount != mp || vismarker(vp)) 1673 if (vp->v_mount != mp || vismarker(vp))
1674 continue; 1674 continue;
1675 /* 1675 /*
1676 * Skip over a selected vnode. 1676 * Skip over a selected vnode.
1677 */ 1677 */
1678 if (vp == skipvp) 1678 if (vp == skipvp)
1679 continue; 1679 continue;
1680 mutex_enter(&vp->v_interlock); 1680 mutex_enter(&vp->v_interlock);
1681 /* 1681 /*
1682 * Ignore clean but still referenced vnodes. 1682 * Ignore clean but still referenced vnodes.
1683 */ 1683 */
1684 if ((vp->v_iflag & VI_CLEAN) != 0) { 1684 if ((vp->v_iflag & VI_CLEAN) != 0) {
1685 mutex_exit(&vp->v_interlock); 1685 mutex_exit(&vp->v_interlock);
1686 continue; 1686 continue;
1687 } 1687 }
1688 /* 1688 /*
1689 * Skip over a vnodes marked VSYSTEM. 1689 * Skip over a vnodes marked VSYSTEM.
1690 */ 1690 */
1691 if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) { 1691 if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) {
1692 mutex_exit(&vp->v_interlock); 1692 mutex_exit(&vp->v_interlock);
1693 continue; 1693 continue;
1694 } 1694 }
1695 /* 1695 /*
1696 * If WRITECLOSE is set, only flush out regular file 1696 * If WRITECLOSE is set, only flush out regular file
1697 * vnodes open for writing. 1697 * vnodes open for writing.
1698 */ 1698 */
1699 if ((flags & WRITECLOSE) && 1699 if ((flags & WRITECLOSE) &&
1700 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1700 (vp->v_writecount == 0 || vp->v_type != VREG)) {
1701 mutex_exit(&vp->v_interlock); 1701 mutex_exit(&vp->v_interlock);
1702 continue; 1702 continue;
1703 } 1703 }
1704 /* 1704 /*
1705 * With v_usecount == 0, all we need to do is clear 1705 * With v_usecount == 0, all we need to do is clear
1706 * out the vnode data structures and we are done. 1706 * out the vnode data structures and we are done.
1707 */ 1707 */
1708 if (vp->v_usecount == 0) { 1708 if (vp->v_usecount == 0) {
1709 mutex_exit(&mntvnode_lock); 1709 mutex_exit(&mntvnode_lock);
1710 vremfree(vp); 1710 vremfree(vp);
1711 vp->v_usecount = 1; 1711 vp->v_usecount = 1;
1712 vclean(vp, DOCLOSE); 1712 vclean(vp, DOCLOSE);
1713 vrelel(vp, 0); 1713 vrelel(vp, 0);
1714 mutex_enter(&mntvnode_lock); 1714 mutex_enter(&mntvnode_lock);
1715 continue; 1715 continue;
1716 } 1716 }
1717 /* 1717 /*
1718 * If FORCECLOSE is set, forcibly close the vnode. 1718 * If FORCECLOSE is set, forcibly close the vnode.
1719 * For block or character devices, revert to an 1719 * For block or character devices, revert to an
1720 * anonymous device. For all other files, just 1720 * anonymous device. For all other files, just
1721 * kill them. 1721 * kill them.
1722 */ 1722 */
1723 if (flags & FORCECLOSE) { 1723 if (flags & FORCECLOSE) {
1724 mutex_exit(&mntvnode_lock); 1724 mutex_exit(&mntvnode_lock);
1725 atomic_inc_uint(&vp->v_usecount); 1725 atomic_inc_uint(&vp->v_usecount);
1726 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1726 if (vp->v_type != VBLK && vp->v_type != VCHR) {
1727 vclean(vp, DOCLOSE); 1727 vclean(vp, DOCLOSE);
1728 vrelel(vp, 0); 1728 vrelel(vp, 0);
1729 } else { 1729 } else {
1730 vclean(vp, 0); 1730 vclean(vp, 0);
1731 vp->v_op = spec_vnodeop_p; /* XXXSMP */ 1731 vp->v_op = spec_vnodeop_p; /* XXXSMP */
1732 mutex_exit(&vp->v_interlock); 1732 mutex_exit(&vp->v_interlock);
1733 /* 1733 /*
1734 * The vnode isn't clean, but still resides 1734 * The vnode isn't clean, but still resides
1735 * on the mount list. Remove it. XXX This 1735 * on the mount list. Remove it. XXX This
1736 * is a bit dodgy. 1736 * is a bit dodgy.
1737 */ 1737 */
1738 insmntque(vp, NULL); 1738 insmntque(vp, NULL);
1739 vrele(vp); 1739 vrele(vp);
1740 } 1740 }
1741 mutex_enter(&mntvnode_lock); 1741 mutex_enter(&mntvnode_lock);
1742 continue; 1742 continue;
1743 } 1743 }
1744#ifdef DEBUG 1744#ifdef DEBUG
1745 if (busyprt) 1745 if (busyprt)
1746 vprint("vflush: busy vnode", vp); 1746 vprint("vflush: busy vnode", vp);
1747#endif 1747#endif
1748 mutex_exit(&vp->v_interlock); 1748 mutex_exit(&vp->v_interlock);
1749 busy++; 1749 busy++;
1750 } 1750 }
1751 mutex_exit(&mntvnode_lock); 1751 mutex_exit(&mntvnode_lock);
1752 vnfree(mvp); 1752 vnfree(mvp);
1753 if (busy) 1753 if (busy)
1754 return (EBUSY); 1754 return (EBUSY);
1755 return (0); 1755 return (0);
1756} 1756}
1757 1757
1758/* 1758/*
1759 * Disassociate the underlying file system from a vnode. 1759 * Disassociate the underlying file system from a vnode.
1760 * 1760 *
1761 * Must be called with the interlock held, and will return with it held. 1761 * Must be called with the interlock held, and will return with it held.
1762 */ 1762 */
1763void 1763void
1764vclean(vnode_t *vp, int flags) 1764vclean(vnode_t *vp, int flags)
1765{ 1765{
1766 lwp_t *l = curlwp; 1766 lwp_t *l = curlwp;
1767 bool recycle, active; 1767 bool recycle, active;
1768 int error; 1768 int error;
1769 1769
1770 KASSERT(mutex_owned(&vp->v_interlock)); 1770 KASSERT(mutex_owned(&vp->v_interlock));
1771 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1771 KASSERT((vp->v_iflag & VI_MARKER) == 0);
1772 KASSERT(vp->v_usecount != 0); 1772 KASSERT(vp->v_usecount != 0);
1773 1773
1774 /* If cleaning is already in progress wait until done and return. */ 1774 /* If cleaning is already in progress wait until done and return. */
1775 if (vp->v_iflag & VI_XLOCK) { 1775 if (vp->v_iflag & VI_XLOCK) {
1776 vwait(vp, VI_XLOCK); 1776 vwait(vp, VI_XLOCK);
1777 return; 1777 return;
1778 } 1778 }
1779 1779
1780 /* If already clean, nothing to do. */ 1780 /* If already clean, nothing to do. */
1781 if ((vp->v_iflag & VI_CLEAN) != 0) { 1781 if ((vp->v_iflag & VI_CLEAN) != 0) {
1782 return; 1782 return;
1783 } 1783 }
1784 1784
1785 /* 1785 /*
1786 * Prevent the vnode from being recycled or brought into use 1786 * Prevent the vnode from being recycled or brought into use
1787 * while we clean it out. 1787 * while we clean it out.
1788 */ 1788 */
1789 vp->v_iflag |= VI_XLOCK; 1789 vp->v_iflag |= VI_XLOCK;
1790 if (vp->v_iflag & VI_EXECMAP) { 1790 if (vp->v_iflag & VI_EXECMAP) {
1791 atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages); 1791 atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages);
1792 atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages); 1792 atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages);
1793 } 1793 }
1794 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP); 1794 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP);
1795 active = (vp->v_usecount > 1); 1795 active = (vp->v_usecount > 1);
1796 1796
1797 /* XXXAD should not lock vnode under layer */ 1797 /* XXXAD should not lock vnode under layer */
1798 VOP_LOCK(vp, LK_EXCLUSIVE | LK_INTERLOCK); 1798 VOP_LOCK(vp, LK_EXCLUSIVE | LK_INTERLOCK);
1799 1799
1800 /* 1800 /*
1801 * Clean out any cached data associated with the vnode. 1801 * Clean out any cached data associated with the vnode.
1802 * If purging an active vnode, it must be closed and 1802 * If purging an active vnode, it must be closed and
1803 * deactivated before being reclaimed. Note that the 1803 * deactivated before being reclaimed. Note that the
1804 * VOP_INACTIVE will unlock the vnode. 1804 * VOP_INACTIVE will unlock the vnode.
1805 */ 1805 */
1806 if (flags & DOCLOSE) { 1806 if (flags & DOCLOSE) {
1807 error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0); 1807 error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
1808 if (error != 0) { 1808 if (error != 0) {
1809 /* XXX, fix vn_start_write's grab of mp and use that. */ 1809 /* XXX, fix vn_start_write's grab of mp and use that. */
1810 1810
1811 if (wapbl_vphaswapbl(vp)) 1811 if (wapbl_vphaswapbl(vp))
1812 WAPBL_DISCARD(wapbl_vptomp(vp)); 1812 WAPBL_DISCARD(wapbl_vptomp(vp));
1813 error = vinvalbuf(vp, 0, NOCRED, l, 0, 0); 1813 error = vinvalbuf(vp, 0, NOCRED, l, 0, 0);
1814 } 1814 }
1815 KASSERT(error == 0); 1815 KASSERT(error == 0);
1816 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 1816 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
1817 if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) { 1817 if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) {
1818 spec_node_revoke(vp); 1818 spec_node_revoke(vp);
1819 } 1819 }
1820 } 1820 }
1821 if (active) { 1821 if (active) {
1822 VOP_INACTIVE(vp, &recycle); 1822 VOP_INACTIVE(vp, &recycle);
1823 } else { 1823 } else {
1824 /* 1824 /*
1825 * Any other processes trying to obtain this lock must first 1825 * Any other processes trying to obtain this lock must first
1826 * wait for VI_XLOCK to clear, then call the new lock operation. 1826 * wait for VI_XLOCK to clear, then call the new lock operation.
1827 */ 1827 */
1828 VOP_UNLOCK(vp, 0); 1828 VOP_UNLOCK(vp, 0);
1829 } 1829 }
1830 1830
1831 /* Disassociate the underlying file system from the vnode. */ 1831 /* Disassociate the underlying file system from the vnode. */
1832 if (VOP_RECLAIM(vp)) { 1832 if (VOP_RECLAIM(vp)) {
1833 vpanic(vp, "vclean: cannot reclaim"); 1833 vpanic(vp, "vclean: cannot reclaim");
1834 } 1834 }
1835 1835
1836 KASSERT(vp->v_uobj.uo_npages == 0); 1836 KASSERT(vp->v_uobj.uo_npages == 0);
1837 if (vp->v_type == VREG && vp->v_ractx != NULL) { 1837 if (vp->v_type == VREG && vp->v_ractx != NULL) {
1838 uvm_ra_freectx(vp->v_ractx); 1838 uvm_ra_freectx(vp->v_ractx);
1839 vp->v_ractx = NULL; 1839 vp->v_ractx = NULL;
1840 } 1840 }
1841 cache_purge(vp); 1841 cache_purge(vp);
1842 1842
1843 /* Done with purge, notify sleepers of the grim news. */ 1843 /* Done with purge, notify sleepers of the grim news. */
1844 mutex_enter(&vp->v_interlock); 1844 mutex_enter(&vp->v_interlock);
1845 vp->v_op = dead_vnodeop_p; 1845 vp->v_op = dead_vnodeop_p;
1846 vp->v_tag = VT_NON; 1846 vp->v_tag = VT_NON;
1847 vp->v_vnlock = &vp->v_lock; 1847 vp->v_vnlock = &vp->v_lock;
1848 KNOTE(&vp->v_klist, NOTE_REVOKE); 1848 KNOTE(&vp->v_klist, NOTE_REVOKE);
1849 vp->v_iflag &= ~(VI_XLOCK | VI_FREEING); 1849 vp->v_iflag &= ~(VI_XLOCK | VI_FREEING);
1850 vp->v_vflag &= ~VV_LOCKSWORK; 1850 vp->v_vflag &= ~VV_LOCKSWORK;
1851 if ((flags & DOCLOSE) != 0) { 1851 if ((flags & DOCLOSE) != 0) {
1852 vp->v_iflag |= VI_CLEAN; 1852 vp->v_iflag |= VI_CLEAN;
1853 } 1853 }
1854 cv_broadcast(&vp->v_cv); 1854 cv_broadcast(&vp->v_cv);
1855 1855
1856 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 1856 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
1857} 1857}
1858 1858
1859/* 1859/*
1860 * Recycle an unused vnode to the front of the free list. 1860 * Recycle an unused vnode to the front of the free list.
1861 * Release the passed interlock if the vnode will be recycled. 1861 * Release the passed interlock if the vnode will be recycled.
1862 */ 1862 */
1863int 1863int
1864vrecycle(vnode_t *vp, kmutex_t *inter_lkp, struct lwp *l) 1864vrecycle(vnode_t *vp, kmutex_t *inter_lkp, struct lwp *l)
1865{ 1865{
1866 1866
1867 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1867 KASSERT((vp->v_iflag & VI_MARKER) == 0);
1868 1868
1869 mutex_enter(&vp->v_interlock); 1869 mutex_enter(&vp->v_interlock);
1870 if (vp->v_usecount != 0) { 1870 if (vp->v_usecount != 0) {
1871 mutex_exit(&vp->v_interlock); 1871 mutex_exit(&vp->v_interlock);
1872 return (0); 1872 return (0);
1873 } 1873 }
1874 if (inter_lkp) 1874 if (inter_lkp)
1875 mutex_exit(inter_lkp); 1875 mutex_exit(inter_lkp);
1876 vremfree(vp); 1876 vremfree(vp);
1877 vp->v_usecount = 1; 1877 vp->v_usecount = 1;
1878 vclean(vp, DOCLOSE); 1878 vclean(vp, DOCLOSE);
1879 vrelel(vp, 0); 1879 vrelel(vp, 0);
1880 return (1); 1880 return (1);
1881} 1881}
1882 1882
1883/* 1883/*
1884 * Eliminate all activity associated with a vnode in preparation for 1884 * Eliminate all activity associated with a vnode in preparation for
1885 * reuse. Drops a reference from the vnode. 1885 * reuse. Drops a reference from the vnode.
1886 */ 1886 */
1887void 1887void
1888vgone(vnode_t *vp) 1888vgone(vnode_t *vp)
1889{ 1889{
1890 1890
1891 mutex_enter(&vp->v_interlock); 1891 mutex_enter(&vp->v_interlock);
1892 vclean(vp, DOCLOSE); 1892 vclean(vp, DOCLOSE);
1893 vrelel(vp, 0); 1893 vrelel(vp, 0);
1894} 1894}
1895 1895
1896/* 1896/*
1897 * Lookup a vnode by device number. 1897 * Lookup a vnode by device number.
1898 */ 1898 */
1899int 1899int
1900vfinddev(dev_t dev, enum vtype type, vnode_t **vpp) 1900vfinddev(dev_t dev, enum vtype type, vnode_t **vpp)
1901{ 1901{
1902 vnode_t *vp; 1902 vnode_t *vp;
1903 int rc = 0; 1903 int rc = 0;
1904 1904
1905 mutex_enter(&device_lock); 1905 mutex_enter(&device_lock);
1906 for (vp = specfs_hash[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1906 for (vp = specfs_hash[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1907 if (dev != vp->v_rdev || type != vp->v_type) 1907 if (dev != vp->v_rdev || type != vp->v_type)
1908 continue; 1908 continue;
1909 *vpp = vp; 1909 *vpp = vp;
1910 rc = 1; 1910 rc = 1;
1911 break; 1911 break;
1912 } 1912 }
1913 mutex_exit(&device_lock); 1913 mutex_exit(&device_lock);
1914 return (rc); 1914 return (rc);
1915} 1915}
1916 1916
1917/* 1917/*
1918 * Revoke all the vnodes corresponding to the specified minor number 1918 * Revoke all the vnodes corresponding to the specified minor number
1919 * range (endpoints inclusive) of the specified major. 1919 * range (endpoints inclusive) of the specified major.
1920 */ 1920 */
1921void 1921void
1922vdevgone(int maj, int minl, int minh, enum vtype type) 1922vdevgone(int maj, int minl, int minh, enum vtype type)
1923{ 1923{
1924 vnode_t *vp, **vpp; 1924 vnode_t *vp, **vpp;
1925 dev_t dev; 1925 dev_t dev;
1926 int mn; 1926 int mn;
1927 1927
1928 vp = NULL; /* XXX gcc */ 1928 vp = NULL; /* XXX gcc */
1929 1929
1930 mutex_enter(&device_lock); 1930 mutex_enter(&device_lock);
1931 for (mn = minl; mn <= minh; mn++) { 1931 for (mn = minl; mn <= minh; mn++) {
1932 dev = makedev(maj, mn); 1932 dev = makedev(maj, mn);
1933 vpp = &specfs_hash[SPECHASH(dev)]; 1933 vpp = &specfs_hash[SPECHASH(dev)];
1934 for (vp = *vpp; vp != NULL;) { 1934 for (vp = *vpp; vp != NULL;) {
1935 mutex_enter(&vp->v_interlock); 1935 mutex_enter(&vp->v_interlock);
1936 if ((vp->v_iflag & VI_CLEAN) != 0 || 1936 if ((vp->v_iflag & VI_CLEAN) != 0 ||
1937 dev != vp->v_rdev || type != vp->v_type) { 1937 dev != vp->v_rdev || type != vp->v_type) {
1938 mutex_exit(&vp->v_interlock); 1938 mutex_exit(&vp->v_interlock);
1939 vp = vp->v_specnext; 1939 vp = vp->v_specnext;
1940 continue; 1940 continue;
1941 } 1941 }
1942 mutex_exit(&device_lock); 1942 mutex_exit(&device_lock);
1943 if (vget(vp, LK_INTERLOCK) == 0) { 1943 if (vget(vp, LK_INTERLOCK) == 0) {
1944 VOP_REVOKE(vp, REVOKEALL); 1944 VOP_REVOKE(vp, REVOKEALL);
1945 vrele(vp); 1945 vrele(vp);
1946 } 1946 }
1947 mutex_enter(&device_lock); 1947 mutex_enter(&device_lock);
1948 vp = *vpp; 1948 vp = *vpp;
1949 } 1949 }
1950 } 1950 }
1951 mutex_exit(&device_lock); 1951 mutex_exit(&device_lock);
1952} 1952}
1953 1953
1954/* 1954/*
1955 * Calculate the total number of references to a special device. 1955 * Calculate the total number of references to a special device.
1956 */ 1956 */
1957int 1957int
1958vcount(vnode_t *vp) 1958vcount(vnode_t *vp)
1959{ 1959{
1960 int count; 1960 int count;
1961 1961
1962 mutex_enter(&device_lock); 1962 mutex_enter(&device_lock);
1963 mutex_enter(&vp->v_interlock); 1963 mutex_enter(&vp->v_interlock);
1964 if (vp->v_specnode == NULL) { 1964 if (vp->v_specnode == NULL) {
1965 count = vp->v_usecount - ((vp->v_iflag & VI_INACTPEND) != 0); 1965 count = vp->v_usecount - ((vp->v_iflag & VI_INACTPEND) != 0);
1966 mutex_exit(&vp->v_interlock); 1966 mutex_exit(&vp->v_interlock);
1967 mutex_exit(&device_lock); 1967 mutex_exit(&device_lock);
1968 return (count); 1968 return (count);
1969 } 1969 }
1970 mutex_exit(&vp->v_interlock); 1970 mutex_exit(&vp->v_interlock);
1971 count = vp->v_specnode->sn_dev->sd_opencnt; 1971 count = vp->v_specnode->sn_dev->sd_opencnt;
1972 mutex_exit(&device_lock); 1972 mutex_exit(&device_lock);
1973 return (count); 1973 return (count);
1974} 1974}
1975 1975
1976/* 1976/*
1977 * Eliminate all activity associated with the requested vnode 1977 * Eliminate all activity associated with the requested vnode
1978 * and with all vnodes aliased to the requested vnode. 1978 * and with all vnodes aliased to the requested vnode.
1979 */ 1979 */
1980void 1980void
1981vrevoke(vnode_t *vp) 1981vrevoke(vnode_t *vp)
1982{ 1982{
1983 vnode_t *vq, **vpp; 1983 vnode_t *vq, **vpp;
1984 enum vtype type; 1984 enum vtype type;
1985 dev_t dev; 1985 dev_t dev;
1986 1986
1987 KASSERT(vp->v_usecount > 0); 1987 KASSERT(vp->v_usecount > 0);
1988 1988
1989 mutex_enter(&vp->v_interlock); 1989 mutex_enter(&vp->v_interlock);
1990 if ((vp->v_iflag & VI_CLEAN) != 0) { 1990 if ((vp->v_iflag & VI_CLEAN) != 0) {
1991 mutex_exit(&vp->v_interlock); 1991 mutex_exit(&vp->v_interlock);
1992 return; 1992 return;
1993 } else { 1993 } else {
1994 dev = vp->v_rdev; 1994 dev = vp->v_rdev;
1995 type = vp->v_type; 1995 type = vp->v_type;
1996 mutex_exit(&vp->v_interlock); 1996 mutex_exit(&vp->v_interlock);
1997 } 1997 }
1998 1998
1999 vpp = &specfs_hash[SPECHASH(dev)]; 1999 vpp = &specfs_hash[SPECHASH(dev)];
2000 mutex_enter(&device_lock); 2000 mutex_enter(&device_lock);
2001 for (vq = *vpp; vq != NULL;) { 2001 for (vq = *vpp; vq != NULL;) {
2002 /* If clean or being cleaned, then ignore it. */ 2002 /* If clean or being cleaned, then ignore it. */
2003 mutex_enter(&vq->v_interlock); 2003 mutex_enter(&vq->v_interlock);
2004 if ((vq->v_iflag & (VI_CLEAN | VI_XLOCK)) != 0 || 2004 if ((vq->v_iflag & (VI_CLEAN | VI_XLOCK)) != 0 ||
2005 vq->v_rdev != dev || vq->v_type != type) { 2005 vq->v_rdev != dev || vq->v_type != type) {
2006 mutex_exit(&vq->v_interlock); 2006 mutex_exit(&vq->v_interlock);
2007 vq = vq->v_specnext; 2007 vq = vq->v_specnext;
2008 continue; 2008 continue;
2009 } 2009 }
2010 mutex_exit(&device_lock); 2010 mutex_exit(&device_lock);
2011 if (vq->v_usecount == 0) { 2011 if (vq->v_usecount == 0) {
2012 vremfree(vq); 2012 vremfree(vq);
2013 vq->v_usecount = 1; 2013 vq->v_usecount = 1;
2014 } else { 2014 } else {
2015 atomic_inc_uint(&vq->v_usecount); 2015 atomic_inc_uint(&vq->v_usecount);
2016 } 2016 }
2017 vclean(vq, DOCLOSE); 2017 vclean(vq, DOCLOSE);
2018 vrelel(vq, 0); 2018 vrelel(vq, 0);
2019 mutex_enter(&device_lock); 2019 mutex_enter(&device_lock);
2020 vq = *vpp; 2020 vq = *vpp;
2021 } 2021 }
2022 mutex_exit(&device_lock); 2022 mutex_exit(&device_lock);
2023} 2023}
2024 2024
2025/* 2025/*
2026 * sysctl helper routine to return list of supported fstypes 2026 * sysctl helper routine to return list of supported fstypes
2027 */ 2027 */
2028int 2028int
2029sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS) 2029sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
2030{ 2030{
2031 char bf[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 2031 char bf[sizeof(((struct statvfs *)NULL)->f_fstypename)];
2032 char *where = oldp; 2032 char *where = oldp;
2033 struct vfsops *v; 2033 struct vfsops *v;
2034 size_t needed, left, slen; 2034 size_t needed, left, slen;
2035 int error, first; 2035 int error, first;
2036 2036
2037 if (newp != NULL) 2037 if (newp != NULL)
2038 return (EPERM); 2038 return (EPERM);
2039 if (namelen != 0) 2039 if (namelen != 0)
2040 return (EINVAL); 2040 return (EINVAL);
2041 2041
2042 first = 1; 2042 first = 1;
2043 error = 0; 2043 error = 0;
2044 needed = 0; 2044 needed = 0;
2045 left = *oldlenp; 2045 left = *oldlenp;
2046 2046
2047 sysctl_unlock(); 2047 sysctl_unlock();
2048 mutex_enter(&vfs_list_lock); 2048 mutex_enter(&vfs_list_lock);
2049 LIST_FOREACH(v, &vfs_list, vfs_list) { 2049 LIST_FOREACH(v, &vfs_list, vfs_list) {
2050 if (where == NULL) 2050 if (where == NULL)
2051 needed += strlen(v->vfs_name) + 1; 2051 needed += strlen(v->vfs_name) + 1;
2052 else { 2052 else {
2053 memset(bf, 0, sizeof(bf)); 2053 memset(bf, 0, sizeof(bf));
2054 if (first) { 2054 if (first) {
2055 strncpy(bf, v->vfs_name, sizeof(bf)); 2055 strncpy(bf, v->vfs_name, sizeof(bf));
2056 first = 0; 2056 first = 0;
2057 } else { 2057 } else {
2058 bf[0] = ' '; 2058 bf[0] = ' ';
2059 strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1); 2059 strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1);
2060 } 2060 }
2061 bf[sizeof(bf)-1] = '\0'; 2061 bf[sizeof(bf)-1] = '\0';
2062 slen = strlen(bf); 2062 slen = strlen(bf);
2063 if (left < slen + 1) 2063 if (left < slen + 1)
2064 break; 2064 break;
2065 v->vfs_refcount++; 2065 v->vfs_refcount++;
2066 mutex_exit(&vfs_list_lock); 2066 mutex_exit(&vfs_list_lock);
2067 /* +1 to copy out the trailing NUL byte */ 2067 /* +1 to copy out the trailing NUL byte */
2068 error = copyout(bf, where, slen + 1); 2068 error = copyout(bf, where, slen + 1);
2069 mutex_enter(&vfs_list_lock); 2069 mutex_enter(&vfs_list_lock);
2070 v->vfs_refcount--; 2070 v->vfs_refcount--;
2071 if (error) 2071 if (error)
2072 break; 2072 break;
2073 where += slen; 2073 where += slen;
2074 needed += slen; 2074 needed += slen;
2075 left -= slen; 2075 left -= slen;
2076 } 2076 }
2077 } 2077 }
2078 mutex_exit(&vfs_list_lock); 2078 mutex_exit(&vfs_list_lock);
2079 sysctl_relock(); 2079 sysctl_relock();
2080 *oldlenp = needed; 2080 *oldlenp = needed;
2081 return (error); 2081 return (error);
2082} 2082}
2083 2083
2084 2084
2085int kinfo_vdebug = 1; 2085int kinfo_vdebug = 1;
2086int kinfo_vgetfailed; 2086int kinfo_vgetfailed;
2087#define KINFO_VNODESLOP 10 2087#define KINFO_VNODESLOP 10
2088/* 2088/*
2089 * Dump vnode list (via sysctl). 2089 * Dump vnode list (via sysctl).
2090 * Copyout address of vnode followed by vnode. 2090 * Copyout address of vnode followed by vnode.
2091 */ 2091 */
2092/* ARGSUSED */ 2092/* ARGSUSED */
2093int 2093int
2094sysctl_kern_vnode(SYSCTLFN_ARGS) 2094sysctl_kern_vnode(SYSCTLFN_ARGS)
2095{ 2095{
2096 char *where = oldp; 2096 char *where = oldp;
2097 size_t *sizep = oldlenp; 2097 size_t *sizep = oldlenp;
2098 struct mount *mp, *nmp; 2098 struct mount *mp, *nmp;
2099 vnode_t *vp, *mvp, vbuf; 2099 vnode_t *vp, *mvp, vbuf;
2100 char *bp = where, *savebp; 2100 char *bp = where, *savebp;
2101 char *ewhere; 2101 char *ewhere;
2102 int error; 2102 int error;
2103 2103
2104 if (namelen != 0) 2104 if (namelen != 0)
2105 return (EOPNOTSUPP); 2105 return (EOPNOTSUPP);
2106 if (newp != NULL) 2106 if (newp != NULL)
2107 return (EPERM); 2107 return (EPERM);
2108 2108
2109#define VPTRSZ sizeof(vnode_t *) 2109#define VPTRSZ sizeof(vnode_t *)
2110#define VNODESZ sizeof(vnode_t) 2110#define VNODESZ sizeof(vnode_t)
2111 if (where == NULL) { 2111 if (where == NULL) {
2112 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 2112 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
2113 return (0); 2113 return (0);
2114 } 2114 }
2115 ewhere = where + *sizep; 2115 ewhere = where + *sizep;
2116 2116
2117 sysctl_unlock(); 2117 sysctl_unlock();
2118 mutex_enter(&mountlist_lock); 2118 mutex_enter(&mountlist_lock);
2119 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 2119 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
2120 mp = nmp) { 2120 mp = nmp) {
2121 if (vfs_busy(mp, &nmp)) { 2121 if (vfs_busy(mp, &nmp)) {
2122 continue; 2122 continue;
2123 } 2123 }
2124 savebp = bp; 2124 savebp = bp;
2125 /* Allocate a marker vnode. */ 2125 /* Allocate a marker vnode. */
2126 if ((mvp = vnalloc(mp)) == NULL) { 2126 if ((mvp = vnalloc(mp)) == NULL) {
2127 sysctl_relock(); 2127 sysctl_relock();
2128 return (ENOMEM); 2128 return (ENOMEM);
2129 } 2129 }
2130 mutex_enter(&mntvnode_lock); 2130 mutex_enter(&mntvnode_lock);
2131 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) { 2131 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
2132 vmark(mvp, vp); 2132 vmark(mvp, vp);
2133 /* 2133 /*
2134 * Check that the vp is still associated with 2134 * Check that the vp is still associated with
2135 * this filesystem. RACE: could have been 2135 * this filesystem. RACE: could have been
2136 * recycled onto the same filesystem. 2136 * recycled onto the same filesystem.
2137 */ 2137 */
2138 if (vp->v_mount != mp || vismarker(vp)) 2138 if (vp->v_mount != mp || vismarker(vp))
2139 continue; 2139 continue;
2140 if (bp + VPTRSZ + VNODESZ > ewhere) { 2140 if (bp + VPTRSZ + VNODESZ > ewhere) {
2141 (void)vunmark(mvp); 2141 (void)vunmark(mvp);
2142 mutex_exit(&mntvnode_lock); 2142 mutex_exit(&mntvnode_lock);
2143 vnfree(mvp); 2143 vnfree(mvp);
2144 sysctl_relock(); 2144 sysctl_relock();
2145 *sizep = bp - where; 2145 *sizep = bp - where;
2146 return (ENOMEM); 2146 return (ENOMEM);
2147 } 2147 }
2148 memcpy(&vbuf, vp, VNODESZ); 2148 memcpy(&vbuf, vp, VNODESZ);
2149 mutex_exit(&mntvnode_lock); 2149 mutex_exit(&mntvnode_lock);
2150 if ((error = copyout(vp, bp, VPTRSZ)) || 2150 if ((error = copyout(vp, bp, VPTRSZ)) ||
2151 (error = copyout(&vbuf, bp + VPTRSZ, VNODESZ))) { 2151 (error = copyout(&vbuf, bp + VPTRSZ, VNODESZ))) {
2152 mutex_enter(&mntvnode_lock); 2152 mutex_enter(&mntvnode_lock);
2153 (void)vunmark(mvp); 2153 (void)vunmark(mvp);
2154 mutex_exit(&mntvnode_lock); 2154 mutex_exit(&mntvnode_lock);
2155 vnfree(mvp); 2155 vnfree(mvp);
2156 sysctl_relock(); 2156 sysctl_relock();
2157 return (error); 2157 return (error);
2158 } 2158 }
2159 bp += VPTRSZ + VNODESZ; 2159 bp += VPTRSZ + VNODESZ;
2160 mutex_enter(&mntvnode_lock); 2160 mutex_enter(&mntvnode_lock);
2161 } 2161 }
2162 mutex_exit(&mntvnode_lock); 2162 mutex_exit(&mntvnode_lock);
2163 vnfree(mvp); 2163 vnfree(mvp);
2164 vfs_unbusy(mp, false, &nmp); 2164 vfs_unbusy(mp, false, &nmp);
2165 } 2165 }
2166 mutex_exit(&mountlist_lock); 2166 mutex_exit(&mountlist_lock);
2167 sysctl_relock(); 2167 sysctl_relock();
2168 2168
2169 *sizep = bp - where; 2169 *sizep = bp - where;
2170 return (0); 2170 return (0);
2171} 2171}
2172 2172
2173/* 2173/*
2174 * Remove clean vnodes from a mountpoint's vnode list. 2174 * Remove clean vnodes from a mountpoint's vnode list.
2175 */ 2175 */
2176void 2176void
2177vfs_scrubvnlist(struct mount *mp) 2177vfs_scrubvnlist(struct mount *mp)
2178{ 2178{
2179 vnode_t *vp, *nvp; 2179 vnode_t *vp, *nvp;
2180 2180
2181 retry: 2181 retry:
2182 mutex_enter(&mntvnode_lock); 2182 mutex_enter(&mntvnode_lock);
2183 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { 2183 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
2184 nvp = TAILQ_NEXT(vp, v_mntvnodes); 2184 nvp = TAILQ_NEXT(vp, v_mntvnodes);
2185 mutex_enter(&vp->v_interlock); 2185 mutex_enter(&vp->v_interlock);
2186 if ((vp->v_iflag & VI_CLEAN) != 0) { 2186 if ((vp->v_iflag & VI_CLEAN) != 0) {
2187 TAILQ_REMOVE(&mp->mnt_vnodelist, vp, v_mntvnodes); 2187 TAILQ_REMOVE(&mp->mnt_vnodelist, vp, v_mntvnodes);
2188 vp->v_mount = NULL; 2188 vp->v_mount = NULL;
2189 mutex_exit(&mntvnode_lock); 2189 mutex_exit(&mntvnode_lock);
2190 mutex_exit(&vp->v_interlock); 2190 mutex_exit(&vp->v_interlock);
2191 vfs_destroy(mp); 2191 vfs_destroy(mp);
2192 goto retry; 2192 goto retry;
2193 } 2193 }
2194 mutex_exit(&vp->v_interlock); 2194 mutex_exit(&vp->v_interlock);
2195 } 2195 }
2196 mutex_exit(&mntvnode_lock); 2196 mutex_exit(&mntvnode_lock);
2197} 2197}
2198 2198
2199/* 2199/*
2200 * Check to see if a filesystem is mounted on a block device. 2200 * Check to see if a filesystem is mounted on a block device.
2201 */ 2201 */
2202int 2202int
2203vfs_mountedon(vnode_t *vp) 2203vfs_mountedon(vnode_t *vp)
2204{ 2204{
2205 vnode_t *vq; 2205 vnode_t *vq;
2206 int error = 0; 2206 int error = 0;
2207 2207
2208 if (vp->v_type != VBLK) 2208 if (vp->v_type != VBLK)
2209 return ENOTBLK; 2209 return ENOTBLK;
2210 if (vp->v_specmountpoint != NULL) 2210 if (vp->v_specmountpoint != NULL)
2211 return (EBUSY); 2211 return (EBUSY);
2212 mutex_enter(&device_lock); 2212 mutex_enter(&device_lock);
2213 for (vq = specfs_hash[SPECHASH(vp->v_rdev)]; vq != NULL; 2213 for (vq = specfs_hash[SPECHASH(vp->v_rdev)]; vq != NULL;
2214 vq = vq->v_specnext) { 2214 vq = vq->v_specnext) {
2215 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 2215 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
2216 continue; 2216 continue;
2217 if (vq->v_specmountpoint != NULL) { 2217 if (vq->v_specmountpoint != NULL) {
2218 error = EBUSY; 2218 error = EBUSY;
2219 break; 2219 break;
2220 } 2220 }
2221 } 2221 }
2222 mutex_exit(&device_lock); 2222 mutex_exit(&device_lock);
2223 return (error); 2223 return (error);
2224} 2224}
2225 2225
2226/* 2226/*
2227 * Unmount all file systems. 2227 * Unmount all file systems.
2228 * We traverse the list in reverse order under the assumption that doing so 2228 * We traverse the list in reverse order under the assumption that doing so
2229 * will avoid needing to worry about dependencies. 2229 * will avoid needing to worry about dependencies.
2230 */ 2230 */
2231void 2231void
2232vfs_unmountall(struct lwp *l) 2232vfs_unmountall(struct lwp *l)
2233{ 2233{
2234 struct mount *mp, *nmp; 2234 struct mount *mp, *nmp;
2235 int allerror, error; 2235 int allerror, error;
2236 2236
2237 printf("unmounting file systems..."); 2237 printf("unmounting file systems...");
2238 for (allerror = 0, mp = CIRCLEQ_LAST(&mountlist); 2238 for (allerror = 0, mp = CIRCLEQ_LAST(&mountlist);
2239 !CIRCLEQ_EMPTY(&mountlist); 2239 !CIRCLEQ_EMPTY(&mountlist);
2240 mp = nmp) { 2240 mp = nmp) {
2241 nmp = CIRCLEQ_PREV(mp, mnt_list); 2241 nmp = CIRCLEQ_PREV(mp, mnt_list);
2242#ifdef DEBUG 2242#ifdef DEBUG
2243 printf("\nunmounting %s (%s)...", 2243 printf("\nunmounting %s (%s)...",
2244 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 2244 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
2245#endif 2245#endif
2246 atomic_inc_uint(&mp->mnt_refcnt); 2246 atomic_inc_uint(&mp->mnt_refcnt);
2247 if ((error = dounmount(mp, MNT_FORCE, l)) != 0) { 2247 if ((error = dounmount(mp, MNT_FORCE, l)) != 0) {
2248 printf("unmount of %s failed with error %d\n", 2248 printf("unmount of %s failed with error %d\n",
2249 mp->mnt_stat.f_mntonname, error); 2249 mp->mnt_stat.f_mntonname, error);
2250 allerror = 1; 2250 allerror = 1;
2251 } 2251 }
2252 } 2252 }
2253 printf(" done\n"); 2253 printf(" done\n");
2254 if (allerror) 2254 if (allerror)
2255 printf("WARNING: some file systems would not unmount\n"); 2255 printf("WARNING: some file systems would not unmount\n");
2256} 2256}
2257 2257
2258/* 2258/*
2259 * Sync and unmount file systems before shutting down. 2259 * Sync and unmount file systems before shutting down.
2260 */ 2260 */
2261void 2261void
2262vfs_shutdown(void) 2262vfs_shutdown(void)
2263{ 2263{
2264 struct lwp *l; 2264 struct lwp *l;
2265 2265
2266 /* XXX we're certainly not running in lwp0's context! */ 2266 /* XXX we're certainly not running in lwp0's context! */
2267 l = curlwp; 2267 l = curlwp;
2268 if (l == NULL) 2268 if (l == NULL)
2269 l = &lwp0; 2269 l = &lwp0;
2270 2270
2271 printf("syncing disks... "); 2271 printf("syncing disks... ");
2272 2272
2273 /* remove user processes from run queue */ 2273 /* remove user processes from run queue */
2274 suspendsched(); 2274 suspendsched();
2275 (void) spl0(); 2275 (void) spl0();
2276 2276
2277 /* avoid coming back this way again if we panic. */ 2277 /* avoid coming back this way again if we panic. */
2278 doing_shutdown = 1; 2278 doing_shutdown = 1;
2279 2279
2280 sys_sync(l, NULL, NULL); 2280 sys_sync(l, NULL, NULL);
2281 2281
2282 /* Wait for sync to finish. */ 2282 /* Wait for sync to finish. */
2283 if (buf_syncwait() != 0) { 2283 if (buf_syncwait() != 0) {
2284#if defined(DDB) && defined(DEBUG_HALT_BUSY) 2284#if defined(DDB) && defined(DEBUG_HALT_BUSY)
2285 Debugger(); 2285 Debugger();
2286#endif 2286#endif
2287 printf("giving up\n"); 2287 printf("giving up\n");
2288 return; 2288 return;
2289 } else 2289 } else
2290 printf("done\n"); 2290 printf("done\n");
2291 2291
2292 /* 2292 /*
2293 * If we've panic'd, don't make the situation potentially 2293 * If we've panic'd, don't make the situation potentially
2294 * worse by unmounting the file systems. 2294 * worse by unmounting the file systems.
2295 */ 2295 */
2296 if (panicstr != NULL) 2296 if (panicstr != NULL)
2297 return; 2297 return;
2298 2298
2299 /* Release inodes held by texts before update. */ 2299 /* Release inodes held by texts before update. */
2300#ifdef notdef 2300#ifdef notdef
2301 vnshutdown(); 2301 vnshutdown();
2302#endif 2302#endif
2303 /* Unmount file systems. */ 2303 /* Unmount file systems. */
2304 vfs_unmountall(l); 2304 vfs_unmountall(l);
2305} 2305}
2306 2306
2307/* 2307/*
2308 * Mount the root file system. If the operator didn't specify a 2308 * Mount the root file system. If the operator didn't specify a
2309 * file system to use, try all possible file systems until one 2309 * file system to use, try all possible file systems until one
2310 * succeeds. 2310 * succeeds.
2311 */ 2311 */
2312int 2312int
2313vfs_mountroot(void) 2313vfs_mountroot(void)
2314{ 2314{
2315 struct vfsops *v; 2315 struct vfsops *v;
2316 int error = ENODEV; 2316 int error = ENODEV;
2317 2317
2318 if (root_device == NULL) 2318 if (root_device == NULL)
2319 panic("vfs_mountroot: root device unknown"); 2319 panic("vfs_mountroot: root device unknown");
2320 2320
2321 switch (device_class(root_device)) { 2321 switch (device_class(root_device)) {
2322 case DV_IFNET: 2322 case DV_IFNET:
2323 if (rootdev != NODEV) 2323 if (rootdev != NODEV)
2324 panic("vfs_mountroot: rootdev set for DV_IFNET " 2324 panic("vfs_mountroot: rootdev set for DV_IFNET "
2325 "(0x%llx -> %llu,%llu)", rootdev, 2325 "(0x%llx -> %llu,%llu)",
 2326 (unsigned long long)rootdev,
2326 (unsigned long long)major(rootdev), 2327 (unsigned long long)major(rootdev),
2327 (unsigned long long)minor(rootdev)); 2328 (unsigned long long)minor(rootdev));
2328 break; 2329 break;
2329 2330
2330 case DV_DISK: 2331 case DV_DISK:
2331 if (rootdev == NODEV) 2332 if (rootdev == NODEV)
2332 panic("vfs_mountroot: rootdev not set for DV_DISK"); 2333 panic("vfs_mountroot: rootdev not set for DV_DISK");
2333 if (bdevvp(rootdev, &rootvp)) 2334 if (bdevvp(rootdev, &rootvp))
2334 panic("vfs_mountroot: can't get vnode for rootdev"); 2335 panic("vfs_mountroot: can't get vnode for rootdev");
2335 error = VOP_OPEN(rootvp, FREAD, FSCRED); 2336 error = VOP_OPEN(rootvp, FREAD, FSCRED);
2336 if (error) { 2337 if (error) {
2337 printf("vfs_mountroot: can't open root device\n"); 2338 printf("vfs_mountroot: can't open root device\n");
2338 return (error); 2339 return (error);
2339 } 2340 }
2340 break; 2341 break;
2341 2342
2342 default: 2343 default:
2343 printf("%s: inappropriate for root file system\n", 2344 printf("%s: inappropriate for root file system\n",
2344 device_xname(root_device)); 2345 device_xname(root_device));
2345 return (ENODEV); 2346 return (ENODEV);
2346 } 2347 }
2347 2348
2348 /* 2349 /*
2349 * If user specified a root fs type, use it. Make sure the 2350 * If user specified a root fs type, use it. Make sure the
2350 * specified type exists and has a mount_root() 2351 * specified type exists and has a mount_root()
2351 */ 2352 */
2352 if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) { 2353 if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) {
2353 v = vfs_getopsbyname(rootfstype); 2354 v = vfs_getopsbyname(rootfstype);
2354 error = EFTYPE; 2355 error = EFTYPE;
2355 if (v != NULL) { 2356 if (v != NULL) {
2356 if (v->vfs_mountroot != NULL) { 2357 if (v->vfs_mountroot != NULL) {
2357 error = (v->vfs_mountroot)(); 2358 error = (v->vfs_mountroot)();
2358 } 2359 }
2359 v->vfs_refcount--; 2360 v->vfs_refcount--;
2360 } 2361 }
2361 goto done; 2362 goto done;
2362 } 2363 }
2363 2364
2364 /* 2365 /*
2365 * Try each file system currently configured into the kernel. 2366 * Try each file system currently configured into the kernel.
2366 */ 2367 */
2367 mutex_enter(&vfs_list_lock); 2368 mutex_enter(&vfs_list_lock);
2368 LIST_FOREACH(v, &vfs_list, vfs_list) { 2369 LIST_FOREACH(v, &vfs_list, vfs_list) {
2369 if (v->vfs_mountroot == NULL) 2370 if (v->vfs_mountroot == NULL)
2370 continue; 2371 continue;
2371#ifdef DEBUG 2372#ifdef DEBUG
2372 aprint_normal("mountroot: trying %s...\n", v->vfs_name); 2373 aprint_normal("mountroot: trying %s...\n", v->vfs_name);
2373#endif 2374#endif
2374 v->vfs_refcount++; 2375 v->vfs_refcount++;
2375 mutex_exit(&vfs_list_lock); 2376 mutex_exit(&vfs_list_lock);
2376 error = (*v->vfs_mountroot)(); 2377 error = (*v->vfs_mountroot)();
2377 mutex_enter(&vfs_list_lock); 2378 mutex_enter(&vfs_list_lock);
2378 v->vfs_refcount--; 2379 v->vfs_refcount--;
2379 if (!error) { 2380 if (!error) {
2380 aprint_normal("root file system type: %s\n", 2381 aprint_normal("root file system type: %s\n",
2381 v->vfs_name); 2382 v->vfs_name);
2382 break; 2383 break;
2383 } 2384 }
2384 } 2385 }
2385 mutex_exit(&vfs_list_lock); 2386 mutex_exit(&vfs_list_lock);
2386 2387
2387 if (v == NULL) { 2388 if (v == NULL) {
2388 printf("no file system for %s", device_xname(root_device)); 2389 printf("no file system for %s", device_xname(root_device));
2389 if (device_class(root_device) == DV_DISK) 2390 if (device_class(root_device) == DV_DISK)
2390 printf(" (dev 0x%llx)", rootdev); 2391 printf(" (dev 0x%llx)", (unsigned long long)rootdev);
2391 printf("\n"); 2392 printf("\n");
2392 error = EFTYPE; 2393 error = EFTYPE;
2393 } 2394 }
2394 2395
2395done: 2396done:
2396 if (error && device_class(root_device) == DV_DISK) { 2397 if (error && device_class(root_device) == DV_DISK) {
2397 VOP_CLOSE(rootvp, FREAD, FSCRED); 2398 VOP_CLOSE(rootvp, FREAD, FSCRED);
2398 vrele(rootvp); 2399 vrele(rootvp);
2399 } 2400 }
2400 return (error); 2401 return (error);
2401} 2402}
2402 2403
2403/* 2404/*
2404 * Get a new unique fsid 2405 * Get a new unique fsid
2405 */ 2406 */
2406void 2407void
2407vfs_getnewfsid(struct mount *mp) 2408vfs_getnewfsid(struct mount *mp)
2408{ 2409{
2409 static u_short xxxfs_mntid; 2410 static u_short xxxfs_mntid;
2410 fsid_t tfsid; 2411 fsid_t tfsid;
2411 int mtype; 2412 int mtype;
2412 2413
2413 mutex_enter(&mntid_lock); 2414 mutex_enter(&mntid_lock);
2414 mtype = makefstype(mp->mnt_op->vfs_name); 2415 mtype = makefstype(mp->mnt_op->vfs_name);
2415 mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0); 2416 mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0);
2416 mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype; 2417 mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype;
2417 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 2418 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
2418 if (xxxfs_mntid == 0) 2419 if (xxxfs_mntid == 0)
2419 ++xxxfs_mntid; 2420 ++xxxfs_mntid;
2420 tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid); 2421 tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
2421 tfsid.__fsid_val[1] = mtype; 2422 tfsid.__fsid_val[1] = mtype;
2422 if (!CIRCLEQ_EMPTY(&mountlist)) { 2423 if (!CIRCLEQ_EMPTY(&mountlist)) {
2423 while (vfs_getvfs(&tfsid)) { 2424 while (vfs_getvfs(&tfsid)) {
2424 tfsid.__fsid_val[0]++; 2425 tfsid.__fsid_val[0]++;
2425 xxxfs_mntid++; 2426 xxxfs_mntid++;
2426 } 2427 }
2427 } 2428 }
2428 mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0]; 2429 mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
2429 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 2430 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
2430 mutex_exit(&mntid_lock); 2431 mutex_exit(&mntid_lock);
2431} 2432}
2432 2433
2433/* 2434/*
2434 * Make a 'unique' number from a mount type name. 2435 * Make a 'unique' number from a mount type name.
2435 */ 2436 */
2436long 2437long
2437makefstype(const char *type) 2438makefstype(const char *type)
2438{ 2439{
2439 long rv; 2440 long rv;
2440 2441
2441 for (rv = 0; *type; type++) { 2442 for (rv = 0; *type; type++) {
2442 rv <<= 2; 2443 rv <<= 2;
2443 rv ^= *type; 2444 rv ^= *type;
2444 } 2445 }
2445 return rv; 2446 return rv;
2446} 2447}
2447 2448
2448/* 2449/*
2449 * Set vnode attributes to VNOVAL 2450 * Set vnode attributes to VNOVAL
2450 */ 2451 */
2451void 2452void
2452vattr_null(struct vattr *vap) 2453vattr_null(struct vattr *vap)
2453{ 2454{
2454 2455
2455 vap->va_type = VNON; 2456 vap->va_type = VNON;
2456 2457
2457 /* 2458 /*
2458 * Assign individually so that it is safe even if size and 2459 * Assign individually so that it is safe even if size and
2459 * sign of each member are varied. 2460 * sign of each member are varied.
2460 */ 2461 */
2461 vap->va_mode = VNOVAL; 2462 vap->va_mode = VNOVAL;
2462 vap->va_nlink = VNOVAL; 2463 vap->va_nlink = VNOVAL;
2463 vap->va_uid = VNOVAL; 2464 vap->va_uid = VNOVAL;
2464 vap->va_gid = VNOVAL; 2465 vap->va_gid = VNOVAL;
2465 vap->va_fsid = VNOVAL; 2466 vap->va_fsid = VNOVAL;
2466 vap->va_fileid = VNOVAL; 2467 vap->va_fileid = VNOVAL;
2467 vap->va_size = VNOVAL; 2468 vap->va_size = VNOVAL;
2468 vap->va_blocksize = VNOVAL; 2469 vap->va_blocksize = VNOVAL;
2469 vap->va_atime.tv_sec = 2470 vap->va_atime.tv_sec =
2470 vap->va_mtime.tv_sec = 2471 vap->va_mtime.tv_sec =
2471 vap->va_ctime.tv_sec = 2472 vap->va_ctime.tv_sec =
2472 vap->va_birthtime.tv_sec = VNOVAL; 2473 vap->va_birthtime.tv_sec = VNOVAL;
2473 vap->va_atime.tv_nsec = 2474 vap->va_atime.tv_nsec =
2474 vap->va_mtime.tv_nsec = 2475 vap->va_mtime.tv_nsec =
2475 vap->va_ctime.tv_nsec = 2476 vap->va_ctime.tv_nsec =
2476 vap->va_birthtime.tv_nsec = VNOVAL; 2477 vap->va_birthtime.tv_nsec = VNOVAL;
2477 vap->va_gen = VNOVAL; 2478 vap->va_gen = VNOVAL;
2478 vap->va_flags = VNOVAL; 2479 vap->va_flags = VNOVAL;
2479 vap->va_rdev = VNOVAL; 2480 vap->va_rdev = VNOVAL;
2480 vap->va_bytes = VNOVAL; 2481 vap->va_bytes = VNOVAL;
2481 vap->va_vaflags = 0; 2482 vap->va_vaflags = 0;
2482} 2483}
2483 2484
2484#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) 2485#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
2485#define ARRAY_PRINT(idx, arr) \ 2486#define ARRAY_PRINT(idx, arr) \
2486 ((idx) > 0 && (idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN") 2487 ((idx) > 0 && (idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN")
2487 2488
2488const char * const vnode_tags[] = { VNODE_TAGS }; 2489const char * const vnode_tags[] = { VNODE_TAGS };
2489const char * const vnode_types[] = { VNODE_TYPES }; 2490const char * const vnode_types[] = { VNODE_TYPES };
2490const char vnode_flagbits[] = VNODE_FLAGBITS; 2491const char vnode_flagbits[] = VNODE_FLAGBITS;
2491 2492
2492/* 2493/*
2493 * Print out a description of a vnode. 2494 * Print out a description of a vnode.
2494 */ 2495 */
2495void 2496void
2496vprint(const char *label, struct vnode *vp) 2497vprint(const char *label, struct vnode *vp)
2497{ 2498{
2498 struct vnlock *vl; 2499 struct vnlock *vl;
2499 char bf[96]; 2500 char bf[96];
2500 int flag; 2501 int flag;
2501 2502
2502 vl = (vp->v_vnlock != NULL ? vp->v_vnlock : &vp->v_lock); 2503 vl = (vp->v_vnlock != NULL ? vp->v_vnlock : &vp->v_lock);
2503 flag = vp->v_iflag | vp->v_vflag | vp->v_uflag; 2504 flag = vp->v_iflag | vp->v_vflag | vp->v_uflag;
2504 snprintb(bf, sizeof(bf), vnode_flagbits, flag); 2505 snprintb(bf, sizeof(bf), vnode_flagbits, flag);
2505 2506
2506 if (label != NULL) 2507 if (label != NULL)
2507 printf("%s: ", label); 2508 printf("%s: ", label);
2508 printf("vnode @ %p, flags (%s)\n\ttag %s(%d), type %s(%d), " 2509 printf("vnode @ %p, flags (%s)\n\ttag %s(%d), type %s(%d), "
2509 "usecount %d, writecount %d, holdcount %d\n" 2510 "usecount %d, writecount %d, holdcount %d\n"
2510 "\tfreelisthd %p, mount %p, data %p lock %p recursecnt %d\n", 2511 "\tfreelisthd %p, mount %p, data %p lock %p recursecnt %d\n",
2511 vp, bf, ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag, 2512 vp, bf, ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
2512 ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type, 2513 ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
2513 vp->v_usecount, vp->v_writecount, vp->v_holdcnt, 2514 vp->v_usecount, vp->v_writecount, vp->v_holdcnt,
2514 vp->v_freelisthd, vp->v_mount, vp->v_data, vl, vl->vl_recursecnt); 2515 vp->v_freelisthd, vp->v_mount, vp->v_data, vl, vl->vl_recursecnt);
2515 if (vp->v_data != NULL) { 2516 if (vp->v_data != NULL) {
2516 printf("\t"); 2517 printf("\t");
2517 VOP_PRINT(vp); 2518 VOP_PRINT(vp);
2518 } 2519 }
2519} 2520}
2520 2521
2521#ifdef DEBUG 2522#ifdef DEBUG
2522/* 2523/*
2523 * List all of the locked vnodes in the system. 2524 * List all of the locked vnodes in the system.
2524 * Called when debugging the kernel. 2525 * Called when debugging the kernel.
2525 */ 2526 */
2526void 2527void
2527printlockedvnodes(void) 2528printlockedvnodes(void)
2528{ 2529{
2529 struct mount *mp, *nmp; 2530 struct mount *mp, *nmp;
2530 struct vnode *vp; 2531 struct vnode *vp;
2531 2532
2532 printf("Locked vnodes\n"); 2533 printf("Locked vnodes\n");
2533 mutex_enter(&mountlist_lock); 2534 mutex_enter(&mountlist_lock);
2534 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 2535 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
2535 mp = nmp) { 2536 mp = nmp) {
2536 if (vfs_busy(mp, &nmp)) { 2537 if (vfs_busy(mp, &nmp)) {
2537 continue; 2538 continue;
2538 } 2539 }
2539 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 2540 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
2540 if (VOP_ISLOCKED(vp)) 2541 if (VOP_ISLOCKED(vp))
2541 vprint(NULL, vp); 2542 vprint(NULL, vp);
2542 } 2543 }
2543 mutex_enter(&mountlist_lock); 2544 mutex_enter(&mountlist_lock);
2544 vfs_unbusy(mp, false, &nmp); 2545 vfs_unbusy(mp, false, &nmp);
2545 } 2546 }
2546 mutex_exit(&mountlist_lock); 2547 mutex_exit(&mountlist_lock);
2547} 2548}
2548#endif 2549#endif
2549 2550
2550/* 2551/*
2551 * Do the usual access checking. 2552 * Do the usual access checking.
2552 * file_mode, uid and gid are from the vnode in question, 2553 * file_mode, uid and gid are from the vnode in question,
2553 * while acc_mode and cred are from the VOP_ACCESS parameter list 2554 * while acc_mode and cred are from the VOP_ACCESS parameter list
2554 */ 2555 */
2555int 2556int
2556vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid, 2557vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid,
2557 mode_t acc_mode, kauth_cred_t cred) 2558 mode_t acc_mode, kauth_cred_t cred)
2558{ 2559{
2559 mode_t mask; 2560 mode_t mask;
2560 int error, ismember; 2561 int error, ismember;
2561 2562
2562 /* 2563 /*
2563 * Super-user always gets read/write access, but execute access depends 2564 * Super-user always gets read/write access, but execute access depends
2564 * on at least one execute bit being set. 2565 * on at least one execute bit being set.
2565 */ 2566 */
2566 if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL) == 0) { 2567 if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL) == 0) {
2567 if ((acc_mode & VEXEC) && type != VDIR && 2568 if ((acc_mode & VEXEC) && type != VDIR &&
2568 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 2569 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
2569 return (EACCES); 2570 return (EACCES);
2570 return (0); 2571 return (0);
2571 } 2572 }
2572 2573
2573 mask = 0; 2574 mask = 0;
2574 2575
2575 /* Otherwise, check the owner. */ 2576 /* Otherwise, check the owner. */
2576 if (kauth_cred_geteuid(cred) == uid) { 2577 if (kauth_cred_geteuid(cred) == uid) {
2577 if (acc_mode & VEXEC) 2578 if (acc_mode & VEXEC)
2578 mask |= S_IXUSR; 2579 mask |= S_IXUSR;
2579 if (acc_mode & VREAD) 2580 if (acc_mode & VREAD)
2580 mask |= S_IRUSR; 2581 mask |= S_IRUSR;
2581 if (acc_mode & VWRITE) 2582 if (acc_mode & VWRITE)
2582 mask |= S_IWUSR; 2583 mask |= S_IWUSR;
2583 return ((file_mode & mask) == mask ? 0 : EACCES); 2584 return ((file_mode & mask) == mask ? 0 : EACCES);
2584 } 2585 }
2585 2586
2586 /* Otherwise, check the groups. */ 2587 /* Otherwise, check the groups. */
2587 error = kauth_cred_ismember_gid(cred, gid, &ismember); 2588 error = kauth_cred_ismember_gid(cred, gid, &ismember);
2588 if (error) 2589 if (error)
2589 return (error); 2590 return (error);
2590 if (kauth_cred_getegid(cred) == gid || ismember) { 2591 if (kauth_cred_getegid(cred) == gid || ismember) {
2591 if (acc_mode & VEXEC) 2592 if (acc_mode & VEXEC)
2592 mask |= S_IXGRP; 2593 mask |= S_IXGRP;
2593 if (acc_mode & VREAD) 2594 if (acc_mode & VREAD)
2594 mask |= S_IRGRP; 2595 mask |= S_IRGRP;
2595 if (acc_mode & VWRITE) 2596 if (acc_mode & VWRITE)
2596 mask |= S_IWGRP; 2597 mask |= S_IWGRP;
2597 return ((file_mode & mask) == mask ? 0 : EACCES); 2598 return ((file_mode & mask) == mask ? 0 : EACCES);
2598 } 2599 }
2599 2600
2600 /* Otherwise, check everyone else. */ 2601 /* Otherwise, check everyone else. */
2601 if (acc_mode & VEXEC) 2602 if (acc_mode & VEXEC)
2602 mask |= S_IXOTH; 2603 mask |= S_IXOTH;
2603 if (acc_mode & VREAD) 2604 if (acc_mode & VREAD)
2604 mask |= S_IROTH; 2605 mask |= S_IROTH;
2605 if (acc_mode & VWRITE) 2606 if (acc_mode & VWRITE)
2606 mask |= S_IWOTH; 2607 mask |= S_IWOTH;
2607 return ((file_mode & mask) == mask ? 0 : EACCES); 2608 return ((file_mode & mask) == mask ? 0 : EACCES);
2608} 2609}
2609 2610
2610/* 2611/*
2611 * Given a file system name, look up the vfsops for that 2612 * Given a file system name, look up the vfsops for that
2612 * file system, or return NULL if file system isn't present 2613 * file system, or return NULL if file system isn't present
2613 * in the kernel. 2614 * in the kernel.
2614 */ 2615 */
2615struct vfsops * 2616struct vfsops *
2616vfs_getopsbyname(const char *name) 2617vfs_getopsbyname(const char *name)
2617{ 2618{
2618 struct vfsops *v; 2619 struct vfsops *v;
2619 2620
2620 mutex_enter(&vfs_list_lock); 2621 mutex_enter(&vfs_list_lock);
2621 LIST_FOREACH(v, &vfs_list, vfs_list) { 2622 LIST_FOREACH(v, &vfs_list, vfs_list) {
2622 if (strcmp(v->vfs_name, name) == 0) 2623 if (strcmp(v->vfs_name, name) == 0)
2623 break; 2624 break;
2624 } 2625 }
2625 if (v != NULL) 2626 if (v != NULL)
2626 v->vfs_refcount++; 2627 v->vfs_refcount++;
2627 mutex_exit(&vfs_list_lock); 2628 mutex_exit(&vfs_list_lock);
2628 2629
2629 return (v); 2630 return (v);
2630} 2631}
2631 2632
2632void 2633void
2633copy_statvfs_info(struct statvfs *sbp, const struct mount *mp) 2634copy_statvfs_info(struct statvfs *sbp, const struct mount *mp)
2634{ 2635{
2635 const struct statvfs *mbp; 2636 const struct statvfs *mbp;
2636 2637
2637 if (sbp == (mbp = &mp->mnt_stat)) 2638 if (sbp == (mbp = &mp->mnt_stat))
2638 return; 2639 return;
2639 2640
2640 (void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx)); 2641 (void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx));
2641 sbp->f_fsid = mbp->f_fsid; 2642 sbp->f_fsid = mbp->f_fsid;
2642 sbp->f_owner = mbp->f_owner; 2643 sbp->f_owner = mbp->f_owner;
2643 sbp->f_flag = mbp->f_flag; 2644 sbp->f_flag = mbp->f_flag;
2644 sbp->f_syncwrites = mbp->f_syncwrites; 2645 sbp->f_syncwrites = mbp->f_syncwrites;
2645 sbp->f_asyncwrites = mbp->f_asyncwrites; 2646 sbp->f_asyncwrites = mbp->f_asyncwrites;
2646 sbp->f_syncreads = mbp->f_syncreads; 2647 sbp->f_syncreads = mbp->f_syncreads;
2647 sbp->f_asyncreads = mbp->f_asyncreads; 2648 sbp->f_asyncreads = mbp->f_asyncreads;
2648 (void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare)); 2649 (void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare));
2649 (void)memcpy(sbp->f_fstypename, mbp->f_fstypename, 2650 (void)memcpy(sbp->f_fstypename, mbp->f_fstypename,
2650 sizeof(sbp->f_fstypename)); 2651 sizeof(sbp->f_fstypename));
2651 (void)memcpy(sbp->f_mntonname, mbp->f_mntonname, 2652 (void)memcpy(sbp->f_mntonname, mbp->f_mntonname,
2652 sizeof(sbp->f_mntonname)); 2653 sizeof(sbp->f_mntonname));
2653 (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname, 2654 (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname,
2654 sizeof(sbp->f_mntfromname)); 2655 sizeof(sbp->f_mntfromname));
2655 sbp->f_namemax = mbp->f_namemax; 2656 sbp->f_namemax = mbp->f_namemax;
2656} 2657}
2657 2658
2658int 2659int
2659set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom, 2660set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom,
2660 const char *vfsname, struct mount *mp, struct lwp *l) 2661 const char *vfsname, struct mount *mp, struct lwp *l)
2661{ 2662{
2662 int error; 2663 int error;
2663 size_t size; 2664 size_t size;
2664 struct statvfs *sfs = &mp->mnt_stat; 2665 struct statvfs *sfs = &mp->mnt_stat;
2665 int (*fun)(const void *, void *, size_t, size_t *); 2666 int (*fun)(const void *, void *, size_t, size_t *);
2666 2667
2667 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsname, 2668 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsname,
2668 sizeof(mp->mnt_stat.f_fstypename)); 2669 sizeof(mp->mnt_stat.f_fstypename));
2669 2670
2670 if (onp) { 2671 if (onp) {
2671 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 2672 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
2672 fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr; 2673 fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr;
2673 if (cwdi->cwdi_rdir != NULL) { 2674 if (cwdi->cwdi_rdir != NULL) {
2674 size_t len; 2675 size_t len;
2675 char *bp; 2676 char *bp;
2676 char *path = PNBUF_GET(); 2677 char *path = PNBUF_GET();
2677 2678
2678 bp = path + MAXPATHLEN; 2679 bp = path + MAXPATHLEN;
2679 *--bp = '\0'; 2680 *--bp = '\0';
2680 rw_enter(&cwdi->cwdi_lock, RW_READER); 2681 rw_enter(&cwdi->cwdi_lock, RW_READER);
2681 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, 2682 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp,
2682 path, MAXPATHLEN / 2, 0, l); 2683 path, MAXPATHLEN / 2, 0, l);
2683 rw_exit(&cwdi->cwdi_lock); 2684 rw_exit(&cwdi->cwdi_lock);
2684 if (error) { 2685 if (error) {
2685 PNBUF_PUT(path); 2686 PNBUF_PUT(path);
2686 return error; 2687 return error;
2687 } 2688 }
2688 2689
2689 len = strlen(bp); 2690 len = strlen(bp);
2690 if (len > sizeof(sfs->f_mntonname) - 1) 2691 if (len > sizeof(sfs->f_mntonname) - 1)
2691 len = sizeof(sfs->f_mntonname) - 1; 2692 len = sizeof(sfs->f_mntonname) - 1;
2692 (void)strncpy(sfs->f_mntonname, bp, len); 2693 (void)strncpy(sfs->f_mntonname, bp, len);
2693 PNBUF_PUT(path); 2694 PNBUF_PUT(path);
2694 2695
2695 if (len < sizeof(sfs->f_mntonname) - 1) { 2696 if (len < sizeof(sfs->f_mntonname) - 1) {
2696 error = (*fun)(onp, &sfs->f_mntonname[len], 2697 error = (*fun)(onp, &sfs->f_mntonname[len],
2697 sizeof(sfs->f_mntonname) - len - 1, &size); 2698 sizeof(sfs->f_mntonname) - len - 1, &size);
2698 if (error) 2699 if (error)
2699 return error; 2700 return error;
2700 size += len; 2701 size += len;
2701 } else { 2702 } else {
2702 size = len; 2703 size = len;
2703 } 2704 }
2704 } else { 2705 } else {
2705 error = (*fun)(onp, &sfs->f_mntonname, 2706 error = (*fun)(onp, &sfs->f_mntonname,
2706 sizeof(sfs->f_mntonname) - 1, &size); 2707 sizeof(sfs->f_mntonname) - 1, &size);
2707 if (error) 2708 if (error)
2708 return error; 2709 return error;
2709 } 2710 }
2710 (void)memset(sfs->f_mntonname + size, 0, 2711 (void)memset(sfs->f_mntonname + size, 0,
2711 sizeof(sfs->f_mntonname) - size); 2712 sizeof(sfs->f_mntonname) - size);
2712 } 2713 }
2713 2714
2714 if (fromp) { 2715 if (fromp) {
2715 fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr; 2716 fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr;
2716 error = (*fun)(fromp, sfs->f_mntfromname, 2717 error = (*fun)(fromp, sfs->f_mntfromname,
2717 sizeof(sfs->f_mntfromname) - 1, &size); 2718 sizeof(sfs->f_mntfromname) - 1, &size);
2718 if (error) 2719 if (error)
2719 return error; 2720 return error;
2720 (void)memset(sfs->f_mntfromname + size, 0, 2721 (void)memset(sfs->f_mntfromname + size, 0,
2721 sizeof(sfs->f_mntfromname) - size); 2722 sizeof(sfs->f_mntfromname) - size);
2722 } 2723 }
2723 return 0; 2724 return 0;
2724} 2725}
2725 2726
2726void 2727void
2727vfs_timestamp(struct timespec *ts) 2728vfs_timestamp(struct timespec *ts)
2728{ 2729{
2729 2730
2730 nanotime(ts); 2731 nanotime(ts);
2731} 2732}
2732 2733
2733time_t rootfstime; /* recorded root fs time, if known */ 2734time_t rootfstime; /* recorded root fs time, if known */
2734void 2735void
2735setrootfstime(time_t t) 2736setrootfstime(time_t t)
2736{ 2737{
2737 rootfstime = t; 2738 rootfstime = t;
2738} 2739}
2739 2740
2740/* 2741/*
2741 * Sham lock manager for vnodes. This is a temporary measure. 2742 * Sham lock manager for vnodes. This is a temporary measure.
2742 */ 2743 */
2743int 2744int
2744vlockmgr(struct vnlock *vl, int flags) 2745vlockmgr(struct vnlock *vl, int flags)
2745{ 2746{
2746 2747
2747 KASSERT((flags & ~(LK_CANRECURSE | LK_NOWAIT | LK_TYPE_MASK)) == 0); 2748 KASSERT((flags & ~(LK_CANRECURSE | LK_NOWAIT | LK_TYPE_MASK)) == 0);
2748 2749
2749 switch (flags & LK_TYPE_MASK) { 2750 switch (flags & LK_TYPE_MASK) {
2750 case LK_SHARED: 2751 case LK_SHARED:
2751 if (rw_tryenter(&vl->vl_lock, RW_READER)) { 2752 if (rw_tryenter(&vl->vl_lock, RW_READER)) {
2752 return 0; 2753 return 0;
2753 } 2754 }
2754 if ((flags & LK_NOWAIT) != 0) { 2755 if ((flags & LK_NOWAIT) != 0) {
2755 return EBUSY; 2756 return EBUSY;
2756 } 2757 }
2757 rw_enter(&vl->vl_lock, RW_READER); 2758 rw_enter(&vl->vl_lock, RW_READER);
2758 return 0; 2759 return 0;
2759 2760
2760 case LK_EXCLUSIVE: 2761 case LK_EXCLUSIVE:
2761 if (rw_tryenter(&vl->vl_lock, RW_WRITER)) { 2762 if (rw_tryenter(&vl->vl_lock, RW_WRITER)) {
2762 return 0; 2763 return 0;
2763 } 2764 }
2764 if ((vl->vl_canrecurse || (flags & LK_CANRECURSE) != 0) && 2765 if ((vl->vl_canrecurse || (flags & LK_CANRECURSE) != 0) &&
2765 rw_write_held(&vl->vl_lock)) { 2766 rw_write_held(&vl->vl_lock)) {
2766 vl->vl_recursecnt++; 2767 vl->vl_recursecnt++;
2767 return 0; 2768 return 0;
2768 } 2769 }
2769 if ((flags & LK_NOWAIT) != 0) { 2770 if ((flags & LK_NOWAIT) != 0) {
2770 return EBUSY; 2771 return EBUSY;
2771 } 2772 }
2772 rw_enter(&vl->vl_lock, RW_WRITER); 2773 rw_enter(&vl->vl_lock, RW_WRITER);
2773 return 0; 2774 return 0;
2774 2775
2775 case LK_RELEASE: 2776 case LK_RELEASE:
2776 if (vl->vl_recursecnt != 0) { 2777 if (vl->vl_recursecnt != 0) {
2777 KASSERT(rw_write_held(&vl->vl_lock)); 2778 KASSERT(rw_write_held(&vl->vl_lock));
2778 vl->vl_recursecnt--; 2779 vl->vl_recursecnt--;
2779 return 0; 2780 return 0;
2780 } 2781 }
2781 rw_exit(&vl->vl_lock); 2782 rw_exit(&vl->vl_lock);
2782 return 0; 2783 return 0;
2783 2784
2784 default: 2785 default:
2785 panic("vlockmgr: flags %x", flags); 2786 panic("vlockmgr: flags %x", flags);
2786 } 2787 }
2787} 2788}
2788 2789
2789int 2790int
2790vlockstatus(struct vnlock *vl) 2791vlockstatus(struct vnlock *vl)
2791{ 2792{
2792 2793
2793 if (rw_write_held(&vl->vl_lock)) { 2794 if (rw_write_held(&vl->vl_lock)) {
2794 return LK_EXCLUSIVE; 2795 return LK_EXCLUSIVE;
2795 } 2796 }
2796 if (rw_read_held(&vl->vl_lock)) { 2797 if (rw_read_held(&vl->vl_lock)) {
2797 return LK_SHARED; 2798 return LK_SHARED;
2798 } 2799 }
2799 return 0; 2800 return 0;
2800} 2801}
2801 2802
2802/* 2803/*
2803 * mount_specific_key_create -- 2804 * mount_specific_key_create --
2804 * Create a key for subsystem mount-specific data. 2805 * Create a key for subsystem mount-specific data.
2805 */ 2806 */
2806int 2807int
2807mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) 2808mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
2808{ 2809{
2809 2810
2810 return (specificdata_key_create(mount_specificdata_domain, keyp, dtor)); 2811 return (specificdata_key_create(mount_specificdata_domain, keyp, dtor));
2811} 2812}
2812 2813
2813/* 2814/*
2814 * mount_specific_key_delete -- 2815 * mount_specific_key_delete --
2815 * Delete a key for subsystem mount-specific data. 2816 * Delete a key for subsystem mount-specific data.
2816 */ 2817 */
2817void 2818void
2818mount_specific_key_delete(specificdata_key_t key) 2819mount_specific_key_delete(specificdata_key_t key)
2819{ 2820{
2820 2821
2821 specificdata_key_delete(mount_specificdata_domain, key); 2822 specificdata_key_delete(mount_specificdata_domain, key);
2822} 2823}
2823 2824
2824/* 2825/*
2825 * mount_initspecific -- 2826 * mount_initspecific --
2826 * Initialize a mount's specificdata container. 2827 * Initialize a mount's specificdata container.
2827 */ 2828 */
2828void 2829void
2829mount_initspecific(struct mount *mp) 2830mount_initspecific(struct mount *mp)
2830{ 2831{
2831 int error; 2832 int error;
2832 2833
2833 error = specificdata_init(mount_specificdata_domain, 2834 error = specificdata_init(mount_specificdata_domain,
2834 &mp->mnt_specdataref); 2835 &mp->mnt_specdataref);
2835 KASSERT(error == 0); 2836 KASSERT(error == 0);
2836} 2837}
2837 2838
2838/* 2839/*
2839 * mount_finispecific -- 2840 * mount_finispecific --
2840 * Finalize a mount's specificdata container. 2841 * Finalize a mount's specificdata container.
2841 */ 2842 */
2842void 2843void
2843mount_finispecific(struct mount *mp) 2844mount_finispecific(struct mount *mp)
2844{ 2845{
2845 2846
2846 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 2847 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
2847} 2848}
2848 2849
2849/* 2850/*
2850 * mount_getspecific -- 2851 * mount_getspecific --
2851 * Return mount-specific data corresponding to the specified key. 2852 * Return mount-specific data corresponding to the specified key.
2852 */ 2853 */
2853void * 2854void *
2854mount_getspecific(struct mount *mp, specificdata_key_t key) 2855mount_getspecific(struct mount *mp, specificdata_key_t key)
2855{ 2856{
2856 2857
2857 return (specificdata_getspecific(mount_specificdata_domain, 2858 return (specificdata_getspecific(mount_specificdata_domain,
2858 &mp->mnt_specdataref, key)); 2859 &mp->mnt_specdataref, key));
2859} 2860}
2860 2861
2861/* 2862/*
2862 * mount_setspecific -- 2863 * mount_setspecific --
2863 * Set mount-specific data corresponding to the specified key. 2864 * Set mount-specific data corresponding to the specified key.
2864 */ 2865 */
2865void 2866void
2866mount_setspecific(struct mount *mp, specificdata_key_t key, void *data) 2867mount_setspecific(struct mount *mp, specificdata_key_t key, void *data)
2867{ 2868{
2868 2869
2869 specificdata_setspecific(mount_specificdata_domain, 2870 specificdata_setspecific(mount_specificdata_domain,
2870 &mp->mnt_specdataref, key, data); 2871 &mp->mnt_specdataref, key, data);
2871} 2872}
2872 2873
2873int 2874int
2874VFS_MOUNT(struct mount *mp, const char *a, void *b, size_t *c) 2875VFS_MOUNT(struct mount *mp, const char *a, void *b, size_t *c)
2875{ 2876{
2876 int error; 2877 int error;
2877 2878
2878 KERNEL_LOCK(1, NULL); 2879 KERNEL_LOCK(1, NULL);
2879 error = (*(mp->mnt_op->vfs_mount))(mp, a, b, c); 2880 error = (*(mp->mnt_op->vfs_mount))(mp, a, b, c);
2880 KERNEL_UNLOCK_ONE(NULL); 2881 KERNEL_UNLOCK_ONE(NULL);
2881 2882
2882 return error; 2883 return error;
2883} 2884}
2884  2885
2885int 2886int
2886VFS_START(struct mount *mp, int a) 2887VFS_START(struct mount *mp, int a)
2887{ 2888{
2888 int error; 2889 int error;
2889 2890
2890 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 2891 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2891 KERNEL_LOCK(1, NULL); 2892 KERNEL_LOCK(1, NULL);
2892 } 2893 }
2893 error = (*(mp->mnt_op->vfs_start))(mp, a); 2894 error = (*(mp->mnt_op->vfs_start))(mp, a);
2894 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 2895 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2895 KERNEL_UNLOCK_ONE(NULL); 2896 KERNEL_UNLOCK_ONE(NULL);
2896 } 2897 }
2897 2898
2898 return error; 2899 return error;
2899} 2900}
2900  2901
2901int 2902int
2902VFS_UNMOUNT(struct mount *mp, int a) 2903VFS_UNMOUNT(struct mount *mp, int a)
2903{ 2904{
2904 int error; 2905 int error;
2905 2906
2906 KERNEL_LOCK(1, NULL); 2907 KERNEL_LOCK(1, NULL);
2907 error = (*(mp->mnt_op->vfs_unmount))(mp, a); 2908 error = (*(mp->mnt_op->vfs_unmount))(mp, a);
2908 KERNEL_UNLOCK_ONE(NULL); 2909 KERNEL_UNLOCK_ONE(NULL);
2909 2910
2910 return error; 2911 return error;
2911} 2912}
2912 2913
2913int 2914int
2914VFS_ROOT(struct mount *mp, struct vnode **a) 2915VFS_ROOT(struct mount *mp, struct vnode **a)
2915{ 2916{
2916 int error; 2917 int error;
2917 2918
2918 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 2919 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2919 KERNEL_LOCK(1, NULL); 2920 KERNEL_LOCK(1, NULL);
2920 } 2921 }
2921 error = (*(mp->mnt_op->vfs_root))(mp, a); 2922 error = (*(mp->mnt_op->vfs_root))(mp, a);
2922 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 2923 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2923 KERNEL_UNLOCK_ONE(NULL); 2924 KERNEL_UNLOCK_ONE(NULL);
2924 } 2925 }
2925 2926
2926 return error; 2927 return error;
2927} 2928}
2928 2929
2929int 2930int
2930VFS_QUOTACTL(struct mount *mp, int a, uid_t b, void *c) 2931VFS_QUOTACTL(struct mount *mp, int a, uid_t b, void *c)
2931{ 2932{
2932 int error; 2933 int error;
2933 2934
2934 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 2935 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2935 KERNEL_LOCK(1, NULL); 2936 KERNEL_LOCK(1, NULL);
2936 } 2937 }
2937 error = (*(mp->mnt_op->vfs_quotactl))(mp, a, b, c); 2938 error = (*(mp->mnt_op->vfs_quotactl))(mp, a, b, c);
2938 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 2939 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2939 KERNEL_UNLOCK_ONE(NULL); 2940 KERNEL_UNLOCK_ONE(NULL);
2940 } 2941 }
2941 2942
2942 return error; 2943 return error;
2943} 2944}
2944 2945
2945int 2946int
2946VFS_STATVFS(struct mount *mp, struct statvfs *a) 2947VFS_STATVFS(struct mount *mp, struct statvfs *a)
2947{ 2948{
2948 int error; 2949 int error;
2949 2950
2950 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 2951 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2951 KERNEL_LOCK(1, NULL); 2952 KERNEL_LOCK(1, NULL);
2952 } 2953 }
2953 error = (*(mp->mnt_op->vfs_statvfs))(mp, a); 2954 error = (*(mp->mnt_op->vfs_statvfs))(mp, a);
2954 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 2955 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2955 KERNEL_UNLOCK_ONE(NULL); 2956 KERNEL_UNLOCK_ONE(NULL);
2956 } 2957 }
2957 2958
2958 return error; 2959 return error;
2959} 2960}
2960 2961
2961int 2962int
2962VFS_SYNC(struct mount *mp, int a, struct kauth_cred *b) 2963VFS_SYNC(struct mount *mp, int a, struct kauth_cred *b)
2963{ 2964{
2964 int error; 2965 int error;
2965 2966
2966 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 2967 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2967 KERNEL_LOCK(1, NULL); 2968 KERNEL_LOCK(1, NULL);
2968 } 2969 }
2969 error = (*(mp->mnt_op->vfs_sync))(mp, a, b); 2970 error = (*(mp->mnt_op->vfs_sync))(mp, a, b);
2970 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 2971 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2971 KERNEL_UNLOCK_ONE(NULL); 2972 KERNEL_UNLOCK_ONE(NULL);
2972 } 2973 }
2973 2974
2974 return error; 2975 return error;
2975} 2976}
2976 2977
2977int 2978int
2978VFS_FHTOVP(struct mount *mp, struct fid *a, struct vnode **b) 2979VFS_FHTOVP(struct mount *mp, struct fid *a, struct vnode **b)
2979{ 2980{
2980 int error; 2981 int error;
2981 2982
2982 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 2983 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2983 KERNEL_LOCK(1, NULL); 2984 KERNEL_LOCK(1, NULL);
2984 } 2985 }
2985 error = (*(mp->mnt_op->vfs_fhtovp))(mp, a, b); 2986 error = (*(mp->mnt_op->vfs_fhtovp))(mp, a, b);
2986 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 2987 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2987 KERNEL_UNLOCK_ONE(NULL); 2988 KERNEL_UNLOCK_ONE(NULL);
2988 } 2989 }
2989 2990
2990 return error; 2991 return error;
2991} 2992}
2992 2993
2993int 2994int
2994VFS_VPTOFH(struct vnode *vp, struct fid *a, size_t *b) 2995VFS_VPTOFH(struct vnode *vp, struct fid *a, size_t *b)
2995{ 2996{
2996 int error; 2997 int error;
2997 2998
2998 if ((vp->v_vflag & VV_MPSAFE) == 0) { 2999 if ((vp->v_vflag & VV_MPSAFE) == 0) {
2999 KERNEL_LOCK(1, NULL); 3000 KERNEL_LOCK(1, NULL);
3000 } 3001 }
3001 error = (*(vp->v_mount->mnt_op->vfs_vptofh))(vp, a, b); 3002 error = (*(vp->v_mount->mnt_op->vfs_vptofh))(vp, a, b);
3002 if ((vp->v_vflag & VV_MPSAFE) == 0) { 3003 if ((vp->v_vflag & VV_MPSAFE) == 0) {
3003 KERNEL_UNLOCK_ONE(NULL); 3004 KERNEL_UNLOCK_ONE(NULL);
3004 } 3005 }
3005 3006
3006 return error; 3007 return error;
3007} 3008}
3008 3009
3009int 3010int
3010VFS_SNAPSHOT(struct mount *mp, struct vnode *a, struct timespec *b) 3011VFS_SNAPSHOT(struct mount *mp, struct vnode *a, struct timespec *b)
3011{ 3012{
3012 int error; 3013 int error;
3013 3014
3014 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3015 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3015 KERNEL_LOCK(1, NULL); 3016 KERNEL_LOCK(1, NULL);
3016 } 3017 }
3017 error = (*(mp->mnt_op->vfs_snapshot))(mp, a, b); 3018 error = (*(mp->mnt_op->vfs_snapshot))(mp, a, b);
3018 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3019 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3019 KERNEL_UNLOCK_ONE(NULL); 3020 KERNEL_UNLOCK_ONE(NULL);
3020 } 3021 }
3021 3022
3022 return error; 3023 return error;
3023} 3024}
3024 3025
3025int 3026int
3026VFS_EXTATTRCTL(struct mount *mp, int a, struct vnode *b, int c, const char *d) 3027VFS_EXTATTRCTL(struct mount *mp, int a, struct vnode *b, int c, const char *d)
3027{ 3028{
3028 int error; 3029 int error;
3029 3030
3030 KERNEL_LOCK(1, NULL); /* XXXSMP check ffs */ 3031 KERNEL_LOCK(1, NULL); /* XXXSMP check ffs */
3031 error = (*(mp->mnt_op->vfs_extattrctl))(mp, a, b, c, d); 3032 error = (*(mp->mnt_op->vfs_extattrctl))(mp, a, b, c, d);
3032 KERNEL_UNLOCK_ONE(NULL); /* XXX */ 3033 KERNEL_UNLOCK_ONE(NULL); /* XXX */
3033 3034
3034 return error; 3035 return error;
3035} 3036}
3036 3037
3037int 3038int
3038VFS_SUSPENDCTL(struct mount *mp, int a) 3039VFS_SUSPENDCTL(struct mount *mp, int a)
3039{ 3040{
3040 int error; 3041 int error;
3041 3042
3042 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3043 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3043 KERNEL_LOCK(1, NULL); 3044 KERNEL_LOCK(1, NULL);
3044 } 3045 }
3045 error = (*(mp->mnt_op->vfs_suspendctl))(mp, a); 3046 error = (*(mp->mnt_op->vfs_suspendctl))(mp, a);
3046 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3047 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3047 KERNEL_UNLOCK_ONE(NULL); 3048 KERNEL_UNLOCK_ONE(NULL);
3048 } 3049 }
3049 3050
3050 return error; 3051 return error;
3051} 3052}
3052 3053
3053#ifdef DDB 3054#ifdef DDB
3054static const char buf_flagbits[] = BUF_FLAGBITS; 3055static const char buf_flagbits[] = BUF_FLAGBITS;
3055 3056
3056void 3057void
3057vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...)) 3058vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...))
3058{ 3059{
3059 char bf[1024]; 3060 char bf[1024];
3060 3061
3061 (*pr)(" vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" rawblkno 0x%" 3062 (*pr)(" vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" rawblkno 0x%"
3062 PRIx64 " dev 0x%x\n", 3063 PRIx64 " dev 0x%x\n",
3063 bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_rawblkno, bp->b_dev); 3064 bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_rawblkno, bp->b_dev);
3064 3065
3065 snprintb(bf, sizeof(bf), 3066 snprintb(bf, sizeof(bf),
3066 buf_flagbits, bp->b_flags | bp->b_oflags | bp->b_cflags); 3067 buf_flagbits, bp->b_flags | bp->b_oflags | bp->b_cflags);
3067 (*pr)(" error %d flags 0x%s\n", bp->b_error, bf); 3068 (*pr)(" error %d flags 0x%s\n", bp->b_error, bf);
3068 3069
3069 (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n", 3070 (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
3070 bp->b_bufsize, bp->b_bcount, bp->b_resid); 3071 bp->b_bufsize, bp->b_bcount, bp->b_resid);
3071 (*pr)(" data %p saveaddr %p dep %p\n", 3072 (*pr)(" data %p saveaddr %p dep %p\n",
3072 bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep)); 3073 bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep));
3073 (*pr)(" iodone %p objlock %p\n", bp->b_iodone, bp->b_objlock); 3074 (*pr)(" iodone %p objlock %p\n", bp->b_iodone, bp->b_objlock);
3074} 3075}
3075 3076
3076 3077
3077void 3078void
3078vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...)) 3079vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...))
3079{ 3080{
3080 char bf[256]; 3081 char bf[256];
3081 3082
3082 uvm_object_printit(&vp->v_uobj, full, pr); 3083 uvm_object_printit(&vp->v_uobj, full, pr);
3083 snprintb(bf, sizeof(bf), 3084 snprintb(bf, sizeof(bf),
3084 vnode_flagbits, vp->v_iflag | vp->v_vflag | vp->v_uflag); 3085 vnode_flagbits, vp->v_iflag | vp->v_vflag | vp->v_uflag);
3085 (*pr)("\nVNODE flags %s\n", bf); 3086 (*pr)("\nVNODE flags %s\n", bf);
3086 (*pr)("mp %p numoutput %d size 0x%llx writesize 0x%llx\n", 3087 (*pr)("mp %p numoutput %d size 0x%llx writesize 0x%llx\n",
3087 vp->v_mount, vp->v_numoutput, vp->v_size, vp->v_writesize); 3088 vp->v_mount, vp->v_numoutput, vp->v_size, vp->v_writesize);
3088 3089
3089 (*pr)("data %p writecount %ld holdcnt %ld\n", 3090 (*pr)("data %p writecount %ld holdcnt %ld\n",
3090 vp->v_data, vp->v_writecount, vp->v_holdcnt); 3091 vp->v_data, vp->v_writecount, vp->v_holdcnt);
3091 3092
3092 (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n", 3093 (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n",
3093 ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag, 3094 ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
3094 ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type, 3095 ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
3095 vp->v_mount, vp->v_mountedhere); 3096 vp->v_mount, vp->v_mountedhere);
3096 3097
3097 (*pr)("v_lock %p v_vnlock %p\n", &vp->v_lock, vp->v_vnlock); 3098 (*pr)("v_lock %p v_vnlock %p\n", &vp->v_lock, vp->v_vnlock);
3098 3099
3099 if (full) { 3100 if (full) {
3100 struct buf *bp; 3101 struct buf *bp;
3101 3102
3102 (*pr)("clean bufs:\n"); 3103 (*pr)("clean bufs:\n");
3103 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) { 3104 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
3104 (*pr)(" bp %p\n", bp); 3105 (*pr)(" bp %p\n", bp);
3105 vfs_buf_print(bp, full, pr); 3106 vfs_buf_print(bp, full, pr);
3106 } 3107 }
3107 3108
3108 (*pr)("dirty bufs:\n"); 3109 (*pr)("dirty bufs:\n");
3109 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { 3110 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
3110 (*pr)(" bp %p\n", bp); 3111 (*pr)(" bp %p\n", bp);
3111 vfs_buf_print(bp, full, pr); 3112 vfs_buf_print(bp, full, pr);
3112 } 3113 }
3113 } 3114 }
3114} 3115}
3115 3116
3116void 3117void
3117vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...)) 3118vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...))
3118{ 3119{
3119 char sbuf[256]; 3120 char sbuf[256];
3120 3121
3121 (*pr)("vnodecovered = %p syncer = %p data = %p\n", 3122 (*pr)("vnodecovered = %p syncer = %p data = %p\n",
3122 mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data); 3123 mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data);
3123 3124
3124 (*pr)("fs_bshift %d dev_bshift = %d\n", 3125 (*pr)("fs_bshift %d dev_bshift = %d\n",
3125 mp->mnt_fs_bshift,mp->mnt_dev_bshift); 3126 mp->mnt_fs_bshift,mp->mnt_dev_bshift);
3126 3127
3127 snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_flag); 3128 snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_flag);
3128 (*pr)("flag = %s\n", sbuf); 3129 (*pr)("flag = %s\n", sbuf);
3129 3130
3130 snprintb(sbuf, sizeof(sbuf), __IMNT_FLAG_BITS, mp->mnt_iflag); 3131 snprintb(sbuf, sizeof(sbuf), __IMNT_FLAG_BITS, mp->mnt_iflag);
3131 (*pr)("iflag = %s\n", sbuf); 3132 (*pr)("iflag = %s\n", sbuf);
3132 3133
3133 (*pr)("refcnt = %d unmounting @ %p updating @ %p\n", mp->mnt_refcnt, 3134 (*pr)("refcnt = %d unmounting @ %p updating @ %p\n", mp->mnt_refcnt,
3134 &mp->mnt_unmounting, &mp->mnt_updating); 3135 &mp->mnt_unmounting, &mp->mnt_updating);
3135 3136
3136 (*pr)("statvfs cache:\n"); 3137 (*pr)("statvfs cache:\n");
3137 (*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize); 3138 (*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize);
3138 (*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize); 3139 (*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize);
3139 (*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize); 3140 (*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize);
3140 3141
3141 (*pr)("\tblocks = %"PRIu64"\n",mp->mnt_stat.f_blocks); 3142 (*pr)("\tblocks = %"PRIu64"\n",mp->mnt_stat.f_blocks);
3142 (*pr)("\tbfree = %"PRIu64"\n",mp->mnt_stat.f_bfree); 3143 (*pr)("\tbfree = %"PRIu64"\n",mp->mnt_stat.f_bfree);
3143 (*pr)("\tbavail = %"PRIu64"\n",mp->mnt_stat.f_bavail); 3144 (*pr)("\tbavail = %"PRIu64"\n",mp->mnt_stat.f_bavail);
3144 (*pr)("\tbresvd = %"PRIu64"\n",mp->mnt_stat.f_bresvd); 3145 (*pr)("\tbresvd = %"PRIu64"\n",mp->mnt_stat.f_bresvd);
3145 3146
3146 (*pr)("\tfiles = %"PRIu64"\n",mp->mnt_stat.f_files); 3147 (*pr)("\tfiles = %"PRIu64"\n",mp->mnt_stat.f_files);
3147 (*pr)("\tffree = %"PRIu64"\n",mp->mnt_stat.f_ffree); 3148 (*pr)("\tffree = %"PRIu64"\n",mp->mnt_stat.f_ffree);
3148 (*pr)("\tfavail = %"PRIu64"\n",mp->mnt_stat.f_favail); 3149 (*pr)("\tfavail = %"PRIu64"\n",mp->mnt_stat.f_favail);
3149 (*pr)("\tfresvd = %"PRIu64"\n",mp->mnt_stat.f_fresvd); 3150 (*pr)("\tfresvd = %"PRIu64"\n",mp->mnt_stat.f_fresvd);
3150 3151
3151 (*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n", 3152 (*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n",
3152 mp->mnt_stat.f_fsidx.__fsid_val[0], 3153 mp->mnt_stat.f_fsidx.__fsid_val[0],
3153 mp->mnt_stat.f_fsidx.__fsid_val[1]); 3154 mp->mnt_stat.f_fsidx.__fsid_val[1]);
3154 3155
3155 (*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner); 3156 (*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner);
3156 (*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax); 3157 (*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax);
3157 3158
3158 snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_stat.f_flag); 3159 snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_stat.f_flag);
3159 3160
3160 (*pr)("\tflag = %s\n",sbuf); 3161 (*pr)("\tflag = %s\n",sbuf);
3161 (*pr)("\tsyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_syncwrites); 3162 (*pr)("\tsyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_syncwrites);
3162 (*pr)("\tasyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_asyncwrites); 3163 (*pr)("\tasyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_asyncwrites);
3163 (*pr)("\tsyncreads = %" PRIu64 "\n",mp->mnt_stat.f_syncreads); 3164 (*pr)("\tsyncreads = %" PRIu64 "\n",mp->mnt_stat.f_syncreads);
3164 (*pr)("\tasyncreads = %" PRIu64 "\n",mp->mnt_stat.f_asyncreads); 3165 (*pr)("\tasyncreads = %" PRIu64 "\n",mp->mnt_stat.f_asyncreads);
3165 (*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename); 3166 (*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename);
3166 (*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname); 3167 (*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname);
3167 (*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname); 3168 (*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname);
3168 3169
3169 { 3170 {
3170 int cnt = 0; 3171 int cnt = 0;
3171 struct vnode *vp; 3172 struct vnode *vp;
3172 (*pr)("locked vnodes ="); 3173 (*pr)("locked vnodes =");
3173 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 3174 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
3174 if (VOP_ISLOCKED(vp)) { 3175 if (VOP_ISLOCKED(vp)) {
3175 if ((++cnt % 6) == 0) { 3176 if ((++cnt % 6) == 0) {
3176 (*pr)(" %p,\n\t", vp); 3177 (*pr)(" %p,\n\t", vp);
3177 } else { 3178 } else {
3178 (*pr)(" %p,", vp); 3179 (*pr)(" %p,", vp);
3179 } 3180 }
3180 } 3181 }
3181 } 3182 }
3182 (*pr)("\n"); 3183 (*pr)("\n");
3183 } 3184 }
3184 3185
3185 if (full) { 3186 if (full) {
3186 int cnt = 0; 3187 int cnt = 0;
3187 struct vnode *vp; 3188 struct vnode *vp;
3188 (*pr)("all vnodes ="); 3189 (*pr)("all vnodes =");
3189 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 3190 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
3190 if (!TAILQ_NEXT(vp, v_mntvnodes)) { 3191 if (!TAILQ_NEXT(vp, v_mntvnodes)) {
3191 (*pr)(" %p", vp); 3192 (*pr)(" %p", vp);
3192 } else if ((++cnt % 6) == 0) { 3193 } else if ((++cnt % 6) == 0) {
3193 (*pr)(" %p,\n\t", vp); 3194 (*pr)(" %p,\n\t", vp);
3194 } else { 3195 } else {
3195 (*pr)(" %p,", vp); 3196 (*pr)(" %p,", vp);
3196 } 3197 }
3197 } 3198 }
3198 (*pr)("\n", vp); 3199 (*pr)("\n", vp);
3199 } 3200 }
3200} 3201}
3201#endif /* DDB */ 3202#endif /* DDB */