| @@ -1,1291 +1,1292 @@ | | | @@ -1,1291 +1,1292 @@ |
1 | /* $NetBSD: vfs_subr.c,v 1.356 2008/09/07 13:09:36 tron Exp $ */ | | 1 | /* $NetBSD: vfs_subr.c,v 1.357 2008/09/24 09:33:40 ad Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc. | | 4 | * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc. |
5 | * All rights reserved. | | 5 | * All rights reserved. |
6 | * | | 6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation | | 7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, | | 8 | * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, |
9 | * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. | | 9 | * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. |
10 | * | | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without | | 11 | * Redistribution and use in source and binary forms, with or without |
12 | * modification, are permitted provided that the following conditions | | 12 | * modification, are permitted provided that the following conditions |
13 | * are met: | | 13 | * are met: |
14 | * 1. Redistributions of source code must retain the above copyright | | 14 | * 1. Redistributions of source code must retain the above copyright |
15 | * notice, this list of conditions and the following disclaimer. | | 15 | * notice, this list of conditions and the following disclaimer. |
16 | * 2. Redistributions in binary form must reproduce the above copyright | | 16 | * 2. Redistributions in binary form must reproduce the above copyright |
17 | * notice, this list of conditions and the following disclaimer in the | | 17 | * notice, this list of conditions and the following disclaimer in the |
18 | * documentation and/or other materials provided with the distribution. | | 18 | * documentation and/or other materials provided with the distribution. |
19 | * | | 19 | * |
20 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | | 20 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
22 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 22 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
23 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | | 23 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
24 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | 24 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
25 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | 25 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
26 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | 26 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
27 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 27 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
29 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 29 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
30 | * POSSIBILITY OF SUCH DAMAGE. | | 30 | * POSSIBILITY OF SUCH DAMAGE. |
31 | */ | | 31 | */ |
32 | | | 32 | |
33 | /* | | 33 | /* |
34 | * Copyright (c) 1989, 1993 | | 34 | * Copyright (c) 1989, 1993 |
35 | * The Regents of the University of California. All rights reserved. | | 35 | * The Regents of the University of California. All rights reserved. |
36 | * (c) UNIX System Laboratories, Inc. | | 36 | * (c) UNIX System Laboratories, Inc. |
37 | * All or some portions of this file are derived from material licensed | | 37 | * All or some portions of this file are derived from material licensed |
38 | * to the University of California by American Telephone and Telegraph | | 38 | * to the University of California by American Telephone and Telegraph |
39 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | | 39 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with |
40 | * the permission of UNIX System Laboratories, Inc. | | 40 | * the permission of UNIX System Laboratories, Inc. |
41 | * | | 41 | * |
42 | * Redistribution and use in source and binary forms, with or without | | 42 | * Redistribution and use in source and binary forms, with or without |
43 | * modification, are permitted provided that the following conditions | | 43 | * modification, are permitted provided that the following conditions |
44 | * are met: | | 44 | * are met: |
45 | * 1. Redistributions of source code must retain the above copyright | | 45 | * 1. Redistributions of source code must retain the above copyright |
46 | * notice, this list of conditions and the following disclaimer. | | 46 | * notice, this list of conditions and the following disclaimer. |
47 | * 2. Redistributions in binary form must reproduce the above copyright | | 47 | * 2. Redistributions in binary form must reproduce the above copyright |
48 | * notice, this list of conditions and the following disclaimer in the | | 48 | * notice, this list of conditions and the following disclaimer in the |
49 | * documentation and/or other materials provided with the distribution. | | 49 | * documentation and/or other materials provided with the distribution. |
50 | * 3. Neither the name of the University nor the names of its contributors | | 50 | * 3. Neither the name of the University nor the names of its contributors |
51 | * may be used to endorse or promote products derived from this software | | 51 | * may be used to endorse or promote products derived from this software |
52 | * without specific prior written permission. | | 52 | * without specific prior written permission. |
53 | * | | 53 | * |
54 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | | 54 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
55 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | | 55 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
56 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | | 56 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
57 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | | 57 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
58 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 58 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
59 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | | 59 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
60 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 60 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
61 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 61 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
62 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 62 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
63 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 63 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
64 | * SUCH DAMAGE. | | 64 | * SUCH DAMAGE. |
65 | * | | 65 | * |
66 | * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 | | 66 | * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 |
67 | */ | | 67 | */ |
68 | | | 68 | |
69 | /* | | 69 | /* |
70 | * Note on v_usecount and locking: | | 70 | * Note on v_usecount and locking: |
71 | * | | 71 | * |
72 | * At nearly all points it is known that v_usecount could be zero, the | | 72 | * At nearly all points it is known that v_usecount could be zero, the |
73 | * vnode interlock will be held. | | 73 | * vnode interlock will be held. |
74 | * | | 74 | * |
75 | * To change v_usecount away from zero, the interlock must be held. To | | 75 | * To change v_usecount away from zero, the interlock must be held. To |
76 | * change from a non-zero value to zero, again the interlock must be | | 76 | * change from a non-zero value to zero, again the interlock must be |
77 | * held. | | 77 | * held. |
78 | * | | 78 | * |
79 | * Changing the usecount from a non-zero value to a non-zero value can | | 79 | * Changing the usecount from a non-zero value to a non-zero value can |
80 | * safely be done using atomic operations, without the interlock held. | | 80 | * safely be done using atomic operations, without the interlock held. |
81 | */ | | 81 | */ |
82 | | | 82 | |
83 | #include <sys/cdefs.h> | | 83 | #include <sys/cdefs.h> |
84 | __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.356 2008/09/07 13:09:36 tron Exp $"); | | 84 | __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.357 2008/09/24 09:33:40 ad Exp $"); |
85 | | | 85 | |
86 | #include "opt_ddb.h" | | 86 | #include "opt_ddb.h" |
87 | #include "opt_compat_netbsd.h" | | 87 | #include "opt_compat_netbsd.h" |
88 | #include "opt_compat_43.h" | | 88 | #include "opt_compat_43.h" |
89 | | | 89 | |
90 | #include <sys/param.h> | | 90 | #include <sys/param.h> |
91 | #include <sys/systm.h> | | 91 | #include <sys/systm.h> |
92 | #include <sys/proc.h> | | 92 | #include <sys/proc.h> |
93 | #include <sys/kernel.h> | | 93 | #include <sys/kernel.h> |
94 | #include <sys/mount.h> | | 94 | #include <sys/mount.h> |
95 | #include <sys/fcntl.h> | | 95 | #include <sys/fcntl.h> |
96 | #include <sys/vnode.h> | | 96 | #include <sys/vnode.h> |
97 | #include <sys/stat.h> | | 97 | #include <sys/stat.h> |
98 | #include <sys/namei.h> | | 98 | #include <sys/namei.h> |
99 | #include <sys/ucred.h> | | 99 | #include <sys/ucred.h> |
100 | #include <sys/buf.h> | | 100 | #include <sys/buf.h> |
101 | #include <sys/errno.h> | | 101 | #include <sys/errno.h> |
102 | #include <sys/malloc.h> | | 102 | #include <sys/malloc.h> |
103 | #include <sys/syscallargs.h> | | 103 | #include <sys/syscallargs.h> |
104 | #include <sys/device.h> | | 104 | #include <sys/device.h> |
105 | #include <sys/filedesc.h> | | 105 | #include <sys/filedesc.h> |
106 | #include <sys/kauth.h> | | 106 | #include <sys/kauth.h> |
107 | #include <sys/atomic.h> | | 107 | #include <sys/atomic.h> |
108 | #include <sys/kthread.h> | | 108 | #include <sys/kthread.h> |
109 | #include <sys/wapbl.h> | | 109 | #include <sys/wapbl.h> |
110 | | | 110 | |
111 | #include <miscfs/specfs/specdev.h> | | 111 | #include <miscfs/specfs/specdev.h> |
112 | #include <miscfs/syncfs/syncfs.h> | | 112 | #include <miscfs/syncfs/syncfs.h> |
113 | | | 113 | |
114 | #include <uvm/uvm.h> | | 114 | #include <uvm/uvm.h> |
115 | #include <uvm/uvm_readahead.h> | | 115 | #include <uvm/uvm_readahead.h> |
116 | #include <uvm/uvm_ddb.h> | | 116 | #include <uvm/uvm_ddb.h> |
117 | | | 117 | |
118 | #include <sys/sysctl.h> | | 118 | #include <sys/sysctl.h> |
119 | | | 119 | |
120 | const enum vtype iftovt_tab[16] = { | | 120 | const enum vtype iftovt_tab[16] = { |
121 | VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, | | 121 | VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, |
122 | VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, | | 122 | VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, |
123 | }; | | 123 | }; |
124 | const int vttoif_tab[9] = { | | 124 | const int vttoif_tab[9] = { |
125 | 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, | | 125 | 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, |
126 | S_IFSOCK, S_IFIFO, S_IFMT, | | 126 | S_IFSOCK, S_IFIFO, S_IFMT, |
127 | }; | | 127 | }; |
128 | | | 128 | |
129 | /* | | 129 | /* |
130 | * Insq/Remq for the vnode usage lists. | | 130 | * Insq/Remq for the vnode usage lists. |
131 | */ | | 131 | */ |
132 | #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) | | 132 | #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) |
133 | #define bufremvn(bp) { \ | | 133 | #define bufremvn(bp) { \ |
134 | LIST_REMOVE(bp, b_vnbufs); \ | | 134 | LIST_REMOVE(bp, b_vnbufs); \ |
135 | (bp)->b_vnbufs.le_next = NOLIST; \ | | 135 | (bp)->b_vnbufs.le_next = NOLIST; \ |
136 | } | | 136 | } |
137 | | | 137 | |
138 | int doforce = 1; /* 1 => permit forcible unmounting */ | | 138 | int doforce = 1; /* 1 => permit forcible unmounting */ |
139 | int prtactive = 0; /* 1 => print out reclaim of active vnodes */ | | 139 | int prtactive = 0; /* 1 => print out reclaim of active vnodes */ |
140 | | | 140 | |
141 | extern int dovfsusermount; /* 1 => permit any user to mount filesystems */ | | 141 | extern int dovfsusermount; /* 1 => permit any user to mount filesystems */ |
142 | extern int vfs_magiclinks; /* 1 => expand "magic" symlinks */ | | 142 | extern int vfs_magiclinks; /* 1 => expand "magic" symlinks */ |
143 | | | 143 | |
144 | static vnodelst_t vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list); | | 144 | static vnodelst_t vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list); |
145 | static vnodelst_t vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list); | | 145 | static vnodelst_t vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list); |
146 | static vnodelst_t vrele_list = TAILQ_HEAD_INITIALIZER(vrele_list); | | 146 | static vnodelst_t vrele_list = TAILQ_HEAD_INITIALIZER(vrele_list); |
147 | | | 147 | |
148 | struct mntlist mountlist = /* mounted filesystem list */ | | 148 | struct mntlist mountlist = /* mounted filesystem list */ |
149 | CIRCLEQ_HEAD_INITIALIZER(mountlist); | | 149 | CIRCLEQ_HEAD_INITIALIZER(mountlist); |
150 | | | 150 | |
151 | u_int numvnodes; | | 151 | u_int numvnodes; |
152 | static specificdata_domain_t mount_specificdata_domain; | | 152 | static specificdata_domain_t mount_specificdata_domain; |
153 | | | 153 | |
154 | static int vrele_pending; | | 154 | static int vrele_pending; |
155 | static int vrele_gen; | | 155 | static int vrele_gen; |
156 | static kmutex_t vrele_lock; | | 156 | static kmutex_t vrele_lock; |
157 | static kcondvar_t vrele_cv; | | 157 | static kcondvar_t vrele_cv; |
158 | static lwp_t *vrele_lwp; | | 158 | static lwp_t *vrele_lwp; |
159 | | | 159 | |
160 | kmutex_t mountlist_lock; | | 160 | kmutex_t mountlist_lock; |
161 | kmutex_t mntid_lock; | | 161 | kmutex_t mntid_lock; |
162 | kmutex_t mntvnode_lock; | | 162 | kmutex_t mntvnode_lock; |
163 | kmutex_t vnode_free_list_lock; | | 163 | kmutex_t vnode_free_list_lock; |
164 | kmutex_t vfs_list_lock; | | 164 | kmutex_t vfs_list_lock; |
165 | | | 165 | |
166 | static pool_cache_t vnode_cache; | | 166 | static pool_cache_t vnode_cache; |
167 | | | 167 | |
168 | MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes"); | | 168 | MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes"); |
169 | | | 169 | |
170 | /* | | 170 | /* |
171 | * These define the root filesystem and device. | | 171 | * These define the root filesystem and device. |
172 | */ | | 172 | */ |
173 | struct vnode *rootvnode; | | 173 | struct vnode *rootvnode; |
174 | struct device *root_device; /* root device */ | | 174 | struct device *root_device; /* root device */ |
175 | | | 175 | |
176 | /* | | 176 | /* |
177 | * Local declarations. | | 177 | * Local declarations. |
178 | */ | | 178 | */ |
179 | | | 179 | |
180 | static void vrele_thread(void *); | | 180 | static void vrele_thread(void *); |
181 | static void insmntque(vnode_t *, struct mount *); | | 181 | static void insmntque(vnode_t *, struct mount *); |
182 | static int getdevvp(dev_t, vnode_t **, enum vtype); | | 182 | static int getdevvp(dev_t, vnode_t **, enum vtype); |
183 | static vnode_t *getcleanvnode(void);; | | 183 | static vnode_t *getcleanvnode(void);; |
184 | void vpanic(vnode_t *, const char *); | | 184 | void vpanic(vnode_t *, const char *); |
185 | | | 185 | |
186 | #ifdef DEBUG | | 186 | #ifdef DEBUG |
187 | void printlockedvnodes(void); | | 187 | void printlockedvnodes(void); |
188 | #endif | | 188 | #endif |
189 | | | 189 | |
190 | #ifdef DIAGNOSTIC | | 190 | #ifdef DIAGNOSTIC |
191 | void | | 191 | void |
192 | vpanic(vnode_t *vp, const char *msg) | | 192 | vpanic(vnode_t *vp, const char *msg) |
193 | { | | 193 | { |
194 | | | 194 | |
195 | vprint(NULL, vp); | | 195 | vprint(NULL, vp); |
196 | panic("%s\n", msg); | | 196 | panic("%s\n", msg); |
197 | } | | 197 | } |
198 | #else | | 198 | #else |
199 | #define vpanic(vp, msg) /* nothing */ | | 199 | #define vpanic(vp, msg) /* nothing */ |
200 | #endif | | 200 | #endif |
201 | | | 201 | |
202 | void | | 202 | void |
203 | vn_init1(void) | | 203 | vn_init1(void) |
204 | { | | 204 | { |
205 | | | 205 | |
206 | vnode_cache = pool_cache_init(sizeof(struct vnode), 0, 0, 0, "vnodepl", | | 206 | vnode_cache = pool_cache_init(sizeof(struct vnode), 0, 0, 0, "vnodepl", |
207 | NULL, IPL_NONE, NULL, NULL, NULL); | | 207 | NULL, IPL_NONE, NULL, NULL, NULL); |
208 | KASSERT(vnode_cache != NULL); | | 208 | KASSERT(vnode_cache != NULL); |
209 | | | 209 | |
210 | /* Create deferred release thread. */ | | 210 | /* Create deferred release thread. */ |
211 | mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE); | | 211 | mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE); |
212 | cv_init(&vrele_cv, "vrele"); | | 212 | cv_init(&vrele_cv, "vrele"); |
213 | if (kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread, | | 213 | if (kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread, |
214 | NULL, &vrele_lwp, "vrele")) | | 214 | NULL, &vrele_lwp, "vrele")) |
215 | panic("fork vrele"); | | 215 | panic("fork vrele"); |
216 | } | | 216 | } |
217 | | | 217 | |
218 | /* | | 218 | /* |
219 | * Initialize the vnode management data structures. | | 219 | * Initialize the vnode management data structures. |
220 | */ | | 220 | */ |
221 | void | | 221 | void |
222 | vntblinit(void) | | 222 | vntblinit(void) |
223 | { | | 223 | { |
224 | | | 224 | |
225 | mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE); | | 225 | mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE); |
226 | mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE); | | 226 | mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE); |
227 | mutex_init(&mntvnode_lock, MUTEX_DEFAULT, IPL_NONE); | | 227 | mutex_init(&mntvnode_lock, MUTEX_DEFAULT, IPL_NONE); |
228 | mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE); | | 228 | mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE); |
229 | mutex_init(&specfs_lock, MUTEX_DEFAULT, IPL_NONE); | | 229 | mutex_init(&specfs_lock, MUTEX_DEFAULT, IPL_NONE); |
230 | mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE); | | 230 | mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE); |
231 | | | 231 | |
232 | mount_specificdata_domain = specificdata_domain_create(); | | 232 | mount_specificdata_domain = specificdata_domain_create(); |
233 | | | 233 | |
234 | /* Initialize the filesystem syncer. */ | | 234 | /* Initialize the filesystem syncer. */ |
235 | vn_initialize_syncerd(); | | 235 | vn_initialize_syncerd(); |
236 | vn_init1(); | | 236 | vn_init1(); |
237 | } | | 237 | } |
238 | | | 238 | |
239 | int | | 239 | int |
240 | vfs_drainvnodes(long target, struct lwp *l) | | 240 | vfs_drainvnodes(long target, struct lwp *l) |
241 | { | | 241 | { |
242 | | | 242 | |
243 | while (numvnodes > target) { | | 243 | while (numvnodes > target) { |
244 | vnode_t *vp; | | 244 | vnode_t *vp; |
245 | | | 245 | |
246 | mutex_enter(&vnode_free_list_lock); | | 246 | mutex_enter(&vnode_free_list_lock); |
247 | vp = getcleanvnode(); | | 247 | vp = getcleanvnode(); |
248 | if (vp == NULL) | | 248 | if (vp == NULL) |
249 | return EBUSY; /* give up */ | | 249 | return EBUSY; /* give up */ |
250 | ungetnewvnode(vp); | | 250 | ungetnewvnode(vp); |
251 | } | | 251 | } |
252 | | | 252 | |
253 | return 0; | | 253 | return 0; |
254 | } | | 254 | } |
255 | | | 255 | |
256 | /* | | 256 | /* |
257 | * Lookup a mount point by filesystem identifier. | | 257 | * Lookup a mount point by filesystem identifier. |
258 | * | | 258 | * |
259 | * XXX Needs to add a reference to the mount point. | | 259 | * XXX Needs to add a reference to the mount point. |
260 | */ | | 260 | */ |
261 | struct mount * | | 261 | struct mount * |
262 | vfs_getvfs(fsid_t *fsid) | | 262 | vfs_getvfs(fsid_t *fsid) |
263 | { | | 263 | { |
264 | struct mount *mp; | | 264 | struct mount *mp; |
265 | | | 265 | |
266 | mutex_enter(&mountlist_lock); | | 266 | mutex_enter(&mountlist_lock); |
267 | CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) { | | 267 | CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) { |
268 | if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] && | | 268 | if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] && |
269 | mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) { | | 269 | mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) { |
270 | mutex_exit(&mountlist_lock); | | 270 | mutex_exit(&mountlist_lock); |
271 | return (mp); | | 271 | return (mp); |
272 | } | | 272 | } |
273 | } | | 273 | } |
274 | mutex_exit(&mountlist_lock); | | 274 | mutex_exit(&mountlist_lock); |
275 | return ((struct mount *)0); | | 275 | return ((struct mount *)0); |
276 | } | | 276 | } |
277 | | | 277 | |
278 | /* | | 278 | /* |
279 | * Drop a reference to a mount structure, freeing if the last reference. | | 279 | * Drop a reference to a mount structure, freeing if the last reference. |
280 | */ | | 280 | */ |
281 | void | | 281 | void |
282 | vfs_destroy(struct mount *mp) | | 282 | vfs_destroy(struct mount *mp) |
283 | { | | 283 | { |
284 | | | 284 | |
285 | if (__predict_true(atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) { | | 285 | if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) { |
286 | return; | | 286 | return; |
287 | } | | 287 | } |
288 | | | 288 | |
289 | /* | | 289 | /* |
290 | * Nothing else has visibility of the mount: we can now | | 290 | * Nothing else has visibility of the mount: we can now |
291 | * free the data structures. | | 291 | * free the data structures. |
292 | */ | | 292 | */ |
| | | 293 | KASSERT(mp->mnt_refcnt == 0); |
293 | specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); | | 294 | specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); |
294 | rw_destroy(&mp->mnt_unmounting); | | 295 | rw_destroy(&mp->mnt_unmounting); |
295 | mutex_destroy(&mp->mnt_updating); | | 296 | mutex_destroy(&mp->mnt_updating); |
296 | mutex_destroy(&mp->mnt_renamelock); | | 297 | mutex_destroy(&mp->mnt_renamelock); |
297 | if (mp->mnt_op != NULL) { | | 298 | if (mp->mnt_op != NULL) { |
298 | vfs_delref(mp->mnt_op); | | 299 | vfs_delref(mp->mnt_op); |
299 | } | | 300 | } |
300 | kmem_free(mp, sizeof(*mp)); | | 301 | kmem_free(mp, sizeof(*mp)); |
301 | } | | 302 | } |
302 | | | 303 | |
303 | /* | | 304 | /* |
304 | * grab a vnode from freelist and clean it. | | 305 | * grab a vnode from freelist and clean it. |
305 | */ | | 306 | */ |
306 | vnode_t * | | 307 | vnode_t * |
307 | getcleanvnode(void) | | 308 | getcleanvnode(void) |
308 | { | | 309 | { |
309 | vnode_t *vp; | | 310 | vnode_t *vp; |
310 | vnodelst_t *listhd; | | 311 | vnodelst_t *listhd; |
311 | | | 312 | |
312 | KASSERT(mutex_owned(&vnode_free_list_lock)); | | 313 | KASSERT(mutex_owned(&vnode_free_list_lock)); |
313 | | | 314 | |
314 | retry: | | 315 | retry: |
315 | listhd = &vnode_free_list; | | 316 | listhd = &vnode_free_list; |
316 | try_nextlist: | | 317 | try_nextlist: |
317 | TAILQ_FOREACH(vp, listhd, v_freelist) { | | 318 | TAILQ_FOREACH(vp, listhd, v_freelist) { |
318 | /* | | 319 | /* |
319 | * It's safe to test v_usecount and v_iflag | | 320 | * It's safe to test v_usecount and v_iflag |
320 | * without holding the interlock here, since | | 321 | * without holding the interlock here, since |
321 | * these vnodes should never appear on the | | 322 | * these vnodes should never appear on the |
322 | * lists. | | 323 | * lists. |
323 | */ | | 324 | */ |
324 | if (vp->v_usecount != 0) { | | 325 | if (vp->v_usecount != 0) { |
325 | vpanic(vp, "free vnode isn't"); | | 326 | vpanic(vp, "free vnode isn't"); |
326 | } | | 327 | } |
327 | if ((vp->v_iflag & VI_CLEAN) != 0) { | | 328 | if ((vp->v_iflag & VI_CLEAN) != 0) { |
328 | vpanic(vp, "clean vnode on freelist"); | | 329 | vpanic(vp, "clean vnode on freelist"); |
329 | } | | 330 | } |
330 | if (vp->v_freelisthd != listhd) { | | 331 | if (vp->v_freelisthd != listhd) { |
331 | printf("vnode sez %p, listhd %p\n", vp->v_freelisthd, listhd); | | 332 | printf("vnode sez %p, listhd %p\n", vp->v_freelisthd, listhd); |
332 | vpanic(vp, "list head mismatch"); | | 333 | vpanic(vp, "list head mismatch"); |
333 | } | | 334 | } |
334 | if (!mutex_tryenter(&vp->v_interlock)) | | 335 | if (!mutex_tryenter(&vp->v_interlock)) |
335 | continue; | | 336 | continue; |
336 | /* | | 337 | /* |
337 | * Our lwp might hold the underlying vnode | | 338 | * Our lwp might hold the underlying vnode |
338 | * locked, so don't try to reclaim a VI_LAYER | | 339 | * locked, so don't try to reclaim a VI_LAYER |
339 | * node if it's locked. | | 340 | * node if it's locked. |
340 | */ | | 341 | */ |
341 | if ((vp->v_iflag & VI_XLOCK) == 0 && | | 342 | if ((vp->v_iflag & VI_XLOCK) == 0 && |
342 | ((vp->v_iflag & VI_LAYER) == 0 || VOP_ISLOCKED(vp) == 0)) { | | 343 | ((vp->v_iflag & VI_LAYER) == 0 || VOP_ISLOCKED(vp) == 0)) { |
343 | break; | | 344 | break; |
344 | } | | 345 | } |
345 | mutex_exit(&vp->v_interlock); | | 346 | mutex_exit(&vp->v_interlock); |
346 | } | | 347 | } |
347 | | | 348 | |
348 | if (vp == NULL) { | | 349 | if (vp == NULL) { |
349 | if (listhd == &vnode_free_list) { | | 350 | if (listhd == &vnode_free_list) { |
350 | listhd = &vnode_hold_list; | | 351 | listhd = &vnode_hold_list; |
351 | goto try_nextlist; | | 352 | goto try_nextlist; |
352 | } | | 353 | } |
353 | mutex_exit(&vnode_free_list_lock); | | 354 | mutex_exit(&vnode_free_list_lock); |
354 | return NULL; | | 355 | return NULL; |
355 | } | | 356 | } |
356 | | | 357 | |
357 | /* Remove it from the freelist. */ | | 358 | /* Remove it from the freelist. */ |
358 | TAILQ_REMOVE(listhd, vp, v_freelist); | | 359 | TAILQ_REMOVE(listhd, vp, v_freelist); |
359 | vp->v_freelisthd = NULL; | | 360 | vp->v_freelisthd = NULL; |
360 | mutex_exit(&vnode_free_list_lock); | | 361 | mutex_exit(&vnode_free_list_lock); |
361 | | | 362 | |
362 | /* | | 363 | /* |
363 | * The vnode is still associated with a file system, so we must | | 364 | * The vnode is still associated with a file system, so we must |
364 | * clean it out before reusing it. We need to add a reference | | 365 | * clean it out before reusing it. We need to add a reference |
365 | * before doing this. If the vnode gains another reference while | | 366 | * before doing this. If the vnode gains another reference while |
366 | * being cleaned out then we lose - retry. | | 367 | * being cleaned out then we lose - retry. |
367 | */ | | 368 | */ |
368 | atomic_inc_uint(&vp->v_usecount); | | 369 | atomic_inc_uint(&vp->v_usecount); |
369 | vclean(vp, DOCLOSE); | | 370 | vclean(vp, DOCLOSE); |
370 | if (vp->v_usecount == 1) { | | 371 | if (vp->v_usecount == 1) { |
371 | /* We're about to dirty it. */ | | 372 | /* We're about to dirty it. */ |
372 | vp->v_iflag &= ~VI_CLEAN; | | 373 | vp->v_iflag &= ~VI_CLEAN; |
373 | mutex_exit(&vp->v_interlock); | | 374 | mutex_exit(&vp->v_interlock); |
374 | if (vp->v_type == VBLK || vp->v_type == VCHR) { | | 375 | if (vp->v_type == VBLK || vp->v_type == VCHR) { |
375 | spec_node_destroy(vp); | | 376 | spec_node_destroy(vp); |
376 | } | | 377 | } |
377 | vp->v_type = VNON; | | 378 | vp->v_type = VNON; |
378 | } else { | | 379 | } else { |
379 | /* | | 380 | /* |
380 | * Don't return to freelist - the holder of the last | | 381 | * Don't return to freelist - the holder of the last |
381 | * reference will destroy it. | | 382 | * reference will destroy it. |
382 | */ | | 383 | */ |
383 | vrelel(vp, 0); /* releases vp->v_interlock */ | | 384 | vrelel(vp, 0); /* releases vp->v_interlock */ |
384 | mutex_enter(&vnode_free_list_lock); | | 385 | mutex_enter(&vnode_free_list_lock); |
385 | goto retry; | | 386 | goto retry; |
386 | } | | 387 | } |
387 | | | 388 | |
388 | if (vp->v_data != NULL || vp->v_uobj.uo_npages != 0 || | | 389 | if (vp->v_data != NULL || vp->v_uobj.uo_npages != 0 || |
389 | !TAILQ_EMPTY(&vp->v_uobj.memq)) { | | 390 | !TAILQ_EMPTY(&vp->v_uobj.memq)) { |
390 | vpanic(vp, "cleaned vnode isn't"); | | 391 | vpanic(vp, "cleaned vnode isn't"); |
391 | } | | 392 | } |
392 | if (vp->v_numoutput != 0) { | | 393 | if (vp->v_numoutput != 0) { |
393 | vpanic(vp, "clean vnode has pending I/O's"); | | 394 | vpanic(vp, "clean vnode has pending I/O's"); |
394 | } | | 395 | } |
395 | if ((vp->v_iflag & VI_ONWORKLST) != 0) { | | 396 | if ((vp->v_iflag & VI_ONWORKLST) != 0) { |
396 | vpanic(vp, "clean vnode on syncer list"); | | 397 | vpanic(vp, "clean vnode on syncer list"); |
397 | } | | 398 | } |
398 | | | 399 | |
399 | return vp; | | 400 | return vp; |
400 | } | | 401 | } |
401 | | | 402 | |
402 | /* | | 403 | /* |
403 | * Mark a mount point as busy, and gain a new reference to it. Used to | | 404 | * Mark a mount point as busy, and gain a new reference to it. Used to |
404 | * prevent the file system from being unmounted during critical sections. | | 405 | * prevent the file system from being unmounted during critical sections. |
405 | * | | 406 | * |
406 | * => The caller must hold a pre-existing reference to the mount. | | 407 | * => The caller must hold a pre-existing reference to the mount. |
407 | * => Will fail if the file system is being unmounted, or is unmounted. | | 408 | * => Will fail if the file system is being unmounted, or is unmounted. |
408 | */ | | 409 | */ |
409 | int | | 410 | int |
410 | vfs_busy(struct mount *mp, struct mount **nextp) | | 411 | vfs_busy(struct mount *mp, struct mount **nextp) |
411 | { | | 412 | { |
412 | | | 413 | |
413 | KASSERT(mp->mnt_refcnt > 0); | | 414 | KASSERT(mp->mnt_refcnt > 0); |
414 | | | 415 | |
415 | if (__predict_false(!rw_tryenter(&mp->mnt_unmounting, RW_READER))) { | | 416 | if (__predict_false(!rw_tryenter(&mp->mnt_unmounting, RW_READER))) { |
416 | if (nextp != NULL) { | | 417 | if (nextp != NULL) { |
417 | KASSERT(mutex_owned(&mountlist_lock)); | | 418 | KASSERT(mutex_owned(&mountlist_lock)); |
418 | *nextp = CIRCLEQ_NEXT(mp, mnt_list); | | 419 | *nextp = CIRCLEQ_NEXT(mp, mnt_list); |
419 | } | | 420 | } |
420 | return EBUSY; | | 421 | return EBUSY; |
421 | } | | 422 | } |
422 | if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) { | | 423 | if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) { |
423 | rw_exit(&mp->mnt_unmounting); | | 424 | rw_exit(&mp->mnt_unmounting); |
424 | if (nextp != NULL) { | | 425 | if (nextp != NULL) { |
425 | KASSERT(mutex_owned(&mountlist_lock)); | | 426 | KASSERT(mutex_owned(&mountlist_lock)); |
426 | *nextp = CIRCLEQ_NEXT(mp, mnt_list); | | 427 | *nextp = CIRCLEQ_NEXT(mp, mnt_list); |
427 | } | | 428 | } |
428 | return ENOENT; | | 429 | return ENOENT; |
429 | } | | 430 | } |
430 | if (nextp != NULL) { | | 431 | if (nextp != NULL) { |
431 | mutex_exit(&mountlist_lock); | | 432 | mutex_exit(&mountlist_lock); |
432 | } | | 433 | } |
433 | atomic_inc_uint(&mp->mnt_refcnt); | | 434 | atomic_inc_uint(&mp->mnt_refcnt); |
434 | return 0; | | 435 | return 0; |
435 | } | | 436 | } |
436 | | | 437 | |
437 | /* | | 438 | /* |
438 | * Unbusy a busy filesystem. | | 439 | * Unbusy a busy filesystem. |
439 | * | | 440 | * |
440 | * => If keepref is true, preserve reference added by vfs_busy(). | | 441 | * => If keepref is true, preserve reference added by vfs_busy(). |
441 | * => If nextp != NULL, acquire mountlist_lock. | | 442 | * => If nextp != NULL, acquire mountlist_lock. |
442 | */ | | 443 | */ |
443 | void | | 444 | void |
444 | vfs_unbusy(struct mount *mp, bool keepref, struct mount **nextp) | | 445 | vfs_unbusy(struct mount *mp, bool keepref, struct mount **nextp) |
445 | { | | 446 | { |
446 | | | 447 | |
447 | KASSERT(mp->mnt_refcnt > 0); | | 448 | KASSERT(mp->mnt_refcnt > 0); |
448 | | | 449 | |
449 | if (nextp != NULL) { | | 450 | if (nextp != NULL) { |
450 | mutex_enter(&mountlist_lock); | | 451 | mutex_enter(&mountlist_lock); |
451 | } | | 452 | } |
452 | rw_exit(&mp->mnt_unmounting); | | 453 | rw_exit(&mp->mnt_unmounting); |
453 | if (!keepref) { | | 454 | if (!keepref) { |
454 | vfs_destroy(mp); | | 455 | vfs_destroy(mp); |
455 | } | | 456 | } |
456 | if (nextp != NULL) { | | 457 | if (nextp != NULL) { |
457 | KASSERT(mutex_owned(&mountlist_lock)); | | 458 | KASSERT(mutex_owned(&mountlist_lock)); |
458 | *nextp = CIRCLEQ_NEXT(mp, mnt_list); | | 459 | *nextp = CIRCLEQ_NEXT(mp, mnt_list); |
459 | } | | 460 | } |
460 | } | | 461 | } |
461 | | | 462 | |
462 | /* | | 463 | /* |
463 | * Lookup a filesystem type, and if found allocate and initialize | | 464 | * Lookup a filesystem type, and if found allocate and initialize |
464 | * a mount structure for it. | | 465 | * a mount structure for it. |
465 | * | | 466 | * |
466 | * Devname is usually updated by mount(8) after booting. | | 467 | * Devname is usually updated by mount(8) after booting. |
467 | */ | | 468 | */ |
468 | int | | 469 | int |
469 | vfs_rootmountalloc(const char *fstypename, const char *devname, | | 470 | vfs_rootmountalloc(const char *fstypename, const char *devname, |
470 | struct mount **mpp) | | 471 | struct mount **mpp) |
471 | { | | 472 | { |
472 | struct vfsops *vfsp = NULL; | | 473 | struct vfsops *vfsp = NULL; |
473 | struct mount *mp; | | 474 | struct mount *mp; |
474 | | | 475 | |
475 | mutex_enter(&vfs_list_lock); | | 476 | mutex_enter(&vfs_list_lock); |
476 | LIST_FOREACH(vfsp, &vfs_list, vfs_list) | | 477 | LIST_FOREACH(vfsp, &vfs_list, vfs_list) |
477 | if (!strncmp(vfsp->vfs_name, fstypename, | | 478 | if (!strncmp(vfsp->vfs_name, fstypename, |
478 | sizeof(mp->mnt_stat.f_fstypename))) | | 479 | sizeof(mp->mnt_stat.f_fstypename))) |
479 | break; | | 480 | break; |
480 | if (vfsp == NULL) { | | 481 | if (vfsp == NULL) { |
481 | mutex_exit(&vfs_list_lock); | | 482 | mutex_exit(&vfs_list_lock); |
482 | return (ENODEV); | | 483 | return (ENODEV); |
483 | } | | 484 | } |
484 | vfsp->vfs_refcount++; | | 485 | vfsp->vfs_refcount++; |
485 | mutex_exit(&vfs_list_lock); | | 486 | mutex_exit(&vfs_list_lock); |
486 | | | 487 | |
487 | mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); | | 488 | mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); |
488 | if (mp == NULL) | | 489 | if (mp == NULL) |
489 | return ENOMEM; | | 490 | return ENOMEM; |
490 | mp->mnt_refcnt = 1; | | 491 | mp->mnt_refcnt = 1; |
491 | rw_init(&mp->mnt_unmounting); | | 492 | rw_init(&mp->mnt_unmounting); |
492 | mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE); | | 493 | mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE); |
493 | mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE); | | 494 | mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE); |
494 | (void)vfs_busy(mp, NULL); | | 495 | (void)vfs_busy(mp, NULL); |
495 | TAILQ_INIT(&mp->mnt_vnodelist); | | 496 | TAILQ_INIT(&mp->mnt_vnodelist); |
496 | mp->mnt_op = vfsp; | | 497 | mp->mnt_op = vfsp; |
497 | mp->mnt_flag = MNT_RDONLY; | | 498 | mp->mnt_flag = MNT_RDONLY; |
498 | mp->mnt_vnodecovered = NULL; | | 499 | mp->mnt_vnodecovered = NULL; |
499 | (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, | | 500 | (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, |
500 | sizeof(mp->mnt_stat.f_fstypename)); | | 501 | sizeof(mp->mnt_stat.f_fstypename)); |
501 | mp->mnt_stat.f_mntonname[0] = '/'; | | 502 | mp->mnt_stat.f_mntonname[0] = '/'; |
502 | mp->mnt_stat.f_mntonname[1] = '\0'; | | 503 | mp->mnt_stat.f_mntonname[1] = '\0'; |
503 | mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] = | | 504 | mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] = |
504 | '\0'; | | 505 | '\0'; |
505 | (void)copystr(devname, mp->mnt_stat.f_mntfromname, | | 506 | (void)copystr(devname, mp->mnt_stat.f_mntfromname, |
506 | sizeof(mp->mnt_stat.f_mntfromname) - 1, 0); | | 507 | sizeof(mp->mnt_stat.f_mntfromname) - 1, 0); |
507 | mount_initspecific(mp); | | 508 | mount_initspecific(mp); |
508 | *mpp = mp; | | 509 | *mpp = mp; |
509 | return (0); | | 510 | return (0); |
510 | } | | 511 | } |
511 | | | 512 | |
512 | /* | | 513 | /* |
513 | * Routines having to do with the management of the vnode table. | | 514 | * Routines having to do with the management of the vnode table. |
514 | */ | | 515 | */ |
515 | extern int (**dead_vnodeop_p)(void *); | | 516 | extern int (**dead_vnodeop_p)(void *); |
516 | | | 517 | |
517 | /* | | 518 | /* |
518 | * Return the next vnode from the free list. | | 519 | * Return the next vnode from the free list. |
519 | */ | | 520 | */ |
520 | int | | 521 | int |
521 | getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *), | | 522 | getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *), |
522 | vnode_t **vpp) | | 523 | vnode_t **vpp) |
523 | { | | 524 | { |
524 | struct uvm_object *uobj; | | 525 | struct uvm_object *uobj; |
525 | static int toggle; | | 526 | static int toggle; |
526 | vnode_t *vp; | | 527 | vnode_t *vp; |
527 | int error = 0, tryalloc; | | 528 | int error = 0, tryalloc; |
528 | | | 529 | |
529 | try_again: | | 530 | try_again: |
530 | if (mp != NULL) { | | 531 | if (mp != NULL) { |
531 | /* | | 532 | /* |
532 | * Mark filesystem busy while we're creating a | | 533 | * Mark filesystem busy while we're creating a |
533 | * vnode. If unmount is in progress, this will | | 534 | * vnode. If unmount is in progress, this will |
534 | * fail. | | 535 | * fail. |
535 | */ | | 536 | */ |
536 | error = vfs_busy(mp, NULL); | | 537 | error = vfs_busy(mp, NULL); |
537 | if (error) | | 538 | if (error) |
538 | return error; | | 539 | return error; |
539 | } | | 540 | } |
540 | | | 541 | |
541 | /* | | 542 | /* |
542 | * We must choose whether to allocate a new vnode or recycle an | | 543 | * We must choose whether to allocate a new vnode or recycle an |
543 | * existing one. The criterion for allocating a new one is that | | 544 | * existing one. The criterion for allocating a new one is that |
544 | * the total number of vnodes is less than the number desired or | | 545 | * the total number of vnodes is less than the number desired or |
545 | * there are no vnodes on either free list. Generally we only | | 546 | * there are no vnodes on either free list. Generally we only |
546 | * want to recycle vnodes that have no buffers associated with | | 547 | * want to recycle vnodes that have no buffers associated with |
547 | * them, so we look first on the vnode_free_list. If it is empty, | | 548 | * them, so we look first on the vnode_free_list. If it is empty, |
548 | * we next consider vnodes with referencing buffers on the | | 549 | * we next consider vnodes with referencing buffers on the |
549 | * vnode_hold_list. The toggle ensures that half the time we | | 550 | * vnode_hold_list. The toggle ensures that half the time we |
550 | * will use a buffer from the vnode_hold_list, and half the time | | 551 | * will use a buffer from the vnode_hold_list, and half the time |
551 | * we will allocate a new one unless the list has grown to twice | | 552 | * we will allocate a new one unless the list has grown to twice |
552 | * the desired size. We are reticent to recycle vnodes from the | | 553 | * the desired size. We are reticent to recycle vnodes from the |
553 | * vnode_hold_list because we will lose the identity of all its | | 554 | * vnode_hold_list because we will lose the identity of all its |
554 | * referencing buffers. | | 555 | * referencing buffers. |
555 | */ | | 556 | */ |
556 | | | 557 | |
557 | vp = NULL; | | 558 | vp = NULL; |
558 | | | 559 | |
559 | mutex_enter(&vnode_free_list_lock); | | 560 | mutex_enter(&vnode_free_list_lock); |
560 | | | 561 | |
561 | toggle ^= 1; | | 562 | toggle ^= 1; |
562 | if (numvnodes > 2 * desiredvnodes) | | 563 | if (numvnodes > 2 * desiredvnodes) |
563 | toggle = 0; | | 564 | toggle = 0; |
564 | | | 565 | |
565 | tryalloc = numvnodes < desiredvnodes || | | 566 | tryalloc = numvnodes < desiredvnodes || |
566 | (TAILQ_FIRST(&vnode_free_list) == NULL && | | 567 | (TAILQ_FIRST(&vnode_free_list) == NULL && |
567 | (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle)); | | 568 | (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle)); |
568 | | | 569 | |
569 | if (tryalloc) { | | 570 | if (tryalloc) { |
570 | numvnodes++; | | 571 | numvnodes++; |
571 | mutex_exit(&vnode_free_list_lock); | | 572 | mutex_exit(&vnode_free_list_lock); |
572 | if ((vp = vnalloc(NULL)) == NULL) { | | 573 | if ((vp = vnalloc(NULL)) == NULL) { |
573 | mutex_enter(&vnode_free_list_lock); | | 574 | mutex_enter(&vnode_free_list_lock); |
574 | numvnodes--; | | 575 | numvnodes--; |
575 | } else | | 576 | } else |
576 | vp->v_usecount = 1; | | 577 | vp->v_usecount = 1; |
577 | } | | 578 | } |
578 | | | 579 | |
579 | if (vp == NULL) { | | 580 | if (vp == NULL) { |
580 | vp = getcleanvnode(); | | 581 | vp = getcleanvnode(); |
581 | if (vp == NULL) { | | 582 | if (vp == NULL) { |
582 | if (mp != NULL) { | | 583 | if (mp != NULL) { |
583 | vfs_unbusy(mp, false, NULL); | | 584 | vfs_unbusy(mp, false, NULL); |
584 | } | | 585 | } |
585 | if (tryalloc) { | | 586 | if (tryalloc) { |
586 | printf("WARNING: unable to allocate new " | | 587 | printf("WARNING: unable to allocate new " |
587 | "vnode, retrying...\n"); | | 588 | "vnode, retrying...\n"); |
588 | kpause("newvn", false, hz, NULL); | | 589 | kpause("newvn", false, hz, NULL); |
589 | goto try_again; | | 590 | goto try_again; |
590 | } | | 591 | } |
591 | tablefull("vnode", "increase kern.maxvnodes or NVNODE"); | | 592 | tablefull("vnode", "increase kern.maxvnodes or NVNODE"); |
592 | *vpp = 0; | | 593 | *vpp = 0; |
593 | return (ENFILE); | | 594 | return (ENFILE); |
594 | } | | 595 | } |
595 | vp->v_iflag = 0; | | 596 | vp->v_iflag = 0; |
596 | vp->v_vflag = 0; | | 597 | vp->v_vflag = 0; |
597 | vp->v_uflag = 0; | | 598 | vp->v_uflag = 0; |
598 | vp->v_socket = NULL; | | 599 | vp->v_socket = NULL; |
599 | } | | 600 | } |
600 | | | 601 | |
601 | KASSERT(vp->v_usecount == 1); | | 602 | KASSERT(vp->v_usecount == 1); |
602 | KASSERT(vp->v_freelisthd == NULL); | | 603 | KASSERT(vp->v_freelisthd == NULL); |
603 | KASSERT(LIST_EMPTY(&vp->v_nclist)); | | 604 | KASSERT(LIST_EMPTY(&vp->v_nclist)); |
604 | KASSERT(LIST_EMPTY(&vp->v_dnclist)); | | 605 | KASSERT(LIST_EMPTY(&vp->v_dnclist)); |
605 | | | 606 | |
606 | vp->v_type = VNON; | | 607 | vp->v_type = VNON; |
607 | vp->v_vnlock = &vp->v_lock; | | 608 | vp->v_vnlock = &vp->v_lock; |
608 | vp->v_tag = tag; | | 609 | vp->v_tag = tag; |
609 | vp->v_op = vops; | | 610 | vp->v_op = vops; |
610 | insmntque(vp, mp); | | 611 | insmntque(vp, mp); |
611 | *vpp = vp; | | 612 | *vpp = vp; |
612 | vp->v_data = 0; | | 613 | vp->v_data = 0; |
613 | | | 614 | |
614 | /* | | 615 | /* |
615 | * initialize uvm_object within vnode. | | 616 | * initialize uvm_object within vnode. |
616 | */ | | 617 | */ |
617 | | | 618 | |
618 | uobj = &vp->v_uobj; | | 619 | uobj = &vp->v_uobj; |
619 | KASSERT(uobj->pgops == &uvm_vnodeops); | | 620 | KASSERT(uobj->pgops == &uvm_vnodeops); |
620 | KASSERT(uobj->uo_npages == 0); | | 621 | KASSERT(uobj->uo_npages == 0); |
621 | KASSERT(TAILQ_FIRST(&uobj->memq) == NULL); | | 622 | KASSERT(TAILQ_FIRST(&uobj->memq) == NULL); |
622 | vp->v_size = vp->v_writesize = VSIZENOTSET; | | 623 | vp->v_size = vp->v_writesize = VSIZENOTSET; |
623 | | | 624 | |
624 | if (mp != NULL) { | | 625 | if (mp != NULL) { |
625 | if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) | | 626 | if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) |
626 | vp->v_vflag |= VV_MPSAFE; | | 627 | vp->v_vflag |= VV_MPSAFE; |
627 | vfs_unbusy(mp, true, NULL); | | 628 | vfs_unbusy(mp, true, NULL); |
628 | } | | 629 | } |
629 | | | 630 | |
630 | return (0); | | 631 | return (0); |
631 | } | | 632 | } |
632 | | | 633 | |
633 | /* | | 634 | /* |
634 | * This is really just the reverse of getnewvnode(). Needed for | | 635 | * This is really just the reverse of getnewvnode(). Needed for |
635 | * VFS_VGET functions who may need to push back a vnode in case | | 636 | * VFS_VGET functions who may need to push back a vnode in case |
636 | * of a locking race. | | 637 | * of a locking race. |
637 | */ | | 638 | */ |
638 | void | | 639 | void |
639 | ungetnewvnode(vnode_t *vp) | | 640 | ungetnewvnode(vnode_t *vp) |
640 | { | | 641 | { |
641 | | | 642 | |
642 | KASSERT(vp->v_usecount == 1); | | 643 | KASSERT(vp->v_usecount == 1); |
643 | KASSERT(vp->v_data == NULL); | | 644 | KASSERT(vp->v_data == NULL); |
644 | KASSERT(vp->v_freelisthd == NULL); | | 645 | KASSERT(vp->v_freelisthd == NULL); |
645 | | | 646 | |
646 | mutex_enter(&vp->v_interlock); | | 647 | mutex_enter(&vp->v_interlock); |
647 | vp->v_iflag |= VI_CLEAN; | | 648 | vp->v_iflag |= VI_CLEAN; |
648 | vrelel(vp, 0); | | 649 | vrelel(vp, 0); |
649 | } | | 650 | } |
650 | | | 651 | |
651 | /* | | 652 | /* |
652 | * Allocate a new, uninitialized vnode. If 'mp' is non-NULL, this is a | | 653 | * Allocate a new, uninitialized vnode. If 'mp' is non-NULL, this is a |
653 | * marker vnode and we are prepared to wait for the allocation. | | 654 | * marker vnode and we are prepared to wait for the allocation. |
654 | */ | | 655 | */ |
655 | vnode_t * | | 656 | vnode_t * |
656 | vnalloc(struct mount *mp) | | 657 | vnalloc(struct mount *mp) |
657 | { | | 658 | { |
658 | vnode_t *vp; | | 659 | vnode_t *vp; |
659 | | | 660 | |
660 | vp = pool_cache_get(vnode_cache, (mp != NULL ? PR_WAITOK : PR_NOWAIT)); | | 661 | vp = pool_cache_get(vnode_cache, (mp != NULL ? PR_WAITOK : PR_NOWAIT)); |
661 | if (vp == NULL) { | | 662 | if (vp == NULL) { |
662 | return NULL; | | 663 | return NULL; |
663 | } | | 664 | } |
664 | | | 665 | |
665 | memset(vp, 0, sizeof(*vp)); | | 666 | memset(vp, 0, sizeof(*vp)); |
666 | UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 0); | | 667 | UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 0); |
667 | cv_init(&vp->v_cv, "vnode"); | | 668 | cv_init(&vp->v_cv, "vnode"); |
668 | /* | | 669 | /* |
669 | * done by memset() above. | | 670 | * done by memset() above. |
670 | * LIST_INIT(&vp->v_nclist); | | 671 | * LIST_INIT(&vp->v_nclist); |
671 | * LIST_INIT(&vp->v_dnclist); | | 672 | * LIST_INIT(&vp->v_dnclist); |
672 | */ | | 673 | */ |
673 | | | 674 | |
674 | if (mp != NULL) { | | 675 | if (mp != NULL) { |
675 | vp->v_mount = mp; | | 676 | vp->v_mount = mp; |
676 | vp->v_type = VBAD; | | 677 | vp->v_type = VBAD; |
677 | vp->v_iflag = VI_MARKER; | | 678 | vp->v_iflag = VI_MARKER; |
678 | } else { | | 679 | } else { |
679 | rw_init(&vp->v_lock.vl_lock); | | 680 | rw_init(&vp->v_lock.vl_lock); |
680 | } | | 681 | } |
681 | | | 682 | |
682 | return vp; | | 683 | return vp; |
683 | } | | 684 | } |
684 | | | 685 | |
685 | /* | | 686 | /* |
686 | * Free an unused, unreferenced vnode. | | 687 | * Free an unused, unreferenced vnode. |
687 | */ | | 688 | */ |
688 | void | | 689 | void |
689 | vnfree(vnode_t *vp) | | 690 | vnfree(vnode_t *vp) |
690 | { | | 691 | { |
691 | | | 692 | |
692 | KASSERT(vp->v_usecount == 0); | | 693 | KASSERT(vp->v_usecount == 0); |
693 | | | 694 | |
694 | if ((vp->v_iflag & VI_MARKER) == 0) { | | 695 | if ((vp->v_iflag & VI_MARKER) == 0) { |
695 | rw_destroy(&vp->v_lock.vl_lock); | | 696 | rw_destroy(&vp->v_lock.vl_lock); |
696 | mutex_enter(&vnode_free_list_lock); | | 697 | mutex_enter(&vnode_free_list_lock); |
697 | numvnodes--; | | 698 | numvnodes--; |
698 | mutex_exit(&vnode_free_list_lock); | | 699 | mutex_exit(&vnode_free_list_lock); |
699 | } | | 700 | } |
700 | | | 701 | |
701 | UVM_OBJ_DESTROY(&vp->v_uobj); | | 702 | UVM_OBJ_DESTROY(&vp->v_uobj); |
702 | cv_destroy(&vp->v_cv); | | 703 | cv_destroy(&vp->v_cv); |
703 | pool_cache_put(vnode_cache, vp); | | 704 | pool_cache_put(vnode_cache, vp); |
704 | } | | 705 | } |
705 | | | 706 | |
706 | /* | | 707 | /* |
707 | * Remove a vnode from its freelist. | | 708 | * Remove a vnode from its freelist. |
708 | */ | | 709 | */ |
709 | static inline void | | 710 | static inline void |
710 | vremfree(vnode_t *vp) | | 711 | vremfree(vnode_t *vp) |
711 | { | | 712 | { |
712 | | | 713 | |
713 | KASSERT(mutex_owned(&vp->v_interlock)); | | 714 | KASSERT(mutex_owned(&vp->v_interlock)); |
714 | KASSERT(vp->v_usecount == 0); | | 715 | KASSERT(vp->v_usecount == 0); |
715 | | | 716 | |
716 | /* | | 717 | /* |
717 | * Note that the reference count must not change until | | 718 | * Note that the reference count must not change until |
718 | * the vnode is removed. | | 719 | * the vnode is removed. |
719 | */ | | 720 | */ |
720 | mutex_enter(&vnode_free_list_lock); | | 721 | mutex_enter(&vnode_free_list_lock); |
721 | if (vp->v_holdcnt > 0) { | | 722 | if (vp->v_holdcnt > 0) { |
722 | KASSERT(vp->v_freelisthd == &vnode_hold_list); | | 723 | KASSERT(vp->v_freelisthd == &vnode_hold_list); |
723 | } else { | | 724 | } else { |
724 | KASSERT(vp->v_freelisthd == &vnode_free_list); | | 725 | KASSERT(vp->v_freelisthd == &vnode_free_list); |
725 | } | | 726 | } |
726 | TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); | | 727 | TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); |
727 | vp->v_freelisthd = NULL; | | 728 | vp->v_freelisthd = NULL; |
728 | mutex_exit(&vnode_free_list_lock); | | 729 | mutex_exit(&vnode_free_list_lock); |
729 | } | | 730 | } |
730 | | | 731 | |
731 | /* | | 732 | /* |
732 | * Move a vnode from one mount queue to another. | | 733 | * Move a vnode from one mount queue to another. |
733 | */ | | 734 | */ |
734 | static void | | 735 | static void |
735 | insmntque(vnode_t *vp, struct mount *mp) | | 736 | insmntque(vnode_t *vp, struct mount *mp) |
736 | { | | 737 | { |
737 | struct mount *omp; | | 738 | struct mount *omp; |
738 | | | 739 | |
739 | #ifdef DIAGNOSTIC | | 740 | #ifdef DIAGNOSTIC |
740 | if ((mp != NULL) && | | 741 | if ((mp != NULL) && |
741 | (mp->mnt_iflag & IMNT_UNMOUNT) && | | 742 | (mp->mnt_iflag & IMNT_UNMOUNT) && |
742 | !(mp->mnt_flag & MNT_SOFTDEP) && | | 743 | !(mp->mnt_flag & MNT_SOFTDEP) && |
743 | vp->v_tag != VT_VFS) { | | 744 | vp->v_tag != VT_VFS) { |
744 | panic("insmntque into dying filesystem"); | | 745 | panic("insmntque into dying filesystem"); |
745 | } | | 746 | } |
746 | #endif | | 747 | #endif |
747 | | | 748 | |
748 | mutex_enter(&mntvnode_lock); | | 749 | mutex_enter(&mntvnode_lock); |
749 | /* | | 750 | /* |
750 | * Delete from old mount point vnode list, if on one. | | 751 | * Delete from old mount point vnode list, if on one. |
751 | */ | | 752 | */ |
752 | if ((omp = vp->v_mount) != NULL) | | 753 | if ((omp = vp->v_mount) != NULL) |
753 | TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vp, v_mntvnodes); | | 754 | TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vp, v_mntvnodes); |
754 | /* | | 755 | /* |
755 | * Insert into list of vnodes for the new mount point, if | | 756 | * Insert into list of vnodes for the new mount point, if |
756 | * available. The caller must take a reference on the mount | | 757 | * available. The caller must take a reference on the mount |
757 | * structure and donate to the vnode. | | 758 | * structure and donate to the vnode. |
758 | */ | | 759 | */ |
759 | if ((vp->v_mount = mp) != NULL) | | 760 | if ((vp->v_mount = mp) != NULL) |
760 | TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes); | | 761 | TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes); |
761 | mutex_exit(&mntvnode_lock); | | 762 | mutex_exit(&mntvnode_lock); |
762 | | | 763 | |
763 | if (omp != NULL) { | | 764 | if (omp != NULL) { |
764 | /* Release reference to old mount. */ | | 765 | /* Release reference to old mount. */ |
765 | vfs_destroy(omp); | | 766 | vfs_destroy(omp); |
766 | } | | 767 | } |
767 | } | | 768 | } |
768 | | | 769 | |
769 | /* | | 770 | /* |
770 | * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or | | 771 | * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or |
771 | * recycled. | | 772 | * recycled. |
772 | */ | | 773 | */ |
773 | void | | 774 | void |
774 | vwait(vnode_t *vp, int flags) | | 775 | vwait(vnode_t *vp, int flags) |
775 | { | | 776 | { |
776 | | | 777 | |
777 | KASSERT(mutex_owned(&vp->v_interlock)); | | 778 | KASSERT(mutex_owned(&vp->v_interlock)); |
778 | KASSERT(vp->v_usecount != 0); | | 779 | KASSERT(vp->v_usecount != 0); |
779 | | | 780 | |
780 | while ((vp->v_iflag & flags) != 0) | | 781 | while ((vp->v_iflag & flags) != 0) |
781 | cv_wait(&vp->v_cv, &vp->v_interlock); | | 782 | cv_wait(&vp->v_cv, &vp->v_interlock); |
782 | } | | 783 | } |
783 | | | 784 | |
784 | /* | | 785 | /* |
785 | * Insert a marker vnode into a mount's vnode list, after the | | 786 | * Insert a marker vnode into a mount's vnode list, after the |
786 | * specified vnode. mntvnode_lock must be held. | | 787 | * specified vnode. mntvnode_lock must be held. |
787 | */ | | 788 | */ |
788 | void | | 789 | void |
789 | vmark(vnode_t *mvp, vnode_t *vp) | | 790 | vmark(vnode_t *mvp, vnode_t *vp) |
790 | { | | 791 | { |
791 | struct mount *mp; | | 792 | struct mount *mp; |
792 | | | 793 | |
793 | mp = mvp->v_mount; | | 794 | mp = mvp->v_mount; |
794 | | | 795 | |
795 | KASSERT(mutex_owned(&mntvnode_lock)); | | 796 | KASSERT(mutex_owned(&mntvnode_lock)); |
796 | KASSERT((mvp->v_iflag & VI_MARKER) != 0); | | 797 | KASSERT((mvp->v_iflag & VI_MARKER) != 0); |
797 | KASSERT(vp->v_mount == mp); | | 798 | KASSERT(vp->v_mount == mp); |
798 | | | 799 | |
799 | TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vp, mvp, v_mntvnodes); | | 800 | TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vp, mvp, v_mntvnodes); |
800 | } | | 801 | } |
801 | | | 802 | |
802 | /* | | 803 | /* |
803 | * Remove a marker vnode from a mount's vnode list, and return | | 804 | * Remove a marker vnode from a mount's vnode list, and return |
804 | * a pointer to the next vnode in the list. mntvnode_lock must | | 805 | * a pointer to the next vnode in the list. mntvnode_lock must |
805 | * be held. | | 806 | * be held. |
806 | */ | | 807 | */ |
807 | vnode_t * | | 808 | vnode_t * |
808 | vunmark(vnode_t *mvp) | | 809 | vunmark(vnode_t *mvp) |
809 | { | | 810 | { |
810 | vnode_t *vp; | | 811 | vnode_t *vp; |
811 | struct mount *mp; | | 812 | struct mount *mp; |
812 | | | 813 | |
813 | mp = mvp->v_mount; | | 814 | mp = mvp->v_mount; |
814 | | | 815 | |
815 | KASSERT(mutex_owned(&mntvnode_lock)); | | 816 | KASSERT(mutex_owned(&mntvnode_lock)); |
816 | KASSERT((mvp->v_iflag & VI_MARKER) != 0); | | 817 | KASSERT((mvp->v_iflag & VI_MARKER) != 0); |
817 | | | 818 | |
818 | vp = TAILQ_NEXT(mvp, v_mntvnodes); | | 819 | vp = TAILQ_NEXT(mvp, v_mntvnodes); |
819 | TAILQ_REMOVE(&mp->mnt_vnodelist, mvp, v_mntvnodes); | | 820 | TAILQ_REMOVE(&mp->mnt_vnodelist, mvp, v_mntvnodes); |
820 | | | 821 | |
821 | KASSERT(vp == NULL || vp->v_mount == mp); | | 822 | KASSERT(vp == NULL || vp->v_mount == mp); |
822 | | | 823 | |
823 | return vp; | | 824 | return vp; |
824 | } | | 825 | } |
825 | | | 826 | |
826 | /* | | 827 | /* |
827 | * Update outstanding I/O count and do wakeup if requested. | | 828 | * Update outstanding I/O count and do wakeup if requested. |
828 | */ | | 829 | */ |
829 | void | | 830 | void |
830 | vwakeup(struct buf *bp) | | 831 | vwakeup(struct buf *bp) |
831 | { | | 832 | { |
832 | struct vnode *vp; | | 833 | struct vnode *vp; |
833 | | | 834 | |
834 | if ((vp = bp->b_vp) == NULL) | | 835 | if ((vp = bp->b_vp) == NULL) |
835 | return; | | 836 | return; |
836 | | | 837 | |
837 | KASSERT(bp->b_objlock == &vp->v_interlock); | | 838 | KASSERT(bp->b_objlock == &vp->v_interlock); |
838 | KASSERT(mutex_owned(bp->b_objlock)); | | 839 | KASSERT(mutex_owned(bp->b_objlock)); |
839 | | | 840 | |
840 | if (--vp->v_numoutput < 0) | | 841 | if (--vp->v_numoutput < 0) |
841 | panic("vwakeup: neg numoutput, vp %p", vp); | | 842 | panic("vwakeup: neg numoutput, vp %p", vp); |
842 | if (vp->v_numoutput == 0) | | 843 | if (vp->v_numoutput == 0) |
843 | cv_broadcast(&vp->v_cv); | | 844 | cv_broadcast(&vp->v_cv); |
844 | } | | 845 | } |
845 | | | 846 | |
846 | /* | | 847 | /* |
847 | * Flush out and invalidate all buffers associated with a vnode. | | 848 | * Flush out and invalidate all buffers associated with a vnode. |
848 | * Called with the underlying vnode locked, which should prevent new dirty | | 849 | * Called with the underlying vnode locked, which should prevent new dirty |
849 | * buffers from being queued. | | 850 | * buffers from being queued. |
850 | */ | | 851 | */ |
851 | int | | 852 | int |
852 | vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l, | | 853 | vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l, |
853 | bool catch, int slptimeo) | | 854 | bool catch, int slptimeo) |
854 | { | | 855 | { |
855 | struct buf *bp, *nbp; | | 856 | struct buf *bp, *nbp; |
856 | int error; | | 857 | int error; |
857 | int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO | | | 858 | int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO | |
858 | (flags & V_SAVE ? PGO_CLEANIT | PGO_RECLAIM : 0); | | 859 | (flags & V_SAVE ? PGO_CLEANIT | PGO_RECLAIM : 0); |
859 | | | 860 | |
860 | /* XXXUBC this doesn't look at flags or slp* */ | | 861 | /* XXXUBC this doesn't look at flags or slp* */ |
861 | mutex_enter(&vp->v_interlock); | | 862 | mutex_enter(&vp->v_interlock); |
862 | error = VOP_PUTPAGES(vp, 0, 0, flushflags); | | 863 | error = VOP_PUTPAGES(vp, 0, 0, flushflags); |
863 | if (error) { | | 864 | if (error) { |
864 | return error; | | 865 | return error; |
865 | } | | 866 | } |
866 | | | 867 | |
867 | if (flags & V_SAVE) { | | 868 | if (flags & V_SAVE) { |
868 | error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0); | | 869 | error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0); |
869 | if (error) | | 870 | if (error) |
870 | return (error); | | 871 | return (error); |
871 | KASSERT(LIST_EMPTY(&vp->v_dirtyblkhd)); | | 872 | KASSERT(LIST_EMPTY(&vp->v_dirtyblkhd)); |
872 | } | | 873 | } |
873 | | | 874 | |
874 | mutex_enter(&bufcache_lock); | | 875 | mutex_enter(&bufcache_lock); |
875 | restart: | | 876 | restart: |
876 | for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { | | 877 | for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { |
877 | nbp = LIST_NEXT(bp, b_vnbufs); | | 878 | nbp = LIST_NEXT(bp, b_vnbufs); |
878 | error = bbusy(bp, catch, slptimeo, NULL); | | 879 | error = bbusy(bp, catch, slptimeo, NULL); |
879 | if (error != 0) { | | 880 | if (error != 0) { |
880 | if (error == EPASSTHROUGH) | | 881 | if (error == EPASSTHROUGH) |
881 | goto restart; | | 882 | goto restart; |
882 | mutex_exit(&bufcache_lock); | | 883 | mutex_exit(&bufcache_lock); |
883 | return (error); | | 884 | return (error); |
884 | } | | 885 | } |
885 | brelsel(bp, BC_INVAL | BC_VFLUSH); | | 886 | brelsel(bp, BC_INVAL | BC_VFLUSH); |
886 | } | | 887 | } |
887 | | | 888 | |
888 | for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { | | 889 | for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { |
889 | nbp = LIST_NEXT(bp, b_vnbufs); | | 890 | nbp = LIST_NEXT(bp, b_vnbufs); |
890 | error = bbusy(bp, catch, slptimeo, NULL); | | 891 | error = bbusy(bp, catch, slptimeo, NULL); |
891 | if (error != 0) { | | 892 | if (error != 0) { |
892 | if (error == EPASSTHROUGH) | | 893 | if (error == EPASSTHROUGH) |
893 | goto restart; | | 894 | goto restart; |
894 | mutex_exit(&bufcache_lock); | | 895 | mutex_exit(&bufcache_lock); |
895 | return (error); | | 896 | return (error); |
896 | } | | 897 | } |
897 | /* | | 898 | /* |
898 | * XXX Since there are no node locks for NFS, I believe | | 899 | * XXX Since there are no node locks for NFS, I believe |
899 | * there is a slight chance that a delayed write will | | 900 | * there is a slight chance that a delayed write will |
900 | * occur while sleeping just above, so check for it. | | 901 | * occur while sleeping just above, so check for it. |
901 | */ | | 902 | */ |
902 | if ((bp->b_oflags & BO_DELWRI) && (flags & V_SAVE)) { | | 903 | if ((bp->b_oflags & BO_DELWRI) && (flags & V_SAVE)) { |
903 | #ifdef DEBUG | | 904 | #ifdef DEBUG |
904 | printf("buffer still DELWRI\n"); | | 905 | printf("buffer still DELWRI\n"); |
905 | #endif | | 906 | #endif |
906 | bp->b_cflags |= BC_BUSY | BC_VFLUSH; | | 907 | bp->b_cflags |= BC_BUSY | BC_VFLUSH; |
907 | mutex_exit(&bufcache_lock); | | 908 | mutex_exit(&bufcache_lock); |
908 | VOP_BWRITE(bp); | | 909 | VOP_BWRITE(bp); |
909 | mutex_enter(&bufcache_lock); | | 910 | mutex_enter(&bufcache_lock); |
910 | goto restart; | | 911 | goto restart; |
911 | } | | 912 | } |
912 | brelsel(bp, BC_INVAL | BC_VFLUSH); | | 913 | brelsel(bp, BC_INVAL | BC_VFLUSH); |
913 | } | | 914 | } |
914 | | | 915 | |
915 | #ifdef DIAGNOSTIC | | 916 | #ifdef DIAGNOSTIC |
916 | if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd)) | | 917 | if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd)) |
917 | panic("vinvalbuf: flush failed, vp %p", vp); | | 918 | panic("vinvalbuf: flush failed, vp %p", vp); |
918 | #endif | | 919 | #endif |
919 | | | 920 | |
920 | mutex_exit(&bufcache_lock); | | 921 | mutex_exit(&bufcache_lock); |
921 | | | 922 | |
922 | return (0); | | 923 | return (0); |
923 | } | | 924 | } |
924 | | | 925 | |
925 | /* | | 926 | /* |
926 | * Destroy any in core blocks past the truncation length. | | 927 | * Destroy any in core blocks past the truncation length. |
927 | * Called with the underlying vnode locked, which should prevent new dirty | | 928 | * Called with the underlying vnode locked, which should prevent new dirty |
928 | * buffers from being queued. | | 929 | * buffers from being queued. |
929 | */ | | 930 | */ |
930 | int | | 931 | int |
931 | vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch, int slptimeo) | | 932 | vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch, int slptimeo) |
932 | { | | 933 | { |
933 | struct buf *bp, *nbp; | | 934 | struct buf *bp, *nbp; |
934 | int error; | | 935 | int error; |
935 | voff_t off; | | 936 | voff_t off; |
936 | | | 937 | |
937 | off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift); | | 938 | off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift); |
938 | mutex_enter(&vp->v_interlock); | | 939 | mutex_enter(&vp->v_interlock); |
939 | error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO); | | 940 | error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO); |
940 | if (error) { | | 941 | if (error) { |
941 | return error; | | 942 | return error; |
942 | } | | 943 | } |
943 | | | 944 | |
944 | mutex_enter(&bufcache_lock); | | 945 | mutex_enter(&bufcache_lock); |
945 | restart: | | 946 | restart: |
946 | for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { | | 947 | for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { |
947 | nbp = LIST_NEXT(bp, b_vnbufs); | | 948 | nbp = LIST_NEXT(bp, b_vnbufs); |
948 | if (bp->b_lblkno < lbn) | | 949 | if (bp->b_lblkno < lbn) |
949 | continue; | | 950 | continue; |
950 | error = bbusy(bp, catch, slptimeo, NULL); | | 951 | error = bbusy(bp, catch, slptimeo, NULL); |
951 | if (error != 0) { | | 952 | if (error != 0) { |
952 | if (error == EPASSTHROUGH) | | 953 | if (error == EPASSTHROUGH) |
953 | goto restart; | | 954 | goto restart; |
954 | mutex_exit(&bufcache_lock); | | 955 | mutex_exit(&bufcache_lock); |
955 | return (error); | | 956 | return (error); |
956 | } | | 957 | } |
957 | brelsel(bp, BC_INVAL | BC_VFLUSH); | | 958 | brelsel(bp, BC_INVAL | BC_VFLUSH); |
958 | } | | 959 | } |
959 | | | 960 | |
960 | for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { | | 961 | for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { |
961 | nbp = LIST_NEXT(bp, b_vnbufs); | | 962 | nbp = LIST_NEXT(bp, b_vnbufs); |
962 | if (bp->b_lblkno < lbn) | | 963 | if (bp->b_lblkno < lbn) |
963 | continue; | | 964 | continue; |
964 | error = bbusy(bp, catch, slptimeo, NULL); | | 965 | error = bbusy(bp, catch, slptimeo, NULL); |
965 | if (error != 0) { | | 966 | if (error != 0) { |
966 | if (error == EPASSTHROUGH) | | 967 | if (error == EPASSTHROUGH) |
967 | goto restart; | | 968 | goto restart; |
968 | mutex_exit(&bufcache_lock); | | 969 | mutex_exit(&bufcache_lock); |
969 | return (error); | | 970 | return (error); |
970 | } | | 971 | } |
971 | brelsel(bp, BC_INVAL | BC_VFLUSH); | | 972 | brelsel(bp, BC_INVAL | BC_VFLUSH); |
972 | } | | 973 | } |
973 | mutex_exit(&bufcache_lock); | | 974 | mutex_exit(&bufcache_lock); |
974 | | | 975 | |
975 | return (0); | | 976 | return (0); |
976 | } | | 977 | } |
977 | | | 978 | |
978 | /* | | 979 | /* |
979 | * Flush all dirty buffers from a vnode. | | 980 | * Flush all dirty buffers from a vnode. |
980 | * Called with the underlying vnode locked, which should prevent new dirty | | 981 | * Called with the underlying vnode locked, which should prevent new dirty |
981 | * buffers from being queued. | | 982 | * buffers from being queued. |
982 | */ | | 983 | */ |
983 | void | | 984 | void |
984 | vflushbuf(struct vnode *vp, int sync) | | 985 | vflushbuf(struct vnode *vp, int sync) |
985 | { | | 986 | { |
986 | struct buf *bp, *nbp; | | 987 | struct buf *bp, *nbp; |
987 | int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0); | | 988 | int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0); |
988 | bool dirty; | | 989 | bool dirty; |
989 | | | 990 | |
990 | mutex_enter(&vp->v_interlock); | | 991 | mutex_enter(&vp->v_interlock); |
991 | (void) VOP_PUTPAGES(vp, 0, 0, flags); | | 992 | (void) VOP_PUTPAGES(vp, 0, 0, flags); |
992 | | | 993 | |
993 | loop: | | 994 | loop: |
994 | mutex_enter(&bufcache_lock); | | 995 | mutex_enter(&bufcache_lock); |
995 | for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { | | 996 | for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { |
996 | nbp = LIST_NEXT(bp, b_vnbufs); | | 997 | nbp = LIST_NEXT(bp, b_vnbufs); |
997 | if ((bp->b_cflags & BC_BUSY)) | | 998 | if ((bp->b_cflags & BC_BUSY)) |
998 | continue; | | 999 | continue; |
999 | if ((bp->b_oflags & BO_DELWRI) == 0) | | 1000 | if ((bp->b_oflags & BO_DELWRI) == 0) |
1000 | panic("vflushbuf: not dirty, bp %p", bp); | | 1001 | panic("vflushbuf: not dirty, bp %p", bp); |
1001 | bp->b_cflags |= BC_BUSY | BC_VFLUSH; | | 1002 | bp->b_cflags |= BC_BUSY | BC_VFLUSH; |
1002 | mutex_exit(&bufcache_lock); | | 1003 | mutex_exit(&bufcache_lock); |
1003 | /* | | 1004 | /* |
1004 | * Wait for I/O associated with indirect blocks to complete, | | 1005 | * Wait for I/O associated with indirect blocks to complete, |
1005 | * since there is no way to quickly wait for them below. | | 1006 | * since there is no way to quickly wait for them below. |
1006 | */ | | 1007 | */ |
1007 | if (bp->b_vp == vp || sync == 0) | | 1008 | if (bp->b_vp == vp || sync == 0) |
1008 | (void) bawrite(bp); | | 1009 | (void) bawrite(bp); |
1009 | else | | 1010 | else |
1010 | (void) bwrite(bp); | | 1011 | (void) bwrite(bp); |
1011 | goto loop; | | 1012 | goto loop; |
1012 | } | | 1013 | } |
1013 | mutex_exit(&bufcache_lock); | | 1014 | mutex_exit(&bufcache_lock); |
1014 | | | 1015 | |
1015 | if (sync == 0) | | 1016 | if (sync == 0) |
1016 | return; | | 1017 | return; |
1017 | | | 1018 | |
1018 | mutex_enter(&vp->v_interlock); | | 1019 | mutex_enter(&vp->v_interlock); |
1019 | while (vp->v_numoutput != 0) | | 1020 | while (vp->v_numoutput != 0) |
1020 | cv_wait(&vp->v_cv, &vp->v_interlock); | | 1021 | cv_wait(&vp->v_cv, &vp->v_interlock); |
1021 | dirty = !LIST_EMPTY(&vp->v_dirtyblkhd); | | 1022 | dirty = !LIST_EMPTY(&vp->v_dirtyblkhd); |
1022 | mutex_exit(&vp->v_interlock); | | 1023 | mutex_exit(&vp->v_interlock); |
1023 | | | 1024 | |
1024 | if (dirty) { | | 1025 | if (dirty) { |
1025 | vprint("vflushbuf: dirty", vp); | | 1026 | vprint("vflushbuf: dirty", vp); |
1026 | goto loop; | | 1027 | goto loop; |
1027 | } | | 1028 | } |
1028 | } | | 1029 | } |
1029 | | | 1030 | |
1030 | /* | | 1031 | /* |
1031 | * Create a vnode for a block device. | | 1032 | * Create a vnode for a block device. |
1032 | * Used for root filesystem and swap areas. | | 1033 | * Used for root filesystem and swap areas. |
1033 | * Also used for memory file system special devices. | | 1034 | * Also used for memory file system special devices. |
1034 | */ | | 1035 | */ |
1035 | int | | 1036 | int |
1036 | bdevvp(dev_t dev, vnode_t **vpp) | | 1037 | bdevvp(dev_t dev, vnode_t **vpp) |
1037 | { | | 1038 | { |
1038 | | | 1039 | |
1039 | return (getdevvp(dev, vpp, VBLK)); | | 1040 | return (getdevvp(dev, vpp, VBLK)); |
1040 | } | | 1041 | } |
1041 | | | 1042 | |
1042 | /* | | 1043 | /* |
1043 | * Create a vnode for a character device. | | 1044 | * Create a vnode for a character device. |
1044 | * Used for kernfs and some console handling. | | 1045 | * Used for kernfs and some console handling. |
1045 | */ | | 1046 | */ |
1046 | int | | 1047 | int |
1047 | cdevvp(dev_t dev, vnode_t **vpp) | | 1048 | cdevvp(dev_t dev, vnode_t **vpp) |
1048 | { | | 1049 | { |
1049 | | | 1050 | |
1050 | return (getdevvp(dev, vpp, VCHR)); | | 1051 | return (getdevvp(dev, vpp, VCHR)); |
1051 | } | | 1052 | } |
1052 | | | 1053 | |
1053 | /* | | 1054 | /* |
1054 | * Associate a buffer with a vnode. There must already be a hold on | | 1055 | * Associate a buffer with a vnode. There must already be a hold on |
1055 | * the vnode. | | 1056 | * the vnode. |
1056 | */ | | 1057 | */ |
1057 | void | | 1058 | void |
1058 | bgetvp(struct vnode *vp, struct buf *bp) | | 1059 | bgetvp(struct vnode *vp, struct buf *bp) |
1059 | { | | 1060 | { |
1060 | | | 1061 | |
1061 | KASSERT(bp->b_vp == NULL); | | 1062 | KASSERT(bp->b_vp == NULL); |
1062 | KASSERT(bp->b_objlock == &buffer_lock); | | 1063 | KASSERT(bp->b_objlock == &buffer_lock); |
1063 | KASSERT(mutex_owned(&vp->v_interlock)); | | 1064 | KASSERT(mutex_owned(&vp->v_interlock)); |
1064 | KASSERT(mutex_owned(&bufcache_lock)); | | 1065 | KASSERT(mutex_owned(&bufcache_lock)); |
1065 | KASSERT((bp->b_cflags & BC_BUSY) != 0); | | 1066 | KASSERT((bp->b_cflags & BC_BUSY) != 0); |
1066 | KASSERT(!cv_has_waiters(&bp->b_done)); | | 1067 | KASSERT(!cv_has_waiters(&bp->b_done)); |
1067 | | | 1068 | |
1068 | vholdl(vp); | | 1069 | vholdl(vp); |
1069 | bp->b_vp = vp; | | 1070 | bp->b_vp = vp; |
1070 | if (vp->v_type == VBLK || vp->v_type == VCHR) | | 1071 | if (vp->v_type == VBLK || vp->v_type == VCHR) |
1071 | bp->b_dev = vp->v_rdev; | | 1072 | bp->b_dev = vp->v_rdev; |
1072 | else | | 1073 | else |
1073 | bp->b_dev = NODEV; | | 1074 | bp->b_dev = NODEV; |
1074 | | | 1075 | |
1075 | /* | | 1076 | /* |
1076 | * Insert onto list for new vnode. | | 1077 | * Insert onto list for new vnode. |
1077 | */ | | 1078 | */ |
1078 | bufinsvn(bp, &vp->v_cleanblkhd); | | 1079 | bufinsvn(bp, &vp->v_cleanblkhd); |
1079 | bp->b_objlock = &vp->v_interlock; | | 1080 | bp->b_objlock = &vp->v_interlock; |
1080 | } | | 1081 | } |
1081 | | | 1082 | |
1082 | /* | | 1083 | /* |
1083 | * Disassociate a buffer from a vnode. | | 1084 | * Disassociate a buffer from a vnode. |
1084 | */ | | 1085 | */ |
1085 | void | | 1086 | void |
1086 | brelvp(struct buf *bp) | | 1087 | brelvp(struct buf *bp) |
1087 | { | | 1088 | { |
1088 | struct vnode *vp = bp->b_vp; | | 1089 | struct vnode *vp = bp->b_vp; |
1089 | | | 1090 | |
1090 | KASSERT(vp != NULL); | | 1091 | KASSERT(vp != NULL); |
1091 | KASSERT(bp->b_objlock == &vp->v_interlock); | | 1092 | KASSERT(bp->b_objlock == &vp->v_interlock); |
1092 | KASSERT(mutex_owned(&vp->v_interlock)); | | 1093 | KASSERT(mutex_owned(&vp->v_interlock)); |
1093 | KASSERT(mutex_owned(&bufcache_lock)); | | 1094 | KASSERT(mutex_owned(&bufcache_lock)); |
1094 | KASSERT((bp->b_cflags & BC_BUSY) != 0); | | 1095 | KASSERT((bp->b_cflags & BC_BUSY) != 0); |
1095 | KASSERT(!cv_has_waiters(&bp->b_done)); | | 1096 | KASSERT(!cv_has_waiters(&bp->b_done)); |
1096 | | | 1097 | |
1097 | /* | | 1098 | /* |
1098 | * Delete from old vnode list, if on one. | | 1099 | * Delete from old vnode list, if on one. |
1099 | */ | | 1100 | */ |
1100 | if (LIST_NEXT(bp, b_vnbufs) != NOLIST) | | 1101 | if (LIST_NEXT(bp, b_vnbufs) != NOLIST) |
1101 | bufremvn(bp); | | 1102 | bufremvn(bp); |
1102 | | | 1103 | |
1103 | if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_iflag & VI_ONWORKLST) && | | 1104 | if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_iflag & VI_ONWORKLST) && |
1104 | LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { | | 1105 | LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { |
1105 | vp->v_iflag &= ~VI_WRMAPDIRTY; | | 1106 | vp->v_iflag &= ~VI_WRMAPDIRTY; |
1106 | vn_syncer_remove_from_worklist(vp); | | 1107 | vn_syncer_remove_from_worklist(vp); |
1107 | } | | 1108 | } |
1108 | | | 1109 | |
1109 | bp->b_objlock = &buffer_lock; | | 1110 | bp->b_objlock = &buffer_lock; |
1110 | bp->b_vp = NULL; | | 1111 | bp->b_vp = NULL; |
1111 | holdrelel(vp); | | 1112 | holdrelel(vp); |
1112 | } | | 1113 | } |
1113 | | | 1114 | |
1114 | /* | | 1115 | /* |
1115 | * Reassign a buffer from one vnode list to another. | | 1116 | * Reassign a buffer from one vnode list to another. |
1116 | * The list reassignment must be within the same vnode. | | 1117 | * The list reassignment must be within the same vnode. |
1117 | * Used to assign file specific control information | | 1118 | * Used to assign file specific control information |
1118 | * (indirect blocks) to the list to which they belong. | | 1119 | * (indirect blocks) to the list to which they belong. |
1119 | */ | | 1120 | */ |
1120 | void | | 1121 | void |
1121 | reassignbuf(struct buf *bp, struct vnode *vp) | | 1122 | reassignbuf(struct buf *bp, struct vnode *vp) |
1122 | { | | 1123 | { |
1123 | struct buflists *listheadp; | | 1124 | struct buflists *listheadp; |
1124 | int delayx; | | 1125 | int delayx; |
1125 | | | 1126 | |
1126 | KASSERT(mutex_owned(&bufcache_lock)); | | 1127 | KASSERT(mutex_owned(&bufcache_lock)); |
1127 | KASSERT(bp->b_objlock == &vp->v_interlock); | | 1128 | KASSERT(bp->b_objlock == &vp->v_interlock); |
1128 | KASSERT(mutex_owned(&vp->v_interlock)); | | 1129 | KASSERT(mutex_owned(&vp->v_interlock)); |
1129 | KASSERT((bp->b_cflags & BC_BUSY) != 0); | | 1130 | KASSERT((bp->b_cflags & BC_BUSY) != 0); |
1130 | | | 1131 | |
1131 | /* | | 1132 | /* |
1132 | * Delete from old vnode list, if on one. | | 1133 | * Delete from old vnode list, if on one. |
1133 | */ | | 1134 | */ |
1134 | if (LIST_NEXT(bp, b_vnbufs) != NOLIST) | | 1135 | if (LIST_NEXT(bp, b_vnbufs) != NOLIST) |
1135 | bufremvn(bp); | | 1136 | bufremvn(bp); |
1136 | | | 1137 | |
1137 | /* | | 1138 | /* |
1138 | * If dirty, put on list of dirty buffers; | | 1139 | * If dirty, put on list of dirty buffers; |
1139 | * otherwise insert onto list of clean buffers. | | 1140 | * otherwise insert onto list of clean buffers. |
1140 | */ | | 1141 | */ |
1141 | if ((bp->b_oflags & BO_DELWRI) == 0) { | | 1142 | if ((bp->b_oflags & BO_DELWRI) == 0) { |
1142 | listheadp = &vp->v_cleanblkhd; | | 1143 | listheadp = &vp->v_cleanblkhd; |
1143 | if (TAILQ_EMPTY(&vp->v_uobj.memq) && | | 1144 | if (TAILQ_EMPTY(&vp->v_uobj.memq) && |
1144 | (vp->v_iflag & VI_ONWORKLST) && | | 1145 | (vp->v_iflag & VI_ONWORKLST) && |
1145 | LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { | | 1146 | LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { |
1146 | vp->v_iflag &= ~VI_WRMAPDIRTY; | | 1147 | vp->v_iflag &= ~VI_WRMAPDIRTY; |
1147 | vn_syncer_remove_from_worklist(vp); | | 1148 | vn_syncer_remove_from_worklist(vp); |
1148 | } | | 1149 | } |
1149 | } else { | | 1150 | } else { |
1150 | listheadp = &vp->v_dirtyblkhd; | | 1151 | listheadp = &vp->v_dirtyblkhd; |
1151 | if ((vp->v_iflag & VI_ONWORKLST) == 0) { | | 1152 | if ((vp->v_iflag & VI_ONWORKLST) == 0) { |
1152 | switch (vp->v_type) { | | 1153 | switch (vp->v_type) { |
1153 | case VDIR: | | 1154 | case VDIR: |
1154 | delayx = dirdelay; | | 1155 | delayx = dirdelay; |
1155 | break; | | 1156 | break; |
1156 | case VBLK: | | 1157 | case VBLK: |
1157 | if (vp->v_specmountpoint != NULL) { | | 1158 | if (vp->v_specmountpoint != NULL) { |
1158 | delayx = metadelay; | | 1159 | delayx = metadelay; |
1159 | break; | | 1160 | break; |
1160 | } | | 1161 | } |
1161 | /* fall through */ | | 1162 | /* fall through */ |
1162 | default: | | 1163 | default: |
1163 | delayx = filedelay; | | 1164 | delayx = filedelay; |
1164 | break; | | 1165 | break; |
1165 | } | | 1166 | } |
1166 | if (!vp->v_mount || | | 1167 | if (!vp->v_mount || |
1167 | (vp->v_mount->mnt_flag & MNT_ASYNC) == 0) | | 1168 | (vp->v_mount->mnt_flag & MNT_ASYNC) == 0) |
1168 | vn_syncer_add_to_worklist(vp, delayx); | | 1169 | vn_syncer_add_to_worklist(vp, delayx); |
1169 | } | | 1170 | } |
1170 | } | | 1171 | } |
1171 | bufinsvn(bp, listheadp); | | 1172 | bufinsvn(bp, listheadp); |
1172 | } | | 1173 | } |
1173 | | | 1174 | |
1174 | /* | | 1175 | /* |
1175 | * Create a vnode for a device. | | 1176 | * Create a vnode for a device. |
1176 | * Used by bdevvp (block device) for root file system etc., | | 1177 | * Used by bdevvp (block device) for root file system etc., |
1177 | * and by cdevvp (character device) for console and kernfs. | | 1178 | * and by cdevvp (character device) for console and kernfs. |
1178 | */ | | 1179 | */ |
1179 | static int | | 1180 | static int |
1180 | getdevvp(dev_t dev, vnode_t **vpp, enum vtype type) | | 1181 | getdevvp(dev_t dev, vnode_t **vpp, enum vtype type) |
1181 | { | | 1182 | { |
1182 | vnode_t *vp; | | 1183 | vnode_t *vp; |
1183 | vnode_t *nvp; | | 1184 | vnode_t *nvp; |
1184 | int error; | | 1185 | int error; |
1185 | | | 1186 | |
1186 | if (dev == NODEV) { | | 1187 | if (dev == NODEV) { |
1187 | *vpp = NULL; | | 1188 | *vpp = NULL; |
1188 | return (0); | | 1189 | return (0); |
1189 | } | | 1190 | } |
1190 | error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); | | 1191 | error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); |
1191 | if (error) { | | 1192 | if (error) { |
1192 | *vpp = NULL; | | 1193 | *vpp = NULL; |
1193 | return (error); | | 1194 | return (error); |
1194 | } | | 1195 | } |
1195 | vp = nvp; | | 1196 | vp = nvp; |
1196 | vp->v_type = type; | | 1197 | vp->v_type = type; |
1197 | vp->v_vflag |= VV_MPSAFE; | | 1198 | vp->v_vflag |= VV_MPSAFE; |
1198 | uvm_vnp_setsize(vp, 0); | | 1199 | uvm_vnp_setsize(vp, 0); |
1199 | spec_node_init(vp, dev); | | 1200 | spec_node_init(vp, dev); |
1200 | *vpp = vp; | | 1201 | *vpp = vp; |
1201 | return (0); | | 1202 | return (0); |
1202 | } | | 1203 | } |
1203 | | | 1204 | |
1204 | /* | | 1205 | /* |
1205 | * Try to gain a reference to a vnode, without acquiring its interlock. | | 1206 | * Try to gain a reference to a vnode, without acquiring its interlock. |
1206 | * The caller must hold a lock that will prevent the vnode from being | | 1207 | * The caller must hold a lock that will prevent the vnode from being |
1207 | * recycled or freed. | | 1208 | * recycled or freed. |
1208 | */ | | 1209 | */ |
1209 | bool | | 1210 | bool |
1210 | vtryget(vnode_t *vp) | | 1211 | vtryget(vnode_t *vp) |
1211 | { | | 1212 | { |
1212 | u_int use, next; | | 1213 | u_int use, next; |
1213 | | | 1214 | |
1214 | /* | | 1215 | /* |
1215 | * If the vnode is being freed, don't make life any harder | | 1216 | * If the vnode is being freed, don't make life any harder |
1216 | * for vclean() by adding another reference without waiting. | | 1217 | * for vclean() by adding another reference without waiting. |
1217 | * This is not strictly necessary, but we'll do it anyway. | | 1218 | * This is not strictly necessary, but we'll do it anyway. |
1218 | */ | | 1219 | */ |
1219 | if (__predict_false((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0)) { | | 1220 | if (__predict_false((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0)) { |
1220 | return false; | | 1221 | return false; |
1221 | } | | 1222 | } |
1222 | for (use = vp->v_usecount;; use = next) { | | 1223 | for (use = vp->v_usecount;; use = next) { |
1223 | if (use == 0) { | | 1224 | if (use == 0) { |
1224 | /* Need interlock held if first reference. */ | | 1225 | /* Need interlock held if first reference. */ |
1225 | return false; | | 1226 | return false; |
1226 | } | | 1227 | } |
1227 | next = atomic_cas_uint(&vp->v_usecount, use, use + 1); | | 1228 | next = atomic_cas_uint(&vp->v_usecount, use, use + 1); |
1228 | if (__predict_true(next == use)) { | | 1229 | if (__predict_true(next == use)) { |
1229 | return true; | | 1230 | return true; |
1230 | } | | 1231 | } |
1231 | } | | 1232 | } |
1232 | } | | 1233 | } |
1233 | | | 1234 | |
1234 | /* | | 1235 | /* |
1235 | * Grab a particular vnode from the free list, increment its | | 1236 | * Grab a particular vnode from the free list, increment its |
1236 | * reference count and lock it. If the vnode lock bit is set the | | 1237 | * reference count and lock it. If the vnode lock bit is set the |
1237 | * vnode is being eliminated in vgone. In that case, we can not | | 1238 | * vnode is being eliminated in vgone. In that case, we can not |
1238 | * grab the vnode, so the process is awakened when the transition is | | 1239 | * grab the vnode, so the process is awakened when the transition is |
1239 | * completed, and an error returned to indicate that the vnode is no | | 1240 | * completed, and an error returned to indicate that the vnode is no |
1240 | * longer usable (possibly having been changed to a new file system type). | | 1241 | * longer usable (possibly having been changed to a new file system type). |
1241 | */ | | 1242 | */ |
1242 | int | | 1243 | int |
1243 | vget(vnode_t *vp, int flags) | | 1244 | vget(vnode_t *vp, int flags) |
1244 | { | | 1245 | { |
1245 | int error; | | 1246 | int error; |
1246 | | | 1247 | |
1247 | KASSERT((vp->v_iflag & VI_MARKER) == 0); | | 1248 | KASSERT((vp->v_iflag & VI_MARKER) == 0); |
1248 | | | 1249 | |
1249 | if ((flags & LK_INTERLOCK) == 0) | | 1250 | if ((flags & LK_INTERLOCK) == 0) |
1250 | mutex_enter(&vp->v_interlock); | | 1251 | mutex_enter(&vp->v_interlock); |
1251 | | | 1252 | |
1252 | /* | | 1253 | /* |
1253 | * Before adding a reference, we must remove the vnode | | 1254 | * Before adding a reference, we must remove the vnode |
1254 | * from its freelist. | | 1255 | * from its freelist. |
1255 | */ | | 1256 | */ |
1256 | if (vp->v_usecount == 0) { | | 1257 | if (vp->v_usecount == 0) { |
1257 | vremfree(vp); | | 1258 | vremfree(vp); |
1258 | vp->v_usecount = 1; | | 1259 | vp->v_usecount = 1; |
1259 | } else { | | 1260 | } else { |
1260 | atomic_inc_uint(&vp->v_usecount); | | 1261 | atomic_inc_uint(&vp->v_usecount); |
1261 | } | | 1262 | } |
1262 | | | 1263 | |
1263 | /* | | 1264 | /* |
1264 | * If the vnode is in the process of being cleaned out for | | 1265 | * If the vnode is in the process of being cleaned out for |
1265 | * another use, we wait for the cleaning to finish and then | | 1266 | * another use, we wait for the cleaning to finish and then |
1266 | * return failure. Cleaning is determined by checking if | | 1267 | * return failure. Cleaning is determined by checking if |
1267 | * the VI_XLOCK or VI_FREEING flags are set. | | 1268 | * the VI_XLOCK or VI_FREEING flags are set. |
1268 | */ | | 1269 | */ |
1269 | if ((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0) { | | 1270 | if ((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0) { |
1270 | if ((flags & LK_NOWAIT) != 0) { | | 1271 | if ((flags & LK_NOWAIT) != 0) { |
1271 | vrelel(vp, 0); | | 1272 | vrelel(vp, 0); |
1272 | return EBUSY; | | 1273 | return EBUSY; |
1273 | } | | 1274 | } |
1274 | vwait(vp, VI_XLOCK | VI_FREEING); | | 1275 | vwait(vp, VI_XLOCK | VI_FREEING); |
1275 | vrelel(vp, 0); | | 1276 | vrelel(vp, 0); |
1276 | return ENOENT; | | 1277 | return ENOENT; |
1277 | } | | 1278 | } |
1278 | if (flags & LK_TYPE_MASK) { | | 1279 | if (flags & LK_TYPE_MASK) { |
1279 | error = vn_lock(vp, flags | LK_INTERLOCK); | | 1280 | error = vn_lock(vp, flags | LK_INTERLOCK); |
1280 | if (error != 0) { | | 1281 | if (error != 0) { |
1281 | vrele(vp); | | 1282 | vrele(vp); |
1282 | } | | 1283 | } |
1283 | return error; | | 1284 | return error; |
1284 | } | | 1285 | } |
1285 | mutex_exit(&vp->v_interlock); | | 1286 | mutex_exit(&vp->v_interlock); |
1286 | return 0; | | 1287 | return 0; |
1287 | } | | 1288 | } |
1288 | | | 1289 | |
1289 | /* | | 1290 | /* |
1290 | * vput(), just unlock and vrele() | | 1291 | * vput(), just unlock and vrele() |
1291 | */ | | 1292 | */ |