| @@ -1,14 +1,14 @@ | | | @@ -1,14 +1,14 @@ |
1 | /* $NetBSD: vfs_subr.c,v 1.357.4.4 2009/02/16 03:33:16 snj Exp $ */ | | 1 | /* $NetBSD: vfs_subr.c,v 1.357.4.4.2.1 2009/07/21 00:29:32 snj Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc. | | 4 | * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc. |
5 | * All rights reserved. | | 5 | * All rights reserved. |
6 | * | | 6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation | | 7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, | | 8 | * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, |
9 | * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. | | 9 | * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. |
10 | * | | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without | | 11 | * Redistribution and use in source and binary forms, with or without |
12 | * modification, are permitted provided that the following conditions | | 12 | * modification, are permitted provided that the following conditions |
13 | * are met: | | 13 | * are met: |
14 | * 1. Redistributions of source code must retain the above copyright | | 14 | * 1. Redistributions of source code must retain the above copyright |
| @@ -66,32 +66,42 @@ | | | @@ -66,32 +66,42 @@ |
66 | * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 | | 66 | * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 |
67 | */ | | 67 | */ |
68 | | | 68 | |
69 | /* | | 69 | /* |
70 | * Note on v_usecount and locking: | | 70 | * Note on v_usecount and locking: |
71 | * | | 71 | * |
72 | * At nearly all points it is known that v_usecount could be zero, the | | 72 | * At nearly all points it is known that v_usecount could be zero, the |
73 | * vnode interlock will be held. | | 73 | * vnode interlock will be held. |
74 | * | | 74 | * |
75 | * To change v_usecount away from zero, the interlock must be held. To | | 75 | * To change v_usecount away from zero, the interlock must be held. To |
76 | * change from a non-zero value to zero, again the interlock must be | | 76 | * change from a non-zero value to zero, again the interlock must be |
77 | * held. | | 77 | * held. |
78 | * | | 78 | * |
79 | * Changing the usecount from a non-zero value to a non-zero value can | | 79 | * There's a flag bit, VC_XLOCK, embedded in v_usecount. |
80 | * safely be done using atomic operations, without the interlock held. | | 80 | * To raise v_usecount, if the VC_XLOCK bit is set in it, the interlock |
| | | 81 | * must be held. |
| | | 82 | * To modify the VC_XLOCK bit, the interlock must be held. |
| | | 83 | * We always keep the usecount (v_usecount & VC_MASK) non-zero while the |
| | | 84 | * VC_XLOCK bit is set. |
| | | 85 | * |
| | | 86 | * Unless the VC_XLOCK bit is set, changing the usecount from a non-zero |
| | | 87 | * value to a non-zero value can safely be done using atomic operations, |
| | | 88 | * without the interlock held. |
| | | 89 | * Even if the VC_XLOCK bit is set, decreasing the usecount to a non-zero |
| | | 90 | * value can be done using atomic operations, without the interlock held. |
81 | */ | | 91 | */ |
82 | | | 92 | |
83 | #include <sys/cdefs.h> | | 93 | #include <sys/cdefs.h> |
84 | __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.357.4.4 2009/02/16 03:33:16 snj Exp $"); | | 94 | __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.357.4.4.2.1 2009/07/21 00:29:32 snj Exp $"); |
85 | | | 95 | |
86 | #include "opt_ddb.h" | | 96 | #include "opt_ddb.h" |
87 | #include "opt_compat_netbsd.h" | | 97 | #include "opt_compat_netbsd.h" |
88 | #include "opt_compat_43.h" | | 98 | #include "opt_compat_43.h" |
89 | | | 99 | |
90 | #include <sys/param.h> | | 100 | #include <sys/param.h> |
91 | #include <sys/systm.h> | | 101 | #include <sys/systm.h> |
92 | #include <sys/proc.h> | | 102 | #include <sys/proc.h> |
93 | #include <sys/kernel.h> | | 103 | #include <sys/kernel.h> |
94 | #include <sys/mount.h> | | 104 | #include <sys/mount.h> |
95 | #include <sys/fcntl.h> | | 105 | #include <sys/fcntl.h> |
96 | #include <sys/vnode.h> | | 106 | #include <sys/vnode.h> |
97 | #include <sys/stat.h> | | 107 | #include <sys/stat.h> |
| @@ -356,28 +366,30 @@ try_nextlist: | | | @@ -356,28 +366,30 @@ try_nextlist: |
356 | } | | 366 | } |
357 | | | 367 | |
358 | /* Remove it from the freelist. */ | | 368 | /* Remove it from the freelist. */ |
359 | TAILQ_REMOVE(listhd, vp, v_freelist); | | 369 | TAILQ_REMOVE(listhd, vp, v_freelist); |
360 | vp->v_freelisthd = NULL; | | 370 | vp->v_freelisthd = NULL; |
361 | mutex_exit(&vnode_free_list_lock); | | 371 | mutex_exit(&vnode_free_list_lock); |
362 | | | 372 | |
363 | /* | | 373 | /* |
364 | * The vnode is still associated with a file system, so we must | | 374 | * The vnode is still associated with a file system, so we must |
365 | * clean it out before reusing it. We need to add a reference | | 375 | * clean it out before reusing it. We need to add a reference |
366 | * before doing this. If the vnode gains another reference while | | 376 | * before doing this. If the vnode gains another reference while |
367 | * being cleaned out then we lose - retry. | | 377 | * being cleaned out then we lose - retry. |
368 | */ | | 378 | */ |
369 | atomic_inc_uint(&vp->v_usecount); | | 379 | atomic_add_int(&vp->v_usecount, 1 + VC_XLOCK); |
370 | vclean(vp, DOCLOSE); | | 380 | vclean(vp, DOCLOSE); |
| | | 381 | KASSERT(vp->v_usecount >= 1 + VC_XLOCK); |
| | | 382 | atomic_add_int(&vp->v_usecount, -VC_XLOCK); |
371 | if (vp->v_usecount == 1) { | | 383 | if (vp->v_usecount == 1) { |
372 | /* We're about to dirty it. */ | | 384 | /* We're about to dirty it. */ |
373 | vp->v_iflag &= ~VI_CLEAN; | | 385 | vp->v_iflag &= ~VI_CLEAN; |
374 | mutex_exit(&vp->v_interlock); | | 386 | mutex_exit(&vp->v_interlock); |
375 | if (vp->v_type == VBLK || vp->v_type == VCHR) { | | 387 | if (vp->v_type == VBLK || vp->v_type == VCHR) { |
376 | spec_node_destroy(vp); | | 388 | spec_node_destroy(vp); |
377 | } | | 389 | } |
378 | vp->v_type = VNON; | | 390 | vp->v_type = VNON; |
379 | } else { | | 391 | } else { |
380 | /* | | 392 | /* |
381 | * Don't return to freelist - the holder of the last | | 393 | * Don't return to freelist - the holder of the last |
382 | * reference will destroy it. | | 394 | * reference will destroy it. |
383 | */ | | 395 | */ |
| @@ -1211,27 +1223,27 @@ bool | | | @@ -1211,27 +1223,27 @@ bool |
1211 | vtryget(vnode_t *vp) | | 1223 | vtryget(vnode_t *vp) |
1212 | { | | 1224 | { |
1213 | u_int use, next; | | 1225 | u_int use, next; |
1214 | | | 1226 | |
1215 | /* | | 1227 | /* |
1216 | * If the vnode is being freed, don't make life any harder | | 1228 | * If the vnode is being freed, don't make life any harder |
1217 | * for vclean() by adding another reference without waiting. | | 1229 | * for vclean() by adding another reference without waiting. |
1218 | * This is not strictly necessary, but we'll do it anyway. | | 1230 | * This is not strictly necessary, but we'll do it anyway. |
1219 | */ | | 1231 | */ |
1220 | if (__predict_false((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0)) { | | 1232 | if (__predict_false((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0)) { |
1221 | return false; | | 1233 | return false; |
1222 | } | | 1234 | } |
1223 | for (use = vp->v_usecount;; use = next) { | | 1235 | for (use = vp->v_usecount;; use = next) { |
1224 | if (use == 0) { | | 1236 | if (use == 0 || __predict_false((use & VC_XLOCK) != 0)) { |
1225 | /* Need interlock held if first reference. */ | | 1237 | /* Need interlock held if first reference. */ |
1226 | return false; | | 1238 | return false; |
1227 | } | | 1239 | } |
1228 | next = atomic_cas_uint(&vp->v_usecount, use, use + 1); | | 1240 | next = atomic_cas_uint(&vp->v_usecount, use, use + 1); |
1229 | if (__predict_true(next == use)) { | | 1241 | if (__predict_true(next == use)) { |
1230 | return true; | | 1242 | return true; |
1231 | } | | 1243 | } |
1232 | } | | 1244 | } |
1233 | } | | 1245 | } |
1234 | | | 1246 | |
1235 | /* | | 1247 | /* |
1236 | * Grab a particular vnode from the free list, increment its | | 1248 | * Grab a particular vnode from the free list, increment its |
1237 | * reference count and lock it. If the vnode lock bit is set the | | 1249 | * reference count and lock it. If the vnode lock bit is set the |
| @@ -1300,29 +1312,30 @@ vput(vnode_t *vp) | | | @@ -1300,29 +1312,30 @@ vput(vnode_t *vp) |
1300 | vrele(vp); | | 1312 | vrele(vp); |
1301 | } | | 1313 | } |
1302 | | | 1314 | |
1303 | /* | | 1315 | /* |
1304 | * Try to drop reference on a vnode. Abort if we are releasing the | | 1316 | * Try to drop reference on a vnode. Abort if we are releasing the |
1305 | * last reference. Note: this _must_ succeed if not the last reference. | | 1317 | * last reference. Note: this _must_ succeed if not the last reference. |
1306 | */ | | 1318 | */ |
1307 | static inline bool | | 1319 | static inline bool |
1308 | vtryrele(vnode_t *vp) | | 1320 | vtryrele(vnode_t *vp) |
1309 | { | | 1321 | { |
1310 | u_int use, next; | | 1322 | u_int use, next; |
1311 | | | 1323 | |
1312 | for (use = vp->v_usecount;; use = next) { | | 1324 | for (use = vp->v_usecount;; use = next) { |
1313 | if (use == 1) { | | 1325 | if (use == 1) { |
1314 | return false; | | 1326 | return false; |
1315 | } | | 1327 | } |
| | | 1328 | KASSERT((use & VC_MASK) > 1); |
1316 | next = atomic_cas_uint(&vp->v_usecount, use, use - 1); | | 1329 | next = atomic_cas_uint(&vp->v_usecount, use, use - 1); |
1317 | if (__predict_true(next == use)) { | | 1330 | if (__predict_true(next == use)) { |
1318 | return true; | | 1331 | return true; |
1319 | } | | 1332 | } |
1320 | } | | 1333 | } |
1321 | } | | 1334 | } |
1322 | | | 1335 | |
1323 | /* | | 1336 | /* |
1324 | * Vnode release. If reference count drops to zero, call inactive | | 1337 | * Vnode release. If reference count drops to zero, call inactive |
1325 | * routine and either return to freelist or free to the pool. | | 1338 | * routine and either return to freelist or free to the pool. |
1326 | */ | | 1339 | */ |
1327 | void | | 1340 | void |
1328 | vrelel(vnode_t *vp, int flags) | | 1341 | vrelel(vnode_t *vp, int flags) |