| @@ -1,14 +1,14 @@ | | | @@ -1,14 +1,14 @@ |
1 | /* $NetBSD: ffs_snapshot.c,v 1.105 2011/02/18 14:48:54 bouyer Exp $ */ | | 1 | /* $NetBSD: ffs_snapshot.c,v 1.106 2011/02/21 09:29:21 hannken Exp $ */ |
2 | | | 2 | |
3 | /* | | 3 | /* |
4 | * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved. | | 4 | * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved. |
5 | * | | 5 | * |
6 | * Further information about snapshots can be obtained from: | | 6 | * Further information about snapshots can be obtained from: |
7 | * | | 7 | * |
8 | * Marshall Kirk McKusick http://www.mckusick.com/softdep/ | | 8 | * Marshall Kirk McKusick http://www.mckusick.com/softdep/ |
9 | * 1614 Oxford Street mckusick@mckusick.com | | 9 | * 1614 Oxford Street mckusick@mckusick.com |
10 | * Berkeley, CA 94709-1608 +1-510-843-9542 | | 10 | * Berkeley, CA 94709-1608 +1-510-843-9542 |
11 | * USA | | 11 | * USA |
12 | * | | 12 | * |
13 | * Redistribution and use in source and binary forms, with or without | | 13 | * Redistribution and use in source and binary forms, with or without |
14 | * modification, are permitted provided that the following conditions | | 14 | * modification, are permitted provided that the following conditions |
| @@ -28,27 +28,27 @@ | | | @@ -28,27 +28,27 @@ |
28 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | | 28 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
29 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 29 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
30 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 30 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
31 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 31 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
32 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 32 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
33 | * SUCH DAMAGE. | | 33 | * SUCH DAMAGE. |
34 | * | | 34 | * |
35 | * @(#)ffs_snapshot.c 8.11 (McKusick) 7/23/00 | | 35 | * @(#)ffs_snapshot.c 8.11 (McKusick) 7/23/00 |
36 | * | | 36 | * |
37 | * from FreeBSD: ffs_snapshot.c,v 1.79 2004/02/13 02:02:06 kuriyama Exp | | 37 | * from FreeBSD: ffs_snapshot.c,v 1.79 2004/02/13 02:02:06 kuriyama Exp |
38 | */ | | 38 | */ |
39 | | | 39 | |
40 | #include <sys/cdefs.h> | | 40 | #include <sys/cdefs.h> |
41 | __KERNEL_RCSID(0, "$NetBSD: ffs_snapshot.c,v 1.105 2011/02/18 14:48:54 bouyer Exp $"); | | 41 | __KERNEL_RCSID(0, "$NetBSD: ffs_snapshot.c,v 1.106 2011/02/21 09:29:21 hannken Exp $"); |
42 | | | 42 | |
43 | #if defined(_KERNEL_OPT) | | 43 | #if defined(_KERNEL_OPT) |
44 | #include "opt_ffs.h" | | 44 | #include "opt_ffs.h" |
45 | #endif | | 45 | #endif |
46 | | | 46 | |
47 | #include <sys/param.h> | | 47 | #include <sys/param.h> |
48 | #include <sys/kernel.h> | | 48 | #include <sys/kernel.h> |
49 | #include <sys/systm.h> | | 49 | #include <sys/systm.h> |
50 | #include <sys/conf.h> | | 50 | #include <sys/conf.h> |
51 | #include <sys/buf.h> | | 51 | #include <sys/buf.h> |
52 | #include <sys/proc.h> | | 52 | #include <sys/proc.h> |
53 | #include <sys/namei.h> | | 53 | #include <sys/namei.h> |
54 | #include <sys/sched.h> | | 54 | #include <sys/sched.h> |
| @@ -69,26 +69,27 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_snapshot | | | @@ -69,26 +69,27 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_snapshot |
69 | #include <ufs/ufs/inode.h> | | 69 | #include <ufs/ufs/inode.h> |
70 | #include <ufs/ufs/ufs_extern.h> | | 70 | #include <ufs/ufs/ufs_extern.h> |
71 | #include <ufs/ufs/ufs_bswap.h> | | 71 | #include <ufs/ufs/ufs_bswap.h> |
72 | #include <ufs/ufs/ufs_wapbl.h> | | 72 | #include <ufs/ufs/ufs_wapbl.h> |
73 | | | 73 | |
74 | #include <ufs/ffs/fs.h> | | 74 | #include <ufs/ffs/fs.h> |
75 | #include <ufs/ffs/ffs_extern.h> | | 75 | #include <ufs/ffs/ffs_extern.h> |
76 | | | 76 | |
77 | #include <uvm/uvm.h> | | 77 | #include <uvm/uvm.h> |
78 | | | 78 | |
79 | struct snap_info { | | 79 | struct snap_info { |
80 | kmutex_t si_lock; /* Lock this snapinfo */ | | 80 | kmutex_t si_lock; /* Lock this snapinfo */ |
81 | kmutex_t si_snaplock; /* Snapshot vnode common lock */ | | 81 | kmutex_t si_snaplock; /* Snapshot vnode common lock */ |
| | | 82 | lwp_t *si_owner; /* Sanplock owner */ |
82 | TAILQ_HEAD(inodelst, inode) si_snapshots; /* List of active snapshots */ | | 83 | TAILQ_HEAD(inodelst, inode) si_snapshots; /* List of active snapshots */ |
83 | daddr_t *si_snapblklist; /* Snapshot block hints list */ | | 84 | daddr_t *si_snapblklist; /* Snapshot block hints list */ |
84 | uint32_t si_gen; /* Incremented on change */ | | 85 | uint32_t si_gen; /* Incremented on change */ |
85 | }; | | 86 | }; |
86 | | | 87 | |
87 | #if !defined(FFS_NO_SNAPSHOT) | | 88 | #if !defined(FFS_NO_SNAPSHOT) |
88 | typedef int (*acctfunc_t) | | 89 | typedef int (*acctfunc_t) |
89 | (struct vnode *, void *, int, int, struct fs *, daddr_t, int); | | 90 | (struct vnode *, void *, int, int, struct fs *, daddr_t, int); |
90 | | | 91 | |
91 | static int snapshot_setup(struct mount *, struct vnode *); | | 92 | static int snapshot_setup(struct mount *, struct vnode *); |
92 | static int snapshot_copyfs(struct mount *, struct vnode *, void **); | | 93 | static int snapshot_copyfs(struct mount *, struct vnode *, void **); |
93 | static int snapshot_expunge(struct mount *, struct vnode *, | | 94 | static int snapshot_expunge(struct mount *, struct vnode *, |
94 | struct fs *, daddr_t *, daddr_t **); | | 95 | struct fs *, daddr_t *, daddr_t **); |
| @@ -130,26 +131,27 @@ static int snapdebug = 0; | | | @@ -130,26 +131,27 @@ static int snapdebug = 0; |
130 | | | 131 | |
131 | int | | 132 | int |
132 | ffs_snapshot_init(struct ufsmount *ump) | | 133 | ffs_snapshot_init(struct ufsmount *ump) |
133 | { | | 134 | { |
134 | struct snap_info *si; | | 135 | struct snap_info *si; |
135 | | | 136 | |
136 | si = ump->um_snapinfo = kmem_alloc(sizeof(*si), KM_SLEEP); | | 137 | si = ump->um_snapinfo = kmem_alloc(sizeof(*si), KM_SLEEP); |
137 | if (si == NULL) | | 138 | if (si == NULL) |
138 | return ENOMEM; | | 139 | return ENOMEM; |
139 | | | 140 | |
140 | TAILQ_INIT(&si->si_snapshots); | | 141 | TAILQ_INIT(&si->si_snapshots); |
141 | mutex_init(&si->si_lock, MUTEX_DEFAULT, IPL_NONE); | | 142 | mutex_init(&si->si_lock, MUTEX_DEFAULT, IPL_NONE); |
142 | mutex_init(&si->si_snaplock, MUTEX_DEFAULT, IPL_NONE); | | 143 | mutex_init(&si->si_snaplock, MUTEX_DEFAULT, IPL_NONE); |
| | | 144 | si->si_owner = NULL; |
143 | si->si_gen = 0; | | 145 | si->si_gen = 0; |
144 | si->si_snapblklist = NULL; | | 146 | si->si_snapblklist = NULL; |
145 | | | 147 | |
146 | return 0; | | 148 | return 0; |
147 | } | | 149 | } |
148 | | | 150 | |
149 | void | | 151 | void |
150 | ffs_snapshot_fini(struct ufsmount *ump) | | 152 | ffs_snapshot_fini(struct ufsmount *ump) |
151 | { | | 153 | { |
152 | struct snap_info *si; | | 154 | struct snap_info *si; |
153 | | | 155 | |
154 | si = ump->um_snapinfo; | | 156 | si = ump->um_snapinfo; |
155 | ump->um_snapinfo = NULL; | | 157 | ump->um_snapinfo = NULL; |
| @@ -163,27 +165,26 @@ ffs_snapshot_fini(struct ufsmount *ump) | | | @@ -163,27 +165,26 @@ ffs_snapshot_fini(struct ufsmount *ump) |
163 | | | 165 | |
164 | /* | | 166 | /* |
165 | * Create a snapshot file and initialize it for the filesystem. | | 167 | * Create a snapshot file and initialize it for the filesystem. |
166 | * Vnode is locked on entry and return. | | 168 | * Vnode is locked on entry and return. |
167 | */ | | 169 | */ |
168 | int | | 170 | int |
169 | ffs_snapshot(struct mount *mp, struct vnode *vp, struct timespec *ctime) | | 171 | ffs_snapshot(struct mount *mp, struct vnode *vp, struct timespec *ctime) |
170 | { | | 172 | { |
171 | #if defined(FFS_NO_SNAPSHOT) | | 173 | #if defined(FFS_NO_SNAPSHOT) |
172 | return EOPNOTSUPP; | | 174 | return EOPNOTSUPP; |
173 | } | | 175 | } |
174 | #else /* defined(FFS_NO_SNAPSHOT) */ | | 176 | #else /* defined(FFS_NO_SNAPSHOT) */ |
175 | bool suspended = false; | | 177 | bool suspended = false; |
176 | bool snapshot_locked = false; | | | |
177 | int error, redo = 0, snaploc; | | 178 | int error, redo = 0, snaploc; |
178 | void *sbbuf = NULL; | | 179 | void *sbbuf = NULL; |
179 | daddr_t *snaplist = NULL, snaplistsize = 0; | | 180 | daddr_t *snaplist = NULL, snaplistsize = 0; |
180 | struct buf *bp, *nbp; | | 181 | struct buf *bp, *nbp; |
181 | struct fs *copy_fs = NULL; | | 182 | struct fs *copy_fs = NULL; |
182 | struct fs *fs = VFSTOUFS(mp)->um_fs; | | 183 | struct fs *fs = VFSTOUFS(mp)->um_fs; |
183 | struct inode *ip = VTOI(vp); | | 184 | struct inode *ip = VTOI(vp); |
184 | struct lwp *l = curlwp; | | 185 | struct lwp *l = curlwp; |
185 | struct snap_info *si = VFSTOUFS(mp)->um_snapinfo; | | 186 | struct snap_info *si = VFSTOUFS(mp)->um_snapinfo; |
186 | struct timespec ts; | | 187 | struct timespec ts; |
187 | struct timeval starttime; | | 188 | struct timeval starttime; |
188 | #ifdef DEBUG | | 189 | #ifdef DEBUG |
189 | struct timeval endtime; | | 190 | struct timeval endtime; |
| @@ -259,31 +260,26 @@ ffs_snapshot(struct mount *mp, struct vn | | | @@ -259,31 +260,26 @@ ffs_snapshot(struct mount *mp, struct vn |
259 | * Create a copy of the superblock and its summary information. | | 260 | * Create a copy of the superblock and its summary information. |
260 | */ | | 261 | */ |
261 | error = snapshot_copyfs(mp, vp, &sbbuf); | | 262 | error = snapshot_copyfs(mp, vp, &sbbuf); |
262 | copy_fs = (struct fs *)((char *)sbbuf + blkoff(fs, fs->fs_sblockloc)); | | 263 | copy_fs = (struct fs *)((char *)sbbuf + blkoff(fs, fs->fs_sblockloc)); |
263 | if (error) | | 264 | if (error) |
264 | goto out; | | 265 | goto out; |
265 | /* | | 266 | /* |
266 | * Expunge unlinked files from our view. | | 267 | * Expunge unlinked files from our view. |
267 | */ | | 268 | */ |
268 | error = snapshot_expunge(mp, vp, copy_fs, &snaplistsize, &snaplist); | | 269 | error = snapshot_expunge(mp, vp, copy_fs, &snaplistsize, &snaplist); |
269 | if (error) | | 270 | if (error) |
270 | goto out; | | 271 | goto out; |
271 | /* | | 272 | /* |
272 | * Acquire the snapshot lock. | | | |
273 | */ | | | |
274 | mutex_enter(&si->si_snaplock); | | | |
275 | snapshot_locked = true; | | | |
276 | /* | | | |
277 | * Record snapshot inode. Since this is the newest snapshot, | | 273 | * Record snapshot inode. Since this is the newest snapshot, |
278 | * it must be placed at the end of the list. | | 274 | * it must be placed at the end of the list. |
279 | */ | | 275 | */ |
280 | fs->fs_snapinum[snaploc] = ip->i_number; | | 276 | fs->fs_snapinum[snaploc] = ip->i_number; |
281 | | | 277 | |
282 | mutex_enter(&si->si_lock); | | 278 | mutex_enter(&si->si_lock); |
283 | if (is_active_snapshot(si, ip)) | | 279 | if (is_active_snapshot(si, ip)) |
284 | panic("ffs_snapshot: %"PRIu64" already on list", ip->i_number); | | 280 | panic("ffs_snapshot: %"PRIu64" already on list", ip->i_number); |
285 | TAILQ_INSERT_TAIL(&si->si_snapshots, ip, i_nextsnap); | | 281 | TAILQ_INSERT_TAIL(&si->si_snapshots, ip, i_nextsnap); |
286 | if (TAILQ_FIRST(&si->si_snapshots) == ip) { | | 282 | if (TAILQ_FIRST(&si->si_snapshots) == ip) { |
287 | /* | | 283 | /* |
288 | * If this is the first snapshot on this filesystem, put the | | 284 | * If this is the first snapshot on this filesystem, put the |
289 | * preliminary list in place and establish the cow handler. | | 285 | * preliminary list in place and establish the cow handler. |
| @@ -366,28 +362,26 @@ out: | | | @@ -366,28 +362,26 @@ out: |
366 | } | | 362 | } |
367 | if (error) { | | 363 | if (error) { |
368 | fs->fs_snapinum[snaploc] = 0; | | 364 | fs->fs_snapinum[snaploc] = 0; |
369 | } else { | | 365 | } else { |
370 | /* | | 366 | /* |
371 | * As this is the newest list, it is the most inclusive, so | | 367 | * As this is the newest list, it is the most inclusive, so |
372 | * should replace the previous list. | | 368 | * should replace the previous list. |
373 | */ | | 369 | */ |
374 | si->si_snapblklist = ip->i_snapblklist; | | 370 | si->si_snapblklist = ip->i_snapblklist; |
375 | } | | 371 | } |
376 | si->si_gen++; | | 372 | si->si_gen++; |
377 | mutex_exit(&si->si_lock); | | 373 | mutex_exit(&si->si_lock); |
378 | | | 374 | |
379 | if (snapshot_locked) | | | |
380 | mutex_exit(&si->si_snaplock); | | | |
381 | if (suspended) { | | 375 | if (suspended) { |
382 | vfs_resume(vp->v_mount); | | 376 | vfs_resume(vp->v_mount); |
383 | #ifdef DEBUG | | 377 | #ifdef DEBUG |
384 | getmicrotime(&endtime); | | 378 | getmicrotime(&endtime); |
385 | timersub(&endtime, &starttime, &endtime); | | 379 | timersub(&endtime, &starttime, &endtime); |
386 | printf("%s: suspended %lld.%03d sec, redo %d of %d\n", | | 380 | printf("%s: suspended %lld.%03d sec, redo %d of %d\n", |
387 | mp->mnt_stat.f_mntonname, (long long)endtime.tv_sec, | | 381 | mp->mnt_stat.f_mntonname, (long long)endtime.tv_sec, |
388 | endtime.tv_usec / 1000, redo, fs->fs_ncg); | | 382 | endtime.tv_usec / 1000, redo, fs->fs_ncg); |
389 | #endif | | 383 | #endif |
390 | } | | 384 | } |
391 | if (error) { | | 385 | if (error) { |
392 | if (!UFS_WAPBL_BEGIN(mp)) { | | 386 | if (!UFS_WAPBL_BEGIN(mp)) { |
393 | (void) ffs_truncate(vp, (off_t)0, 0, NOCRED); | | 387 | (void) ffs_truncate(vp, (off_t)0, 0, NOCRED); |
| @@ -1344,101 +1338,102 @@ ffs_snapgone(struct inode *ip) | | | @@ -1344,101 +1338,102 @@ ffs_snapgone(struct inode *ip) |
1344 | * Prepare a snapshot file for being removed. | | 1338 | * Prepare a snapshot file for being removed. |
1345 | */ | | 1339 | */ |
1346 | void | | 1340 | void |
1347 | ffs_snapremove(struct vnode *vp) | | 1341 | ffs_snapremove(struct vnode *vp) |
1348 | { | | 1342 | { |
1349 | struct inode *ip = VTOI(vp), *xp; | | 1343 | struct inode *ip = VTOI(vp), *xp; |
1350 | struct vnode *devvp = ip->i_devvp; | | 1344 | struct vnode *devvp = ip->i_devvp; |
1351 | struct fs *fs = ip->i_fs; | | 1345 | struct fs *fs = ip->i_fs; |
1352 | struct mount *mp = devvp->v_specmountpoint; | | 1346 | struct mount *mp = devvp->v_specmountpoint; |
1353 | struct buf *ibp; | | 1347 | struct buf *ibp; |
1354 | struct snap_info *si; | | 1348 | struct snap_info *si; |
1355 | struct lwp *l = curlwp; | | 1349 | struct lwp *l = curlwp; |
1356 | daddr_t numblks, blkno, dblk; | | 1350 | daddr_t numblks, blkno, dblk; |
1357 | int error, loc, last, n; | | 1351 | int error, loc, last; |
1358 | const int wbreak = blocks_in_journal(fs)/8; | | | |
1359 | | | 1352 | |
1360 | si = VFSTOUFS(mp)->um_snapinfo; | | 1353 | si = VFSTOUFS(mp)->um_snapinfo; |
1361 | /* | | 1354 | /* |
1362 | * If active, delete from incore list (this snapshot may | | 1355 | * If active, delete from incore list (this snapshot may |
1363 | * already have been in the process of being deleted, so | | 1356 | * already have been in the process of being deleted, so |
1364 | * would not have been active). | | 1357 | * would not have been active). |
1365 | * | | 1358 | * |
1366 | * Clear copy-on-write flag if last snapshot. | | 1359 | * Clear copy-on-write flag if last snapshot. |
1367 | */ | | 1360 | */ |
| | | 1361 | mutex_enter(&si->si_snaplock); |
1368 | mutex_enter(&si->si_lock); | | 1362 | mutex_enter(&si->si_lock); |
1369 | if (is_active_snapshot(si, ip)) { | | 1363 | if (is_active_snapshot(si, ip)) { |
1370 | TAILQ_REMOVE(&si->si_snapshots, ip, i_nextsnap); | | 1364 | TAILQ_REMOVE(&si->si_snapshots, ip, i_nextsnap); |
1371 | if (TAILQ_FIRST(&si->si_snapshots) != 0) { | | 1365 | if (TAILQ_FIRST(&si->si_snapshots) != 0) { |
1372 | /* Roll back the list of preallocated blocks. */ | | 1366 | /* Roll back the list of preallocated blocks. */ |
1373 | xp = TAILQ_LAST(&si->si_snapshots, inodelst); | | 1367 | xp = TAILQ_LAST(&si->si_snapshots, inodelst); |
1374 | si->si_snapblklist = xp->i_snapblklist; | | 1368 | si->si_snapblklist = xp->i_snapblklist; |
1375 | si->si_gen++; | | 1369 | si->si_gen++; |
1376 | mutex_exit(&si->si_lock); | | 1370 | mutex_exit(&si->si_lock); |
| | | 1371 | mutex_exit(&si->si_snaplock); |
1377 | } else { | | 1372 | } else { |
1378 | si->si_snapblklist = 0; | | 1373 | si->si_snapblklist = 0; |
1379 | si->si_gen++; | | 1374 | si->si_gen++; |
1380 | mutex_exit(&si->si_lock); | | 1375 | mutex_exit(&si->si_lock); |
| | | 1376 | mutex_exit(&si->si_snaplock); |
1381 | fscow_disestablish(mp, ffs_copyonwrite, devvp); | | 1377 | fscow_disestablish(mp, ffs_copyonwrite, devvp); |
1382 | } | | 1378 | } |
1383 | if (ip->i_snapblklist != NULL) { | | 1379 | if (ip->i_snapblklist != NULL) { |
1384 | free(ip->i_snapblklist, M_UFSMNT); | | 1380 | free(ip->i_snapblklist, M_UFSMNT); |
1385 | ip->i_snapblklist = NULL; | | 1381 | ip->i_snapblklist = NULL; |
1386 | } | | 1382 | } |
1387 | } else | | 1383 | } else { |
1388 | mutex_exit(&si->si_lock); | | 1384 | mutex_exit(&si->si_lock); |
| | | 1385 | mutex_exit(&si->si_snaplock); |
| | | 1386 | } |
1389 | /* | | 1387 | /* |
1390 | * Clear all BLK_NOCOPY fields. Pass any block claims to other | | 1388 | * Clear all BLK_NOCOPY fields. Pass any block claims to other |
1391 | * snapshots that want them (see ffs_snapblkfree below). | | 1389 | * snapshots that want them (see ffs_snapblkfree below). |
1392 | */ | | 1390 | */ |
1393 | for (blkno = 1; blkno < NDADDR; blkno++) { | | 1391 | for (blkno = 1; blkno < NDADDR; blkno++) { |
1394 | dblk = db_get(ip, blkno); | | 1392 | dblk = db_get(ip, blkno); |
1395 | if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) | | 1393 | if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) |
1396 | db_assign(ip, blkno, 0); | | 1394 | db_assign(ip, blkno, 0); |
1397 | else if ((dblk == blkstofrags(fs, blkno) && | | 1395 | else if ((dblk == blkstofrags(fs, blkno) && |
1398 | ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize, | | 1396 | ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize, |
1399 | ip->i_number))) { | | 1397 | ip->i_number))) { |
1400 | DIP_ADD(ip, blocks, -btodb(fs->fs_bsize)); | | 1398 | DIP_ADD(ip, blocks, -btodb(fs->fs_bsize)); |
1401 | db_assign(ip, blkno, 0); | | 1399 | db_assign(ip, blkno, 0); |
1402 | } | | 1400 | } |
1403 | } | | 1401 | } |
1404 | numblks = howmany(ip->i_size, fs->fs_bsize); | | 1402 | numblks = howmany(ip->i_size, fs->fs_bsize); |
1405 | for (blkno = NDADDR, n = 0; blkno < numblks; blkno += NINDIR(fs)) { | | 1403 | for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) { |
1406 | error = ffs_balloc(vp, lblktosize(fs, (off_t)blkno), | | 1404 | error = ffs_balloc(vp, lblktosize(fs, (off_t)blkno), |
1407 | fs->fs_bsize, l->l_cred, B_METAONLY, &ibp); | | 1405 | fs->fs_bsize, l->l_cred, B_METAONLY, &ibp); |
1408 | if (error) | | 1406 | if (error) |
1409 | continue; | | 1407 | continue; |
1410 | if (fs->fs_size - blkno > NINDIR(fs)) | | 1408 | if (fs->fs_size - blkno > NINDIR(fs)) |
1411 | last = NINDIR(fs); | | 1409 | last = NINDIR(fs); |
1412 | else | | 1410 | else |
1413 | last = fs->fs_size - blkno; | | 1411 | last = fs->fs_size - blkno; |
1414 | for (loc = 0; loc < last; loc++) { | | 1412 | for (loc = 0; loc < last; loc++) { |
1415 | if (wbreak > 0 && (++n % wbreak) == 0) { | | | |
1416 | UFS_WAPBL_END(mp); | | | |
1417 | error = UFS_WAPBL_BEGIN(mp); | | | |
1418 | if (error) | | | |
1419 | panic("UFS_WAPBL_BEGIN failed"); | | | |
1420 | } | | | |
1421 | dblk = idb_get(ip, ibp->b_data, loc); | | 1413 | dblk = idb_get(ip, ibp->b_data, loc); |
1422 | if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) | | 1414 | if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) |
1423 | idb_assign(ip, ibp->b_data, loc, 0); | | 1415 | idb_assign(ip, ibp->b_data, loc, 0); |
1424 | else if (dblk == blkstofrags(fs, blkno) && | | 1416 | else if (dblk == blkstofrags(fs, blkno) && |
1425 | ffs_snapblkfree(fs, ip->i_devvp, dblk, | | 1417 | ffs_snapblkfree(fs, ip->i_devvp, dblk, |
1426 | fs->fs_bsize, ip->i_number)) { | | 1418 | fs->fs_bsize, ip->i_number)) { |
1427 | DIP_ADD(ip, blocks, -btodb(fs->fs_bsize)); | | 1419 | DIP_ADD(ip, blocks, -btodb(fs->fs_bsize)); |
1428 | idb_assign(ip, ibp->b_data, loc, 0); | | 1420 | idb_assign(ip, ibp->b_data, loc, 0); |
1429 | } | | 1421 | } |
1430 | } | | 1422 | } |
1431 | bawrite(ibp); | | 1423 | bawrite(ibp); |
| | | 1424 | UFS_WAPBL_END(mp); |
| | | 1425 | error = UFS_WAPBL_BEGIN(mp); |
| | | 1426 | KASSERT(error == 0); |
1432 | } | | 1427 | } |
1433 | /* | | 1428 | /* |
1434 | * Clear snapshot flag and drop reference. | | 1429 | * Clear snapshot flag and drop reference. |
1435 | */ | | 1430 | */ |
1436 | ip->i_flags &= ~SF_SNAPSHOT; | | 1431 | ip->i_flags &= ~SF_SNAPSHOT; |
1437 | DIP_ASSIGN(ip, flags, ip->i_flags); | | 1432 | DIP_ASSIGN(ip, flags, ip->i_flags); |
1438 | ip->i_flag |= IN_CHANGE | IN_UPDATE; | | 1433 | ip->i_flag |= IN_CHANGE | IN_UPDATE; |
1439 | } | | 1434 | } |
1440 | | | 1435 | |
1441 | /* | | 1436 | /* |
1442 | * Notification that a block is being freed. Return zero if the free | | 1437 | * Notification that a block is being freed. Return zero if the free |
1443 | * should be allowed to proceed. Return non-zero if the snapshot file | | 1438 | * should be allowed to proceed. Return non-zero if the snapshot file |
1444 | * wants to claim the block. The block will be claimed if it is an | | 1439 | * wants to claim the block. The block will be claimed if it is an |
| @@ -1459,45 +1454,38 @@ ffs_snapremove(struct vnode *vp) | | | @@ -1459,45 +1454,38 @@ ffs_snapremove(struct vnode *vp) |
1459 | int | | 1454 | int |
1460 | ffs_snapblkfree(struct fs *fs, struct vnode *devvp, daddr_t bno, | | 1455 | ffs_snapblkfree(struct fs *fs, struct vnode *devvp, daddr_t bno, |
1461 | long size, ino_t inum) | | 1456 | long size, ino_t inum) |
1462 | { | | 1457 | { |
1463 | struct mount *mp = devvp->v_specmountpoint; | | 1458 | struct mount *mp = devvp->v_specmountpoint; |
1464 | struct buf *ibp; | | 1459 | struct buf *ibp; |
1465 | struct inode *ip; | | 1460 | struct inode *ip; |
1466 | struct vnode *vp = NULL; | | 1461 | struct vnode *vp = NULL; |
1467 | struct snap_info *si; | | 1462 | struct snap_info *si; |
1468 | void *saved_data = NULL; | | 1463 | void *saved_data = NULL; |
1469 | daddr_t lbn; | | 1464 | daddr_t lbn; |
1470 | daddr_t blkno; | | 1465 | daddr_t blkno; |
1471 | uint32_t gen; | | 1466 | uint32_t gen; |
1472 | int indiroff = 0, snapshot_locked = 0, error = 0, claimedblk = 0; | | 1467 | int indiroff = 0, error = 0, claimedblk = 0; |
1473 | | | 1468 | |
1474 | si = VFSTOUFS(mp)->um_snapinfo; | | 1469 | si = VFSTOUFS(mp)->um_snapinfo; |
1475 | lbn = fragstoblks(fs, bno); | | 1470 | lbn = fragstoblks(fs, bno); |
| | | 1471 | mutex_enter(&si->si_snaplock); |
1476 | mutex_enter(&si->si_lock); | | 1472 | mutex_enter(&si->si_lock); |
| | | 1473 | si->si_owner = curlwp; |
| | | 1474 | |
1477 | retry: | | 1475 | retry: |
1478 | gen = si->si_gen; | | 1476 | gen = si->si_gen; |
1479 | TAILQ_FOREACH(ip, &si->si_snapshots, i_nextsnap) { | | 1477 | TAILQ_FOREACH(ip, &si->si_snapshots, i_nextsnap) { |
1480 | vp = ITOV(ip); | | 1478 | vp = ITOV(ip); |
1481 | if (snapshot_locked == 0) { | | | |
1482 | if (!mutex_tryenter(&si->si_snaplock)) { | | | |
1483 | mutex_exit(&si->si_lock); | | | |
1484 | mutex_enter(&si->si_snaplock); | | | |
1485 | mutex_enter(&si->si_lock); | | | |
1486 | } | | | |
1487 | snapshot_locked = 1; | | | |
1488 | if (gen != si->si_gen) | | | |
1489 | goto retry; | | | |
1490 | } | | | |
1491 | /* | | 1479 | /* |
1492 | * Lookup block being written. | | 1480 | * Lookup block being written. |
1493 | */ | | 1481 | */ |
1494 | if (lbn < NDADDR) { | | 1482 | if (lbn < NDADDR) { |
1495 | blkno = db_get(ip, lbn); | | 1483 | blkno = db_get(ip, lbn); |
1496 | } else { | | 1484 | } else { |
1497 | mutex_exit(&si->si_lock); | | 1485 | mutex_exit(&si->si_lock); |
1498 | error = ffs_balloc(vp, lblktosize(fs, (off_t)lbn), | | 1486 | error = ffs_balloc(vp, lblktosize(fs, (off_t)lbn), |
1499 | fs->fs_bsize, FSCRED, B_METAONLY, &ibp); | | 1487 | fs->fs_bsize, FSCRED, B_METAONLY, &ibp); |
1500 | if (error) { | | 1488 | if (error) { |
1501 | mutex_enter(&si->si_lock); | | 1489 | mutex_enter(&si->si_lock); |
1502 | break; | | 1490 | break; |
1503 | } | | 1491 | } |
| @@ -1574,26 +1562,29 @@ retry: | | | @@ -1574,26 +1562,29 @@ retry: |
1574 | } else { | | 1562 | } else { |
1575 | idb_assign(ip, ibp->b_data, indiroff, bno); | | 1563 | idb_assign(ip, ibp->b_data, indiroff, bno); |
1576 | if (ip->i_nlink > 0) | | 1564 | if (ip->i_nlink > 0) |
1577 | bwrite(ibp); | | 1565 | bwrite(ibp); |
1578 | else | | 1566 | else |
1579 | bdwrite(ibp); | | 1567 | bdwrite(ibp); |
1580 | } | | 1568 | } |
1581 | DIP_ADD(ip, blocks, btodb(size)); | | 1569 | DIP_ADD(ip, blocks, btodb(size)); |
1582 | ip->i_flag |= IN_CHANGE | IN_UPDATE; | | 1570 | ip->i_flag |= IN_CHANGE | IN_UPDATE; |
1583 | if (ip->i_nlink > 0 && mp->mnt_wapbl) | | 1571 | if (ip->i_nlink > 0 && mp->mnt_wapbl) |
1584 | error = syncsnap(vp); | | 1572 | error = syncsnap(vp); |
1585 | else | | 1573 | else |
1586 | error = 0; | | 1574 | error = 0; |
| | | 1575 | mutex_enter(&si->si_lock); |
| | | 1576 | si->si_owner = NULL; |
| | | 1577 | mutex_exit(&si->si_lock); |
1587 | mutex_exit(&si->si_snaplock); | | 1578 | mutex_exit(&si->si_snaplock); |
1588 | return (error == 0); | | 1579 | return (error == 0); |
1589 | } | | 1580 | } |
1590 | if (lbn >= NDADDR) | | 1581 | if (lbn >= NDADDR) |
1591 | brelse(ibp, 0); | | 1582 | brelse(ibp, 0); |
1592 | #ifdef DEBUG | | 1583 | #ifdef DEBUG |
1593 | if (snapdebug) | | 1584 | if (snapdebug) |
1594 | printf("%s%llu lbn %" PRId64 " %s %llu size %ld\n", | | 1585 | printf("%s%llu lbn %" PRId64 " %s %llu size %ld\n", |
1595 | "Copyonremove: snapino ", | | 1586 | "Copyonremove: snapino ", |
1596 | (unsigned long long)ip->i_number, | | 1587 | (unsigned long long)ip->i_number, |
1597 | lbn, "for inum", (unsigned long long)inum, size); | | 1588 | lbn, "for inum", (unsigned long long)inum, size); |
1598 | #endif | | 1589 | #endif |
1599 | /* | | 1590 | /* |
| @@ -1613,37 +1604,37 @@ retry: | | | @@ -1613,37 +1604,37 @@ retry: |
1613 | mutex_enter(&si->si_lock); | | 1604 | mutex_enter(&si->si_lock); |
1614 | break; | | 1605 | break; |
1615 | } | | 1606 | } |
1616 | } | | 1607 | } |
1617 | error = wrsnapblk(vp, saved_data, lbn); | | 1608 | error = wrsnapblk(vp, saved_data, lbn); |
1618 | if (error == 0 && ip->i_nlink > 0 && mp->mnt_wapbl) | | 1609 | if (error == 0 && ip->i_nlink > 0 && mp->mnt_wapbl) |
1619 | error = syncsnap(vp); | | 1610 | error = syncsnap(vp); |
1620 | mutex_enter(&si->si_lock); | | 1611 | mutex_enter(&si->si_lock); |
1621 | if (error) | | 1612 | if (error) |
1622 | break; | | 1613 | break; |
1623 | if (gen != si->si_gen) | | 1614 | if (gen != si->si_gen) |
1624 | goto retry; | | 1615 | goto retry; |
1625 | } | | 1616 | } |
| | | 1617 | si->si_owner = NULL; |
1626 | mutex_exit(&si->si_lock); | | 1618 | mutex_exit(&si->si_lock); |
| | | 1619 | mutex_exit(&si->si_snaplock); |
1627 | if (saved_data) | | 1620 | if (saved_data) |
1628 | free(saved_data, M_UFSMNT); | | 1621 | free(saved_data, M_UFSMNT); |
1629 | /* | | 1622 | /* |
1630 | * If we have been unable to allocate a block in which to do | | 1623 | * If we have been unable to allocate a block in which to do |
1631 | * the copy, then return non-zero so that the fragment will | | 1624 | * the copy, then return non-zero so that the fragment will |
1632 | * not be freed. Although space will be lost, the snapshot | | 1625 | * not be freed. Although space will be lost, the snapshot |
1633 | * will stay consistent. | | 1626 | * will stay consistent. |
1634 | */ | | 1627 | */ |
1635 | if (snapshot_locked) | | | |
1636 | mutex_exit(&si->si_snaplock); | | | |
1637 | return (error); | | 1628 | return (error); |
1638 | } | | 1629 | } |
1639 | | | 1630 | |
1640 | /* | | 1631 | /* |
1641 | * Associate snapshot files when mounting. | | 1632 | * Associate snapshot files when mounting. |
1642 | */ | | 1633 | */ |
1643 | void | | 1634 | void |
1644 | ffs_snapshot_mount(struct mount *mp) | | 1635 | ffs_snapshot_mount(struct mount *mp) |
1645 | { | | 1636 | { |
1646 | struct vnode *devvp = VFSTOUFS(mp)->um_devvp; | | 1637 | struct vnode *devvp = VFSTOUFS(mp)->um_devvp; |
1647 | struct fs *fs = VFSTOUFS(mp)->um_fs; | | 1638 | struct fs *fs = VFSTOUFS(mp)->um_fs; |
1648 | struct lwp *l = curlwp; | | 1639 | struct lwp *l = curlwp; |
1649 | struct vnode *vp; | | 1640 | struct vnode *vp; |
| @@ -1836,26 +1827,35 @@ ffs_copyonwrite(void *v, struct buf *bp, | | | @@ -1836,26 +1827,35 @@ ffs_copyonwrite(void *v, struct buf *bp, |
1836 | break; | | 1827 | break; |
1837 | if (snapblklist[mid] < lbn) | | 1828 | if (snapblklist[mid] < lbn) |
1838 | lower = mid + 1; | | 1829 | lower = mid + 1; |
1839 | else | | 1830 | else |
1840 | upper = mid - 1; | | 1831 | upper = mid - 1; |
1841 | } | | 1832 | } |
1842 | if (lower <= upper) { | | 1833 | if (lower <= upper) { |
1843 | mutex_exit(&si->si_lock); | | 1834 | mutex_exit(&si->si_lock); |
1844 | return 0; | | 1835 | return 0; |
1845 | } | | 1836 | } |
1846 | /* | | 1837 | /* |
1847 | * Not in the precomputed list, so check the snapshots. | | 1838 | * Not in the precomputed list, so check the snapshots. |
1848 | */ | | 1839 | */ |
| | | 1840 | if (si->si_owner != curlwp) { |
| | | 1841 | if (!mutex_tryenter(&si->si_snaplock)) { |
| | | 1842 | mutex_exit(&si->si_lock); |
| | | 1843 | mutex_enter(&si->si_snaplock); |
| | | 1844 | mutex_enter(&si->si_lock); |
| | | 1845 | } |
| | | 1846 | si->si_owner = curlwp; |
| | | 1847 | snapshot_locked = 1; |
| | | 1848 | } |
1849 | if (data_valid && bp->b_bcount == fs->fs_bsize) | | 1849 | if (data_valid && bp->b_bcount == fs->fs_bsize) |
1850 | saved_data = bp->b_data; | | 1850 | saved_data = bp->b_data; |
1851 | retry: | | 1851 | retry: |
1852 | gen = si->si_gen; | | 1852 | gen = si->si_gen; |
1853 | TAILQ_FOREACH(ip, &si->si_snapshots, i_nextsnap) { | | 1853 | TAILQ_FOREACH(ip, &si->si_snapshots, i_nextsnap) { |
1854 | vp = ITOV(ip); | | 1854 | vp = ITOV(ip); |
1855 | /* | | 1855 | /* |
1856 | * We ensure that everything of our own that needs to be | | 1856 | * We ensure that everything of our own that needs to be |
1857 | * copied will be done at the time that ffs_snapshot is | | 1857 | * copied will be done at the time that ffs_snapshot is |
1858 | * called. Thus we can skip the check here which can | | 1858 | * called. Thus we can skip the check here which can |
1859 | * deadlock in doing the lookup in ffs_balloc. | | 1859 | * deadlock in doing the lookup in ffs_balloc. |
1860 | */ | | 1860 | */ |
1861 | if (bp->b_vp == vp) | | 1861 | if (bp->b_vp == vp) |
| @@ -1876,54 +1876,28 @@ retry: | | | @@ -1876,54 +1876,28 @@ retry: |
1876 | goto retry; | | 1876 | goto retry; |
1877 | } | | 1877 | } |
1878 | #ifdef DIAGNOSTIC | | 1878 | #ifdef DIAGNOSTIC |
1879 | if (blkno == BLK_SNAP && bp->b_lblkno >= 0) | | 1879 | if (blkno == BLK_SNAP && bp->b_lblkno >= 0) |
1880 | panic("ffs_copyonwrite: bad copy block"); | | 1880 | panic("ffs_copyonwrite: bad copy block"); |
1881 | #endif | | 1881 | #endif |
1882 | if (blkno != 0) | | 1882 | if (blkno != 0) |
1883 | continue; | | 1883 | continue; |
1884 | | | 1884 | |
1885 | if (curlwp == uvm.pagedaemon_lwp) { | | 1885 | if (curlwp == uvm.pagedaemon_lwp) { |
1886 | error = ENOMEM; | | 1886 | error = ENOMEM; |
1887 | break; | | 1887 | break; |
1888 | } | | 1888 | } |
1889 | | | 1889 | /* Only one level of recursion allowed. */ |
1890 | if (snapshot_locked == 0) { | | 1890 | KASSERT(snapshot_locked); |
1891 | if (!mutex_tryenter(&si->si_snaplock)) { | | | |
1892 | mutex_exit(&si->si_lock); | | | |
1893 | mutex_enter(&si->si_snaplock); | | | |
1894 | mutex_enter(&si->si_lock); | | | |
1895 | } | | | |
1896 | snapshot_locked = 1; | | | |
1897 | if (gen != si->si_gen) | | | |
1898 | goto retry; | | | |
1899 | | | | |
1900 | /* Check again if block still needs to be copied */ | | | |
1901 | if (lbn < NDADDR) { | | | |
1902 | blkno = db_get(ip, lbn); | | | |
1903 | } else { | | | |
1904 | mutex_exit(&si->si_lock); | | | |
1905 | if ((error = snapblkaddr(vp, lbn, &blkno)) != 0) { | | | |
1906 | mutex_enter(&si->si_lock); | | | |
1907 | break; | | | |
1908 | } | | | |
1909 | mutex_enter(&si->si_lock); | | | |
1910 | if (gen != si->si_gen) | | | |
1911 | goto retry; | | | |
1912 | } | | | |
1913 | | | | |
1914 | if (blkno != 0) | | | |
1915 | continue; | | | |
1916 | } | | | |
1917 | /* | | 1891 | /* |
1918 | * Allocate the block into which to do the copy. Since | | 1892 | * Allocate the block into which to do the copy. Since |
1919 | * multiple processes may all try to copy the same block, | | 1893 | * multiple processes may all try to copy the same block, |
1920 | * we have to recheck our need to do a copy if we sleep | | 1894 | * we have to recheck our need to do a copy if we sleep |
1921 | * waiting for the lock. | | 1895 | * waiting for the lock. |
1922 | * | | 1896 | * |
1923 | * Because all snapshots on a filesystem share a single | | 1897 | * Because all snapshots on a filesystem share a single |
1924 | * lock, we ensure that we will never be in competition | | 1898 | * lock, we ensure that we will never be in competition |
1925 | * with another process to allocate a block. | | 1899 | * with another process to allocate a block. |
1926 | */ | | 1900 | */ |
1927 | #ifdef DEBUG | | 1901 | #ifdef DEBUG |
1928 | if (snapdebug) { | | 1902 | if (snapdebug) { |
1929 | printf("Copyonwrite: snapino %llu lbn %" PRId64 " for ", | | 1903 | printf("Copyonwrite: snapino %llu lbn %" PRId64 " for ", |
| @@ -1958,31 +1932,34 @@ retry: | | | @@ -1958,31 +1932,34 @@ retry: |
1958 | if (error == 0 && ip->i_nlink > 0 && mp->mnt_wapbl) | | 1932 | if (error == 0 && ip->i_nlink > 0 && mp->mnt_wapbl) |
1959 | error = syncsnap(vp); | | 1933 | error = syncsnap(vp); |
1960 | mutex_enter(&si->si_lock); | | 1934 | mutex_enter(&si->si_lock); |
1961 | if (error) | | 1935 | if (error) |
1962 | break; | | 1936 | break; |
1963 | if (gen != si->si_gen) | | 1937 | if (gen != si->si_gen) |
1964 | goto retry; | | 1938 | goto retry; |
1965 | } | | 1939 | } |
1966 | /* | | 1940 | /* |
1967 | * Note that we need to synchronously write snapshots that | | 1941 | * Note that we need to synchronously write snapshots that |
1968 | * have not been unlinked, and hence will be visible after | | 1942 | * have not been unlinked, and hence will be visible after |
1969 | * a crash, to ensure their integrity. | | 1943 | * a crash, to ensure their integrity. |
1970 | */ | | 1944 | */ |
1971 | mutex_exit(&si->si_lock); | | 1945 | if (snapshot_locked) { |
| | | 1946 | si->si_owner = NULL; |
| | | 1947 | mutex_exit(&si->si_lock); |
| | | 1948 | mutex_exit(&si->si_snaplock); |
| | | 1949 | } else |
| | | 1950 | mutex_exit(&si->si_lock); |
1972 | if (saved_data && saved_data != bp->b_data) | | 1951 | if (saved_data && saved_data != bp->b_data) |
1973 | free(saved_data, M_UFSMNT); | | 1952 | free(saved_data, M_UFSMNT); |
1974 | if (snapshot_locked) | | | |
1975 | mutex_exit(&si->si_snaplock); | | | |
1976 | return error; | | 1953 | return error; |
1977 | } | | 1954 | } |
1978 | | | 1955 | |
1979 | /* | | 1956 | /* |
1980 | * Read from a snapshot. | | 1957 | * Read from a snapshot. |
1981 | */ | | 1958 | */ |
1982 | int | | 1959 | int |
1983 | ffs_snapshot_read(struct vnode *vp, struct uio *uio, int ioflag) | | 1960 | ffs_snapshot_read(struct vnode *vp, struct uio *uio, int ioflag) |
1984 | { | | 1961 | { |
1985 | struct inode *ip = VTOI(vp); | | 1962 | struct inode *ip = VTOI(vp); |
1986 | struct fs *fs = ip->i_fs; | | 1963 | struct fs *fs = ip->i_fs; |
1987 | struct snap_info *si = VFSTOUFS(vp->v_mount)->um_snapinfo; | | 1964 | struct snap_info *si = VFSTOUFS(vp->v_mount)->um_snapinfo; |
1988 | struct buf *bp; | | 1965 | struct buf *bp; |