Sat Sep 24 23:18:54 2022 UTC ()
malloc(9) -> kmem(9)


(thorpej)
diff -r1.112 -r1.113 src/sys/dev/fss.c

cvs diff -r1.112 -r1.113 src/sys/dev/fss.c (switch to unified diff)

--- src/sys/dev/fss.c 2022/03/31 19:30:15 1.112
+++ src/sys/dev/fss.c 2022/09/24 23:18:54 1.113
@@ -1,1276 +1,1276 @@ @@ -1,1276 +1,1276 @@
1/* $NetBSD: fss.c,v 1.112 2022/03/31 19:30:15 pgoyette Exp $ */ 1/* $NetBSD: fss.c,v 1.113 2022/09/24 23:18:54 thorpej Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2003 The NetBSD Foundation, Inc. 4 * Copyright (c) 2003 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Juergen Hannken-Illjes. 8 * by Juergen Hannken-Illjes.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32/* 32/*
33 * File system snapshot disk driver. 33 * File system snapshot disk driver.
34 * 34 *
35 * Block/character interface to the snapshot of a mounted file system. 35 * Block/character interface to the snapshot of a mounted file system.
36 */ 36 */
37 37
38#include <sys/cdefs.h> 38#include <sys/cdefs.h>
39__KERNEL_RCSID(0, "$NetBSD: fss.c,v 1.112 2022/03/31 19:30:15 pgoyette Exp $"); 39__KERNEL_RCSID(0, "$NetBSD: fss.c,v 1.113 2022/09/24 23:18:54 thorpej Exp $");
40 40
41#include <sys/param.h> 41#include <sys/param.h>
42#include <sys/systm.h> 42#include <sys/systm.h>
43#include <sys/namei.h> 43#include <sys/namei.h>
44#include <sys/proc.h> 44#include <sys/proc.h>
45#include <sys/errno.h> 45#include <sys/errno.h>
46#include <sys/malloc.h> 46#include <sys/kmem.h>
47#include <sys/buf.h> 47#include <sys/buf.h>
48#include <sys/ioctl.h> 48#include <sys/ioctl.h>
49#include <sys/disklabel.h> 49#include <sys/disklabel.h>
50#include <sys/device.h> 50#include <sys/device.h>
51#include <sys/disk.h> 51#include <sys/disk.h>
52#include <sys/stat.h> 52#include <sys/stat.h>
53#include <sys/mount.h> 53#include <sys/mount.h>
54#include <sys/vnode.h> 54#include <sys/vnode.h>
55#include <sys/file.h> 55#include <sys/file.h>
56#include <sys/uio.h> 56#include <sys/uio.h>
57#include <sys/conf.h> 57#include <sys/conf.h>
58#include <sys/kthread.h> 58#include <sys/kthread.h>
59#include <sys/fstrans.h> 59#include <sys/fstrans.h>
60#include <sys/vfs_syscalls.h> /* For do_sys_unlink(). */ 60#include <sys/vfs_syscalls.h> /* For do_sys_unlink(). */
61 61
62#include <miscfs/specfs/specdev.h> 62#include <miscfs/specfs/specdev.h>
63 63
64#include <dev/fssvar.h> 64#include <dev/fssvar.h>
65 65
66#include <uvm/uvm.h> 66#include <uvm/uvm.h>
67 67
68#include "ioconf.h" 68#include "ioconf.h"
69 69
70dev_type_open(fss_open); 70dev_type_open(fss_open);
71dev_type_close(fss_close); 71dev_type_close(fss_close);
72dev_type_read(fss_read); 72dev_type_read(fss_read);
73dev_type_write(fss_write); 73dev_type_write(fss_write);
74dev_type_ioctl(fss_ioctl); 74dev_type_ioctl(fss_ioctl);
75dev_type_strategy(fss_strategy); 75dev_type_strategy(fss_strategy);
76dev_type_dump(fss_dump); 76dev_type_dump(fss_dump);
77dev_type_size(fss_size); 77dev_type_size(fss_size);
78 78
79static void fss_unmount_hook(struct mount *); 79static void fss_unmount_hook(struct mount *);
80static int fss_copy_on_write(void *, struct buf *, bool); 80static int fss_copy_on_write(void *, struct buf *, bool);
81static inline void fss_error(struct fss_softc *, const char *); 81static inline void fss_error(struct fss_softc *, const char *);
82static int fss_create_files(struct fss_softc *, struct fss_set *, 82static int fss_create_files(struct fss_softc *, struct fss_set *,
83 off_t *, struct lwp *); 83 off_t *, struct lwp *);
84static int fss_create_snapshot(struct fss_softc *, struct fss_set *, 84static int fss_create_snapshot(struct fss_softc *, struct fss_set *,
85 struct lwp *); 85 struct lwp *);
86static int fss_delete_snapshot(struct fss_softc *, struct lwp *); 86static int fss_delete_snapshot(struct fss_softc *, struct lwp *);
87static int fss_softc_alloc(struct fss_softc *); 87static int fss_softc_alloc(struct fss_softc *);
88static void fss_softc_free(struct fss_softc *); 88static void fss_softc_free(struct fss_softc *);
89static int fss_read_cluster(struct fss_softc *, u_int32_t); 89static int fss_read_cluster(struct fss_softc *, u_int32_t);
90static void fss_bs_thread(void *); 90static void fss_bs_thread(void *);
91static int fss_bs_io(struct fss_softc *, fss_io_type, 91static int fss_bs_io(struct fss_softc *, fss_io_type,
92 u_int32_t, off_t, int, void *, size_t *); 92 u_int32_t, off_t, int, void *, size_t *);
93static u_int32_t *fss_bs_indir(struct fss_softc *, u_int32_t); 93static u_int32_t *fss_bs_indir(struct fss_softc *, u_int32_t);
94 94
95static kmutex_t fss_device_lock; /* Protect all units. */ 95static kmutex_t fss_device_lock; /* Protect all units. */
96static kcondvar_t fss_device_cv; /* Serialize snapshot creation. */ 96static kcondvar_t fss_device_cv; /* Serialize snapshot creation. */
97static bool fss_creating = false; /* Currently creating a snapshot. */ 97static bool fss_creating = false; /* Currently creating a snapshot. */
98static int fss_num_attached = 0; /* Number of attached devices. */ 98static int fss_num_attached = 0; /* Number of attached devices. */
99static struct vfs_hooks fss_vfs_hooks = { 99static struct vfs_hooks fss_vfs_hooks = {
100 .vh_unmount = fss_unmount_hook 100 .vh_unmount = fss_unmount_hook
101}; 101};
102 102
103const struct bdevsw fss_bdevsw = { 103const struct bdevsw fss_bdevsw = {
104 .d_open = fss_open, 104 .d_open = fss_open,
105 .d_close = fss_close, 105 .d_close = fss_close,
106 .d_strategy = fss_strategy, 106 .d_strategy = fss_strategy,
107 .d_ioctl = fss_ioctl, 107 .d_ioctl = fss_ioctl,
108 .d_dump = fss_dump, 108 .d_dump = fss_dump,
109 .d_psize = fss_size, 109 .d_psize = fss_size,
110 .d_discard = nodiscard, 110 .d_discard = nodiscard,
111 .d_flag = D_DISK | D_MPSAFE 111 .d_flag = D_DISK | D_MPSAFE
112}; 112};
113 113
114const struct cdevsw fss_cdevsw = { 114const struct cdevsw fss_cdevsw = {
115 .d_open = fss_open, 115 .d_open = fss_open,
116 .d_close = fss_close, 116 .d_close = fss_close,
117 .d_read = fss_read, 117 .d_read = fss_read,
118 .d_write = fss_write, 118 .d_write = fss_write,
119 .d_ioctl = fss_ioctl, 119 .d_ioctl = fss_ioctl,
120 .d_stop = nostop, 120 .d_stop = nostop,
121 .d_tty = notty, 121 .d_tty = notty,
122 .d_poll = nopoll, 122 .d_poll = nopoll,
123 .d_mmap = nommap, 123 .d_mmap = nommap,
124 .d_kqfilter = nokqfilter, 124 .d_kqfilter = nokqfilter,
125 .d_discard = nodiscard, 125 .d_discard = nodiscard,
126 .d_flag = D_DISK | D_MPSAFE 126 .d_flag = D_DISK | D_MPSAFE
127}; 127};
128 128
129static int fss_match(device_t, cfdata_t, void *); 129static int fss_match(device_t, cfdata_t, void *);
130static void fss_attach(device_t, device_t, void *); 130static void fss_attach(device_t, device_t, void *);
131static int fss_detach(device_t, int); 131static int fss_detach(device_t, int);
132 132
133CFATTACH_DECL_NEW(fss, sizeof(struct fss_softc), 133CFATTACH_DECL_NEW(fss, sizeof(struct fss_softc),
134 fss_match, fss_attach, fss_detach, NULL); 134 fss_match, fss_attach, fss_detach, NULL);
135 135
136void 136void
137fssattach(int num) 137fssattach(int num)
138{ 138{
139 139
140 mutex_init(&fss_device_lock, MUTEX_DEFAULT, IPL_NONE); 140 mutex_init(&fss_device_lock, MUTEX_DEFAULT, IPL_NONE);
141 cv_init(&fss_device_cv, "snapwait"); 141 cv_init(&fss_device_cv, "snapwait");
142 if (config_cfattach_attach(fss_cd.cd_name, &fss_ca)) 142 if (config_cfattach_attach(fss_cd.cd_name, &fss_ca))
143 aprint_error("%s: unable to register\n", fss_cd.cd_name); 143 aprint_error("%s: unable to register\n", fss_cd.cd_name);
144} 144}
145 145
146static int 146static int
147fss_match(device_t self, cfdata_t cfdata, void *aux) 147fss_match(device_t self, cfdata_t cfdata, void *aux)
148{ 148{
149 return 1; 149 return 1;
150} 150}
151 151
152static void 152static void
153fss_attach(device_t parent, device_t self, void *aux) 153fss_attach(device_t parent, device_t self, void *aux)
154{ 154{
155 struct fss_softc *sc = device_private(self); 155 struct fss_softc *sc = device_private(self);
156 156
157 sc->sc_dev = self; 157 sc->sc_dev = self;
158 sc->sc_bdev = NODEV; 158 sc->sc_bdev = NODEV;
159 mutex_init(&sc->sc_slock, MUTEX_DEFAULT, IPL_NONE); 159 mutex_init(&sc->sc_slock, MUTEX_DEFAULT, IPL_NONE);
160 cv_init(&sc->sc_work_cv, "fssbs"); 160 cv_init(&sc->sc_work_cv, "fssbs");
161 cv_init(&sc->sc_cache_cv, "cowwait"); 161 cv_init(&sc->sc_cache_cv, "cowwait");
162 bufq_alloc(&sc->sc_bufq, "fcfs", 0); 162 bufq_alloc(&sc->sc_bufq, "fcfs", 0);
163 sc->sc_dkdev = malloc(sizeof(*sc->sc_dkdev), M_DEVBUF, M_WAITOK); 163 sc->sc_dkdev = kmem_zalloc(sizeof(*sc->sc_dkdev), KM_SLEEP);
164 sc->sc_dkdev->dk_info = NULL; 164 sc->sc_dkdev->dk_info = NULL;
165 disk_init(sc->sc_dkdev, device_xname(self), NULL); 165 disk_init(sc->sc_dkdev, device_xname(self), NULL);
166 if (!pmf_device_register(self, NULL, NULL)) 166 if (!pmf_device_register(self, NULL, NULL))
167 aprint_error_dev(self, "couldn't establish power handler\n"); 167 aprint_error_dev(self, "couldn't establish power handler\n");
168 168
169 if (fss_num_attached++ == 0) 169 if (fss_num_attached++ == 0)
170 vfs_hooks_attach(&fss_vfs_hooks); 170 vfs_hooks_attach(&fss_vfs_hooks);
171} 171}
172 172
173static int 173static int
174fss_detach(device_t self, int flags) 174fss_detach(device_t self, int flags)
175{ 175{
176 struct fss_softc *sc = device_private(self); 176 struct fss_softc *sc = device_private(self);
177 177
178 mutex_enter(&sc->sc_slock); 178 mutex_enter(&sc->sc_slock);
179 if (sc->sc_state != FSS_IDLE) { 179 if (sc->sc_state != FSS_IDLE) {
180 mutex_exit(&sc->sc_slock); 180 mutex_exit(&sc->sc_slock);
181 return EBUSY; 181 return EBUSY;
182 } 182 }
183 mutex_exit(&sc->sc_slock); 183 mutex_exit(&sc->sc_slock);
184 184
185 if (--fss_num_attached == 0) 185 if (--fss_num_attached == 0)
186 vfs_hooks_detach(&fss_vfs_hooks); 186 vfs_hooks_detach(&fss_vfs_hooks);
187 187
188 pmf_device_deregister(self); 188 pmf_device_deregister(self);
189 mutex_destroy(&sc->sc_slock); 189 mutex_destroy(&sc->sc_slock);
190 cv_destroy(&sc->sc_work_cv); 190 cv_destroy(&sc->sc_work_cv);
191 cv_destroy(&sc->sc_cache_cv); 191 cv_destroy(&sc->sc_cache_cv);
192 bufq_drain(sc->sc_bufq); 192 bufq_drain(sc->sc_bufq);
193 bufq_free(sc->sc_bufq); 193 bufq_free(sc->sc_bufq);
194 disk_destroy(sc->sc_dkdev); 194 disk_destroy(sc->sc_dkdev);
195 free(sc->sc_dkdev, M_DEVBUF); 195 kmem_free(sc->sc_dkdev, sizeof(*sc->sc_dkdev));
196 196
197 return 0; 197 return 0;
198} 198}
199 199
200int 200int
201fss_open(dev_t dev, int flags, int mode, struct lwp *l) 201fss_open(dev_t dev, int flags, int mode, struct lwp *l)
202{ 202{
203 int mflag; 203 int mflag;
204 cfdata_t cf; 204 cfdata_t cf;
205 struct fss_softc *sc; 205 struct fss_softc *sc;
206 206
207 mflag = (mode == S_IFCHR ? FSS_CDEV_OPEN : FSS_BDEV_OPEN); 207 mflag = (mode == S_IFCHR ? FSS_CDEV_OPEN : FSS_BDEV_OPEN);
208 208
209 mutex_enter(&fss_device_lock); 209 mutex_enter(&fss_device_lock);
210 210
211 sc = device_lookup_private(&fss_cd, minor(dev)); 211 sc = device_lookup_private(&fss_cd, minor(dev));
212 if (sc == NULL) { 212 if (sc == NULL) {
213 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK); 213 cf = kmem_zalloc(sizeof(*cf), KM_SLEEP);
214 cf->cf_name = fss_cd.cd_name; 214 cf->cf_name = fss_cd.cd_name;
215 cf->cf_atname = fss_cd.cd_name; 215 cf->cf_atname = fss_cd.cd_name;
216 cf->cf_unit = minor(dev); 216 cf->cf_unit = minor(dev);
217 cf->cf_fstate = FSTATE_STAR; 217 cf->cf_fstate = FSTATE_STAR;
218 sc = device_private(config_attach_pseudo(cf)); 218 sc = device_private(config_attach_pseudo(cf));
219 if (sc == NULL) { 219 if (sc == NULL) {
220 mutex_exit(&fss_device_lock); 220 mutex_exit(&fss_device_lock);
221 return ENOMEM; 221 return ENOMEM;
222 } 222 }
223 sc->sc_state = FSS_IDLE; 223 sc->sc_state = FSS_IDLE;
224 } 224 }
225 225
226 mutex_enter(&sc->sc_slock); 226 mutex_enter(&sc->sc_slock);
227 227
228 sc->sc_flags |= mflag; 228 sc->sc_flags |= mflag;
229 229
230 mutex_exit(&sc->sc_slock); 230 mutex_exit(&sc->sc_slock);
231 mutex_exit(&fss_device_lock); 231 mutex_exit(&fss_device_lock);
232 232
233 return 0; 233 return 0;
234} 234}
235 235
236int 236int
237fss_close(dev_t dev, int flags, int mode, struct lwp *l) 237fss_close(dev_t dev, int flags, int mode, struct lwp *l)
238{ 238{
239 int mflag, error; 239 int mflag, error;
240 cfdata_t cf; 240 cfdata_t cf;
241 struct fss_softc *sc = device_lookup_private(&fss_cd, minor(dev)); 241 struct fss_softc *sc = device_lookup_private(&fss_cd, minor(dev));
242 242
243 if (sc == NULL) 243 if (sc == NULL)
244 return ENXIO; 244 return ENXIO;
245 245
246 mflag = (mode == S_IFCHR ? FSS_CDEV_OPEN : FSS_BDEV_OPEN); 246 mflag = (mode == S_IFCHR ? FSS_CDEV_OPEN : FSS_BDEV_OPEN);
247 error = 0; 247 error = 0;
248 248
249 mutex_enter(&fss_device_lock); 249 mutex_enter(&fss_device_lock);
250restart: 250restart:
251 mutex_enter(&sc->sc_slock); 251 mutex_enter(&sc->sc_slock);
252 if ((sc->sc_flags & (FSS_CDEV_OPEN|FSS_BDEV_OPEN)) != mflag) { 252 if ((sc->sc_flags & (FSS_CDEV_OPEN|FSS_BDEV_OPEN)) != mflag) {
253 sc->sc_flags &= ~mflag; 253 sc->sc_flags &= ~mflag;
254 mutex_exit(&sc->sc_slock); 254 mutex_exit(&sc->sc_slock);
255 mutex_exit(&fss_device_lock); 255 mutex_exit(&fss_device_lock);
256 return 0; 256 return 0;
257 } 257 }
258 if (sc->sc_state != FSS_IDLE && 258 if (sc->sc_state != FSS_IDLE &&
259 (sc->sc_uflags & FSS_UNCONFIG_ON_CLOSE) != 0) { 259 (sc->sc_uflags & FSS_UNCONFIG_ON_CLOSE) != 0) {
260 sc->sc_uflags &= ~FSS_UNCONFIG_ON_CLOSE; 260 sc->sc_uflags &= ~FSS_UNCONFIG_ON_CLOSE;
261 mutex_exit(&sc->sc_slock); 261 mutex_exit(&sc->sc_slock);
262 error = fss_ioctl(dev, FSSIOCCLR, NULL, FWRITE, l); 262 error = fss_ioctl(dev, FSSIOCCLR, NULL, FWRITE, l);
263 goto restart; 263 goto restart;
264 } 264 }
265 if (sc->sc_state != FSS_IDLE) { 265 if (sc->sc_state != FSS_IDLE) {
266 mutex_exit(&sc->sc_slock); 266 mutex_exit(&sc->sc_slock);
267 mutex_exit(&fss_device_lock); 267 mutex_exit(&fss_device_lock);
268 return error; 268 return error;
269 } 269 }
270 270
271 KASSERT(sc->sc_state == FSS_IDLE); 271 KASSERT(sc->sc_state == FSS_IDLE);
272 KASSERT((sc->sc_flags & (FSS_CDEV_OPEN|FSS_BDEV_OPEN)) == mflag); 272 KASSERT((sc->sc_flags & (FSS_CDEV_OPEN|FSS_BDEV_OPEN)) == mflag);
273 mutex_exit(&sc->sc_slock); 273 mutex_exit(&sc->sc_slock);
274 cf = device_cfdata(sc->sc_dev); 274 cf = device_cfdata(sc->sc_dev);
275 error = config_detach(sc->sc_dev, DETACH_QUIET); 275 error = config_detach(sc->sc_dev, DETACH_QUIET);
276 if (! error) 276 if (! error)
277 free(cf, M_DEVBUF); 277 kmem_free(cf, sizeof(*cf));
278 mutex_exit(&fss_device_lock); 278 mutex_exit(&fss_device_lock);
279 279
280 return error; 280 return error;
281} 281}
282 282
283void 283void
284fss_strategy(struct buf *bp) 284fss_strategy(struct buf *bp)
285{ 285{
286 const bool write = ((bp->b_flags & B_READ) != B_READ); 286 const bool write = ((bp->b_flags & B_READ) != B_READ);
287 struct fss_softc *sc = device_lookup_private(&fss_cd, minor(bp->b_dev)); 287 struct fss_softc *sc = device_lookup_private(&fss_cd, minor(bp->b_dev));
288 288
289 if (sc == NULL) { 289 if (sc == NULL) {
290 bp->b_error = ENXIO; 290 bp->b_error = ENXIO;
291 goto done; 291 goto done;
292 } 292 }
293 293
294 mutex_enter(&sc->sc_slock); 294 mutex_enter(&sc->sc_slock);
295 295
296 if (write || sc->sc_state != FSS_ACTIVE) { 296 if (write || sc->sc_state != FSS_ACTIVE) {
297 bp->b_error = (write ? EROFS : ENXIO); 297 bp->b_error = (write ? EROFS : ENXIO);
298 goto done; 298 goto done;
299 } 299 }
300 /* Check bounds for non-persistent snapshots. */ 300 /* Check bounds for non-persistent snapshots. */
301 if ((sc->sc_flags & FSS_PERSISTENT) == 0 && 301 if ((sc->sc_flags & FSS_PERSISTENT) == 0 &&
302 bounds_check_with_mediasize(bp, DEV_BSIZE, 302 bounds_check_with_mediasize(bp, DEV_BSIZE,
303 btodb(FSS_CLTOB(sc, sc->sc_clcount - 1) + sc->sc_clresid)) <= 0) 303 btodb(FSS_CLTOB(sc, sc->sc_clcount - 1) + sc->sc_clresid)) <= 0)
304 goto done; 304 goto done;
305 305
306 bp->b_rawblkno = bp->b_blkno; 306 bp->b_rawblkno = bp->b_blkno;
307 bufq_put(sc->sc_bufq, bp); 307 bufq_put(sc->sc_bufq, bp);
308 cv_signal(&sc->sc_work_cv); 308 cv_signal(&sc->sc_work_cv);
309 309
310 mutex_exit(&sc->sc_slock); 310 mutex_exit(&sc->sc_slock);
311 return; 311 return;
312 312
313done: 313done:
314 if (sc != NULL) 314 if (sc != NULL)
315 mutex_exit(&sc->sc_slock); 315 mutex_exit(&sc->sc_slock);
316 bp->b_resid = bp->b_bcount; 316 bp->b_resid = bp->b_bcount;
317 biodone(bp); 317 biodone(bp);
318} 318}
319 319
320int 320int
321fss_read(dev_t dev, struct uio *uio, int flags) 321fss_read(dev_t dev, struct uio *uio, int flags)
322{ 322{
323 return physio(fss_strategy, NULL, dev, B_READ, minphys, uio); 323 return physio(fss_strategy, NULL, dev, B_READ, minphys, uio);
324} 324}
325 325
326int 326int
327fss_write(dev_t dev, struct uio *uio, int flags) 327fss_write(dev_t dev, struct uio *uio, int flags)
328{ 328{
329 return physio(fss_strategy, NULL, dev, B_WRITE, minphys, uio); 329 return physio(fss_strategy, NULL, dev, B_WRITE, minphys, uio);
330} 330}
331 331
332int 332int
333fss_ioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 333fss_ioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
334{ 334{
335 int error = 0; 335 int error = 0;
336 struct fss_softc *sc = device_lookup_private(&fss_cd, minor(dev)); 336 struct fss_softc *sc = device_lookup_private(&fss_cd, minor(dev));
337 struct fss_set _fss; 337 struct fss_set _fss;
338 struct fss_set *fss = (struct fss_set *)data; 338 struct fss_set *fss = (struct fss_set *)data;
339 struct fss_set50 *fss50 = (struct fss_set50 *)data; 339 struct fss_set50 *fss50 = (struct fss_set50 *)data;
340 struct fss_get *fsg = (struct fss_get *)data; 340 struct fss_get *fsg = (struct fss_get *)data;
341#ifndef _LP64 341#ifndef _LP64
342 struct fss_get50 *fsg50 = (struct fss_get50 *)data; 342 struct fss_get50 *fsg50 = (struct fss_get50 *)data;
343#endif 343#endif
344 344
345 if (sc == NULL) 345 if (sc == NULL)
346 return ENXIO; 346 return ENXIO;
347 347
348 switch (cmd) { 348 switch (cmd) {
349 case FSSIOCSET50: 349 case FSSIOCSET50:
350 fss = &_fss; 350 fss = &_fss;
351 fss->fss_mount = fss50->fss_mount; 351 fss->fss_mount = fss50->fss_mount;
352 fss->fss_bstore = fss50->fss_bstore; 352 fss->fss_bstore = fss50->fss_bstore;
353 fss->fss_csize = fss50->fss_csize; 353 fss->fss_csize = fss50->fss_csize;
354 fss->fss_flags = 0; 354 fss->fss_flags = 0;
355 /* Fall through */ 355 /* Fall through */
356 case FSSIOCSET: 356 case FSSIOCSET:
357 mutex_enter(&sc->sc_slock); 357 mutex_enter(&sc->sc_slock);
358 if ((flag & FWRITE) == 0) 358 if ((flag & FWRITE) == 0)
359 error = EPERM; 359 error = EPERM;
360 if (error == 0 && sc->sc_state != FSS_IDLE) { 360 if (error == 0 && sc->sc_state != FSS_IDLE) {
361 error = EBUSY; 361 error = EBUSY;
362 } else { 362 } else {
363 sc->sc_state = FSS_CREATING; 363 sc->sc_state = FSS_CREATING;
364 copyinstr(fss->fss_mount, sc->sc_mntname, 364 copyinstr(fss->fss_mount, sc->sc_mntname,
365 sizeof(sc->sc_mntname), NULL); 365 sizeof(sc->sc_mntname), NULL);
366 memset(&sc->sc_time, 0, sizeof(sc->sc_time)); 366 memset(&sc->sc_time, 0, sizeof(sc->sc_time));
367 sc->sc_clshift = 0; 367 sc->sc_clshift = 0;
368 } 368 }
369 mutex_exit(&sc->sc_slock); 369 mutex_exit(&sc->sc_slock);
370 if (error) 370 if (error)
371 break; 371 break;
372 372
373 /* 373 /*
374 * Serialize snapshot creation. 374 * Serialize snapshot creation.
375 */ 375 */
376 mutex_enter(&fss_device_lock); 376 mutex_enter(&fss_device_lock);
377 while (fss_creating) { 377 while (fss_creating) {
378 error = cv_wait_sig(&fss_device_cv, &fss_device_lock); 378 error = cv_wait_sig(&fss_device_cv, &fss_device_lock);
379 if (error) { 379 if (error) {
380 mutex_enter(&sc->sc_slock); 380 mutex_enter(&sc->sc_slock);
381 KASSERT(sc->sc_state == FSS_CREATING); 381 KASSERT(sc->sc_state == FSS_CREATING);
382 sc->sc_state = FSS_IDLE; 382 sc->sc_state = FSS_IDLE;
383 mutex_exit(&sc->sc_slock); 383 mutex_exit(&sc->sc_slock);
384 mutex_exit(&fss_device_lock); 384 mutex_exit(&fss_device_lock);
385 break; 385 break;
386 } 386 }
387 } 387 }
388 fss_creating = true; 388 fss_creating = true;
389 mutex_exit(&fss_device_lock); 389 mutex_exit(&fss_device_lock);
390 390
391 error = fss_create_snapshot(sc, fss, l); 391 error = fss_create_snapshot(sc, fss, l);
392 mutex_enter(&sc->sc_slock); 392 mutex_enter(&sc->sc_slock);
393 if (error == 0) { 393 if (error == 0) {
394 KASSERT(sc->sc_state == FSS_ACTIVE); 394 KASSERT(sc->sc_state == FSS_ACTIVE);
395 sc->sc_uflags = fss->fss_flags; 395 sc->sc_uflags = fss->fss_flags;
396 } else { 396 } else {
397 KASSERT(sc->sc_state == FSS_CREATING); 397 KASSERT(sc->sc_state == FSS_CREATING);
398 sc->sc_state = FSS_IDLE; 398 sc->sc_state = FSS_IDLE;
399 } 399 }
400 mutex_exit(&sc->sc_slock); 400 mutex_exit(&sc->sc_slock);
401 401
402 mutex_enter(&fss_device_lock); 402 mutex_enter(&fss_device_lock);
403 fss_creating = false; 403 fss_creating = false;
404 cv_broadcast(&fss_device_cv); 404 cv_broadcast(&fss_device_cv);
405 mutex_exit(&fss_device_lock); 405 mutex_exit(&fss_device_lock);
406 406
407 break; 407 break;
408 408
409 case FSSIOCCLR: 409 case FSSIOCCLR:
410 mutex_enter(&sc->sc_slock); 410 mutex_enter(&sc->sc_slock);
411 if ((flag & FWRITE) == 0) { 411 if ((flag & FWRITE) == 0) {
412 error = EPERM; 412 error = EPERM;
413 } else if (sc->sc_state != FSS_ACTIVE) { 413 } else if (sc->sc_state != FSS_ACTIVE) {
414 error = EBUSY; 414 error = EBUSY;
415 } else { 415 } else {
416 sc->sc_state = FSS_DESTROYING; 416 sc->sc_state = FSS_DESTROYING;
417 } 417 }
418 mutex_exit(&sc->sc_slock); 418 mutex_exit(&sc->sc_slock);
419 if (error) 419 if (error)
420 break; 420 break;
421 421
422 error = fss_delete_snapshot(sc, l); 422 error = fss_delete_snapshot(sc, l);
423 mutex_enter(&sc->sc_slock); 423 mutex_enter(&sc->sc_slock);
424 if (error) 424 if (error)
425 fss_error(sc, "Failed to delete snapshot"); 425 fss_error(sc, "Failed to delete snapshot");
426 else 426 else
427 KASSERT(sc->sc_state == FSS_IDLE); 427 KASSERT(sc->sc_state == FSS_IDLE);
428 mutex_exit(&sc->sc_slock); 428 mutex_exit(&sc->sc_slock);
429 break; 429 break;
430 430
431#ifndef _LP64 431#ifndef _LP64
432 case FSSIOCGET50: 432 case FSSIOCGET50:
433 mutex_enter(&sc->sc_slock); 433 mutex_enter(&sc->sc_slock);
434 if (sc->sc_state == FSS_IDLE) { 434 if (sc->sc_state == FSS_IDLE) {
435 error = ENXIO; 435 error = ENXIO;
436 } else if ((sc->sc_flags & FSS_PERSISTENT) == 0) { 436 } else if ((sc->sc_flags & FSS_PERSISTENT) == 0) {
437 memcpy(fsg50->fsg_mount, sc->sc_mntname, MNAMELEN); 437 memcpy(fsg50->fsg_mount, sc->sc_mntname, MNAMELEN);
438 fsg50->fsg_csize = FSS_CLSIZE(sc); 438 fsg50->fsg_csize = FSS_CLSIZE(sc);
439 timeval_to_timeval50(&sc->sc_time, &fsg50->fsg_time); 439 timeval_to_timeval50(&sc->sc_time, &fsg50->fsg_time);
440 fsg50->fsg_mount_size = sc->sc_clcount; 440 fsg50->fsg_mount_size = sc->sc_clcount;
441 fsg50->fsg_bs_size = sc->sc_clnext; 441 fsg50->fsg_bs_size = sc->sc_clnext;
442 error = 0; 442 error = 0;
443 } else { 443 } else {
444 memcpy(fsg50->fsg_mount, sc->sc_mntname, MNAMELEN); 444 memcpy(fsg50->fsg_mount, sc->sc_mntname, MNAMELEN);
445 fsg50->fsg_csize = 0; 445 fsg50->fsg_csize = 0;
446 timeval_to_timeval50(&sc->sc_time, &fsg50->fsg_time); 446 timeval_to_timeval50(&sc->sc_time, &fsg50->fsg_time);
447 fsg50->fsg_mount_size = 0; 447 fsg50->fsg_mount_size = 0;
448 fsg50->fsg_bs_size = 0; 448 fsg50->fsg_bs_size = 0;
449 error = 0; 449 error = 0;
450 } 450 }
451 mutex_exit(&sc->sc_slock); 451 mutex_exit(&sc->sc_slock);
452 break; 452 break;
453#endif /* _LP64 */ 453#endif /* _LP64 */
454 454
455 case FSSIOCGET: 455 case FSSIOCGET:
456 mutex_enter(&sc->sc_slock); 456 mutex_enter(&sc->sc_slock);
457 if (sc->sc_state == FSS_IDLE) { 457 if (sc->sc_state == FSS_IDLE) {
458 error = ENXIO; 458 error = ENXIO;
459 } else if ((sc->sc_flags & FSS_PERSISTENT) == 0) { 459 } else if ((sc->sc_flags & FSS_PERSISTENT) == 0) {
460 memcpy(fsg->fsg_mount, sc->sc_mntname, MNAMELEN); 460 memcpy(fsg->fsg_mount, sc->sc_mntname, MNAMELEN);
461 fsg->fsg_csize = FSS_CLSIZE(sc); 461 fsg->fsg_csize = FSS_CLSIZE(sc);
462 fsg->fsg_time = sc->sc_time; 462 fsg->fsg_time = sc->sc_time;
463 fsg->fsg_mount_size = sc->sc_clcount; 463 fsg->fsg_mount_size = sc->sc_clcount;
464 fsg->fsg_bs_size = sc->sc_clnext; 464 fsg->fsg_bs_size = sc->sc_clnext;
465 error = 0; 465 error = 0;
466 } else { 466 } else {
467 memcpy(fsg->fsg_mount, sc->sc_mntname, MNAMELEN); 467 memcpy(fsg->fsg_mount, sc->sc_mntname, MNAMELEN);
468 fsg->fsg_csize = 0; 468 fsg->fsg_csize = 0;
469 fsg->fsg_time = sc->sc_time; 469 fsg->fsg_time = sc->sc_time;
470 fsg->fsg_mount_size = 0; 470 fsg->fsg_mount_size = 0;
471 fsg->fsg_bs_size = 0; 471 fsg->fsg_bs_size = 0;
472 error = 0; 472 error = 0;
473 } 473 }
474 mutex_exit(&sc->sc_slock); 474 mutex_exit(&sc->sc_slock);
475 break; 475 break;
476 476
477 case FSSIOFSET: 477 case FSSIOFSET:
478 mutex_enter(&sc->sc_slock); 478 mutex_enter(&sc->sc_slock);
479 sc->sc_uflags = *(int *)data; 479 sc->sc_uflags = *(int *)data;
480 mutex_exit(&sc->sc_slock); 480 mutex_exit(&sc->sc_slock);
481 error = 0; 481 error = 0;
482 break; 482 break;
483 483
484 case FSSIOFGET: 484 case FSSIOFGET:
485 mutex_enter(&sc->sc_slock); 485 mutex_enter(&sc->sc_slock);
486 *(int *)data = sc->sc_uflags; 486 *(int *)data = sc->sc_uflags;
487 mutex_exit(&sc->sc_slock); 487 mutex_exit(&sc->sc_slock);
488 error = 0; 488 error = 0;
489 break; 489 break;
490 490
491 default: 491 default:
492 error = EINVAL; 492 error = EINVAL;
493 break; 493 break;
494 } 494 }
495 495
496 return error; 496 return error;
497} 497}
498 498
499int 499int
500fss_size(dev_t dev) 500fss_size(dev_t dev)
501{ 501{
502 return -1; 502 return -1;
503} 503}
504 504
505int 505int
506fss_dump(dev_t dev, daddr_t blkno, void *va, 506fss_dump(dev_t dev, daddr_t blkno, void *va,
507 size_t size) 507 size_t size)
508{ 508{
509 return EROFS; 509 return EROFS;
510} 510}
511 511
512/* 512/*
513 * An error occurred reading or writing the snapshot or backing store. 513 * An error occurred reading or writing the snapshot or backing store.
514 * If it is the first error log to console and disestablish cow handler. 514 * If it is the first error log to console and disestablish cow handler.
515 * The caller holds the mutex. 515 * The caller holds the mutex.
516 */ 516 */
517static inline void 517static inline void
518fss_error(struct fss_softc *sc, const char *msg) 518fss_error(struct fss_softc *sc, const char *msg)
519{ 519{
520 520
521 KASSERT(mutex_owned(&sc->sc_slock)); 521 KASSERT(mutex_owned(&sc->sc_slock));
522 522
523 if ((sc->sc_flags & FSS_ERROR)) 523 if ((sc->sc_flags & FSS_ERROR))
524 return; 524 return;
525 525
526 aprint_error_dev(sc->sc_dev, "snapshot invalid: %s\n", msg); 526 aprint_error_dev(sc->sc_dev, "snapshot invalid: %s\n", msg);
527 if ((sc->sc_flags & FSS_PERSISTENT) == 0) { 527 if ((sc->sc_flags & FSS_PERSISTENT) == 0) {
528 mutex_exit(&sc->sc_slock); 528 mutex_exit(&sc->sc_slock);
529 fscow_disestablish(sc->sc_mount, fss_copy_on_write, sc); 529 fscow_disestablish(sc->sc_mount, fss_copy_on_write, sc);
530 mutex_enter(&sc->sc_slock); 530 mutex_enter(&sc->sc_slock);
531 } 531 }
532 sc->sc_flags |= FSS_ERROR; 532 sc->sc_flags |= FSS_ERROR;
533} 533}
534 534
535/* 535/*
536 * Allocate the variable sized parts of the softc and 536 * Allocate the variable sized parts of the softc and
537 * fork the kernel thread. 537 * fork the kernel thread.
538 * 538 *
539 * The fields sc_clcount, sc_clshift, sc_cache_size and sc_indir_size 539 * The fields sc_clcount, sc_clshift, sc_cache_size and sc_indir_size
540 * must be initialized. 540 * must be initialized.
541 */ 541 */
542static int 542static int
543fss_softc_alloc(struct fss_softc *sc) 543fss_softc_alloc(struct fss_softc *sc)
544{ 544{
545 int i, error; 545 int i, error;
546 546
547 if ((sc->sc_flags & FSS_PERSISTENT) == 0) { 547 if ((sc->sc_flags & FSS_PERSISTENT) == 0) {
548 sc->sc_copied = 548 sc->sc_copied =
549 kmem_zalloc(howmany(sc->sc_clcount, NBBY), KM_SLEEP); 549 kmem_zalloc(howmany(sc->sc_clcount, NBBY), KM_SLEEP);
550 sc->sc_cache = kmem_alloc(sc->sc_cache_size * 550 sc->sc_cache = kmem_alloc(sc->sc_cache_size *
551 sizeof(struct fss_cache), KM_SLEEP); 551 sizeof(struct fss_cache), KM_SLEEP);
552 for (i = 0; i < sc->sc_cache_size; i++) { 552 for (i = 0; i < sc->sc_cache_size; i++) {
553 sc->sc_cache[i].fc_type = FSS_CACHE_FREE; 553 sc->sc_cache[i].fc_type = FSS_CACHE_FREE;
554 sc->sc_cache[i].fc_data = 554 sc->sc_cache[i].fc_data =
555 kmem_alloc(FSS_CLSIZE(sc), KM_SLEEP); 555 kmem_alloc(FSS_CLSIZE(sc), KM_SLEEP);
556 cv_init(&sc->sc_cache[i].fc_state_cv, "cowwait1"); 556 cv_init(&sc->sc_cache[i].fc_state_cv, "cowwait1");
557 } 557 }
558 558
559 sc->sc_indir_valid = 559 sc->sc_indir_valid =
560 kmem_zalloc(howmany(sc->sc_indir_size, NBBY), KM_SLEEP); 560 kmem_zalloc(howmany(sc->sc_indir_size, NBBY), KM_SLEEP);
561 sc->sc_indir_data = kmem_zalloc(FSS_CLSIZE(sc), KM_SLEEP); 561 sc->sc_indir_data = kmem_zalloc(FSS_CLSIZE(sc), KM_SLEEP);
562 } else { 562 } else {
563 sc->sc_copied = NULL; 563 sc->sc_copied = NULL;
564 sc->sc_cache = NULL; 564 sc->sc_cache = NULL;
565 sc->sc_indir_valid = NULL; 565 sc->sc_indir_valid = NULL;
566 sc->sc_indir_data = NULL; 566 sc->sc_indir_data = NULL;
567 } 567 }
568 568
569 sc->sc_flags |= FSS_BS_THREAD; 569 sc->sc_flags |= FSS_BS_THREAD;
570 if ((error = kthread_create(PRI_BIO, KTHREAD_MUSTJOIN, NULL, 570 if ((error = kthread_create(PRI_BIO, KTHREAD_MUSTJOIN, NULL,
571 fss_bs_thread, sc, &sc->sc_bs_lwp, 571 fss_bs_thread, sc, &sc->sc_bs_lwp,
572 "%s", device_xname(sc->sc_dev))) != 0) { 572 "%s", device_xname(sc->sc_dev))) != 0) {
573 sc->sc_flags &= ~FSS_BS_THREAD; 573 sc->sc_flags &= ~FSS_BS_THREAD;
574 return error; 574 return error;
575 } 575 }
576 576
577 disk_attach(sc->sc_dkdev); 577 disk_attach(sc->sc_dkdev);
578 578
579 return 0; 579 return 0;
580} 580}
581 581
582/* 582/*
583 * Free the variable sized parts of the softc. 583 * Free the variable sized parts of the softc.
584 */ 584 */
585static void 585static void
586fss_softc_free(struct fss_softc *sc) 586fss_softc_free(struct fss_softc *sc)
587{ 587{
588 int i; 588 int i;
589 589
590 if ((sc->sc_flags & FSS_BS_THREAD) != 0) { 590 if ((sc->sc_flags & FSS_BS_THREAD) != 0) {
591 mutex_enter(&sc->sc_slock); 591 mutex_enter(&sc->sc_slock);
592 sc->sc_flags &= ~FSS_BS_THREAD; 592 sc->sc_flags &= ~FSS_BS_THREAD;
593 cv_signal(&sc->sc_work_cv); 593 cv_signal(&sc->sc_work_cv);
594 mutex_exit(&sc->sc_slock); 594 mutex_exit(&sc->sc_slock);
595 kthread_join(sc->sc_bs_lwp); 595 kthread_join(sc->sc_bs_lwp);
596 596
597 disk_detach(sc->sc_dkdev); 597 disk_detach(sc->sc_dkdev);
598 } 598 }
599 599
600 if (sc->sc_copied != NULL) 600 if (sc->sc_copied != NULL)
601 kmem_free(sc->sc_copied, howmany(sc->sc_clcount, NBBY)); 601 kmem_free(sc->sc_copied, howmany(sc->sc_clcount, NBBY));
602 sc->sc_copied = NULL; 602 sc->sc_copied = NULL;
603 603
604 if (sc->sc_cache != NULL) { 604 if (sc->sc_cache != NULL) {
605 for (i = 0; i < sc->sc_cache_size; i++) 605 for (i = 0; i < sc->sc_cache_size; i++)
606 if (sc->sc_cache[i].fc_data != NULL) { 606 if (sc->sc_cache[i].fc_data != NULL) {
607 cv_destroy(&sc->sc_cache[i].fc_state_cv); 607 cv_destroy(&sc->sc_cache[i].fc_state_cv);
608 kmem_free(sc->sc_cache[i].fc_data, 608 kmem_free(sc->sc_cache[i].fc_data,
609 FSS_CLSIZE(sc)); 609 FSS_CLSIZE(sc));
610 } 610 }
611 kmem_free(sc->sc_cache, 611 kmem_free(sc->sc_cache,
612 sc->sc_cache_size*sizeof(struct fss_cache)); 612 sc->sc_cache_size*sizeof(struct fss_cache));
613 } 613 }
614 sc->sc_cache = NULL; 614 sc->sc_cache = NULL;
615 615
616 if (sc->sc_indir_valid != NULL) 616 if (sc->sc_indir_valid != NULL)
617 kmem_free(sc->sc_indir_valid, howmany(sc->sc_indir_size, NBBY)); 617 kmem_free(sc->sc_indir_valid, howmany(sc->sc_indir_size, NBBY));
618 sc->sc_indir_valid = NULL; 618 sc->sc_indir_valid = NULL;
619 619
620 if (sc->sc_indir_data != NULL) 620 if (sc->sc_indir_data != NULL)
621 kmem_free(sc->sc_indir_data, FSS_CLSIZE(sc)); 621 kmem_free(sc->sc_indir_data, FSS_CLSIZE(sc));
622 sc->sc_indir_data = NULL; 622 sc->sc_indir_data = NULL;
623} 623}
624 624
625/* 625/*
626 * Set all active snapshots on this file system into ERROR state. 626 * Set all active snapshots on this file system into ERROR state.
627 */ 627 */
628static void 628static void
629fss_unmount_hook(struct mount *mp) 629fss_unmount_hook(struct mount *mp)
630{ 630{
631 int i; 631 int i;
632 struct fss_softc *sc; 632 struct fss_softc *sc;
633 633
634 mutex_enter(&fss_device_lock); 634 mutex_enter(&fss_device_lock);
635 for (i = 0; i < fss_cd.cd_ndevs; i++) { 635 for (i = 0; i < fss_cd.cd_ndevs; i++) {
636 if ((sc = device_lookup_private(&fss_cd, i)) == NULL) 636 if ((sc = device_lookup_private(&fss_cd, i)) == NULL)
637 continue; 637 continue;
638 mutex_enter(&sc->sc_slock); 638 mutex_enter(&sc->sc_slock);
639 if (sc->sc_state != FSS_IDLE && sc->sc_mount == mp) 639 if (sc->sc_state != FSS_IDLE && sc->sc_mount == mp)
640 fss_error(sc, "forced by unmount"); 640 fss_error(sc, "forced by unmount");
641 mutex_exit(&sc->sc_slock); 641 mutex_exit(&sc->sc_slock);
642 } 642 }
643 mutex_exit(&fss_device_lock); 643 mutex_exit(&fss_device_lock);
644} 644}
645 645
646/* 646/*
647 * A buffer is written to the snapshotted block device. Copy to 647 * A buffer is written to the snapshotted block device. Copy to
648 * backing store if needed. 648 * backing store if needed.
649 */ 649 */
650static int 650static int
651fss_copy_on_write(void *v, struct buf *bp, bool data_valid) 651fss_copy_on_write(void *v, struct buf *bp, bool data_valid)
652{ 652{
653 int error; 653 int error;
654 u_int32_t cl, ch, c; 654 u_int32_t cl, ch, c;
655 struct fss_softc *sc = v; 655 struct fss_softc *sc = v;
656 656
657 mutex_enter(&sc->sc_slock); 657 mutex_enter(&sc->sc_slock);
658 if (sc->sc_state != FSS_ACTIVE) { 658 if (sc->sc_state != FSS_ACTIVE) {
659 mutex_exit(&sc->sc_slock); 659 mutex_exit(&sc->sc_slock);
660 return 0; 660 return 0;
661 } 661 }
662 662
663 cl = FSS_BTOCL(sc, dbtob(bp->b_blkno)); 663 cl = FSS_BTOCL(sc, dbtob(bp->b_blkno));
664 ch = FSS_BTOCL(sc, dbtob(bp->b_blkno)+bp->b_bcount-1); 664 ch = FSS_BTOCL(sc, dbtob(bp->b_blkno)+bp->b_bcount-1);
665 error = 0; 665 error = 0;
666 if (curlwp == uvm.pagedaemon_lwp) { 666 if (curlwp == uvm.pagedaemon_lwp) {
667 for (c = cl; c <= ch; c++) 667 for (c = cl; c <= ch; c++)
668 if (isclr(sc->sc_copied, c)) { 668 if (isclr(sc->sc_copied, c)) {
669 error = ENOMEM; 669 error = ENOMEM;
670 break; 670 break;
671 } 671 }
672 } 672 }
673 mutex_exit(&sc->sc_slock); 673 mutex_exit(&sc->sc_slock);
674 674
675 if (error == 0) 675 if (error == 0)
676 for (c = cl; c <= ch; c++) { 676 for (c = cl; c <= ch; c++) {
677 error = fss_read_cluster(sc, c); 677 error = fss_read_cluster(sc, c);
678 if (error) 678 if (error)
679 break; 679 break;
680 } 680 }
681 681
682 return error; 682 return error;
683} 683}
684 684
685/* 685/*
686 * Lookup and open needed files. 686 * Lookup and open needed files.
687 * 687 *
688 * For file system internal snapshot initializes sc_mntname, sc_mount, 688 * For file system internal snapshot initializes sc_mntname, sc_mount,
689 * sc_bs_vp and sc_time. 689 * sc_bs_vp and sc_time.
690 * 690 *
691 * Otherwise returns dev and size of the underlying block device. 691 * Otherwise returns dev and size of the underlying block device.
692 * Initializes sc_mntname, sc_mount, sc_bdev, sc_bs_vp and sc_mount 692 * Initializes sc_mntname, sc_mount, sc_bdev, sc_bs_vp and sc_mount
693 */ 693 */
694static int 694static int
695fss_create_files(struct fss_softc *sc, struct fss_set *fss, 695fss_create_files(struct fss_softc *sc, struct fss_set *fss,
696 off_t *bsize, struct lwp *l) 696 off_t *bsize, struct lwp *l)
697{ 697{
698 int error, bits, fsbsize; 698 int error, bits, fsbsize;
699 uint64_t numsec; 699 uint64_t numsec;
700 unsigned int secsize; 700 unsigned int secsize;
701 struct timespec ts; 701 struct timespec ts;
702 /* distinguish lookup 1 from lookup 2 to reduce mistakes */ 702 /* distinguish lookup 1 from lookup 2 to reduce mistakes */
703 struct pathbuf *pb2; 703 struct pathbuf *pb2;
704 struct vnode *vp, *vp2; 704 struct vnode *vp, *vp2;
705 705
706 /* 706 /*
707 * Get the mounted file system. 707 * Get the mounted file system.
708 */ 708 */
709 709
710 error = namei_simple_user(fss->fss_mount, 710 error = namei_simple_user(fss->fss_mount,
711 NSM_FOLLOW_NOEMULROOT, &vp); 711 NSM_FOLLOW_NOEMULROOT, &vp);
712 if (error != 0) 712 if (error != 0)
713 return error; 713 return error;
714 714
715 if ((vp->v_vflag & VV_ROOT) != VV_ROOT) { 715 if ((vp->v_vflag & VV_ROOT) != VV_ROOT) {
716 vrele(vp); 716 vrele(vp);
717 return EINVAL; 717 return EINVAL;
718 } 718 }
719 719
720 sc->sc_mount = vp->v_mount; 720 sc->sc_mount = vp->v_mount;
721 memcpy(sc->sc_mntname, sc->sc_mount->mnt_stat.f_mntonname, MNAMELEN); 721 memcpy(sc->sc_mntname, sc->sc_mount->mnt_stat.f_mntonname, MNAMELEN);
722 722
723 vrele(vp); 723 vrele(vp);
724 724
725 /* 725 /*
726 * Check for file system internal snapshot. 726 * Check for file system internal snapshot.
727 */ 727 */
728 728
729 error = namei_simple_user(fss->fss_bstore, 729 error = namei_simple_user(fss->fss_bstore,
730 NSM_FOLLOW_NOEMULROOT, &vp); 730 NSM_FOLLOW_NOEMULROOT, &vp);
731 if (error != 0) 731 if (error != 0)
732 return error; 732 return error;
733 733
734 if (vp->v_type == VREG && vp->v_mount == sc->sc_mount) { 734 if (vp->v_type == VREG && vp->v_mount == sc->sc_mount) {
735 sc->sc_flags |= FSS_PERSISTENT; 735 sc->sc_flags |= FSS_PERSISTENT;
736 sc->sc_bs_vp = vp; 736 sc->sc_bs_vp = vp;
737 737
738 fsbsize = sc->sc_bs_vp->v_mount->mnt_stat.f_iosize; 738 fsbsize = sc->sc_bs_vp->v_mount->mnt_stat.f_iosize;
739 bits = sizeof(sc->sc_bs_bshift)*NBBY; 739 bits = sizeof(sc->sc_bs_bshift)*NBBY;
740 for (sc->sc_bs_bshift = 1; sc->sc_bs_bshift < bits; 740 for (sc->sc_bs_bshift = 1; sc->sc_bs_bshift < bits;
741 sc->sc_bs_bshift++) 741 sc->sc_bs_bshift++)
742 if (FSS_FSBSIZE(sc) == fsbsize) 742 if (FSS_FSBSIZE(sc) == fsbsize)
743 break; 743 break;
744 if (sc->sc_bs_bshift >= bits) 744 if (sc->sc_bs_bshift >= bits)
745 return EINVAL; 745 return EINVAL;
746 746
747 sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1; 747 sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1;
748 sc->sc_clshift = 0; 748 sc->sc_clshift = 0;
749 749
750 if ((fss->fss_flags & FSS_UNLINK_ON_CREATE) != 0) { 750 if ((fss->fss_flags & FSS_UNLINK_ON_CREATE) != 0) {
751 error = do_sys_unlink(fss->fss_bstore, UIO_USERSPACE); 751 error = do_sys_unlink(fss->fss_bstore, UIO_USERSPACE);
752 if (error) 752 if (error)
753 return error; 753 return error;
754 } 754 }
755 error = vn_lock(vp, LK_EXCLUSIVE); 755 error = vn_lock(vp, LK_EXCLUSIVE);
756 if (error != 0) 756 if (error != 0)
757 return error; 757 return error;
758 error = VFS_SNAPSHOT(sc->sc_mount, sc->sc_bs_vp, &ts); 758 error = VFS_SNAPSHOT(sc->sc_mount, sc->sc_bs_vp, &ts);
759 TIMESPEC_TO_TIMEVAL(&sc->sc_time, &ts); 759 TIMESPEC_TO_TIMEVAL(&sc->sc_time, &ts);
760 760
761 VOP_UNLOCK(sc->sc_bs_vp); 761 VOP_UNLOCK(sc->sc_bs_vp);
762 762
763 return error; 763 return error;
764 } 764 }
765 vrele(vp); 765 vrele(vp);
766 766
767 /* 767 /*
768 * Get the block device it is mounted on and its size. 768 * Get the block device it is mounted on and its size.
769 */ 769 */
770 770
771 error = spec_node_lookup_by_mount(sc->sc_mount, &vp); 771 error = spec_node_lookup_by_mount(sc->sc_mount, &vp);
772 if (error) 772 if (error)
773 return error; 773 return error;
774 sc->sc_bdev = vp->v_rdev; 774 sc->sc_bdev = vp->v_rdev;
775 775
776 error = getdisksize(vp, &numsec, &secsize); 776 error = getdisksize(vp, &numsec, &secsize);
777 vrele(vp); 777 vrele(vp);
778 if (error) 778 if (error)
779 return error; 779 return error;
780 780
781 *bsize = (off_t)numsec*secsize; 781 *bsize = (off_t)numsec*secsize;
782 782
783 /* 783 /*
784 * Get the backing store 784 * Get the backing store
785 */ 785 */
786 786
787 error = pathbuf_copyin(fss->fss_bstore, &pb2); 787 error = pathbuf_copyin(fss->fss_bstore, &pb2);
788 if (error) { 788 if (error) {
789 return error; 789 return error;
790 } 790 }
791 error = vn_open(NULL, pb2, 0, FREAD|FWRITE, 0, &vp2, NULL, NULL); 791 error = vn_open(NULL, pb2, 0, FREAD|FWRITE, 0, &vp2, NULL, NULL);
792 if (error != 0) { 792 if (error != 0) {
793 pathbuf_destroy(pb2); 793 pathbuf_destroy(pb2);
794 return error; 794 return error;
795 } 795 }
796 VOP_UNLOCK(vp2); 796 VOP_UNLOCK(vp2);
797 797
798 sc->sc_bs_vp = vp2; 798 sc->sc_bs_vp = vp2;
799 799
800 if (vp2->v_type != VREG && vp2->v_type != VCHR) { 800 if (vp2->v_type != VREG && vp2->v_type != VCHR) {
801 vrele(vp2); 801 vrele(vp2);
802 pathbuf_destroy(pb2); 802 pathbuf_destroy(pb2);
803 return EINVAL; 803 return EINVAL;
804 } 804 }
805 pathbuf_destroy(pb2); 805 pathbuf_destroy(pb2);
806 806
807 if ((fss->fss_flags & FSS_UNLINK_ON_CREATE) != 0) { 807 if ((fss->fss_flags & FSS_UNLINK_ON_CREATE) != 0) {
808 error = do_sys_unlink(fss->fss_bstore, UIO_USERSPACE); 808 error = do_sys_unlink(fss->fss_bstore, UIO_USERSPACE);
809 if (error) 809 if (error)
810 return error; 810 return error;
811 } 811 }
812 if (sc->sc_bs_vp->v_type == VREG) { 812 if (sc->sc_bs_vp->v_type == VREG) {
813 fsbsize = sc->sc_bs_vp->v_mount->mnt_stat.f_iosize; 813 fsbsize = sc->sc_bs_vp->v_mount->mnt_stat.f_iosize;
814 if (fsbsize & (fsbsize-1)) /* No power of two */ 814 if (fsbsize & (fsbsize-1)) /* No power of two */
815 return EINVAL; 815 return EINVAL;
816 for (sc->sc_bs_bshift = 1; sc->sc_bs_bshift < 32; 816 for (sc->sc_bs_bshift = 1; sc->sc_bs_bshift < 32;
817 sc->sc_bs_bshift++) 817 sc->sc_bs_bshift++)
818 if (FSS_FSBSIZE(sc) == fsbsize) 818 if (FSS_FSBSIZE(sc) == fsbsize)
819 break; 819 break;
820 if (sc->sc_bs_bshift >= 32) 820 if (sc->sc_bs_bshift >= 32)
821 return EINVAL; 821 return EINVAL;
822 sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1; 822 sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1;
823 } else { 823 } else {
824 sc->sc_bs_bshift = DEV_BSHIFT; 824 sc->sc_bs_bshift = DEV_BSHIFT;
825 sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1; 825 sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1;
826 } 826 }
827 827
828 return 0; 828 return 0;
829} 829}
830 830
831/* 831/*
832 * Create a snapshot. 832 * Create a snapshot.
833 */ 833 */
834static int 834static int
835fss_create_snapshot(struct fss_softc *sc, struct fss_set *fss, struct lwp *l) 835fss_create_snapshot(struct fss_softc *sc, struct fss_set *fss, struct lwp *l)
836{ 836{
837 int len, error; 837 int len, error;
838 u_int32_t csize; 838 u_int32_t csize;
839 off_t bsize; 839 off_t bsize;
840 840
841 bsize = 0; /* XXX gcc */ 841 bsize = 0; /* XXX gcc */
842 842
843 /* 843 /*
844 * Open needed files. 844 * Open needed files.
845 */ 845 */
846 if ((error = fss_create_files(sc, fss, &bsize, l)) != 0) 846 if ((error = fss_create_files(sc, fss, &bsize, l)) != 0)
847 goto bad; 847 goto bad;
848 848
849 if (sc->sc_flags & FSS_PERSISTENT) { 849 if (sc->sc_flags & FSS_PERSISTENT) {
850 fss_softc_alloc(sc); 850 fss_softc_alloc(sc);
851 mutex_enter(&sc->sc_slock); 851 mutex_enter(&sc->sc_slock);
852 sc->sc_state = FSS_ACTIVE; 852 sc->sc_state = FSS_ACTIVE;
853 mutex_exit(&sc->sc_slock); 853 mutex_exit(&sc->sc_slock);
854 return 0; 854 return 0;
855 } 855 }
856 856
857 /* 857 /*
858 * Set cluster size. Must be a power of two and 858 * Set cluster size. Must be a power of two and
859 * a multiple of backing store block size. 859 * a multiple of backing store block size.
860 */ 860 */
861 if (fss->fss_csize <= 0) 861 if (fss->fss_csize <= 0)
862 csize = MAXPHYS; 862 csize = MAXPHYS;
863 else 863 else
864 csize = fss->fss_csize; 864 csize = fss->fss_csize;
865 if (bsize/csize > FSS_CLUSTER_MAX) 865 if (bsize/csize > FSS_CLUSTER_MAX)
866 csize = bsize/FSS_CLUSTER_MAX+1; 866 csize = bsize/FSS_CLUSTER_MAX+1;
867 867
868 for (sc->sc_clshift = sc->sc_bs_bshift; sc->sc_clshift < 32; 868 for (sc->sc_clshift = sc->sc_bs_bshift; sc->sc_clshift < 32;
869 sc->sc_clshift++) 869 sc->sc_clshift++)
870 if (FSS_CLSIZE(sc) >= csize) 870 if (FSS_CLSIZE(sc) >= csize)
871 break; 871 break;
872 if (sc->sc_clshift >= 32) { 872 if (sc->sc_clshift >= 32) {
873 error = EINVAL; 873 error = EINVAL;
874 goto bad; 874 goto bad;
875 } 875 }
876 sc->sc_clmask = FSS_CLSIZE(sc)-1; 876 sc->sc_clmask = FSS_CLSIZE(sc)-1;
877 877
878 /* 878 /*
879 * Set number of cache slots. 879 * Set number of cache slots.
880 */ 880 */
881 if (FSS_CLSIZE(sc) <= 8192) 881 if (FSS_CLSIZE(sc) <= 8192)
882 sc->sc_cache_size = 32; 882 sc->sc_cache_size = 32;
883 else if (FSS_CLSIZE(sc) <= 65536) 883 else if (FSS_CLSIZE(sc) <= 65536)
884 sc->sc_cache_size = 8; 884 sc->sc_cache_size = 8;
885 else 885 else
886 sc->sc_cache_size = 4; 886 sc->sc_cache_size = 4;
887 887
888 /* 888 /*
889 * Set number of clusters and size of last cluster. 889 * Set number of clusters and size of last cluster.
890 */ 890 */
891 sc->sc_clcount = FSS_BTOCL(sc, bsize-1)+1; 891 sc->sc_clcount = FSS_BTOCL(sc, bsize-1)+1;
892 sc->sc_clresid = FSS_CLOFF(sc, bsize-1)+1; 892 sc->sc_clresid = FSS_CLOFF(sc, bsize-1)+1;
893 893
894 /* 894 /*
895 * Set size of indirect table. 895 * Set size of indirect table.
896 */ 896 */
897 len = sc->sc_clcount*sizeof(u_int32_t); 897 len = sc->sc_clcount*sizeof(u_int32_t);
898 sc->sc_indir_size = FSS_BTOCL(sc, len)+1; 898 sc->sc_indir_size = FSS_BTOCL(sc, len)+1;
899 sc->sc_clnext = sc->sc_indir_size; 899 sc->sc_clnext = sc->sc_indir_size;
900 sc->sc_indir_cur = 0; 900 sc->sc_indir_cur = 0;
901 901
902 if ((error = fss_softc_alloc(sc)) != 0) 902 if ((error = fss_softc_alloc(sc)) != 0)
903 goto bad; 903 goto bad;
904 904
905 /* 905 /*
906 * Activate the snapshot. 906 * Activate the snapshot.
907 */ 907 */
908 908
909 if ((error = vfs_suspend(sc->sc_mount, 0)) != 0) 909 if ((error = vfs_suspend(sc->sc_mount, 0)) != 0)
910 goto bad; 910 goto bad;
911 911
912 microtime(&sc->sc_time); 912 microtime(&sc->sc_time);
913 913
914 vrele_flush(sc->sc_mount); 914 vrele_flush(sc->sc_mount);
915 error = VFS_SYNC(sc->sc_mount, MNT_WAIT, curlwp->l_cred); 915 error = VFS_SYNC(sc->sc_mount, MNT_WAIT, curlwp->l_cred);
916 if (error == 0) 916 if (error == 0)
917 error = fscow_establish(sc->sc_mount, fss_copy_on_write, sc); 917 error = fscow_establish(sc->sc_mount, fss_copy_on_write, sc);
918 if (error == 0) { 918 if (error == 0) {
919 mutex_enter(&sc->sc_slock); 919 mutex_enter(&sc->sc_slock);
920 sc->sc_state = FSS_ACTIVE; 920 sc->sc_state = FSS_ACTIVE;
921 mutex_exit(&sc->sc_slock); 921 mutex_exit(&sc->sc_slock);
922 } 922 }
923 923
924 vfs_resume(sc->sc_mount); 924 vfs_resume(sc->sc_mount);
925 925
926 if (error != 0) 926 if (error != 0)
927 goto bad; 927 goto bad;
928 928
929 aprint_debug_dev(sc->sc_dev, "%s snapshot active\n", sc->sc_mntname); 929 aprint_debug_dev(sc->sc_dev, "%s snapshot active\n", sc->sc_mntname);
930 aprint_debug_dev(sc->sc_dev, 930 aprint_debug_dev(sc->sc_dev,
931 "%u clusters of %u, %u cache slots, %u indir clusters\n", 931 "%u clusters of %u, %u cache slots, %u indir clusters\n",
932 sc->sc_clcount, FSS_CLSIZE(sc), 932 sc->sc_clcount, FSS_CLSIZE(sc),
933 sc->sc_cache_size, sc->sc_indir_size); 933 sc->sc_cache_size, sc->sc_indir_size);
934 934
935 return 0; 935 return 0;
936 936
937bad: 937bad:
938 fss_softc_free(sc); 938 fss_softc_free(sc);
939 if (sc->sc_bs_vp != NULL) { 939 if (sc->sc_bs_vp != NULL) {
940 if (sc->sc_flags & FSS_PERSISTENT) 940 if (sc->sc_flags & FSS_PERSISTENT)
941 vrele(sc->sc_bs_vp); 941 vrele(sc->sc_bs_vp);
942 else 942 else
943 vn_close(sc->sc_bs_vp, FREAD|FWRITE, l->l_cred); 943 vn_close(sc->sc_bs_vp, FREAD|FWRITE, l->l_cred);
944 } 944 }
945 sc->sc_bs_vp = NULL; 945 sc->sc_bs_vp = NULL;
946 946
947 return error; 947 return error;
948} 948}
949 949
950/* 950/*
951 * Delete a snapshot. 951 * Delete a snapshot.
952 */ 952 */
953static int 953static int
954fss_delete_snapshot(struct fss_softc *sc, struct lwp *l) 954fss_delete_snapshot(struct fss_softc *sc, struct lwp *l)
955{ 955{
956 956
957 mutex_enter(&sc->sc_slock); 957 mutex_enter(&sc->sc_slock);
958 if ((sc->sc_flags & FSS_PERSISTENT) == 0 && 958 if ((sc->sc_flags & FSS_PERSISTENT) == 0 &&
959 (sc->sc_flags & FSS_ERROR) == 0) { 959 (sc->sc_flags & FSS_ERROR) == 0) {
960 mutex_exit(&sc->sc_slock); 960 mutex_exit(&sc->sc_slock);
961 fscow_disestablish(sc->sc_mount, fss_copy_on_write, sc); 961 fscow_disestablish(sc->sc_mount, fss_copy_on_write, sc);
962 } else { 962 } else {
963 mutex_exit(&sc->sc_slock); 963 mutex_exit(&sc->sc_slock);
964 } 964 }
965 965
966 fss_softc_free(sc); 966 fss_softc_free(sc);
967 if (sc->sc_flags & FSS_PERSISTENT) 967 if (sc->sc_flags & FSS_PERSISTENT)
968 vrele(sc->sc_bs_vp); 968 vrele(sc->sc_bs_vp);
969 else 969 else
970 vn_close(sc->sc_bs_vp, FREAD|FWRITE, l->l_cred); 970 vn_close(sc->sc_bs_vp, FREAD|FWRITE, l->l_cred);
971 971
972 mutex_enter(&sc->sc_slock); 972 mutex_enter(&sc->sc_slock);
973 sc->sc_state = FSS_IDLE; 973 sc->sc_state = FSS_IDLE;
974 sc->sc_mount = NULL; 974 sc->sc_mount = NULL;
975 sc->sc_bdev = NODEV; 975 sc->sc_bdev = NODEV;
976 sc->sc_bs_vp = NULL; 976 sc->sc_bs_vp = NULL;
977 sc->sc_flags &= ~FSS_PERSISTENT; 977 sc->sc_flags &= ~FSS_PERSISTENT;
978 mutex_exit(&sc->sc_slock); 978 mutex_exit(&sc->sc_slock);
979 979
980 return 0; 980 return 0;
981} 981}
982 982
983/* 983/*
984 * Read a cluster from the snapshotted block device to the cache. 984 * Read a cluster from the snapshotted block device to the cache.
985 */ 985 */
986static int 986static int
987fss_read_cluster(struct fss_softc *sc, u_int32_t cl) 987fss_read_cluster(struct fss_softc *sc, u_int32_t cl)
988{ 988{
989 int error, todo, offset, len; 989 int error, todo, offset, len;
990 daddr_t dblk; 990 daddr_t dblk;
991 struct buf *bp, *mbp; 991 struct buf *bp, *mbp;
992 struct fss_cache *scp, *scl; 992 struct fss_cache *scp, *scl;
993 993
994 /* 994 /*
995 * Get a free cache slot. 995 * Get a free cache slot.
996 */ 996 */
997 scl = sc->sc_cache+sc->sc_cache_size; 997 scl = sc->sc_cache+sc->sc_cache_size;
998 998
999 mutex_enter(&sc->sc_slock); 999 mutex_enter(&sc->sc_slock);
1000 1000
1001restart: 1001restart:
1002 if (isset(sc->sc_copied, cl) || sc->sc_state != FSS_ACTIVE) { 1002 if (isset(sc->sc_copied, cl) || sc->sc_state != FSS_ACTIVE) {
1003 mutex_exit(&sc->sc_slock); 1003 mutex_exit(&sc->sc_slock);
1004 return 0; 1004 return 0;
1005 } 1005 }
1006 1006
1007 for (scp = sc->sc_cache; scp < scl; scp++) { 1007 for (scp = sc->sc_cache; scp < scl; scp++) {
1008 if (scp->fc_type == FSS_CACHE_VALID) { 1008 if (scp->fc_type == FSS_CACHE_VALID) {
1009 if (scp->fc_cluster == cl) { 1009 if (scp->fc_cluster == cl) {
1010 mutex_exit(&sc->sc_slock); 1010 mutex_exit(&sc->sc_slock);
1011 return 0; 1011 return 0;
1012 } 1012 }
1013 } else if (scp->fc_type == FSS_CACHE_BUSY) { 1013 } else if (scp->fc_type == FSS_CACHE_BUSY) {
1014 if (scp->fc_cluster == cl) { 1014 if (scp->fc_cluster == cl) {
1015 cv_wait(&scp->fc_state_cv, &sc->sc_slock); 1015 cv_wait(&scp->fc_state_cv, &sc->sc_slock);
1016 goto restart; 1016 goto restart;
1017 } 1017 }
1018 } 1018 }
1019 } 1019 }
1020 1020
1021 for (scp = sc->sc_cache; scp < scl; scp++) 1021 for (scp = sc->sc_cache; scp < scl; scp++)
1022 if (scp->fc_type == FSS_CACHE_FREE) { 1022 if (scp->fc_type == FSS_CACHE_FREE) {
1023 scp->fc_type = FSS_CACHE_BUSY; 1023 scp->fc_type = FSS_CACHE_BUSY;
1024 scp->fc_cluster = cl; 1024 scp->fc_cluster = cl;
1025 break; 1025 break;
1026 } 1026 }
1027 if (scp >= scl) { 1027 if (scp >= scl) {
1028 cv_wait(&sc->sc_cache_cv, &sc->sc_slock); 1028 cv_wait(&sc->sc_cache_cv, &sc->sc_slock);
1029 goto restart; 1029 goto restart;
1030 } 1030 }
1031 1031
1032 mutex_exit(&sc->sc_slock); 1032 mutex_exit(&sc->sc_slock);
1033 1033
1034 /* 1034 /*
1035 * Start the read. 1035 * Start the read.
1036 */ 1036 */
1037 dblk = btodb(FSS_CLTOB(sc, cl)); 1037 dblk = btodb(FSS_CLTOB(sc, cl));
1038 if (cl == sc->sc_clcount-1) { 1038 if (cl == sc->sc_clcount-1) {
1039 todo = sc->sc_clresid; 1039 todo = sc->sc_clresid;
1040 memset((char *)scp->fc_data + todo, 0, FSS_CLSIZE(sc) - todo); 1040 memset((char *)scp->fc_data + todo, 0, FSS_CLSIZE(sc) - todo);
1041 } else 1041 } else
1042 todo = FSS_CLSIZE(sc); 1042 todo = FSS_CLSIZE(sc);
1043 offset = 0; 1043 offset = 0;
1044 mbp = getiobuf(NULL, true); 1044 mbp = getiobuf(NULL, true);
1045 mbp->b_bufsize = todo; 1045 mbp->b_bufsize = todo;
1046 mbp->b_data = scp->fc_data; 1046 mbp->b_data = scp->fc_data;
1047 mbp->b_resid = mbp->b_bcount = todo; 1047 mbp->b_resid = mbp->b_bcount = todo;
1048 mbp->b_flags = B_READ; 1048 mbp->b_flags = B_READ;
1049 mbp->b_cflags = BC_BUSY; 1049 mbp->b_cflags = BC_BUSY;
1050 mbp->b_dev = sc->sc_bdev; 1050 mbp->b_dev = sc->sc_bdev;
1051 while (todo > 0) { 1051 while (todo > 0) {
1052 len = todo; 1052 len = todo;
1053 if (len > MAXPHYS) 1053 if (len > MAXPHYS)
1054 len = MAXPHYS; 1054 len = MAXPHYS;
1055 if (btodb(FSS_CLTOB(sc, cl)) == dblk && len == todo) 1055 if (btodb(FSS_CLTOB(sc, cl)) == dblk && len == todo)
1056 bp = mbp; 1056 bp = mbp;
1057 else { 1057 else {
1058 bp = getiobuf(NULL, true); 1058 bp = getiobuf(NULL, true);
1059 nestiobuf_setup(mbp, bp, offset, len); 1059 nestiobuf_setup(mbp, bp, offset, len);
1060 } 1060 }
1061 bp->b_lblkno = 0; 1061 bp->b_lblkno = 0;
1062 bp->b_blkno = dblk; 1062 bp->b_blkno = dblk;
1063 bdev_strategy(bp); 1063 bdev_strategy(bp);
1064 dblk += btodb(len); 1064 dblk += btodb(len);
1065 offset += len; 1065 offset += len;
1066 todo -= len; 1066 todo -= len;
1067 } 1067 }
1068 error = biowait(mbp); 1068 error = biowait(mbp);
1069 if (error == 0 && mbp->b_resid != 0) 1069 if (error == 0 && mbp->b_resid != 0)
1070 error = EIO; 1070 error = EIO;
1071 putiobuf(mbp); 1071 putiobuf(mbp);
1072 1072
1073 mutex_enter(&sc->sc_slock); 1073 mutex_enter(&sc->sc_slock);
1074 scp->fc_type = (error ? FSS_CACHE_FREE : FSS_CACHE_VALID); 1074 scp->fc_type = (error ? FSS_CACHE_FREE : FSS_CACHE_VALID);
1075 cv_broadcast(&scp->fc_state_cv); 1075 cv_broadcast(&scp->fc_state_cv);
1076 if (error == 0) { 1076 if (error == 0) {
1077 setbit(sc->sc_copied, scp->fc_cluster); 1077 setbit(sc->sc_copied, scp->fc_cluster);
1078 cv_signal(&sc->sc_work_cv); 1078 cv_signal(&sc->sc_work_cv);
1079 } 1079 }
1080 mutex_exit(&sc->sc_slock); 1080 mutex_exit(&sc->sc_slock);
1081 1081
1082 return error; 1082 return error;
1083} 1083}
1084 1084
1085/* 1085/*
1086 * Read/write clusters from/to backing store. 1086 * Read/write clusters from/to backing store.
1087 * For persistent snapshots must be called with cl == 0. off is the 1087 * For persistent snapshots must be called with cl == 0. off is the
1088 * offset into the snapshot. 1088 * offset into the snapshot.
1089 */ 1089 */
1090static int 1090static int
1091fss_bs_io(struct fss_softc *sc, fss_io_type rw, 1091fss_bs_io(struct fss_softc *sc, fss_io_type rw,
1092 u_int32_t cl, off_t off, int len, void *data, size_t *resid) 1092 u_int32_t cl, off_t off, int len, void *data, size_t *resid)
1093{ 1093{
1094 int error; 1094 int error;
1095 1095
1096 off += FSS_CLTOB(sc, cl); 1096 off += FSS_CLTOB(sc, cl);
1097 1097
1098 vn_lock(sc->sc_bs_vp, LK_EXCLUSIVE|LK_RETRY); 1098 vn_lock(sc->sc_bs_vp, LK_EXCLUSIVE|LK_RETRY);
1099 1099
1100 error = vn_rdwr((rw == FSS_READ ? UIO_READ : UIO_WRITE), sc->sc_bs_vp, 1100 error = vn_rdwr((rw == FSS_READ ? UIO_READ : UIO_WRITE), sc->sc_bs_vp,
1101 data, len, off, UIO_SYSSPACE, 1101 data, len, off, UIO_SYSSPACE,
1102 IO_ADV_ENCODE(POSIX_FADV_NOREUSE) | IO_NODELOCKED, 1102 IO_ADV_ENCODE(POSIX_FADV_NOREUSE) | IO_NODELOCKED,
1103 sc->sc_bs_lwp->l_cred, resid, NULL); 1103 sc->sc_bs_lwp->l_cred, resid, NULL);
1104 if (error == 0) { 1104 if (error == 0) {
1105 rw_enter(sc->sc_bs_vp->v_uobj.vmobjlock, RW_WRITER); 1105 rw_enter(sc->sc_bs_vp->v_uobj.vmobjlock, RW_WRITER);
1106 error = VOP_PUTPAGES(sc->sc_bs_vp, trunc_page(off), 1106 error = VOP_PUTPAGES(sc->sc_bs_vp, trunc_page(off),
1107 round_page(off+len), PGO_CLEANIT | PGO_FREE | PGO_SYNCIO); 1107 round_page(off+len), PGO_CLEANIT | PGO_FREE | PGO_SYNCIO);
1108 } 1108 }
1109 1109
1110 VOP_UNLOCK(sc->sc_bs_vp); 1110 VOP_UNLOCK(sc->sc_bs_vp);
1111 1111
1112 return error; 1112 return error;
1113} 1113}
1114 1114
1115/* 1115/*
1116 * Get a pointer to the indirect slot for this cluster. 1116 * Get a pointer to the indirect slot for this cluster.
1117 */ 1117 */
1118static u_int32_t * 1118static u_int32_t *
1119fss_bs_indir(struct fss_softc *sc, u_int32_t cl) 1119fss_bs_indir(struct fss_softc *sc, u_int32_t cl)
1120{ 1120{
1121 u_int32_t icl; 1121 u_int32_t icl;
1122 int ioff; 1122 int ioff;
1123 1123
1124 icl = cl/(FSS_CLSIZE(sc)/sizeof(u_int32_t)); 1124 icl = cl/(FSS_CLSIZE(sc)/sizeof(u_int32_t));
1125 ioff = cl%(FSS_CLSIZE(sc)/sizeof(u_int32_t)); 1125 ioff = cl%(FSS_CLSIZE(sc)/sizeof(u_int32_t));
1126 1126
1127 if (sc->sc_indir_cur == icl) 1127 if (sc->sc_indir_cur == icl)
1128 return &sc->sc_indir_data[ioff]; 1128 return &sc->sc_indir_data[ioff];
1129 1129
1130 if (sc->sc_indir_dirty) { 1130 if (sc->sc_indir_dirty) {
1131 if (fss_bs_io(sc, FSS_WRITE, sc->sc_indir_cur, 0, 1131 if (fss_bs_io(sc, FSS_WRITE, sc->sc_indir_cur, 0,
1132 FSS_CLSIZE(sc), (void *)sc->sc_indir_data, NULL) != 0) 1132 FSS_CLSIZE(sc), (void *)sc->sc_indir_data, NULL) != 0)
1133 return NULL; 1133 return NULL;
1134 setbit(sc->sc_indir_valid, sc->sc_indir_cur); 1134 setbit(sc->sc_indir_valid, sc->sc_indir_cur);
1135 } 1135 }
1136 1136
1137 sc->sc_indir_dirty = 0; 1137 sc->sc_indir_dirty = 0;
1138 sc->sc_indir_cur = icl; 1138 sc->sc_indir_cur = icl;
1139 1139
1140 if (isset(sc->sc_indir_valid, sc->sc_indir_cur)) { 1140 if (isset(sc->sc_indir_valid, sc->sc_indir_cur)) {
1141 if (fss_bs_io(sc, FSS_READ, sc->sc_indir_cur, 0, 1141 if (fss_bs_io(sc, FSS_READ, sc->sc_indir_cur, 0,
1142 FSS_CLSIZE(sc), (void *)sc->sc_indir_data, NULL) != 0) 1142 FSS_CLSIZE(sc), (void *)sc->sc_indir_data, NULL) != 0)
1143 return NULL; 1143 return NULL;
1144 } else 1144 } else
1145 memset(sc->sc_indir_data, 0, FSS_CLSIZE(sc)); 1145 memset(sc->sc_indir_data, 0, FSS_CLSIZE(sc));
1146 1146
1147 return &sc->sc_indir_data[ioff]; 1147 return &sc->sc_indir_data[ioff];
1148} 1148}
1149 1149
1150/* 1150/*
1151 * The kernel thread (one for every active snapshot). 1151 * The kernel thread (one for every active snapshot).
1152 * 1152 *
1153 * After wakeup it cleans the cache and runs the I/O requests. 1153 * After wakeup it cleans the cache and runs the I/O requests.
1154 */ 1154 */
1155static void 1155static void
1156fss_bs_thread(void *arg) 1156fss_bs_thread(void *arg)
1157{ 1157{
1158 bool thread_idle, is_valid; 1158 bool thread_idle, is_valid;
1159 int error, i, todo, len, crotor, is_read; 1159 int error, i, todo, len, crotor, is_read;
1160 long off; 1160 long off;
1161 char *addr; 1161 char *addr;
1162 u_int32_t c, cl, ch, *indirp; 1162 u_int32_t c, cl, ch, *indirp;
1163 size_t resid; 1163 size_t resid;
1164 struct buf *bp, *nbp; 1164 struct buf *bp, *nbp;
1165 struct fss_softc *sc; 1165 struct fss_softc *sc;
1166 struct fss_cache *scp, *scl; 1166 struct fss_cache *scp, *scl;
1167 1167
1168 sc = arg; 1168 sc = arg;
1169 scl = sc->sc_cache+sc->sc_cache_size; 1169 scl = sc->sc_cache+sc->sc_cache_size;
1170 crotor = 0; 1170 crotor = 0;
1171 thread_idle = false; 1171 thread_idle = false;
1172 1172
1173 mutex_enter(&sc->sc_slock); 1173 mutex_enter(&sc->sc_slock);
1174 1174
1175 for (;;) { 1175 for (;;) {
1176 if (thread_idle) 1176 if (thread_idle)
1177 cv_wait(&sc->sc_work_cv, &sc->sc_slock); 1177 cv_wait(&sc->sc_work_cv, &sc->sc_slock);
1178 thread_idle = true; 1178 thread_idle = true;
1179 if ((sc->sc_flags & FSS_BS_THREAD) == 0) { 1179 if ((sc->sc_flags & FSS_BS_THREAD) == 0) {
1180 mutex_exit(&sc->sc_slock); 1180 mutex_exit(&sc->sc_slock);
1181 kthread_exit(0); 1181 kthread_exit(0);
1182 } 1182 }
1183 1183
1184 /* 1184 /*
1185 * Process I/O requests (persistent) 1185 * Process I/O requests (persistent)
1186 */ 1186 */
1187 1187
1188 if (sc->sc_flags & FSS_PERSISTENT) { 1188 if (sc->sc_flags & FSS_PERSISTENT) {
1189 if ((bp = bufq_get(sc->sc_bufq)) == NULL) 1189 if ((bp = bufq_get(sc->sc_bufq)) == NULL)
1190 continue; 1190 continue;
1191 is_valid = (sc->sc_state == FSS_ACTIVE); 1191 is_valid = (sc->sc_state == FSS_ACTIVE);
1192 is_read = (bp->b_flags & B_READ); 1192 is_read = (bp->b_flags & B_READ);
1193 thread_idle = false; 1193 thread_idle = false;
1194 mutex_exit(&sc->sc_slock); 1194 mutex_exit(&sc->sc_slock);
1195 1195
1196 if (is_valid) { 1196 if (is_valid) {
1197 disk_busy(sc->sc_dkdev); 1197 disk_busy(sc->sc_dkdev);
1198 error = fss_bs_io(sc, FSS_READ, 0, 1198 error = fss_bs_io(sc, FSS_READ, 0,
1199 dbtob(bp->b_blkno), bp->b_bcount, 1199 dbtob(bp->b_blkno), bp->b_bcount,
1200 bp->b_data, &resid); 1200 bp->b_data, &resid);
1201 if (error) 1201 if (error)
1202 resid = bp->b_bcount; 1202 resid = bp->b_bcount;
1203 disk_unbusy(sc->sc_dkdev, 1203 disk_unbusy(sc->sc_dkdev,
1204 (error ? 0 : bp->b_bcount), is_read); 1204 (error ? 0 : bp->b_bcount), is_read);
1205 } else { 1205 } else {
1206 error = ENXIO; 1206 error = ENXIO;
1207 resid = bp->b_bcount; 1207 resid = bp->b_bcount;
1208 } 1208 }
1209 1209
1210 bp->b_error = error; 1210 bp->b_error = error;
1211 bp->b_resid = resid; 1211 bp->b_resid = resid;
1212 biodone(bp); 1212 biodone(bp);
1213 1213
1214 mutex_enter(&sc->sc_slock); 1214 mutex_enter(&sc->sc_slock);
1215 continue; 1215 continue;
1216 } 1216 }
1217 1217
1218 /* 1218 /*
1219 * Clean the cache 1219 * Clean the cache
1220 */ 1220 */
1221 for (i = 0; i < sc->sc_cache_size; i++) { 1221 for (i = 0; i < sc->sc_cache_size; i++) {
1222 crotor = (crotor + 1) % sc->sc_cache_size; 1222 crotor = (crotor + 1) % sc->sc_cache_size;
1223 scp = sc->sc_cache + crotor; 1223 scp = sc->sc_cache + crotor;
1224 if (scp->fc_type != FSS_CACHE_VALID) 1224 if (scp->fc_type != FSS_CACHE_VALID)
1225 continue; 1225 continue;
1226 mutex_exit(&sc->sc_slock); 1226 mutex_exit(&sc->sc_slock);
1227 1227
1228 thread_idle = false; 1228 thread_idle = false;
1229 indirp = fss_bs_indir(sc, scp->fc_cluster); 1229 indirp = fss_bs_indir(sc, scp->fc_cluster);
1230 if (indirp != NULL) { 1230 if (indirp != NULL) {
1231 error = fss_bs_io(sc, FSS_WRITE, sc->sc_clnext, 1231 error = fss_bs_io(sc, FSS_WRITE, sc->sc_clnext,
1232 0, FSS_CLSIZE(sc), scp->fc_data, NULL); 1232 0, FSS_CLSIZE(sc), scp->fc_data, NULL);
1233 } else 1233 } else
1234 error = EIO; 1234 error = EIO;
1235 1235
1236 mutex_enter(&sc->sc_slock); 1236 mutex_enter(&sc->sc_slock);
1237 if (error == 0) { 1237 if (error == 0) {
1238 *indirp = sc->sc_clnext++; 1238 *indirp = sc->sc_clnext++;
1239 sc->sc_indir_dirty = 1; 1239 sc->sc_indir_dirty = 1;
1240 } else 1240 } else
1241 fss_error(sc, "write error on backing store"); 1241 fss_error(sc, "write error on backing store");
1242 1242
1243 scp->fc_type = FSS_CACHE_FREE; 1243 scp->fc_type = FSS_CACHE_FREE;
1244 cv_broadcast(&sc->sc_cache_cv); 1244 cv_broadcast(&sc->sc_cache_cv);
1245 break; 1245 break;
1246 } 1246 }
1247 1247
1248 /* 1248 /*
1249 * Process I/O requests 1249 * Process I/O requests
1250 */ 1250 */
1251 if ((bp = bufq_get(sc->sc_bufq)) == NULL) 1251 if ((bp = bufq_get(sc->sc_bufq)) == NULL)
1252 continue; 1252 continue;
1253 is_valid = (sc->sc_state == FSS_ACTIVE); 1253 is_valid = (sc->sc_state == FSS_ACTIVE);
1254 is_read = (bp->b_flags & B_READ); 1254 is_read = (bp->b_flags & B_READ);
1255 thread_idle = false; 1255 thread_idle = false;
1256 1256
1257 if (!is_valid) { 1257 if (!is_valid) {
1258 mutex_exit(&sc->sc_slock); 1258 mutex_exit(&sc->sc_slock);
1259 1259
1260 bp->b_error = ENXIO; 1260 bp->b_error = ENXIO;
1261 bp->b_resid = bp->b_bcount; 1261 bp->b_resid = bp->b_bcount;
1262 biodone(bp); 1262 biodone(bp);
1263 1263
1264 mutex_enter(&sc->sc_slock); 1264 mutex_enter(&sc->sc_slock);
1265 continue; 1265 continue;
1266 } 1266 }
1267 1267
1268 disk_busy(sc->sc_dkdev); 1268 disk_busy(sc->sc_dkdev);
1269 1269
1270 /* 1270 /*
1271 * First read from the snapshotted block device unless 1271 * First read from the snapshotted block device unless
1272 * this request is completely covered by backing store. 1272 * this request is completely covered by backing store.
1273 */ 1273 */
1274 1274
1275 cl = FSS_BTOCL(sc, dbtob(bp->b_blkno)); 1275 cl = FSS_BTOCL(sc, dbtob(bp->b_blkno));
1276 off = FSS_CLOFF(sc, dbtob(bp->b_blkno)); 1276 off = FSS_CLOFF(sc, dbtob(bp->b_blkno));