Sun Oct 2 16:52:27 2016 UTC ()
drop wl_mtx mutex during call to pool_get() with PR_WAITOK

pointed out by riastradh


(jdolecek)
diff -r1.83 -r1.84 src/sys/kern/vfs_wapbl.c

cvs diff -r1.83 -r1.84 src/sys/kern/vfs_wapbl.c (switch to unified diff)

--- src/sys/kern/vfs_wapbl.c 2016/10/02 16:44:02 1.83
+++ src/sys/kern/vfs_wapbl.c 2016/10/02 16:52:27 1.84
@@ -1,2957 +1,2960 @@ @@ -1,2957 +1,2960 @@
1/* $NetBSD: vfs_wapbl.c,v 1.83 2016/10/02 16:44:02 jdolecek Exp $ */ 1/* $NetBSD: vfs_wapbl.c,v 1.84 2016/10/02 16:52:27 jdolecek Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2003, 2008, 2009 The NetBSD Foundation, Inc. 4 * Copyright (c) 2003, 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Wasabi Systems, Inc. 8 * by Wasabi Systems, Inc.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32/* 32/*
33 * This implements file system independent write ahead filesystem logging. 33 * This implements file system independent write ahead filesystem logging.
34 */ 34 */
35 35
36#define WAPBL_INTERNAL 36#define WAPBL_INTERNAL
37 37
38#include <sys/cdefs.h> 38#include <sys/cdefs.h>
39__KERNEL_RCSID(0, "$NetBSD: vfs_wapbl.c,v 1.83 2016/10/02 16:44:02 jdolecek Exp $"); 39__KERNEL_RCSID(0, "$NetBSD: vfs_wapbl.c,v 1.84 2016/10/02 16:52:27 jdolecek Exp $");
40 40
41#include <sys/param.h> 41#include <sys/param.h>
42#include <sys/bitops.h> 42#include <sys/bitops.h>
43#include <sys/time.h> 43#include <sys/time.h>
44#include <sys/wapbl.h> 44#include <sys/wapbl.h>
45#include <sys/wapbl_replay.h> 45#include <sys/wapbl_replay.h>
46 46
47#ifdef _KERNEL 47#ifdef _KERNEL
48 48
49#include <sys/atomic.h> 49#include <sys/atomic.h>
50#include <sys/conf.h> 50#include <sys/conf.h>
51#include <sys/file.h> 51#include <sys/file.h>
52#include <sys/kauth.h> 52#include <sys/kauth.h>
53#include <sys/kernel.h> 53#include <sys/kernel.h>
54#include <sys/module.h> 54#include <sys/module.h>
55#include <sys/mount.h> 55#include <sys/mount.h>
56#include <sys/mutex.h> 56#include <sys/mutex.h>
57#include <sys/namei.h> 57#include <sys/namei.h>
58#include <sys/proc.h> 58#include <sys/proc.h>
59#include <sys/resourcevar.h> 59#include <sys/resourcevar.h>
60#include <sys/sysctl.h> 60#include <sys/sysctl.h>
61#include <sys/uio.h> 61#include <sys/uio.h>
62#include <sys/vnode.h> 62#include <sys/vnode.h>
63 63
64#include <miscfs/specfs/specdev.h> 64#include <miscfs/specfs/specdev.h>
65 65
66#define wapbl_alloc(s) kmem_alloc((s), KM_SLEEP) 66#define wapbl_alloc(s) kmem_alloc((s), KM_SLEEP)
67#define wapbl_free(a, s) kmem_free((a), (s)) 67#define wapbl_free(a, s) kmem_free((a), (s))
68#define wapbl_calloc(n, s) kmem_zalloc((n)*(s), KM_SLEEP) 68#define wapbl_calloc(n, s) kmem_zalloc((n)*(s), KM_SLEEP)
69 69
70static struct sysctllog *wapbl_sysctl; 70static struct sysctllog *wapbl_sysctl;
71static int wapbl_flush_disk_cache = 1; 71static int wapbl_flush_disk_cache = 1;
72static int wapbl_verbose_commit = 0; 72static int wapbl_verbose_commit = 0;
73 73
74static inline size_t wapbl_space_free(size_t, off_t, off_t); 74static inline size_t wapbl_space_free(size_t, off_t, off_t);
75 75
76#else /* !_KERNEL */ 76#else /* !_KERNEL */
77 77
78#include <assert.h> 78#include <assert.h>
79#include <errno.h> 79#include <errno.h>
80#include <stdbool.h> 80#include <stdbool.h>
81#include <stdio.h> 81#include <stdio.h>
82#include <stdlib.h> 82#include <stdlib.h>
83#include <string.h> 83#include <string.h>
84 84
85#define KDASSERT(x) assert(x) 85#define KDASSERT(x) assert(x)
86#define KASSERT(x) assert(x) 86#define KASSERT(x) assert(x)
87#define wapbl_alloc(s) malloc(s) 87#define wapbl_alloc(s) malloc(s)
88#define wapbl_free(a, s) free(a) 88#define wapbl_free(a, s) free(a)
89#define wapbl_calloc(n, s) calloc((n), (s)) 89#define wapbl_calloc(n, s) calloc((n), (s))
90 90
91#endif /* !_KERNEL */ 91#endif /* !_KERNEL */
92 92
93/* 93/*
94 * INTERNAL DATA STRUCTURES 94 * INTERNAL DATA STRUCTURES
95 */ 95 */
96 96
97/*  97/*
98 * This structure holds per-mount log information. 98 * This structure holds per-mount log information.
99 * 99 *
100 * Legend: a = atomic access only 100 * Legend: a = atomic access only
101 * r = read-only after init 101 * r = read-only after init
102 * l = rwlock held 102 * l = rwlock held
103 * m = mutex held 103 * m = mutex held
104 * lm = rwlock held writing or mutex held 104 * lm = rwlock held writing or mutex held
105 * u = unlocked access ok 105 * u = unlocked access ok
106 * b = bufcache_lock held 106 * b = bufcache_lock held
107 */ 107 */
108LIST_HEAD(wapbl_ino_head, wapbl_ino); 108LIST_HEAD(wapbl_ino_head, wapbl_ino);
109struct wapbl { 109struct wapbl {
110 struct vnode *wl_logvp; /* r: log here */ 110 struct vnode *wl_logvp; /* r: log here */
111 struct vnode *wl_devvp; /* r: log on this device */ 111 struct vnode *wl_devvp; /* r: log on this device */
112 struct mount *wl_mount; /* r: mountpoint wl is associated with */ 112 struct mount *wl_mount; /* r: mountpoint wl is associated with */
113 daddr_t wl_logpbn; /* r: Physical block number of start of log */ 113 daddr_t wl_logpbn; /* r: Physical block number of start of log */
114 int wl_log_dev_bshift; /* r: logarithm of device block size of log 114 int wl_log_dev_bshift; /* r: logarithm of device block size of log
115 device */ 115 device */
116 int wl_fs_dev_bshift; /* r: logarithm of device block size of 116 int wl_fs_dev_bshift; /* r: logarithm of device block size of
117 filesystem device */ 117 filesystem device */
118 118
119 unsigned wl_lock_count; /* m: Count of transactions in progress */ 119 unsigned wl_lock_count; /* m: Count of transactions in progress */
120 120
121 size_t wl_circ_size; /* r: Number of bytes in buffer of log */ 121 size_t wl_circ_size; /* r: Number of bytes in buffer of log */
122 size_t wl_circ_off; /* r: Number of bytes reserved at start */ 122 size_t wl_circ_off; /* r: Number of bytes reserved at start */
123 123
124 size_t wl_bufcount_max; /* r: Number of buffers reserved for log */ 124 size_t wl_bufcount_max; /* r: Number of buffers reserved for log */
125 size_t wl_bufbytes_max; /* r: Number of buf bytes reserved for log */ 125 size_t wl_bufbytes_max; /* r: Number of buf bytes reserved for log */
126 126
127 off_t wl_head; /* l: Byte offset of log head */ 127 off_t wl_head; /* l: Byte offset of log head */
128 off_t wl_tail; /* l: Byte offset of log tail */ 128 off_t wl_tail; /* l: Byte offset of log tail */
129 /* 129 /*
130 * WAPBL log layout, stored on wl_devvp at wl_logpbn: 130 * WAPBL log layout, stored on wl_devvp at wl_logpbn:
131 * 131 *
132 * ___________________ wl_circ_size __________________ 132 * ___________________ wl_circ_size __________________
133 * / \ 133 * / \
134 * +---------+---------+-------+--------------+--------+ 134 * +---------+---------+-------+--------------+--------+
135 * [ commit0 | commit1 | CCWCW | EEEEEEEEEEEE | CCCWCW ] 135 * [ commit0 | commit1 | CCWCW | EEEEEEEEEEEE | CCCWCW ]
136 * +---------+---------+-------+--------------+--------+ 136 * +---------+---------+-------+--------------+--------+
137 * wl_circ_off --^ ^-- wl_head ^-- wl_tail 137 * wl_circ_off --^ ^-- wl_head ^-- wl_tail
138 * 138 *
139 * commit0 and commit1 are commit headers. A commit header has 139 * commit0 and commit1 are commit headers. A commit header has
140 * a generation number, indicating which of the two headers is 140 * a generation number, indicating which of the two headers is
141 * more recent, and an assignment of head and tail pointers. 141 * more recent, and an assignment of head and tail pointers.
142 * The rest is a circular queue of log records, starting at 142 * The rest is a circular queue of log records, starting at
143 * the byte offset wl_circ_off. 143 * the byte offset wl_circ_off.
144 * 144 *
145 * E marks empty space for records. 145 * E marks empty space for records.
146 * W marks records for block writes issued but waiting. 146 * W marks records for block writes issued but waiting.
147 * C marks completed records. 147 * C marks completed records.
148 * 148 *
149 * wapbl_flush writes new records to empty `E' spaces after 149 * wapbl_flush writes new records to empty `E' spaces after
150 * wl_head from the current transaction in memory. 150 * wl_head from the current transaction in memory.
151 * 151 *
152 * wapbl_truncate advances wl_tail past any completed `C' 152 * wapbl_truncate advances wl_tail past any completed `C'
153 * records, freeing them up for use. 153 * records, freeing them up for use.
154 * 154 *
155 * head == tail == 0 means log is empty. 155 * head == tail == 0 means log is empty.
156 * head == tail != 0 means log is full. 156 * head == tail != 0 means log is full.
157 * 157 *
158 * See assertions in wapbl_advance() for other boundary 158 * See assertions in wapbl_advance() for other boundary
159 * conditions. 159 * conditions.
160 * 160 *
161 * Only wapbl_flush moves the head, except when wapbl_truncate 161 * Only wapbl_flush moves the head, except when wapbl_truncate
162 * sets it to 0 to indicate that the log is empty. 162 * sets it to 0 to indicate that the log is empty.
163 * 163 *
164 * Only wapbl_truncate moves the tail, except when wapbl_flush 164 * Only wapbl_truncate moves the tail, except when wapbl_flush
165 * sets it to wl_circ_off to indicate that the log is full. 165 * sets it to wl_circ_off to indicate that the log is full.
166 */ 166 */
167 167
168 struct wapbl_wc_header *wl_wc_header; /* l */ 168 struct wapbl_wc_header *wl_wc_header; /* l */
169 void *wl_wc_scratch; /* l: scratch space (XXX: por que?!?) */ 169 void *wl_wc_scratch; /* l: scratch space (XXX: por que?!?) */
170 170
171 kmutex_t wl_mtx; /* u: short-term lock */ 171 kmutex_t wl_mtx; /* u: short-term lock */
172 krwlock_t wl_rwlock; /* u: File system transaction lock */ 172 krwlock_t wl_rwlock; /* u: File system transaction lock */
173 173
174 /* 174 /*
175 * Must be held while accessing 175 * Must be held while accessing
176 * wl_count or wl_bufs or head or tail 176 * wl_count or wl_bufs or head or tail
177 */ 177 */
178 178
179 /* 179 /*
180 * Callback called from within the flush routine to flush any extra 180 * Callback called from within the flush routine to flush any extra
181 * bits. Note that flush may be skipped without calling this if 181 * bits. Note that flush may be skipped without calling this if
182 * there are no outstanding buffers in the transaction. 182 * there are no outstanding buffers in the transaction.
183 */ 183 */
184#if _KERNEL 184#if _KERNEL
185 wapbl_flush_fn_t wl_flush; /* r */ 185 wapbl_flush_fn_t wl_flush; /* r */
186 wapbl_flush_fn_t wl_flush_abort;/* r */ 186 wapbl_flush_fn_t wl_flush_abort;/* r */
187#endif 187#endif
188 188
189 size_t wl_bufbytes; /* m: Byte count of pages in wl_bufs */ 189 size_t wl_bufbytes; /* m: Byte count of pages in wl_bufs */
190 size_t wl_bufcount; /* m: Count of buffers in wl_bufs */ 190 size_t wl_bufcount; /* m: Count of buffers in wl_bufs */
191 size_t wl_bcount; /* m: Total bcount of wl_bufs */ 191 size_t wl_bcount; /* m: Total bcount of wl_bufs */
192 192
193 LIST_HEAD(, buf) wl_bufs; /* m: Buffers in current transaction */ 193 LIST_HEAD(, buf) wl_bufs; /* m: Buffers in current transaction */
194 194
195 kcondvar_t wl_reclaimable_cv; /* m (obviously) */ 195 kcondvar_t wl_reclaimable_cv; /* m (obviously) */
196 size_t wl_reclaimable_bytes; /* m: Amount of space available for 196 size_t wl_reclaimable_bytes; /* m: Amount of space available for
197 reclamation by truncate */ 197 reclamation by truncate */
198 int wl_error_count; /* m: # of wl_entries with errors */ 198 int wl_error_count; /* m: # of wl_entries with errors */
199 size_t wl_reserved_bytes; /* never truncate log smaller than this */ 199 size_t wl_reserved_bytes; /* never truncate log smaller than this */
200 200
201#ifdef WAPBL_DEBUG_BUFBYTES 201#ifdef WAPBL_DEBUG_BUFBYTES
202 size_t wl_unsynced_bufbytes; /* Byte count of unsynced buffers */ 202 size_t wl_unsynced_bufbytes; /* Byte count of unsynced buffers */
203#endif 203#endif
204 204
205#if _KERNEL 205#if _KERNEL
206 int wl_brperjblock; /* r Block records per journal block */ 206 int wl_brperjblock; /* r Block records per journal block */
207#endif 207#endif
208 208
209 SIMPLEQ_HEAD(, wapbl_dealloc) wl_dealloclist; /* lm: list head */ 209 SIMPLEQ_HEAD(, wapbl_dealloc) wl_dealloclist; /* lm: list head */
210 int wl_dealloccnt; /* lm: total count */ 210 int wl_dealloccnt; /* lm: total count */
211 int wl_dealloclim; /* r: max count */ 211 int wl_dealloclim; /* r: max count */
212 212
213 /* hashtable of inode numbers for allocated but unlinked inodes */ 213 /* hashtable of inode numbers for allocated but unlinked inodes */
214 /* synch ??? */ 214 /* synch ??? */
215 struct wapbl_ino_head *wl_inohash; 215 struct wapbl_ino_head *wl_inohash;
216 u_long wl_inohashmask; 216 u_long wl_inohashmask;
217 int wl_inohashcnt; 217 int wl_inohashcnt;
218 218
219 SIMPLEQ_HEAD(, wapbl_entry) wl_entries; /* On disk transaction 219 SIMPLEQ_HEAD(, wapbl_entry) wl_entries; /* On disk transaction
220 accounting */ 220 accounting */
221 221
222 u_char *wl_buffer; /* l: buffer for wapbl_buffered_write() */ 222 u_char *wl_buffer; /* l: buffer for wapbl_buffered_write() */
223 daddr_t wl_buffer_dblk; /* l: buffer disk block address */ 223 daddr_t wl_buffer_dblk; /* l: buffer disk block address */
224 size_t wl_buffer_used; /* l: buffer current use */ 224 size_t wl_buffer_used; /* l: buffer current use */
225}; 225};
226 226
227#ifdef WAPBL_DEBUG_PRINT 227#ifdef WAPBL_DEBUG_PRINT
228int wapbl_debug_print = WAPBL_DEBUG_PRINT; 228int wapbl_debug_print = WAPBL_DEBUG_PRINT;
229#endif 229#endif
230 230
231/****************************************************************/ 231/****************************************************************/
232#ifdef _KERNEL 232#ifdef _KERNEL
233 233
234#ifdef WAPBL_DEBUG 234#ifdef WAPBL_DEBUG
235struct wapbl *wapbl_debug_wl; 235struct wapbl *wapbl_debug_wl;
236#endif 236#endif
237 237
238static int wapbl_write_commit(struct wapbl *wl, off_t head, off_t tail); 238static int wapbl_write_commit(struct wapbl *wl, off_t head, off_t tail);
239static int wapbl_write_blocks(struct wapbl *wl, off_t *offp); 239static int wapbl_write_blocks(struct wapbl *wl, off_t *offp);
240static int wapbl_write_revocations(struct wapbl *wl, off_t *offp); 240static int wapbl_write_revocations(struct wapbl *wl, off_t *offp);
241static int wapbl_write_inodes(struct wapbl *wl, off_t *offp); 241static int wapbl_write_inodes(struct wapbl *wl, off_t *offp);
242#endif /* _KERNEL */ 242#endif /* _KERNEL */
243 243
244static int wapbl_replay_process(struct wapbl_replay *wr, off_t, off_t); 244static int wapbl_replay_process(struct wapbl_replay *wr, off_t, off_t);
245 245
246static inline size_t wapbl_space_used(size_t avail, off_t head, 246static inline size_t wapbl_space_used(size_t avail, off_t head,
247 off_t tail); 247 off_t tail);
248 248
249#ifdef _KERNEL 249#ifdef _KERNEL
250 250
251static struct pool wapbl_entry_pool; 251static struct pool wapbl_entry_pool;
252static struct pool wapbl_dealloc_pool; 252static struct pool wapbl_dealloc_pool;
253 253
254#define WAPBL_INODETRK_SIZE 83 254#define WAPBL_INODETRK_SIZE 83
255static int wapbl_ino_pool_refcount; 255static int wapbl_ino_pool_refcount;
256static struct pool wapbl_ino_pool; 256static struct pool wapbl_ino_pool;
257struct wapbl_ino { 257struct wapbl_ino {
258 LIST_ENTRY(wapbl_ino) wi_hash; 258 LIST_ENTRY(wapbl_ino) wi_hash;
259 ino_t wi_ino; 259 ino_t wi_ino;
260 mode_t wi_mode; 260 mode_t wi_mode;
261}; 261};
262 262
263static void wapbl_inodetrk_init(struct wapbl *wl, u_int size); 263static void wapbl_inodetrk_init(struct wapbl *wl, u_int size);
264static void wapbl_inodetrk_free(struct wapbl *wl); 264static void wapbl_inodetrk_free(struct wapbl *wl);
265static struct wapbl_ino *wapbl_inodetrk_get(struct wapbl *wl, ino_t ino); 265static struct wapbl_ino *wapbl_inodetrk_get(struct wapbl *wl, ino_t ino);
266 266
267static size_t wapbl_transaction_len(struct wapbl *wl); 267static size_t wapbl_transaction_len(struct wapbl *wl);
268static inline size_t wapbl_transaction_inodes_len(struct wapbl *wl); 268static inline size_t wapbl_transaction_inodes_len(struct wapbl *wl);
269 269
270#if 0 270#if 0
271int wapbl_replay_verify(struct wapbl_replay *, struct vnode *); 271int wapbl_replay_verify(struct wapbl_replay *, struct vnode *);
272#endif 272#endif
273 273
274static int wapbl_replay_isopen1(struct wapbl_replay *); 274static int wapbl_replay_isopen1(struct wapbl_replay *);
275 275
276struct wapbl_ops wapbl_ops = { 276struct wapbl_ops wapbl_ops = {
277 .wo_wapbl_discard = wapbl_discard, 277 .wo_wapbl_discard = wapbl_discard,
278 .wo_wapbl_replay_isopen = wapbl_replay_isopen1, 278 .wo_wapbl_replay_isopen = wapbl_replay_isopen1,
279 .wo_wapbl_replay_can_read = wapbl_replay_can_read, 279 .wo_wapbl_replay_can_read = wapbl_replay_can_read,
280 .wo_wapbl_replay_read = wapbl_replay_read, 280 .wo_wapbl_replay_read = wapbl_replay_read,
281 .wo_wapbl_add_buf = wapbl_add_buf, 281 .wo_wapbl_add_buf = wapbl_add_buf,
282 .wo_wapbl_remove_buf = wapbl_remove_buf, 282 .wo_wapbl_remove_buf = wapbl_remove_buf,
283 .wo_wapbl_resize_buf = wapbl_resize_buf, 283 .wo_wapbl_resize_buf = wapbl_resize_buf,
284 .wo_wapbl_begin = wapbl_begin, 284 .wo_wapbl_begin = wapbl_begin,
285 .wo_wapbl_end = wapbl_end, 285 .wo_wapbl_end = wapbl_end,
286 .wo_wapbl_junlock_assert= wapbl_junlock_assert, 286 .wo_wapbl_junlock_assert= wapbl_junlock_assert,
287 287
288 /* XXX: the following is only used to say "this is a wapbl buf" */ 288 /* XXX: the following is only used to say "this is a wapbl buf" */
289 .wo_wapbl_biodone = wapbl_biodone, 289 .wo_wapbl_biodone = wapbl_biodone,
290}; 290};
291 291
292static int 292static int
293wapbl_sysctl_init(void) 293wapbl_sysctl_init(void)
294{ 294{
295 int rv; 295 int rv;
296 const struct sysctlnode *rnode, *cnode; 296 const struct sysctlnode *rnode, *cnode;
297 297
298 wapbl_sysctl = NULL; 298 wapbl_sysctl = NULL;
299 299
300 rv = sysctl_createv(&wapbl_sysctl, 0, NULL, &rnode, 300 rv = sysctl_createv(&wapbl_sysctl, 0, NULL, &rnode,
301 CTLFLAG_PERMANENT, 301 CTLFLAG_PERMANENT,
302 CTLTYPE_NODE, "wapbl", 302 CTLTYPE_NODE, "wapbl",
303 SYSCTL_DESCR("WAPBL journaling options"), 303 SYSCTL_DESCR("WAPBL journaling options"),
304 NULL, 0, NULL, 0, 304 NULL, 0, NULL, 0,
305 CTL_VFS, CTL_CREATE, CTL_EOL); 305 CTL_VFS, CTL_CREATE, CTL_EOL);
306 if (rv) 306 if (rv)
307 return rv; 307 return rv;
308 308
309 rv = sysctl_createv(&wapbl_sysctl, 0, &rnode, &cnode, 309 rv = sysctl_createv(&wapbl_sysctl, 0, &rnode, &cnode,
310 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 310 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
311 CTLTYPE_INT, "flush_disk_cache", 311 CTLTYPE_INT, "flush_disk_cache",
312 SYSCTL_DESCR("flush disk cache"), 312 SYSCTL_DESCR("flush disk cache"),
313 NULL, 0, &wapbl_flush_disk_cache, 0, 313 NULL, 0, &wapbl_flush_disk_cache, 0,
314 CTL_CREATE, CTL_EOL); 314 CTL_CREATE, CTL_EOL);
315 if (rv) 315 if (rv)
316 return rv; 316 return rv;
317 317
318 rv = sysctl_createv(&wapbl_sysctl, 0, &rnode, &cnode, 318 rv = sysctl_createv(&wapbl_sysctl, 0, &rnode, &cnode,
319 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 319 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
320 CTLTYPE_INT, "verbose_commit", 320 CTLTYPE_INT, "verbose_commit",
321 SYSCTL_DESCR("show time and size of wapbl log commits"), 321 SYSCTL_DESCR("show time and size of wapbl log commits"),
322 NULL, 0, &wapbl_verbose_commit, 0, 322 NULL, 0, &wapbl_verbose_commit, 0,
323 CTL_CREATE, CTL_EOL); 323 CTL_CREATE, CTL_EOL);
324 return rv; 324 return rv;
325} 325}
326 326
327static void 327static void
328wapbl_init(void) 328wapbl_init(void)
329{ 329{
330 330
331 pool_init(&wapbl_entry_pool, sizeof(struct wapbl_entry), 0, 0, 0, 331 pool_init(&wapbl_entry_pool, sizeof(struct wapbl_entry), 0, 0, 0,
332 "wapblentrypl", &pool_allocator_kmem, IPL_VM); 332 "wapblentrypl", &pool_allocator_kmem, IPL_VM);
333 pool_init(&wapbl_dealloc_pool, sizeof(struct wapbl_dealloc), 0, 0, 0, 333 pool_init(&wapbl_dealloc_pool, sizeof(struct wapbl_dealloc), 0, 0, 0,
334 "wapbldealloc", &pool_allocator_nointr, IPL_NONE); 334 "wapbldealloc", &pool_allocator_nointr, IPL_NONE);
335 335
336 wapbl_sysctl_init(); 336 wapbl_sysctl_init();
337} 337}
338 338
339static int 339static int
340wapbl_fini(void) 340wapbl_fini(void)
341{ 341{
342 342
343 if (wapbl_sysctl != NULL) 343 if (wapbl_sysctl != NULL)
344 sysctl_teardown(&wapbl_sysctl); 344 sysctl_teardown(&wapbl_sysctl);
345 345
346 pool_destroy(&wapbl_dealloc_pool); 346 pool_destroy(&wapbl_dealloc_pool);
347 pool_destroy(&wapbl_entry_pool); 347 pool_destroy(&wapbl_entry_pool);
348 348
349 return 0; 349 return 0;
350} 350}
351 351
352static int 352static int
353wapbl_start_flush_inodes(struct wapbl *wl, struct wapbl_replay *wr) 353wapbl_start_flush_inodes(struct wapbl *wl, struct wapbl_replay *wr)
354{ 354{
355 int error, i; 355 int error, i;
356 356
357 WAPBL_PRINTF(WAPBL_PRINT_REPLAY, 357 WAPBL_PRINTF(WAPBL_PRINT_REPLAY,
358 ("wapbl_start: reusing log with %d inodes\n", wr->wr_inodescnt)); 358 ("wapbl_start: reusing log with %d inodes\n", wr->wr_inodescnt));
359 359
360 /* 360 /*
361 * Its only valid to reuse the replay log if its 361 * Its only valid to reuse the replay log if its
362 * the same as the new log we just opened. 362 * the same as the new log we just opened.
363 */ 363 */
364 KDASSERT(!wapbl_replay_isopen(wr)); 364 KDASSERT(!wapbl_replay_isopen(wr));
365 KASSERT(wl->wl_devvp->v_type == VBLK); 365 KASSERT(wl->wl_devvp->v_type == VBLK);
366 KASSERT(wr->wr_devvp->v_type == VBLK); 366 KASSERT(wr->wr_devvp->v_type == VBLK);
367 KASSERT(wl->wl_devvp->v_rdev == wr->wr_devvp->v_rdev); 367 KASSERT(wl->wl_devvp->v_rdev == wr->wr_devvp->v_rdev);
368 KASSERT(wl->wl_logpbn == wr->wr_logpbn); 368 KASSERT(wl->wl_logpbn == wr->wr_logpbn);
369 KASSERT(wl->wl_circ_size == wr->wr_circ_size); 369 KASSERT(wl->wl_circ_size == wr->wr_circ_size);
370 KASSERT(wl->wl_circ_off == wr->wr_circ_off); 370 KASSERT(wl->wl_circ_off == wr->wr_circ_off);
371 KASSERT(wl->wl_log_dev_bshift == wr->wr_log_dev_bshift); 371 KASSERT(wl->wl_log_dev_bshift == wr->wr_log_dev_bshift);
372 KASSERT(wl->wl_fs_dev_bshift == wr->wr_fs_dev_bshift); 372 KASSERT(wl->wl_fs_dev_bshift == wr->wr_fs_dev_bshift);
373 373
374 wl->wl_wc_header->wc_generation = wr->wr_generation + 1; 374 wl->wl_wc_header->wc_generation = wr->wr_generation + 1;
375 375
376 for (i = 0; i < wr->wr_inodescnt; i++) 376 for (i = 0; i < wr->wr_inodescnt; i++)
377 wapbl_register_inode(wl, wr->wr_inodes[i].wr_inumber, 377 wapbl_register_inode(wl, wr->wr_inodes[i].wr_inumber,
378 wr->wr_inodes[i].wr_imode); 378 wr->wr_inodes[i].wr_imode);
379 379
380 /* Make sure new transaction won't overwrite old inodes list */ 380 /* Make sure new transaction won't overwrite old inodes list */
381 KDASSERT(wapbl_transaction_len(wl) <=  381 KDASSERT(wapbl_transaction_len(wl) <=
382 wapbl_space_free(wl->wl_circ_size, wr->wr_inodeshead, 382 wapbl_space_free(wl->wl_circ_size, wr->wr_inodeshead,
383 wr->wr_inodestail)); 383 wr->wr_inodestail));
384 384
385 wl->wl_head = wl->wl_tail = wr->wr_inodeshead; 385 wl->wl_head = wl->wl_tail = wr->wr_inodeshead;
386 wl->wl_reclaimable_bytes = wl->wl_reserved_bytes = 386 wl->wl_reclaimable_bytes = wl->wl_reserved_bytes =
387 wapbl_transaction_len(wl); 387 wapbl_transaction_len(wl);
388 388
389 error = wapbl_write_inodes(wl, &wl->wl_head); 389 error = wapbl_write_inodes(wl, &wl->wl_head);
390 if (error) 390 if (error)
391 return error; 391 return error;
392 392
393 KASSERT(wl->wl_head != wl->wl_tail); 393 KASSERT(wl->wl_head != wl->wl_tail);
394 KASSERT(wl->wl_head != 0); 394 KASSERT(wl->wl_head != 0);
395 395
396 return 0; 396 return 0;
397} 397}
398 398
399int 399int
400wapbl_start(struct wapbl ** wlp, struct mount *mp, struct vnode *vp, 400wapbl_start(struct wapbl ** wlp, struct mount *mp, struct vnode *vp,
401 daddr_t off, size_t count, size_t blksize, struct wapbl_replay *wr, 401 daddr_t off, size_t count, size_t blksize, struct wapbl_replay *wr,
402 wapbl_flush_fn_t flushfn, wapbl_flush_fn_t flushabortfn) 402 wapbl_flush_fn_t flushfn, wapbl_flush_fn_t flushabortfn)
403{ 403{
404 struct wapbl *wl; 404 struct wapbl *wl;
405 struct vnode *devvp; 405 struct vnode *devvp;
406 daddr_t logpbn; 406 daddr_t logpbn;
407 int error; 407 int error;
408 int log_dev_bshift = ilog2(blksize); 408 int log_dev_bshift = ilog2(blksize);
409 int fs_dev_bshift = log_dev_bshift; 409 int fs_dev_bshift = log_dev_bshift;
410 int run; 410 int run;
411 411
412 WAPBL_PRINTF(WAPBL_PRINT_OPEN, ("wapbl_start: vp=%p off=%" PRId64 412 WAPBL_PRINTF(WAPBL_PRINT_OPEN, ("wapbl_start: vp=%p off=%" PRId64
413 " count=%zu blksize=%zu\n", vp, off, count, blksize)); 413 " count=%zu blksize=%zu\n", vp, off, count, blksize));
414 414
415 if (log_dev_bshift > fs_dev_bshift) { 415 if (log_dev_bshift > fs_dev_bshift) {
416 WAPBL_PRINTF(WAPBL_PRINT_OPEN, 416 WAPBL_PRINTF(WAPBL_PRINT_OPEN,
417 ("wapbl: log device's block size cannot be larger " 417 ("wapbl: log device's block size cannot be larger "
418 "than filesystem's\n")); 418 "than filesystem's\n"));
419 /* 419 /*
420 * Not currently implemented, although it could be if 420 * Not currently implemented, although it could be if
421 * needed someday. 421 * needed someday.
422 */ 422 */
423 return ENOSYS; 423 return ENOSYS;
424 } 424 }
425 425
426 if (off < 0) 426 if (off < 0)
427 return EINVAL; 427 return EINVAL;
428 428
429 if (blksize < DEV_BSIZE) 429 if (blksize < DEV_BSIZE)
430 return EINVAL; 430 return EINVAL;
431 if (blksize % DEV_BSIZE) 431 if (blksize % DEV_BSIZE)
432 return EINVAL; 432 return EINVAL;
433 433
434 /* XXXTODO: verify that the full load is writable */ 434 /* XXXTODO: verify that the full load is writable */
435 435
436 /* 436 /*
437 * XXX check for minimum log size 437 * XXX check for minimum log size
438 * minimum is governed by minimum amount of space 438 * minimum is governed by minimum amount of space
439 * to complete a transaction. (probably truncate) 439 * to complete a transaction. (probably truncate)
440 */ 440 */
441 /* XXX for now pick something minimal */ 441 /* XXX for now pick something minimal */
442 if ((count * blksize) < MAXPHYS) { 442 if ((count * blksize) < MAXPHYS) {
443 return ENOSPC; 443 return ENOSPC;
444 } 444 }
445 445
446 if ((error = VOP_BMAP(vp, off, &devvp, &logpbn, &run)) != 0) { 446 if ((error = VOP_BMAP(vp, off, &devvp, &logpbn, &run)) != 0) {
447 return error; 447 return error;
448 } 448 }
449 449
450 wl = wapbl_calloc(1, sizeof(*wl)); 450 wl = wapbl_calloc(1, sizeof(*wl));
451 rw_init(&wl->wl_rwlock); 451 rw_init(&wl->wl_rwlock);
452 mutex_init(&wl->wl_mtx, MUTEX_DEFAULT, IPL_NONE); 452 mutex_init(&wl->wl_mtx, MUTEX_DEFAULT, IPL_NONE);
453 cv_init(&wl->wl_reclaimable_cv, "wapblrec"); 453 cv_init(&wl->wl_reclaimable_cv, "wapblrec");
454 LIST_INIT(&wl->wl_bufs); 454 LIST_INIT(&wl->wl_bufs);
455 SIMPLEQ_INIT(&wl->wl_entries); 455 SIMPLEQ_INIT(&wl->wl_entries);
456 456
457 wl->wl_logvp = vp; 457 wl->wl_logvp = vp;
458 wl->wl_devvp = devvp; 458 wl->wl_devvp = devvp;
459 wl->wl_mount = mp; 459 wl->wl_mount = mp;
460 wl->wl_logpbn = logpbn; 460 wl->wl_logpbn = logpbn;
461 wl->wl_log_dev_bshift = log_dev_bshift; 461 wl->wl_log_dev_bshift = log_dev_bshift;
462 wl->wl_fs_dev_bshift = fs_dev_bshift; 462 wl->wl_fs_dev_bshift = fs_dev_bshift;
463 463
464 wl->wl_flush = flushfn; 464 wl->wl_flush = flushfn;
465 wl->wl_flush_abort = flushabortfn; 465 wl->wl_flush_abort = flushabortfn;
466 466
467 /* Reserve two log device blocks for the commit headers */ 467 /* Reserve two log device blocks for the commit headers */
468 wl->wl_circ_off = 2<<wl->wl_log_dev_bshift; 468 wl->wl_circ_off = 2<<wl->wl_log_dev_bshift;
469 wl->wl_circ_size = ((count * blksize) - wl->wl_circ_off); 469 wl->wl_circ_size = ((count * blksize) - wl->wl_circ_off);
470 /* truncate the log usage to a multiple of log_dev_bshift */ 470 /* truncate the log usage to a multiple of log_dev_bshift */
471 wl->wl_circ_size >>= wl->wl_log_dev_bshift; 471 wl->wl_circ_size >>= wl->wl_log_dev_bshift;
472 wl->wl_circ_size <<= wl->wl_log_dev_bshift; 472 wl->wl_circ_size <<= wl->wl_log_dev_bshift;
473 473
474 /* 474 /*
475 * wl_bufbytes_max limits the size of the in memory transaction space. 475 * wl_bufbytes_max limits the size of the in memory transaction space.
476 * - Since buffers are allocated and accounted for in units of 476 * - Since buffers are allocated and accounted for in units of
477 * PAGE_SIZE it is required to be a multiple of PAGE_SIZE 477 * PAGE_SIZE it is required to be a multiple of PAGE_SIZE
478 * (i.e. 1<<PAGE_SHIFT) 478 * (i.e. 1<<PAGE_SHIFT)
479 * - Since the log device has to be written in units of 479 * - Since the log device has to be written in units of
480 * 1<<wl_log_dev_bshift it is required to be a mulitple of 480 * 1<<wl_log_dev_bshift it is required to be a mulitple of
481 * 1<<wl_log_dev_bshift. 481 * 1<<wl_log_dev_bshift.
482 * - Since filesystem will provide data in units of 1<<wl_fs_dev_bshift, 482 * - Since filesystem will provide data in units of 1<<wl_fs_dev_bshift,
483 * it is convenient to be a multiple of 1<<wl_fs_dev_bshift. 483 * it is convenient to be a multiple of 1<<wl_fs_dev_bshift.
484 * Therefore it must be multiple of the least common multiple of those 484 * Therefore it must be multiple of the least common multiple of those
485 * three quantities. Fortunately, all of those quantities are 485 * three quantities. Fortunately, all of those quantities are
486 * guaranteed to be a power of two, and the least common multiple of 486 * guaranteed to be a power of two, and the least common multiple of
487 * a set of numbers which are all powers of two is simply the maximum 487 * a set of numbers which are all powers of two is simply the maximum
488 * of those numbers. Finally, the maximum logarithm of a power of two 488 * of those numbers. Finally, the maximum logarithm of a power of two
489 * is the same as the log of the maximum power of two. So we can do 489 * is the same as the log of the maximum power of two. So we can do
490 * the following operations to size wl_bufbytes_max: 490 * the following operations to size wl_bufbytes_max:
491 */ 491 */
492 492
493 /* XXX fix actual number of pages reserved per filesystem. */ 493 /* XXX fix actual number of pages reserved per filesystem. */
494 wl->wl_bufbytes_max = MIN(wl->wl_circ_size, buf_memcalc() / 2); 494 wl->wl_bufbytes_max = MIN(wl->wl_circ_size, buf_memcalc() / 2);
495 495
496 /* Round wl_bufbytes_max to the largest power of two constraint */ 496 /* Round wl_bufbytes_max to the largest power of two constraint */
497 wl->wl_bufbytes_max >>= PAGE_SHIFT; 497 wl->wl_bufbytes_max >>= PAGE_SHIFT;
498 wl->wl_bufbytes_max <<= PAGE_SHIFT; 498 wl->wl_bufbytes_max <<= PAGE_SHIFT;
499 wl->wl_bufbytes_max >>= wl->wl_log_dev_bshift; 499 wl->wl_bufbytes_max >>= wl->wl_log_dev_bshift;
500 wl->wl_bufbytes_max <<= wl->wl_log_dev_bshift; 500 wl->wl_bufbytes_max <<= wl->wl_log_dev_bshift;
501 wl->wl_bufbytes_max >>= wl->wl_fs_dev_bshift; 501 wl->wl_bufbytes_max >>= wl->wl_fs_dev_bshift;
502 wl->wl_bufbytes_max <<= wl->wl_fs_dev_bshift; 502 wl->wl_bufbytes_max <<= wl->wl_fs_dev_bshift;
503 503
504 /* XXX maybe use filesystem fragment size instead of 1024 */ 504 /* XXX maybe use filesystem fragment size instead of 1024 */
505 /* XXX fix actual number of buffers reserved per filesystem. */ 505 /* XXX fix actual number of buffers reserved per filesystem. */
506 wl->wl_bufcount_max = (nbuf / 2) * 1024; 506 wl->wl_bufcount_max = (nbuf / 2) * 1024;
507 507
508 wl->wl_brperjblock = ((1<<wl->wl_log_dev_bshift) 508 wl->wl_brperjblock = ((1<<wl->wl_log_dev_bshift)
509 - offsetof(struct wapbl_wc_blocklist, wc_blocks)) / 509 - offsetof(struct wapbl_wc_blocklist, wc_blocks)) /
510 sizeof(((struct wapbl_wc_blocklist *)0)->wc_blocks[0]); 510 sizeof(((struct wapbl_wc_blocklist *)0)->wc_blocks[0]);
511 KASSERT(wl->wl_brperjblock > 0); 511 KASSERT(wl->wl_brperjblock > 0);
512 512
513 /* XXX tie this into resource estimation */ 513 /* XXX tie this into resource estimation */
514 wl->wl_dealloclim = wl->wl_bufbytes_max / mp->mnt_stat.f_bsize / 2; 514 wl->wl_dealloclim = wl->wl_bufbytes_max / mp->mnt_stat.f_bsize / 2;
515 SIMPLEQ_INIT(&wl->wl_dealloclist); 515 SIMPLEQ_INIT(&wl->wl_dealloclist);
516  516
517 wl->wl_buffer = wapbl_alloc(MAXPHYS); 517 wl->wl_buffer = wapbl_alloc(MAXPHYS);
518 wl->wl_buffer_used = 0; 518 wl->wl_buffer_used = 0;
519 519
520 wapbl_inodetrk_init(wl, WAPBL_INODETRK_SIZE); 520 wapbl_inodetrk_init(wl, WAPBL_INODETRK_SIZE);
521 521
522 /* Initialize the commit header */ 522 /* Initialize the commit header */
523 { 523 {
524 struct wapbl_wc_header *wc; 524 struct wapbl_wc_header *wc;
525 size_t len = 1 << wl->wl_log_dev_bshift; 525 size_t len = 1 << wl->wl_log_dev_bshift;
526 wc = wapbl_calloc(1, len); 526 wc = wapbl_calloc(1, len);
527 wc->wc_type = WAPBL_WC_HEADER; 527 wc->wc_type = WAPBL_WC_HEADER;
528 wc->wc_len = len; 528 wc->wc_len = len;
529 wc->wc_circ_off = wl->wl_circ_off; 529 wc->wc_circ_off = wl->wl_circ_off;
530 wc->wc_circ_size = wl->wl_circ_size; 530 wc->wc_circ_size = wl->wl_circ_size;
531 /* XXX wc->wc_fsid */ 531 /* XXX wc->wc_fsid */
532 wc->wc_log_dev_bshift = wl->wl_log_dev_bshift; 532 wc->wc_log_dev_bshift = wl->wl_log_dev_bshift;
533 wc->wc_fs_dev_bshift = wl->wl_fs_dev_bshift; 533 wc->wc_fs_dev_bshift = wl->wl_fs_dev_bshift;
534 wl->wl_wc_header = wc; 534 wl->wl_wc_header = wc;
535 wl->wl_wc_scratch = wapbl_alloc(len); 535 wl->wl_wc_scratch = wapbl_alloc(len);
536 } 536 }
537 537
538 /* 538 /*
539 * if there was an existing set of unlinked but 539 * if there was an existing set of unlinked but
540 * allocated inodes, preserve it in the new 540 * allocated inodes, preserve it in the new
541 * log. 541 * log.
542 */ 542 */
543 if (wr && wr->wr_inodescnt) { 543 if (wr && wr->wr_inodescnt) {
544 error = wapbl_start_flush_inodes(wl, wr); 544 error = wapbl_start_flush_inodes(wl, wr);
545 if (error) 545 if (error)
546 goto errout; 546 goto errout;
547 } 547 }
548 548
549 error = wapbl_write_commit(wl, wl->wl_head, wl->wl_tail); 549 error = wapbl_write_commit(wl, wl->wl_head, wl->wl_tail);
550 if (error) { 550 if (error) {
551 goto errout; 551 goto errout;
552 } 552 }
553 553
554 *wlp = wl; 554 *wlp = wl;
555#if defined(WAPBL_DEBUG) 555#if defined(WAPBL_DEBUG)
556 wapbl_debug_wl = wl; 556 wapbl_debug_wl = wl;
557#endif 557#endif
558 558
559 return 0; 559 return 0;
560 errout: 560 errout:
561 wapbl_discard(wl); 561 wapbl_discard(wl);
562 wapbl_free(wl->wl_wc_scratch, wl->wl_wc_header->wc_len); 562 wapbl_free(wl->wl_wc_scratch, wl->wl_wc_header->wc_len);
563 wapbl_free(wl->wl_wc_header, wl->wl_wc_header->wc_len); 563 wapbl_free(wl->wl_wc_header, wl->wl_wc_header->wc_len);
564 wapbl_free(wl->wl_buffer, MAXPHYS); 564 wapbl_free(wl->wl_buffer, MAXPHYS);
565 wapbl_inodetrk_free(wl); 565 wapbl_inodetrk_free(wl);
566 wapbl_free(wl, sizeof(*wl)); 566 wapbl_free(wl, sizeof(*wl));
567 567
568 return error; 568 return error;
569} 569}
570 570
571/* 571/*
572 * Like wapbl_flush, only discards the transaction 572 * Like wapbl_flush, only discards the transaction
573 * completely 573 * completely
574 */ 574 */
575 575
576void 576void
577wapbl_discard(struct wapbl *wl) 577wapbl_discard(struct wapbl *wl)
578{ 578{
579 struct wapbl_entry *we; 579 struct wapbl_entry *we;
580 struct wapbl_dealloc *wd; 580 struct wapbl_dealloc *wd;
581 struct buf *bp; 581 struct buf *bp;
582 int i; 582 int i;
583 583
584 /* 584 /*
585 * XXX we may consider using upgrade here 585 * XXX we may consider using upgrade here
586 * if we want to call flush from inside a transaction 586 * if we want to call flush from inside a transaction
587 */ 587 */
588 rw_enter(&wl->wl_rwlock, RW_WRITER); 588 rw_enter(&wl->wl_rwlock, RW_WRITER);
589 wl->wl_flush(wl->wl_mount, SIMPLEQ_FIRST(&wl->wl_dealloclist)); 589 wl->wl_flush(wl->wl_mount, SIMPLEQ_FIRST(&wl->wl_dealloclist));
590 590
591#ifdef WAPBL_DEBUG_PRINT 591#ifdef WAPBL_DEBUG_PRINT
592 { 592 {
593 pid_t pid = -1; 593 pid_t pid = -1;
594 lwpid_t lid = -1; 594 lwpid_t lid = -1;
595 if (curproc) 595 if (curproc)
596 pid = curproc->p_pid; 596 pid = curproc->p_pid;
597 if (curlwp) 597 if (curlwp)
598 lid = curlwp->l_lid; 598 lid = curlwp->l_lid;
599#ifdef WAPBL_DEBUG_BUFBYTES 599#ifdef WAPBL_DEBUG_BUFBYTES
600 WAPBL_PRINTF(WAPBL_PRINT_DISCARD, 600 WAPBL_PRINTF(WAPBL_PRINT_DISCARD,
601 ("wapbl_discard: thread %d.%d discarding " 601 ("wapbl_discard: thread %d.%d discarding "
602 "transaction\n" 602 "transaction\n"
603 "\tbufcount=%zu bufbytes=%zu bcount=%zu " 603 "\tbufcount=%zu bufbytes=%zu bcount=%zu "
604 "deallocs=%d inodes=%d\n" 604 "deallocs=%d inodes=%d\n"
605 "\terrcnt = %u, reclaimable=%zu reserved=%zu " 605 "\terrcnt = %u, reclaimable=%zu reserved=%zu "
606 "unsynced=%zu\n", 606 "unsynced=%zu\n",
607 pid, lid, wl->wl_bufcount, wl->wl_bufbytes, 607 pid, lid, wl->wl_bufcount, wl->wl_bufbytes,
608 wl->wl_bcount, wl->wl_dealloccnt, 608 wl->wl_bcount, wl->wl_dealloccnt,
609 wl->wl_inohashcnt, wl->wl_error_count, 609 wl->wl_inohashcnt, wl->wl_error_count,
610 wl->wl_reclaimable_bytes, wl->wl_reserved_bytes, 610 wl->wl_reclaimable_bytes, wl->wl_reserved_bytes,
611 wl->wl_unsynced_bufbytes)); 611 wl->wl_unsynced_bufbytes));
612 SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) { 612 SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) {
613 WAPBL_PRINTF(WAPBL_PRINT_DISCARD, 613 WAPBL_PRINTF(WAPBL_PRINT_DISCARD,
614 ("\tentry: bufcount = %zu, reclaimable = %zu, " 614 ("\tentry: bufcount = %zu, reclaimable = %zu, "
615 "error = %d, unsynced = %zu\n", 615 "error = %d, unsynced = %zu\n",
616 we->we_bufcount, we->we_reclaimable_bytes, 616 we->we_bufcount, we->we_reclaimable_bytes,
617 we->we_error, we->we_unsynced_bufbytes)); 617 we->we_error, we->we_unsynced_bufbytes));
618 } 618 }
619#else /* !WAPBL_DEBUG_BUFBYTES */ 619#else /* !WAPBL_DEBUG_BUFBYTES */
620 WAPBL_PRINTF(WAPBL_PRINT_DISCARD, 620 WAPBL_PRINTF(WAPBL_PRINT_DISCARD,
621 ("wapbl_discard: thread %d.%d discarding transaction\n" 621 ("wapbl_discard: thread %d.%d discarding transaction\n"
622 "\tbufcount=%zu bufbytes=%zu bcount=%zu " 622 "\tbufcount=%zu bufbytes=%zu bcount=%zu "
623 "deallocs=%d inodes=%d\n" 623 "deallocs=%d inodes=%d\n"
624 "\terrcnt = %u, reclaimable=%zu reserved=%zu\n", 624 "\terrcnt = %u, reclaimable=%zu reserved=%zu\n",
625 pid, lid, wl->wl_bufcount, wl->wl_bufbytes, 625 pid, lid, wl->wl_bufcount, wl->wl_bufbytes,
626 wl->wl_bcount, wl->wl_dealloccnt, 626 wl->wl_bcount, wl->wl_dealloccnt,
627 wl->wl_inohashcnt, wl->wl_error_count, 627 wl->wl_inohashcnt, wl->wl_error_count,
628 wl->wl_reclaimable_bytes, wl->wl_reserved_bytes)); 628 wl->wl_reclaimable_bytes, wl->wl_reserved_bytes));
629 SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) { 629 SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) {
630 WAPBL_PRINTF(WAPBL_PRINT_DISCARD, 630 WAPBL_PRINTF(WAPBL_PRINT_DISCARD,
631 ("\tentry: bufcount = %zu, reclaimable = %zu, " 631 ("\tentry: bufcount = %zu, reclaimable = %zu, "
632 "error = %d\n", 632 "error = %d\n",
633 we->we_bufcount, we->we_reclaimable_bytes, 633 we->we_bufcount, we->we_reclaimable_bytes,
634 we->we_error)); 634 we->we_error));
635 } 635 }
636#endif /* !WAPBL_DEBUG_BUFBYTES */ 636#endif /* !WAPBL_DEBUG_BUFBYTES */
637 } 637 }
638#endif /* WAPBL_DEBUG_PRINT */ 638#endif /* WAPBL_DEBUG_PRINT */
639 639
640 for (i = 0; i <= wl->wl_inohashmask; i++) { 640 for (i = 0; i <= wl->wl_inohashmask; i++) {
641 struct wapbl_ino_head *wih; 641 struct wapbl_ino_head *wih;
642 struct wapbl_ino *wi; 642 struct wapbl_ino *wi;
643 643
644 wih = &wl->wl_inohash[i]; 644 wih = &wl->wl_inohash[i];
645 while ((wi = LIST_FIRST(wih)) != NULL) { 645 while ((wi = LIST_FIRST(wih)) != NULL) {
646 LIST_REMOVE(wi, wi_hash); 646 LIST_REMOVE(wi, wi_hash);
647 pool_put(&wapbl_ino_pool, wi); 647 pool_put(&wapbl_ino_pool, wi);
648 KASSERT(wl->wl_inohashcnt > 0); 648 KASSERT(wl->wl_inohashcnt > 0);
649 wl->wl_inohashcnt--; 649 wl->wl_inohashcnt--;
650 } 650 }
651 } 651 }
652 652
653 /* 653 /*
654 * clean buffer list 654 * clean buffer list
655 */ 655 */
656 mutex_enter(&bufcache_lock); 656 mutex_enter(&bufcache_lock);
657 mutex_enter(&wl->wl_mtx); 657 mutex_enter(&wl->wl_mtx);
658 while ((bp = LIST_FIRST(&wl->wl_bufs)) != NULL) { 658 while ((bp = LIST_FIRST(&wl->wl_bufs)) != NULL) {
659 if (bbusy(bp, 0, 0, &wl->wl_mtx) == 0) { 659 if (bbusy(bp, 0, 0, &wl->wl_mtx) == 0) {
660 /* 660 /*
661 * The buffer will be unlocked and 661 * The buffer will be unlocked and
662 * removed from the transaction in brelse 662 * removed from the transaction in brelse
663 */ 663 */
664 mutex_exit(&wl->wl_mtx); 664 mutex_exit(&wl->wl_mtx);
665 brelsel(bp, 0); 665 brelsel(bp, 0);
666 mutex_enter(&wl->wl_mtx); 666 mutex_enter(&wl->wl_mtx);
667 } 667 }
668 } 668 }
669 mutex_exit(&wl->wl_mtx); 669 mutex_exit(&wl->wl_mtx);
670 mutex_exit(&bufcache_lock); 670 mutex_exit(&bufcache_lock);
671 671
672 /* 672 /*
673 * Remove references to this wl from wl_entries, free any which 673 * Remove references to this wl from wl_entries, free any which
674 * no longer have buffers, others will be freed in wapbl_biodone 674 * no longer have buffers, others will be freed in wapbl_biodone
675 * when they no longer have any buffers. 675 * when they no longer have any buffers.
676 */ 676 */
677 while ((we = SIMPLEQ_FIRST(&wl->wl_entries)) != NULL) { 677 while ((we = SIMPLEQ_FIRST(&wl->wl_entries)) != NULL) {
678 SIMPLEQ_REMOVE_HEAD(&wl->wl_entries, we_entries); 678 SIMPLEQ_REMOVE_HEAD(&wl->wl_entries, we_entries);
679 /* XXX should we be accumulating wl_error_count 679 /* XXX should we be accumulating wl_error_count
680 * and increasing reclaimable bytes ? */ 680 * and increasing reclaimable bytes ? */
681 we->we_wapbl = NULL; 681 we->we_wapbl = NULL;
682 if (we->we_bufcount == 0) { 682 if (we->we_bufcount == 0) {
683#ifdef WAPBL_DEBUG_BUFBYTES 683#ifdef WAPBL_DEBUG_BUFBYTES
684 KASSERT(we->we_unsynced_bufbytes == 0); 684 KASSERT(we->we_unsynced_bufbytes == 0);
685#endif 685#endif
686 pool_put(&wapbl_entry_pool, we); 686 pool_put(&wapbl_entry_pool, we);
687 } 687 }
688 } 688 }
689 689
690 /* Discard list of deallocs */ 690 /* Discard list of deallocs */
691 while ((wd = SIMPLEQ_FIRST(&wl->wl_dealloclist)) != NULL) { 691 while ((wd = SIMPLEQ_FIRST(&wl->wl_dealloclist)) != NULL) {
692 SIMPLEQ_REMOVE_HEAD(&wl->wl_dealloclist, wd_entries); 692 SIMPLEQ_REMOVE_HEAD(&wl->wl_dealloclist, wd_entries);
693 pool_put(&wapbl_dealloc_pool, wd); 693 pool_put(&wapbl_dealloc_pool, wd);
694 wl->wl_dealloccnt--; 694 wl->wl_dealloccnt--;
695 } 695 }
696 696
697 /* XXX should we clear wl_reserved_bytes? */ 697 /* XXX should we clear wl_reserved_bytes? */
698 698
699 KASSERT(wl->wl_bufbytes == 0); 699 KASSERT(wl->wl_bufbytes == 0);
700 KASSERT(wl->wl_bcount == 0); 700 KASSERT(wl->wl_bcount == 0);
701 KASSERT(wl->wl_bufcount == 0); 701 KASSERT(wl->wl_bufcount == 0);
702 KASSERT(LIST_EMPTY(&wl->wl_bufs)); 702 KASSERT(LIST_EMPTY(&wl->wl_bufs));
703 KASSERT(SIMPLEQ_EMPTY(&wl->wl_entries)); 703 KASSERT(SIMPLEQ_EMPTY(&wl->wl_entries));
704 KASSERT(wl->wl_inohashcnt == 0); 704 KASSERT(wl->wl_inohashcnt == 0);
705 KASSERT(SIMPLEQ_EMPTY(&wl->wl_dealloclist)); 705 KASSERT(SIMPLEQ_EMPTY(&wl->wl_dealloclist));
706 KASSERT(wl->wl_dealloccnt == 0); 706 KASSERT(wl->wl_dealloccnt == 0);
707 707
708 rw_exit(&wl->wl_rwlock); 708 rw_exit(&wl->wl_rwlock);
709} 709}
710 710
711int 711int
712wapbl_stop(struct wapbl *wl, int force) 712wapbl_stop(struct wapbl *wl, int force)
713{ 713{
714 int error; 714 int error;
715 715
716 WAPBL_PRINTF(WAPBL_PRINT_OPEN, ("wapbl_stop called\n")); 716 WAPBL_PRINTF(WAPBL_PRINT_OPEN, ("wapbl_stop called\n"));
717 error = wapbl_flush(wl, 1); 717 error = wapbl_flush(wl, 1);
718 if (error) { 718 if (error) {
719 if (force) 719 if (force)
720 wapbl_discard(wl); 720 wapbl_discard(wl);
721 else 721 else
722 return error; 722 return error;
723 } 723 }
724 724
725 /* Unlinked inodes persist after a flush */ 725 /* Unlinked inodes persist after a flush */
726 if (wl->wl_inohashcnt) { 726 if (wl->wl_inohashcnt) {
727 if (force) { 727 if (force) {
728 wapbl_discard(wl); 728 wapbl_discard(wl);
729 } else { 729 } else {
730 return EBUSY; 730 return EBUSY;
731 } 731 }
732 } 732 }
733 733
734 KASSERT(wl->wl_bufbytes == 0); 734 KASSERT(wl->wl_bufbytes == 0);
735 KASSERT(wl->wl_bcount == 0); 735 KASSERT(wl->wl_bcount == 0);
736 KASSERT(wl->wl_bufcount == 0); 736 KASSERT(wl->wl_bufcount == 0);
737 KASSERT(LIST_EMPTY(&wl->wl_bufs)); 737 KASSERT(LIST_EMPTY(&wl->wl_bufs));
738 KASSERT(wl->wl_dealloccnt == 0); 738 KASSERT(wl->wl_dealloccnt == 0);
739 KASSERT(SIMPLEQ_EMPTY(&wl->wl_entries)); 739 KASSERT(SIMPLEQ_EMPTY(&wl->wl_entries));
740 KASSERT(wl->wl_inohashcnt == 0); 740 KASSERT(wl->wl_inohashcnt == 0);
741 KASSERT(SIMPLEQ_EMPTY(&wl->wl_dealloclist)); 741 KASSERT(SIMPLEQ_EMPTY(&wl->wl_dealloclist));
742 KASSERT(wl->wl_dealloccnt == 0); 742 KASSERT(wl->wl_dealloccnt == 0);
743 743
744 wapbl_free(wl->wl_wc_scratch, wl->wl_wc_header->wc_len); 744 wapbl_free(wl->wl_wc_scratch, wl->wl_wc_header->wc_len);
745 wapbl_free(wl->wl_wc_header, wl->wl_wc_header->wc_len); 745 wapbl_free(wl->wl_wc_header, wl->wl_wc_header->wc_len);
746 wapbl_free(wl->wl_buffer, MAXPHYS); 746 wapbl_free(wl->wl_buffer, MAXPHYS);
747 wapbl_inodetrk_free(wl); 747 wapbl_inodetrk_free(wl);
748 748
749 cv_destroy(&wl->wl_reclaimable_cv); 749 cv_destroy(&wl->wl_reclaimable_cv);
750 mutex_destroy(&wl->wl_mtx); 750 mutex_destroy(&wl->wl_mtx);
751 rw_destroy(&wl->wl_rwlock); 751 rw_destroy(&wl->wl_rwlock);
752 wapbl_free(wl, sizeof(*wl)); 752 wapbl_free(wl, sizeof(*wl));
753 753
754 return 0; 754 return 0;
755} 755}
756 756
757/****************************************************************/ 757/****************************************************************/
758/* 758/*
759 * Unbuffered disk I/O 759 * Unbuffered disk I/O
760 */ 760 */
761 761
762static int 762static int
763wapbl_doio(void *data, size_t len, struct vnode *devvp, daddr_t pbn, int flags) 763wapbl_doio(void *data, size_t len, struct vnode *devvp, daddr_t pbn, int flags)
764{ 764{
765 struct pstats *pstats = curlwp->l_proc->p_stats; 765 struct pstats *pstats = curlwp->l_proc->p_stats;
766 struct buf *bp; 766 struct buf *bp;
767 int error; 767 int error;
768 768
769 KASSERT((flags & ~(B_WRITE | B_READ)) == 0); 769 KASSERT((flags & ~(B_WRITE | B_READ)) == 0);
770 KASSERT(devvp->v_type == VBLK); 770 KASSERT(devvp->v_type == VBLK);
771 771
772 if ((flags & (B_WRITE | B_READ)) == B_WRITE) { 772 if ((flags & (B_WRITE | B_READ)) == B_WRITE) {
773 mutex_enter(devvp->v_interlock); 773 mutex_enter(devvp->v_interlock);
774 devvp->v_numoutput++; 774 devvp->v_numoutput++;
775 mutex_exit(devvp->v_interlock); 775 mutex_exit(devvp->v_interlock);
776 pstats->p_ru.ru_oublock++; 776 pstats->p_ru.ru_oublock++;
777 } else { 777 } else {
778 pstats->p_ru.ru_inblock++; 778 pstats->p_ru.ru_inblock++;
779 } 779 }
780 780
781 bp = getiobuf(devvp, true); 781 bp = getiobuf(devvp, true);
782 bp->b_flags = flags; 782 bp->b_flags = flags;
783 bp->b_cflags = BC_BUSY; /* silly & dubious */ 783 bp->b_cflags = BC_BUSY; /* silly & dubious */
784 bp->b_dev = devvp->v_rdev; 784 bp->b_dev = devvp->v_rdev;
785 bp->b_data = data; 785 bp->b_data = data;
786 bp->b_bufsize = bp->b_resid = bp->b_bcount = len; 786 bp->b_bufsize = bp->b_resid = bp->b_bcount = len;
787 bp->b_blkno = pbn; 787 bp->b_blkno = pbn;
788 BIO_SETPRIO(bp, BPRIO_TIMECRITICAL); 788 BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
789 789
790 WAPBL_PRINTF(WAPBL_PRINT_IO, 790 WAPBL_PRINTF(WAPBL_PRINT_IO,
791 ("wapbl_doio: %s %d bytes at block %"PRId64" on dev 0x%"PRIx64"\n", 791 ("wapbl_doio: %s %d bytes at block %"PRId64" on dev 0x%"PRIx64"\n",
792 BUF_ISWRITE(bp) ? "write" : "read", bp->b_bcount, 792 BUF_ISWRITE(bp) ? "write" : "read", bp->b_bcount,
793 bp->b_blkno, bp->b_dev)); 793 bp->b_blkno, bp->b_dev));
794 794
795 VOP_STRATEGY(devvp, bp); 795 VOP_STRATEGY(devvp, bp);
796 796
797 error = biowait(bp); 797 error = biowait(bp);
798 putiobuf(bp); 798 putiobuf(bp);
799 799
800 if (error) { 800 if (error) {
801 WAPBL_PRINTF(WAPBL_PRINT_ERROR, 801 WAPBL_PRINTF(WAPBL_PRINT_ERROR,
802 ("wapbl_doio: %s %zu bytes at block %" PRId64 802 ("wapbl_doio: %s %zu bytes at block %" PRId64
803 " on dev 0x%"PRIx64" failed with error %d\n", 803 " on dev 0x%"PRIx64" failed with error %d\n",
804 (((flags & (B_WRITE | B_READ)) == B_WRITE) ? 804 (((flags & (B_WRITE | B_READ)) == B_WRITE) ?
805 "write" : "read"), 805 "write" : "read"),
806 len, pbn, devvp->v_rdev, error)); 806 len, pbn, devvp->v_rdev, error));
807 } 807 }
808 808
809 return error; 809 return error;
810} 810}
811 811
812/* 812/*
813 * wapbl_write(data, len, devvp, pbn) 813 * wapbl_write(data, len, devvp, pbn)
814 * 814 *
815 * Synchronously write len bytes from data to physical block pbn 815 * Synchronously write len bytes from data to physical block pbn
816 * on devvp. 816 * on devvp.
817 */ 817 */
818int 818int
819wapbl_write(void *data, size_t len, struct vnode *devvp, daddr_t pbn) 819wapbl_write(void *data, size_t len, struct vnode *devvp, daddr_t pbn)
820{ 820{
821 821
822 return wapbl_doio(data, len, devvp, pbn, B_WRITE); 822 return wapbl_doio(data, len, devvp, pbn, B_WRITE);
823} 823}
824 824
825/* 825/*
826 * wapbl_read(data, len, devvp, pbn) 826 * wapbl_read(data, len, devvp, pbn)
827 * 827 *
828 * Synchronously read len bytes into data from physical block pbn 828 * Synchronously read len bytes into data from physical block pbn
829 * on devvp. 829 * on devvp.
830 */ 830 */
831int 831int
832wapbl_read(void *data, size_t len, struct vnode *devvp, daddr_t pbn) 832wapbl_read(void *data, size_t len, struct vnode *devvp, daddr_t pbn)
833{ 833{
834 834
835 return wapbl_doio(data, len, devvp, pbn, B_READ); 835 return wapbl_doio(data, len, devvp, pbn, B_READ);
836} 836}
837 837
838/****************************************************************/ 838/****************************************************************/
839/* 839/*
840 * Buffered disk writes -- try to coalesce writes and emit 840 * Buffered disk writes -- try to coalesce writes and emit
841 * MAXPHYS-aligned blocks. 841 * MAXPHYS-aligned blocks.
842 */ 842 */
843 843
844/* 844/*
845 * wapbl_buffered_flush(wl) 845 * wapbl_buffered_flush(wl)
846 * 846 *
847 * Flush any buffered writes from wapbl_buffered_write. 847 * Flush any buffered writes from wapbl_buffered_write.
848 */ 848 */
849static int 849static int
850wapbl_buffered_flush(struct wapbl *wl) 850wapbl_buffered_flush(struct wapbl *wl)
851{ 851{
852 int error; 852 int error;
853 853
854 if (wl->wl_buffer_used == 0) 854 if (wl->wl_buffer_used == 0)
855 return 0; 855 return 0;
856 856
857 error = wapbl_doio(wl->wl_buffer, wl->wl_buffer_used, 857 error = wapbl_doio(wl->wl_buffer, wl->wl_buffer_used,
858 wl->wl_devvp, wl->wl_buffer_dblk, B_WRITE); 858 wl->wl_devvp, wl->wl_buffer_dblk, B_WRITE);
859 wl->wl_buffer_used = 0; 859 wl->wl_buffer_used = 0;
860 860
861 return error; 861 return error;
862} 862}
863 863
864/* 864/*
865 * wapbl_buffered_write(data, len, wl, pbn) 865 * wapbl_buffered_write(data, len, wl, pbn)
866 * 866 *
867 * Write len bytes from data to physical block pbn on 867 * Write len bytes from data to physical block pbn on
868 * wl->wl_devvp. The write may not complete until 868 * wl->wl_devvp. The write may not complete until
869 * wapbl_buffered_flush. 869 * wapbl_buffered_flush.
870 */ 870 */
871static int 871static int
872wapbl_buffered_write(void *data, size_t len, struct wapbl *wl, daddr_t pbn) 872wapbl_buffered_write(void *data, size_t len, struct wapbl *wl, daddr_t pbn)
873{ 873{
874 int error; 874 int error;
875 size_t resid; 875 size_t resid;
876 876
877 /* 877 /*
878 * If not adjacent to buffered data flush first. Disk block 878 * If not adjacent to buffered data flush first. Disk block
879 * address is always valid for non-empty buffer. 879 * address is always valid for non-empty buffer.
880 */ 880 */
881 if (wl->wl_buffer_used > 0 && 881 if (wl->wl_buffer_used > 0 &&
882 pbn != wl->wl_buffer_dblk + btodb(wl->wl_buffer_used)) { 882 pbn != wl->wl_buffer_dblk + btodb(wl->wl_buffer_used)) {
883 error = wapbl_buffered_flush(wl); 883 error = wapbl_buffered_flush(wl);
884 if (error) 884 if (error)
885 return error; 885 return error;
886 } 886 }
887 /* 887 /*
888 * If this write goes to an empty buffer we have to 888 * If this write goes to an empty buffer we have to
889 * save the disk block address first. 889 * save the disk block address first.
890 */ 890 */
891 if (wl->wl_buffer_used == 0) 891 if (wl->wl_buffer_used == 0)
892 wl->wl_buffer_dblk = pbn; 892 wl->wl_buffer_dblk = pbn;
893 /* 893 /*
894 * Remaining space so this buffer ends on a MAXPHYS boundary. 894 * Remaining space so this buffer ends on a MAXPHYS boundary.
895 * 895 *
896 * Cannot become less or equal zero as the buffer would have been 896 * Cannot become less or equal zero as the buffer would have been
897 * flushed on the last call then. 897 * flushed on the last call then.
898 */ 898 */
899 resid = MAXPHYS - dbtob(wl->wl_buffer_dblk % btodb(MAXPHYS)) - 899 resid = MAXPHYS - dbtob(wl->wl_buffer_dblk % btodb(MAXPHYS)) -
900 wl->wl_buffer_used; 900 wl->wl_buffer_used;
901 KASSERT(resid > 0); 901 KASSERT(resid > 0);
902 KASSERT(dbtob(btodb(resid)) == resid); 902 KASSERT(dbtob(btodb(resid)) == resid);
903 if (len >= resid) { 903 if (len >= resid) {
904 memcpy(wl->wl_buffer + wl->wl_buffer_used, data, resid); 904 memcpy(wl->wl_buffer + wl->wl_buffer_used, data, resid);
905 wl->wl_buffer_used += resid; 905 wl->wl_buffer_used += resid;
906 error = wapbl_doio(wl->wl_buffer, wl->wl_buffer_used, 906 error = wapbl_doio(wl->wl_buffer, wl->wl_buffer_used,
907 wl->wl_devvp, wl->wl_buffer_dblk, B_WRITE); 907 wl->wl_devvp, wl->wl_buffer_dblk, B_WRITE);
908 data = (uint8_t *)data + resid; 908 data = (uint8_t *)data + resid;
909 len -= resid; 909 len -= resid;
910 wl->wl_buffer_dblk = pbn + btodb(resid); 910 wl->wl_buffer_dblk = pbn + btodb(resid);
911 wl->wl_buffer_used = 0; 911 wl->wl_buffer_used = 0;
912 if (error) 912 if (error)
913 return error; 913 return error;
914 } 914 }
915 KASSERT(len < MAXPHYS); 915 KASSERT(len < MAXPHYS);
916 if (len > 0) { 916 if (len > 0) {
917 memcpy(wl->wl_buffer + wl->wl_buffer_used, data, len); 917 memcpy(wl->wl_buffer + wl->wl_buffer_used, data, len);
918 wl->wl_buffer_used += len; 918 wl->wl_buffer_used += len;
919 } 919 }
920 920
921 return 0; 921 return 0;
922} 922}
923 923
924/* 924/*
925 * wapbl_circ_write(wl, data, len, offp) 925 * wapbl_circ_write(wl, data, len, offp)
926 * 926 *
927 * Write len bytes from data to the circular queue of wl, starting 927 * Write len bytes from data to the circular queue of wl, starting
928 * at linear byte offset *offp, and returning the new linear byte 928 * at linear byte offset *offp, and returning the new linear byte
929 * offset in *offp. 929 * offset in *offp.
930 * 930 *
931 * If the starting linear byte offset precedes wl->wl_circ_off, 931 * If the starting linear byte offset precedes wl->wl_circ_off,
932 * the write instead begins at wl->wl_circ_off. XXX WTF? This 932 * the write instead begins at wl->wl_circ_off. XXX WTF? This
933 * should be a KASSERT, not a conditional. 933 * should be a KASSERT, not a conditional.
934 * 934 *
935 * The write is buffered in wl and must be flushed with 935 * The write is buffered in wl and must be flushed with
936 * wapbl_buffered_flush before it will be submitted to the disk. 936 * wapbl_buffered_flush before it will be submitted to the disk.
937 */ 937 */
938static int 938static int
939wapbl_circ_write(struct wapbl *wl, void *data, size_t len, off_t *offp) 939wapbl_circ_write(struct wapbl *wl, void *data, size_t len, off_t *offp)
940{ 940{
941 size_t slen; 941 size_t slen;
942 off_t off = *offp; 942 off_t off = *offp;
943 int error; 943 int error;
944 daddr_t pbn; 944 daddr_t pbn;
945 945
946 KDASSERT(((len >> wl->wl_log_dev_bshift) << 946 KDASSERT(((len >> wl->wl_log_dev_bshift) <<
947 wl->wl_log_dev_bshift) == len); 947 wl->wl_log_dev_bshift) == len);
948 948
949 if (off < wl->wl_circ_off) 949 if (off < wl->wl_circ_off)
950 off = wl->wl_circ_off; 950 off = wl->wl_circ_off;
951 slen = wl->wl_circ_off + wl->wl_circ_size - off; 951 slen = wl->wl_circ_off + wl->wl_circ_size - off;
952 if (slen < len) { 952 if (slen < len) {
953 pbn = wl->wl_logpbn + (off >> wl->wl_log_dev_bshift); 953 pbn = wl->wl_logpbn + (off >> wl->wl_log_dev_bshift);
954#ifdef _KERNEL 954#ifdef _KERNEL
955 pbn = btodb(pbn << wl->wl_log_dev_bshift); 955 pbn = btodb(pbn << wl->wl_log_dev_bshift);
956#endif 956#endif
957 error = wapbl_buffered_write(data, slen, wl, pbn); 957 error = wapbl_buffered_write(data, slen, wl, pbn);
958 if (error) 958 if (error)
959 return error; 959 return error;
960 data = (uint8_t *)data + slen; 960 data = (uint8_t *)data + slen;
961 len -= slen; 961 len -= slen;
962 off = wl->wl_circ_off; 962 off = wl->wl_circ_off;
963 } 963 }
964 pbn = wl->wl_logpbn + (off >> wl->wl_log_dev_bshift); 964 pbn = wl->wl_logpbn + (off >> wl->wl_log_dev_bshift);
965#ifdef _KERNEL 965#ifdef _KERNEL
966 pbn = btodb(pbn << wl->wl_log_dev_bshift); 966 pbn = btodb(pbn << wl->wl_log_dev_bshift);
967#endif 967#endif
968 error = wapbl_buffered_write(data, len, wl, pbn); 968 error = wapbl_buffered_write(data, len, wl, pbn);
969 if (error) 969 if (error)
970 return error; 970 return error;
971 off += len; 971 off += len;
972 if (off >= wl->wl_circ_off + wl->wl_circ_size) 972 if (off >= wl->wl_circ_off + wl->wl_circ_size)
973 off = wl->wl_circ_off; 973 off = wl->wl_circ_off;
974 *offp = off; 974 *offp = off;
975 return 0; 975 return 0;
976} 976}
977 977
978/****************************************************************/ 978/****************************************************************/
979/* 979/*
980 * WAPBL transactions: entering, adding/removing bufs, and exiting 980 * WAPBL transactions: entering, adding/removing bufs, and exiting
981 */ 981 */
982 982
983int 983int
984wapbl_begin(struct wapbl *wl, const char *file, int line) 984wapbl_begin(struct wapbl *wl, const char *file, int line)
985{ 985{
986 int doflush; 986 int doflush;
987 unsigned lockcount; 987 unsigned lockcount;
988 988
989 KDASSERT(wl); 989 KDASSERT(wl);
990 990
991 /* 991 /*
992 * XXX this needs to be made much more sophisticated. 992 * XXX this needs to be made much more sophisticated.
993 * perhaps each wapbl_begin could reserve a specified 993 * perhaps each wapbl_begin could reserve a specified
994 * number of buffers and bytes. 994 * number of buffers and bytes.
995 */ 995 */
996 mutex_enter(&wl->wl_mtx); 996 mutex_enter(&wl->wl_mtx);
997 lockcount = wl->wl_lock_count; 997 lockcount = wl->wl_lock_count;
998 doflush = ((wl->wl_bufbytes + (lockcount * MAXPHYS)) > 998 doflush = ((wl->wl_bufbytes + (lockcount * MAXPHYS)) >
999 wl->wl_bufbytes_max / 2) || 999 wl->wl_bufbytes_max / 2) ||
1000 ((wl->wl_bufcount + (lockcount * 10)) > 1000 ((wl->wl_bufcount + (lockcount * 10)) >
1001 wl->wl_bufcount_max / 2) || 1001 wl->wl_bufcount_max / 2) ||
1002 (wapbl_transaction_len(wl) > wl->wl_circ_size / 2) || 1002 (wapbl_transaction_len(wl) > wl->wl_circ_size / 2) ||
1003 (wl->wl_dealloccnt >= (wl->wl_dealloclim / 2)); 1003 (wl->wl_dealloccnt >= (wl->wl_dealloclim / 2));
1004 mutex_exit(&wl->wl_mtx); 1004 mutex_exit(&wl->wl_mtx);
1005 1005
1006 if (doflush) { 1006 if (doflush) {
1007 WAPBL_PRINTF(WAPBL_PRINT_FLUSH, 1007 WAPBL_PRINTF(WAPBL_PRINT_FLUSH,
1008 ("force flush lockcnt=%d bufbytes=%zu " 1008 ("force flush lockcnt=%d bufbytes=%zu "
1009 "(max=%zu) bufcount=%zu (max=%zu) " 1009 "(max=%zu) bufcount=%zu (max=%zu) "
1010 "dealloccnt %d (lim=%d)\n", 1010 "dealloccnt %d (lim=%d)\n",
1011 lockcount, wl->wl_bufbytes, 1011 lockcount, wl->wl_bufbytes,
1012 wl->wl_bufbytes_max, wl->wl_bufcount, 1012 wl->wl_bufbytes_max, wl->wl_bufcount,
1013 wl->wl_bufcount_max, 1013 wl->wl_bufcount_max,
1014 wl->wl_dealloccnt, wl->wl_dealloclim)); 1014 wl->wl_dealloccnt, wl->wl_dealloclim));
1015 } 1015 }
1016 1016
1017 if (doflush) { 1017 if (doflush) {
1018 int error = wapbl_flush(wl, 0); 1018 int error = wapbl_flush(wl, 0);
1019 if (error) 1019 if (error)
1020 return error; 1020 return error;
1021 } 1021 }
1022 1022
1023 rw_enter(&wl->wl_rwlock, RW_READER); 1023 rw_enter(&wl->wl_rwlock, RW_READER);
1024 mutex_enter(&wl->wl_mtx); 1024 mutex_enter(&wl->wl_mtx);
1025 wl->wl_lock_count++; 1025 wl->wl_lock_count++;
1026 mutex_exit(&wl->wl_mtx); 1026 mutex_exit(&wl->wl_mtx);
1027 1027
1028#if defined(WAPBL_DEBUG_PRINT) 1028#if defined(WAPBL_DEBUG_PRINT)
1029 WAPBL_PRINTF(WAPBL_PRINT_TRANSACTION, 1029 WAPBL_PRINTF(WAPBL_PRINT_TRANSACTION,
1030 ("wapbl_begin thread %d.%d with bufcount=%zu " 1030 ("wapbl_begin thread %d.%d with bufcount=%zu "
1031 "bufbytes=%zu bcount=%zu at %s:%d\n", 1031 "bufbytes=%zu bcount=%zu at %s:%d\n",
1032 curproc->p_pid, curlwp->l_lid, wl->wl_bufcount, 1032 curproc->p_pid, curlwp->l_lid, wl->wl_bufcount,
1033 wl->wl_bufbytes, wl->wl_bcount, file, line)); 1033 wl->wl_bufbytes, wl->wl_bcount, file, line));
1034#endif 1034#endif
1035 1035
1036 return 0; 1036 return 0;
1037} 1037}
1038 1038
1039void 1039void
1040wapbl_end(struct wapbl *wl) 1040wapbl_end(struct wapbl *wl)
1041{ 1041{
1042 1042
1043#if defined(WAPBL_DEBUG_PRINT) 1043#if defined(WAPBL_DEBUG_PRINT)
1044 WAPBL_PRINTF(WAPBL_PRINT_TRANSACTION, 1044 WAPBL_PRINTF(WAPBL_PRINT_TRANSACTION,
1045 ("wapbl_end thread %d.%d with bufcount=%zu " 1045 ("wapbl_end thread %d.%d with bufcount=%zu "
1046 "bufbytes=%zu bcount=%zu\n", 1046 "bufbytes=%zu bcount=%zu\n",
1047 curproc->p_pid, curlwp->l_lid, wl->wl_bufcount, 1047 curproc->p_pid, curlwp->l_lid, wl->wl_bufcount,
1048 wl->wl_bufbytes, wl->wl_bcount)); 1048 wl->wl_bufbytes, wl->wl_bcount));
1049#endif 1049#endif
1050 1050
1051 /* 1051 /*
1052 * XXX this could be handled more gracefully, perhaps place 1052 * XXX this could be handled more gracefully, perhaps place
1053 * only a partial transaction in the log and allow the 1053 * only a partial transaction in the log and allow the
1054 * remaining to flush without the protection of the journal. 1054 * remaining to flush without the protection of the journal.
1055 */ 1055 */
1056 KASSERTMSG((wapbl_transaction_len(wl) <= 1056 KASSERTMSG((wapbl_transaction_len(wl) <=
1057 (wl->wl_circ_size - wl->wl_reserved_bytes)), 1057 (wl->wl_circ_size - wl->wl_reserved_bytes)),
1058 "wapbl_end: current transaction too big to flush"); 1058 "wapbl_end: current transaction too big to flush");
1059 1059
1060 mutex_enter(&wl->wl_mtx); 1060 mutex_enter(&wl->wl_mtx);
1061 KASSERT(wl->wl_lock_count > 0); 1061 KASSERT(wl->wl_lock_count > 0);
1062 wl->wl_lock_count--; 1062 wl->wl_lock_count--;
1063 mutex_exit(&wl->wl_mtx); 1063 mutex_exit(&wl->wl_mtx);
1064 1064
1065 rw_exit(&wl->wl_rwlock); 1065 rw_exit(&wl->wl_rwlock);
1066} 1066}
1067 1067
1068void 1068void
1069wapbl_add_buf(struct wapbl *wl, struct buf * bp) 1069wapbl_add_buf(struct wapbl *wl, struct buf * bp)
1070{ 1070{
1071 1071
1072 KASSERT(bp->b_cflags & BC_BUSY); 1072 KASSERT(bp->b_cflags & BC_BUSY);
1073 KASSERT(bp->b_vp); 1073 KASSERT(bp->b_vp);
1074 1074
1075 wapbl_jlock_assert(wl); 1075 wapbl_jlock_assert(wl);
1076 1076
1077#if 0 1077#if 0
1078 /* 1078 /*
1079 * XXX this might be an issue for swapfiles. 1079 * XXX this might be an issue for swapfiles.
1080 * see uvm_swap.c:1702 1080 * see uvm_swap.c:1702
1081 * 1081 *
1082 * XXX2 why require it then? leap of semantics? 1082 * XXX2 why require it then? leap of semantics?
1083 */ 1083 */
1084 KASSERT((bp->b_cflags & BC_NOCACHE) == 0); 1084 KASSERT((bp->b_cflags & BC_NOCACHE) == 0);
1085#endif 1085#endif
1086 1086
1087 mutex_enter(&wl->wl_mtx); 1087 mutex_enter(&wl->wl_mtx);
1088 if (bp->b_flags & B_LOCKED) { 1088 if (bp->b_flags & B_LOCKED) {
1089 LIST_REMOVE(bp, b_wapbllist); 1089 LIST_REMOVE(bp, b_wapbllist);
1090 WAPBL_PRINTF(WAPBL_PRINT_BUFFER2, 1090 WAPBL_PRINTF(WAPBL_PRINT_BUFFER2,
1091 ("wapbl_add_buf thread %d.%d re-adding buf %p " 1091 ("wapbl_add_buf thread %d.%d re-adding buf %p "
1092 "with %d bytes %d bcount\n", 1092 "with %d bytes %d bcount\n",
1093 curproc->p_pid, curlwp->l_lid, bp, bp->b_bufsize, 1093 curproc->p_pid, curlwp->l_lid, bp, bp->b_bufsize,
1094 bp->b_bcount)); 1094 bp->b_bcount));
1095 } else { 1095 } else {
1096 /* unlocked by dirty buffers shouldn't exist */ 1096 /* unlocked by dirty buffers shouldn't exist */
1097 KASSERT(!(bp->b_oflags & BO_DELWRI)); 1097 KASSERT(!(bp->b_oflags & BO_DELWRI));
1098 wl->wl_bufbytes += bp->b_bufsize; 1098 wl->wl_bufbytes += bp->b_bufsize;
1099 wl->wl_bcount += bp->b_bcount; 1099 wl->wl_bcount += bp->b_bcount;
1100 wl->wl_bufcount++; 1100 wl->wl_bufcount++;
1101 WAPBL_PRINTF(WAPBL_PRINT_BUFFER, 1101 WAPBL_PRINTF(WAPBL_PRINT_BUFFER,
1102 ("wapbl_add_buf thread %d.%d adding buf %p " 1102 ("wapbl_add_buf thread %d.%d adding buf %p "
1103 "with %d bytes %d bcount\n", 1103 "with %d bytes %d bcount\n",
1104 curproc->p_pid, curlwp->l_lid, bp, bp->b_bufsize, 1104 curproc->p_pid, curlwp->l_lid, bp, bp->b_bufsize,
1105 bp->b_bcount)); 1105 bp->b_bcount));
1106 } 1106 }
1107 LIST_INSERT_HEAD(&wl->wl_bufs, bp, b_wapbllist); 1107 LIST_INSERT_HEAD(&wl->wl_bufs, bp, b_wapbllist);
1108 mutex_exit(&wl->wl_mtx); 1108 mutex_exit(&wl->wl_mtx);
1109 1109
1110 bp->b_flags |= B_LOCKED; 1110 bp->b_flags |= B_LOCKED;
1111} 1111}
1112 1112
1113static void 1113static void
1114wapbl_remove_buf_locked(struct wapbl * wl, struct buf *bp) 1114wapbl_remove_buf_locked(struct wapbl * wl, struct buf *bp)
1115{ 1115{
1116 1116
1117 KASSERT(mutex_owned(&wl->wl_mtx)); 1117 KASSERT(mutex_owned(&wl->wl_mtx));
1118 KASSERT(bp->b_cflags & BC_BUSY); 1118 KASSERT(bp->b_cflags & BC_BUSY);
1119 wapbl_jlock_assert(wl); 1119 wapbl_jlock_assert(wl);
1120 1120
1121#if 0 1121#if 0
1122 /* 1122 /*
1123 * XXX this might be an issue for swapfiles. 1123 * XXX this might be an issue for swapfiles.
1124 * see uvm_swap.c:1725 1124 * see uvm_swap.c:1725
1125 * 1125 *
1126 * XXXdeux: see above 1126 * XXXdeux: see above
1127 */ 1127 */
1128 KASSERT((bp->b_flags & BC_NOCACHE) == 0); 1128 KASSERT((bp->b_flags & BC_NOCACHE) == 0);
1129#endif 1129#endif
1130 KASSERT(bp->b_flags & B_LOCKED); 1130 KASSERT(bp->b_flags & B_LOCKED);
1131 1131
1132 WAPBL_PRINTF(WAPBL_PRINT_BUFFER, 1132 WAPBL_PRINTF(WAPBL_PRINT_BUFFER,
1133 ("wapbl_remove_buf thread %d.%d removing buf %p with " 1133 ("wapbl_remove_buf thread %d.%d removing buf %p with "
1134 "%d bytes %d bcount\n", 1134 "%d bytes %d bcount\n",
1135 curproc->p_pid, curlwp->l_lid, bp, bp->b_bufsize, bp->b_bcount)); 1135 curproc->p_pid, curlwp->l_lid, bp, bp->b_bufsize, bp->b_bcount));
1136 1136
1137 KASSERT(wl->wl_bufbytes >= bp->b_bufsize); 1137 KASSERT(wl->wl_bufbytes >= bp->b_bufsize);
1138 wl->wl_bufbytes -= bp->b_bufsize; 1138 wl->wl_bufbytes -= bp->b_bufsize;
1139 KASSERT(wl->wl_bcount >= bp->b_bcount); 1139 KASSERT(wl->wl_bcount >= bp->b_bcount);
1140 wl->wl_bcount -= bp->b_bcount; 1140 wl->wl_bcount -= bp->b_bcount;
1141 KASSERT(wl->wl_bufcount > 0); 1141 KASSERT(wl->wl_bufcount > 0);
1142 wl->wl_bufcount--; 1142 wl->wl_bufcount--;
1143 KASSERT((wl->wl_bufcount == 0) == (wl->wl_bufbytes == 0)); 1143 KASSERT((wl->wl_bufcount == 0) == (wl->wl_bufbytes == 0));
1144 KASSERT((wl->wl_bufcount == 0) == (wl->wl_bcount == 0)); 1144 KASSERT((wl->wl_bufcount == 0) == (wl->wl_bcount == 0));
1145 LIST_REMOVE(bp, b_wapbllist); 1145 LIST_REMOVE(bp, b_wapbllist);
1146 1146
1147 bp->b_flags &= ~B_LOCKED; 1147 bp->b_flags &= ~B_LOCKED;
1148} 1148}
1149 1149
1150/* called from brelsel() in vfs_bio among other places */ 1150/* called from brelsel() in vfs_bio among other places */
1151void 1151void
1152wapbl_remove_buf(struct wapbl * wl, struct buf *bp) 1152wapbl_remove_buf(struct wapbl * wl, struct buf *bp)
1153{ 1153{
1154 1154
1155 mutex_enter(&wl->wl_mtx); 1155 mutex_enter(&wl->wl_mtx);
1156 wapbl_remove_buf_locked(wl, bp); 1156 wapbl_remove_buf_locked(wl, bp);
1157 mutex_exit(&wl->wl_mtx); 1157 mutex_exit(&wl->wl_mtx);
1158} 1158}
1159 1159
1160void 1160void
1161wapbl_resize_buf(struct wapbl *wl, struct buf *bp, long oldsz, long oldcnt) 1161wapbl_resize_buf(struct wapbl *wl, struct buf *bp, long oldsz, long oldcnt)
1162{ 1162{
1163 1163
1164 KASSERT(bp->b_cflags & BC_BUSY); 1164 KASSERT(bp->b_cflags & BC_BUSY);
1165 1165
1166 /* 1166 /*
1167 * XXX: why does this depend on B_LOCKED? otherwise the buf 1167 * XXX: why does this depend on B_LOCKED? otherwise the buf
1168 * is not for a transaction? if so, why is this called in the 1168 * is not for a transaction? if so, why is this called in the
1169 * first place? 1169 * first place?
1170 */ 1170 */
1171 if (bp->b_flags & B_LOCKED) { 1171 if (bp->b_flags & B_LOCKED) {
1172 mutex_enter(&wl->wl_mtx); 1172 mutex_enter(&wl->wl_mtx);
1173 wl->wl_bufbytes += bp->b_bufsize - oldsz; 1173 wl->wl_bufbytes += bp->b_bufsize - oldsz;
1174 wl->wl_bcount += bp->b_bcount - oldcnt; 1174 wl->wl_bcount += bp->b_bcount - oldcnt;
1175 mutex_exit(&wl->wl_mtx); 1175 mutex_exit(&wl->wl_mtx);
1176 } 1176 }
1177} 1177}
1178 1178
1179#endif /* _KERNEL */ 1179#endif /* _KERNEL */
1180 1180
1181/****************************************************************/ 1181/****************************************************************/
1182/* Some utility inlines */ 1182/* Some utility inlines */
1183 1183
1184/* 1184/*
1185 * wapbl_space_used(avail, head, tail) 1185 * wapbl_space_used(avail, head, tail)
1186 * 1186 *
1187 * Number of bytes used in a circular queue of avail total bytes, 1187 * Number of bytes used in a circular queue of avail total bytes,
1188 * from tail to head. 1188 * from tail to head.
1189 */ 1189 */
1190static inline size_t 1190static inline size_t
1191wapbl_space_used(size_t avail, off_t head, off_t tail) 1191wapbl_space_used(size_t avail, off_t head, off_t tail)
1192{ 1192{
1193 1193
1194 if (tail == 0) { 1194 if (tail == 0) {
1195 KASSERT(head == 0); 1195 KASSERT(head == 0);
1196 return 0; 1196 return 0;
1197 } 1197 }
1198 return ((head + (avail - 1) - tail) % avail) + 1; 1198 return ((head + (avail - 1) - tail) % avail) + 1;
1199} 1199}
1200 1200
1201#ifdef _KERNEL 1201#ifdef _KERNEL
1202/* 1202/*
1203 * wapbl_advance(size, off, oldoff, delta) 1203 * wapbl_advance(size, off, oldoff, delta)
1204 * 1204 *
1205 * Given a byte offset oldoff into a circular queue of size bytes 1205 * Given a byte offset oldoff into a circular queue of size bytes
1206 * starting at off, return a new byte offset oldoff + delta into 1206 * starting at off, return a new byte offset oldoff + delta into
1207 * the circular queue. 1207 * the circular queue.
1208 */ 1208 */
1209static inline off_t 1209static inline off_t
1210wapbl_advance(size_t size, size_t off, off_t oldoff, size_t delta) 1210wapbl_advance(size_t size, size_t off, off_t oldoff, size_t delta)
1211{ 1211{
1212 off_t newoff; 1212 off_t newoff;
1213 1213
1214 /* Define acceptable ranges for inputs. */ 1214 /* Define acceptable ranges for inputs. */
1215 KASSERT(delta <= (size_t)size); 1215 KASSERT(delta <= (size_t)size);
1216 KASSERT((oldoff == 0) || ((size_t)oldoff >= off)); 1216 KASSERT((oldoff == 0) || ((size_t)oldoff >= off));
1217 KASSERT(oldoff < (off_t)(size + off)); 1217 KASSERT(oldoff < (off_t)(size + off));
1218 1218
1219 if ((oldoff == 0) && (delta != 0)) 1219 if ((oldoff == 0) && (delta != 0))
1220 newoff = off + delta; 1220 newoff = off + delta;
1221 else if ((oldoff + delta) < (size + off)) 1221 else if ((oldoff + delta) < (size + off))
1222 newoff = oldoff + delta; 1222 newoff = oldoff + delta;
1223 else 1223 else
1224 newoff = (oldoff + delta) - size; 1224 newoff = (oldoff + delta) - size;
1225 1225
1226 /* Note some interesting axioms */ 1226 /* Note some interesting axioms */
1227 KASSERT((delta != 0) || (newoff == oldoff)); 1227 KASSERT((delta != 0) || (newoff == oldoff));
1228 KASSERT((delta == 0) || (newoff != 0)); 1228 KASSERT((delta == 0) || (newoff != 0));
1229 KASSERT((delta != (size)) || (newoff == oldoff)); 1229 KASSERT((delta != (size)) || (newoff == oldoff));
1230 1230
1231 /* Define acceptable ranges for output. */ 1231 /* Define acceptable ranges for output. */
1232 KASSERT((newoff == 0) || ((size_t)newoff >= off)); 1232 KASSERT((newoff == 0) || ((size_t)newoff >= off));
1233 KASSERT((size_t)newoff < (size + off)); 1233 KASSERT((size_t)newoff < (size + off));
1234 return newoff; 1234 return newoff;
1235} 1235}
1236 1236
1237/* 1237/*
1238 * wapbl_space_free(avail, head, tail) 1238 * wapbl_space_free(avail, head, tail)
1239 * 1239 *
1240 * Number of bytes free in a circular queue of avail total bytes, 1240 * Number of bytes free in a circular queue of avail total bytes,
1241 * in which everything from tail to head is used. 1241 * in which everything from tail to head is used.
1242 */ 1242 */
1243static inline size_t 1243static inline size_t
1244wapbl_space_free(size_t avail, off_t head, off_t tail) 1244wapbl_space_free(size_t avail, off_t head, off_t tail)
1245{ 1245{
1246 1246
1247 return avail - wapbl_space_used(avail, head, tail); 1247 return avail - wapbl_space_used(avail, head, tail);
1248} 1248}
1249 1249
1250/* 1250/*
1251 * wapbl_advance_head(size, off, delta, headp, tailp) 1251 * wapbl_advance_head(size, off, delta, headp, tailp)
1252 * 1252 *
1253 * In a circular queue of size bytes starting at off, given the 1253 * In a circular queue of size bytes starting at off, given the
1254 * old head and tail offsets *headp and *tailp, store the new head 1254 * old head and tail offsets *headp and *tailp, store the new head
1255 * and tail offsets in *headp and *tailp resulting from adding 1255 * and tail offsets in *headp and *tailp resulting from adding
1256 * delta bytes of data to the head. 1256 * delta bytes of data to the head.
1257 */ 1257 */
1258static inline void 1258static inline void
1259wapbl_advance_head(size_t size, size_t off, size_t delta, off_t *headp, 1259wapbl_advance_head(size_t size, size_t off, size_t delta, off_t *headp,
1260 off_t *tailp) 1260 off_t *tailp)
1261{ 1261{
1262 off_t head = *headp; 1262 off_t head = *headp;
1263 off_t tail = *tailp; 1263 off_t tail = *tailp;
1264 1264
1265 KASSERT(delta <= wapbl_space_free(size, head, tail)); 1265 KASSERT(delta <= wapbl_space_free(size, head, tail));
1266 head = wapbl_advance(size, off, head, delta); 1266 head = wapbl_advance(size, off, head, delta);
1267 if ((tail == 0) && (head != 0)) 1267 if ((tail == 0) && (head != 0))
1268 tail = off; 1268 tail = off;
1269 *headp = head; 1269 *headp = head;
1270 *tailp = tail; 1270 *tailp = tail;
1271} 1271}
1272 1272
1273/* 1273/*
1274 * wapbl_advance_tail(size, off, delta, headp, tailp) 1274 * wapbl_advance_tail(size, off, delta, headp, tailp)
1275 * 1275 *
1276 * In a circular queue of size bytes starting at off, given the 1276 * In a circular queue of size bytes starting at off, given the
1277 * old head and tail offsets *headp and *tailp, store the new head 1277 * old head and tail offsets *headp and *tailp, store the new head
1278 * and tail offsets in *headp and *tailp resulting from removing 1278 * and tail offsets in *headp and *tailp resulting from removing
1279 * delta bytes of data from the tail. 1279 * delta bytes of data from the tail.
1280 */ 1280 */
1281static inline void 1281static inline void
1282wapbl_advance_tail(size_t size, size_t off, size_t delta, off_t *headp, 1282wapbl_advance_tail(size_t size, size_t off, size_t delta, off_t *headp,
1283 off_t *tailp) 1283 off_t *tailp)
1284{ 1284{
1285 off_t head = *headp; 1285 off_t head = *headp;
1286 off_t tail = *tailp; 1286 off_t tail = *tailp;
1287 1287
1288 KASSERT(delta <= wapbl_space_used(size, head, tail)); 1288 KASSERT(delta <= wapbl_space_used(size, head, tail));
1289 tail = wapbl_advance(size, off, tail, delta); 1289 tail = wapbl_advance(size, off, tail, delta);
1290 if (head == tail) { 1290 if (head == tail) {
1291 head = tail = 0; 1291 head = tail = 0;
1292 } 1292 }
1293 *headp = head; 1293 *headp = head;
1294 *tailp = tail; 1294 *tailp = tail;
1295} 1295}
1296 1296
1297 1297
1298/****************************************************************/ 1298/****************************************************************/
1299 1299
1300/* 1300/*
1301 * wapbl_truncate(wl, minfree) 1301 * wapbl_truncate(wl, minfree)
1302 * 1302 *
1303 * Wait until at least minfree bytes are available in the log. 1303 * Wait until at least minfree bytes are available in the log.
1304 * 1304 *
1305 * If it was necessary to wait for writes to complete, 1305 * If it was necessary to wait for writes to complete,
1306 * advance the circular queue tail to reflect the new write 1306 * advance the circular queue tail to reflect the new write
1307 * completions and issue a write commit to the log. 1307 * completions and issue a write commit to the log.
1308 * 1308 *
1309 * => Caller must hold wl->wl_rwlock writer lock. 1309 * => Caller must hold wl->wl_rwlock writer lock.
1310 */ 1310 */
1311static int 1311static int
1312wapbl_truncate(struct wapbl *wl, size_t minfree) 1312wapbl_truncate(struct wapbl *wl, size_t minfree)
1313{ 1313{
1314 size_t delta; 1314 size_t delta;
1315 size_t avail; 1315 size_t avail;
1316 off_t head; 1316 off_t head;
1317 off_t tail; 1317 off_t tail;
1318 int error = 0; 1318 int error = 0;
1319 1319
1320 KASSERT(minfree <= (wl->wl_circ_size - wl->wl_reserved_bytes)); 1320 KASSERT(minfree <= (wl->wl_circ_size - wl->wl_reserved_bytes));
1321 KASSERT(rw_write_held(&wl->wl_rwlock)); 1321 KASSERT(rw_write_held(&wl->wl_rwlock));
1322 1322
1323 mutex_enter(&wl->wl_mtx); 1323 mutex_enter(&wl->wl_mtx);
1324 1324
1325 /* 1325 /*
1326 * First check to see if we have to do a commit 1326 * First check to see if we have to do a commit
1327 * at all. 1327 * at all.
1328 */ 1328 */
1329 avail = wapbl_space_free(wl->wl_circ_size, wl->wl_head, wl->wl_tail); 1329 avail = wapbl_space_free(wl->wl_circ_size, wl->wl_head, wl->wl_tail);
1330 if (minfree < avail) { 1330 if (minfree < avail) {
1331 mutex_exit(&wl->wl_mtx); 1331 mutex_exit(&wl->wl_mtx);
1332 return 0; 1332 return 0;
1333 } 1333 }
1334 minfree -= avail; 1334 minfree -= avail;
1335 while ((wl->wl_error_count == 0) && 1335 while ((wl->wl_error_count == 0) &&
1336 (wl->wl_reclaimable_bytes < minfree)) { 1336 (wl->wl_reclaimable_bytes < minfree)) {
1337 WAPBL_PRINTF(WAPBL_PRINT_TRUNCATE, 1337 WAPBL_PRINTF(WAPBL_PRINT_TRUNCATE,
1338 ("wapbl_truncate: sleeping on %p wl=%p bytes=%zd " 1338 ("wapbl_truncate: sleeping on %p wl=%p bytes=%zd "
1339 "minfree=%zd\n", 1339 "minfree=%zd\n",
1340 &wl->wl_reclaimable_bytes, wl, wl->wl_reclaimable_bytes, 1340 &wl->wl_reclaimable_bytes, wl, wl->wl_reclaimable_bytes,
1341 minfree)); 1341 minfree));
1342 1342
1343 cv_wait(&wl->wl_reclaimable_cv, &wl->wl_mtx); 1343 cv_wait(&wl->wl_reclaimable_cv, &wl->wl_mtx);
1344 } 1344 }
1345 if (wl->wl_reclaimable_bytes < minfree) { 1345 if (wl->wl_reclaimable_bytes < minfree) {
1346 KASSERT(wl->wl_error_count); 1346 KASSERT(wl->wl_error_count);
1347 /* XXX maybe get actual error from buffer instead someday? */ 1347 /* XXX maybe get actual error from buffer instead someday? */
1348 error = EIO; 1348 error = EIO;
1349 } 1349 }
1350 head = wl->wl_head; 1350 head = wl->wl_head;
1351 tail = wl->wl_tail; 1351 tail = wl->wl_tail;
1352 delta = wl->wl_reclaimable_bytes; 1352 delta = wl->wl_reclaimable_bytes;
1353 1353
1354 /* If all of of the entries are flushed, then be sure to keep 1354 /* If all of of the entries are flushed, then be sure to keep
1355 * the reserved bytes reserved. Watch out for discarded transactions, 1355 * the reserved bytes reserved. Watch out for discarded transactions,
1356 * which could leave more bytes reserved than are reclaimable. 1356 * which could leave more bytes reserved than are reclaimable.
1357 */ 1357 */
1358 if (SIMPLEQ_EMPTY(&wl->wl_entries) &&  1358 if (SIMPLEQ_EMPTY(&wl->wl_entries) &&
1359 (delta >= wl->wl_reserved_bytes)) { 1359 (delta >= wl->wl_reserved_bytes)) {
1360 delta -= wl->wl_reserved_bytes; 1360 delta -= wl->wl_reserved_bytes;
1361 } 1361 }
1362 wapbl_advance_tail(wl->wl_circ_size, wl->wl_circ_off, delta, &head, 1362 wapbl_advance_tail(wl->wl_circ_size, wl->wl_circ_off, delta, &head,
1363 &tail); 1363 &tail);
1364 KDASSERT(wl->wl_reserved_bytes <= 1364 KDASSERT(wl->wl_reserved_bytes <=
1365 wapbl_space_used(wl->wl_circ_size, head, tail)); 1365 wapbl_space_used(wl->wl_circ_size, head, tail));
1366 mutex_exit(&wl->wl_mtx); 1366 mutex_exit(&wl->wl_mtx);
1367 1367
1368 if (error) 1368 if (error)
1369 return error; 1369 return error;
1370 1370
1371 /* 1371 /*
1372 * This is where head, tail and delta are unprotected 1372 * This is where head, tail and delta are unprotected
1373 * from races against itself or flush. This is ok since 1373 * from races against itself or flush. This is ok since
1374 * we only call this routine from inside flush itself. 1374 * we only call this routine from inside flush itself.
1375 * 1375 *
1376 * XXX: how can it race against itself when accessed only 1376 * XXX: how can it race against itself when accessed only
1377 * from behind the write-locked rwlock? 1377 * from behind the write-locked rwlock?
1378 */ 1378 */
1379 error = wapbl_write_commit(wl, head, tail); 1379 error = wapbl_write_commit(wl, head, tail);
1380 if (error) 1380 if (error)
1381 return error; 1381 return error;
1382 1382
1383 wl->wl_head = head; 1383 wl->wl_head = head;
1384 wl->wl_tail = tail; 1384 wl->wl_tail = tail;
1385 1385
1386 mutex_enter(&wl->wl_mtx); 1386 mutex_enter(&wl->wl_mtx);
1387 KASSERT(wl->wl_reclaimable_bytes >= delta); 1387 KASSERT(wl->wl_reclaimable_bytes >= delta);
1388 wl->wl_reclaimable_bytes -= delta; 1388 wl->wl_reclaimable_bytes -= delta;
1389 mutex_exit(&wl->wl_mtx); 1389 mutex_exit(&wl->wl_mtx);
1390 WAPBL_PRINTF(WAPBL_PRINT_TRUNCATE, 1390 WAPBL_PRINTF(WAPBL_PRINT_TRUNCATE,
1391 ("wapbl_truncate thread %d.%d truncating %zu bytes\n", 1391 ("wapbl_truncate thread %d.%d truncating %zu bytes\n",
1392 curproc->p_pid, curlwp->l_lid, delta)); 1392 curproc->p_pid, curlwp->l_lid, delta));
1393 1393
1394 return 0; 1394 return 0;
1395} 1395}
1396 1396
1397/****************************************************************/ 1397/****************************************************************/
1398 1398
1399void 1399void
1400wapbl_biodone(struct buf *bp) 1400wapbl_biodone(struct buf *bp)
1401{ 1401{
1402 struct wapbl_entry *we = bp->b_private; 1402 struct wapbl_entry *we = bp->b_private;
1403 struct wapbl *wl = we->we_wapbl; 1403 struct wapbl *wl = we->we_wapbl;
1404#ifdef WAPBL_DEBUG_BUFBYTES 1404#ifdef WAPBL_DEBUG_BUFBYTES
1405 const int bufsize = bp->b_bufsize; 1405 const int bufsize = bp->b_bufsize;
1406#endif 1406#endif
1407 1407
1408 /* 1408 /*
1409 * Handle possible flushing of buffers after log has been 1409 * Handle possible flushing of buffers after log has been
1410 * decomissioned. 1410 * decomissioned.
1411 */ 1411 */
1412 if (!wl) { 1412 if (!wl) {
1413 KASSERT(we->we_bufcount > 0); 1413 KASSERT(we->we_bufcount > 0);
1414 we->we_bufcount--; 1414 we->we_bufcount--;
1415#ifdef WAPBL_DEBUG_BUFBYTES 1415#ifdef WAPBL_DEBUG_BUFBYTES
1416 KASSERT(we->we_unsynced_bufbytes >= bufsize); 1416 KASSERT(we->we_unsynced_bufbytes >= bufsize);
1417 we->we_unsynced_bufbytes -= bufsize; 1417 we->we_unsynced_bufbytes -= bufsize;
1418#endif 1418#endif
1419 1419
1420 if (we->we_bufcount == 0) { 1420 if (we->we_bufcount == 0) {
1421#ifdef WAPBL_DEBUG_BUFBYTES 1421#ifdef WAPBL_DEBUG_BUFBYTES
1422 KASSERT(we->we_unsynced_bufbytes == 0); 1422 KASSERT(we->we_unsynced_bufbytes == 0);
1423#endif 1423#endif
1424 pool_put(&wapbl_entry_pool, we); 1424 pool_put(&wapbl_entry_pool, we);
1425 } 1425 }
1426 1426
1427 brelse(bp, 0); 1427 brelse(bp, 0);
1428 return; 1428 return;
1429 } 1429 }
1430 1430
1431#ifdef ohbother 1431#ifdef ohbother
1432 KDASSERT(bp->b_oflags & BO_DONE); 1432 KDASSERT(bp->b_oflags & BO_DONE);
1433 KDASSERT(!(bp->b_oflags & BO_DELWRI)); 1433 KDASSERT(!(bp->b_oflags & BO_DELWRI));
1434 KDASSERT(bp->b_flags & B_ASYNC); 1434 KDASSERT(bp->b_flags & B_ASYNC);
1435 KDASSERT(bp->b_cflags & BC_BUSY); 1435 KDASSERT(bp->b_cflags & BC_BUSY);
1436 KDASSERT(!(bp->b_flags & B_LOCKED)); 1436 KDASSERT(!(bp->b_flags & B_LOCKED));
1437 KDASSERT(!(bp->b_flags & B_READ)); 1437 KDASSERT(!(bp->b_flags & B_READ));
1438 KDASSERT(!(bp->b_cflags & BC_INVAL)); 1438 KDASSERT(!(bp->b_cflags & BC_INVAL));
1439 KDASSERT(!(bp->b_cflags & BC_NOCACHE)); 1439 KDASSERT(!(bp->b_cflags & BC_NOCACHE));
1440#endif 1440#endif
1441 1441
1442 if (bp->b_error) { 1442 if (bp->b_error) {
1443 /* 1443 /*
1444 * If an error occurs, it would be nice to leave the buffer 1444 * If an error occurs, it would be nice to leave the buffer
1445 * as a delayed write on the LRU queue so that we can retry 1445 * as a delayed write on the LRU queue so that we can retry
1446 * it later. But buffercache(9) can't handle dirty buffer 1446 * it later. But buffercache(9) can't handle dirty buffer
1447 * reuse, so just mark the log permanently errored out. 1447 * reuse, so just mark the log permanently errored out.
1448 */ 1448 */
1449 mutex_enter(&wl->wl_mtx); 1449 mutex_enter(&wl->wl_mtx);
1450 if (wl->wl_error_count == 0) { 1450 if (wl->wl_error_count == 0) {
1451 wl->wl_error_count++; 1451 wl->wl_error_count++;
1452 cv_broadcast(&wl->wl_reclaimable_cv); 1452 cv_broadcast(&wl->wl_reclaimable_cv);
1453 } 1453 }
1454 mutex_exit(&wl->wl_mtx); 1454 mutex_exit(&wl->wl_mtx);
1455 } 1455 }
1456 1456
1457 /* 1457 /*
1458 * Release the buffer here. wapbl_flush() may wait for the 1458 * Release the buffer here. wapbl_flush() may wait for the
1459 * log to become empty and we better unbusy the buffer before 1459 * log to become empty and we better unbusy the buffer before
1460 * wapbl_flush() returns. 1460 * wapbl_flush() returns.
1461 */ 1461 */
1462 brelse(bp, 0); 1462 brelse(bp, 0);
1463 1463
1464 mutex_enter(&wl->wl_mtx); 1464 mutex_enter(&wl->wl_mtx);
1465 1465
1466 KASSERT(we->we_bufcount > 0); 1466 KASSERT(we->we_bufcount > 0);
1467 we->we_bufcount--; 1467 we->we_bufcount--;
1468#ifdef WAPBL_DEBUG_BUFBYTES 1468#ifdef WAPBL_DEBUG_BUFBYTES
1469 KASSERT(we->we_unsynced_bufbytes >= bufsize); 1469 KASSERT(we->we_unsynced_bufbytes >= bufsize);
1470 we->we_unsynced_bufbytes -= bufsize; 1470 we->we_unsynced_bufbytes -= bufsize;
1471 KASSERT(wl->wl_unsynced_bufbytes >= bufsize); 1471 KASSERT(wl->wl_unsynced_bufbytes >= bufsize);
1472 wl->wl_unsynced_bufbytes -= bufsize; 1472 wl->wl_unsynced_bufbytes -= bufsize;
1473#endif 1473#endif
1474 1474
1475 /* 1475 /*
1476 * If the current transaction can be reclaimed, start 1476 * If the current transaction can be reclaimed, start
1477 * at the beginning and reclaim any consecutive reclaimable 1477 * at the beginning and reclaim any consecutive reclaimable
1478 * transactions. If we successfully reclaim anything, 1478 * transactions. If we successfully reclaim anything,
1479 * then wakeup anyone waiting for the reclaim. 1479 * then wakeup anyone waiting for the reclaim.
1480 */ 1480 */
1481 if (we->we_bufcount == 0) { 1481 if (we->we_bufcount == 0) {
1482 size_t delta = 0; 1482 size_t delta = 0;
1483 int errcnt = 0; 1483 int errcnt = 0;
1484#ifdef WAPBL_DEBUG_BUFBYTES 1484#ifdef WAPBL_DEBUG_BUFBYTES
1485 KDASSERT(we->we_unsynced_bufbytes == 0); 1485 KDASSERT(we->we_unsynced_bufbytes == 0);
1486#endif 1486#endif
1487 /* 1487 /*
1488 * clear any posted error, since the buffer it came from 1488 * clear any posted error, since the buffer it came from
1489 * has successfully flushed by now 1489 * has successfully flushed by now
1490 */ 1490 */
1491 while ((we = SIMPLEQ_FIRST(&wl->wl_entries)) && 1491 while ((we = SIMPLEQ_FIRST(&wl->wl_entries)) &&
1492 (we->we_bufcount == 0)) { 1492 (we->we_bufcount == 0)) {
1493 delta += we->we_reclaimable_bytes; 1493 delta += we->we_reclaimable_bytes;
1494 if (we->we_error) 1494 if (we->we_error)
1495 errcnt++; 1495 errcnt++;
1496 SIMPLEQ_REMOVE_HEAD(&wl->wl_entries, we_entries); 1496 SIMPLEQ_REMOVE_HEAD(&wl->wl_entries, we_entries);
1497 pool_put(&wapbl_entry_pool, we); 1497 pool_put(&wapbl_entry_pool, we);
1498 } 1498 }
1499 1499
1500 if (delta) { 1500 if (delta) {
1501 wl->wl_reclaimable_bytes += delta; 1501 wl->wl_reclaimable_bytes += delta;
1502 KASSERT(wl->wl_error_count >= errcnt); 1502 KASSERT(wl->wl_error_count >= errcnt);
1503 wl->wl_error_count -= errcnt; 1503 wl->wl_error_count -= errcnt;
1504 cv_broadcast(&wl->wl_reclaimable_cv); 1504 cv_broadcast(&wl->wl_reclaimable_cv);
1505 } 1505 }
1506 } 1506 }
1507 1507
1508 mutex_exit(&wl->wl_mtx); 1508 mutex_exit(&wl->wl_mtx);
1509} 1509}
1510 1510
1511/* 1511/*
1512 * wapbl_flush(wl, wait) 1512 * wapbl_flush(wl, wait)
1513 * 1513 *
1514 * Flush pending block writes, deallocations, and inodes from 1514 * Flush pending block writes, deallocations, and inodes from
1515 * the current transaction in memory to the log on disk: 1515 * the current transaction in memory to the log on disk:
1516 * 1516 *
1517 * 1. Call the file system's wl_flush callback to flush any 1517 * 1. Call the file system's wl_flush callback to flush any
1518 * per-file-system pending updates. 1518 * per-file-system pending updates.
1519 * 2. Wait for enough space in the log for the current transaction. 1519 * 2. Wait for enough space in the log for the current transaction.
1520 * 3. Synchronously write the new log records, advancing the 1520 * 3. Synchronously write the new log records, advancing the
1521 * circular queue head. 1521 * circular queue head.
1522 * 4. Issue the pending block writes asynchronously, now that they 1522 * 4. Issue the pending block writes asynchronously, now that they
1523 * are recorded in the log and can be replayed after crash. 1523 * are recorded in the log and can be replayed after crash.
1524 * 5. If wait is true, wait for all writes to complete and for the 1524 * 5. If wait is true, wait for all writes to complete and for the
1525 * log to become empty. 1525 * log to become empty.
1526 * 1526 *
1527 * On failure, call the file system's wl_flush_abort callback. 1527 * On failure, call the file system's wl_flush_abort callback.
1528 */ 1528 */
1529int 1529int
1530wapbl_flush(struct wapbl *wl, int waitfor) 1530wapbl_flush(struct wapbl *wl, int waitfor)
1531{ 1531{
1532 struct buf *bp; 1532 struct buf *bp;
1533 struct wapbl_entry *we; 1533 struct wapbl_entry *we;
1534 off_t off; 1534 off_t off;
1535 off_t head; 1535 off_t head;
1536 off_t tail; 1536 off_t tail;
1537 size_t delta = 0; 1537 size_t delta = 0;
1538 size_t flushsize; 1538 size_t flushsize;
1539 size_t reserved; 1539 size_t reserved;
1540 int error = 0; 1540 int error = 0;
1541 1541
1542 /* 1542 /*
1543 * Do a quick check to see if a full flush can be skipped 1543 * Do a quick check to see if a full flush can be skipped
1544 * This assumes that the flush callback does not need to be called 1544 * This assumes that the flush callback does not need to be called
1545 * unless there are other outstanding bufs. 1545 * unless there are other outstanding bufs.
1546 */ 1546 */
1547 if (!waitfor) { 1547 if (!waitfor) {
1548 size_t nbufs; 1548 size_t nbufs;
1549 mutex_enter(&wl->wl_mtx); /* XXX need mutex here to 1549 mutex_enter(&wl->wl_mtx); /* XXX need mutex here to
1550 protect the KASSERTS */ 1550 protect the KASSERTS */
1551 nbufs = wl->wl_bufcount; 1551 nbufs = wl->wl_bufcount;
1552 KASSERT((wl->wl_bufcount == 0) == (wl->wl_bufbytes == 0)); 1552 KASSERT((wl->wl_bufcount == 0) == (wl->wl_bufbytes == 0));
1553 KASSERT((wl->wl_bufcount == 0) == (wl->wl_bcount == 0)); 1553 KASSERT((wl->wl_bufcount == 0) == (wl->wl_bcount == 0));
1554 mutex_exit(&wl->wl_mtx); 1554 mutex_exit(&wl->wl_mtx);
1555 if (nbufs == 0) 1555 if (nbufs == 0)
1556 return 0; 1556 return 0;
1557 } 1557 }
1558 1558
1559 /* 1559 /*
1560 * XXX we may consider using LK_UPGRADE here 1560 * XXX we may consider using LK_UPGRADE here
1561 * if we want to call flush from inside a transaction 1561 * if we want to call flush from inside a transaction
1562 */ 1562 */
1563 rw_enter(&wl->wl_rwlock, RW_WRITER); 1563 rw_enter(&wl->wl_rwlock, RW_WRITER);
1564 wl->wl_flush(wl->wl_mount, SIMPLEQ_FIRST(&wl->wl_dealloclist)); 1564 wl->wl_flush(wl->wl_mount, SIMPLEQ_FIRST(&wl->wl_dealloclist));
1565 1565
1566 /* 1566 /*
1567 * Now that we are exclusively locked and the file system has 1567 * Now that we are exclusively locked and the file system has
1568 * issued any deferred block writes for this transaction, check 1568 * issued any deferred block writes for this transaction, check
1569 * whether there are any blocks to write to the log. If not, 1569 * whether there are any blocks to write to the log. If not,
1570 * skip waiting for space or writing any log entries. 1570 * skip waiting for space or writing any log entries.
1571 * 1571 *
1572 * XXX Shouldn't this also check wl_dealloccnt and 1572 * XXX Shouldn't this also check wl_dealloccnt and
1573 * wl_inohashcnt? Perhaps wl_dealloccnt doesn't matter if the 1573 * wl_inohashcnt? Perhaps wl_dealloccnt doesn't matter if the
1574 * file system didn't produce any blocks as a consequence of 1574 * file system didn't produce any blocks as a consequence of
1575 * it, but the same does not seem to be so of wl_inohashcnt. 1575 * it, but the same does not seem to be so of wl_inohashcnt.
1576 */ 1576 */
1577 if (wl->wl_bufcount == 0) { 1577 if (wl->wl_bufcount == 0) {
1578 goto wait_out; 1578 goto wait_out;
1579 } 1579 }
1580 1580
1581#if 0 1581#if 0
1582 WAPBL_PRINTF(WAPBL_PRINT_FLUSH, 1582 WAPBL_PRINTF(WAPBL_PRINT_FLUSH,
1583 ("wapbl_flush thread %d.%d flushing entries with " 1583 ("wapbl_flush thread %d.%d flushing entries with "
1584 "bufcount=%zu bufbytes=%zu\n", 1584 "bufcount=%zu bufbytes=%zu\n",
1585 curproc->p_pid, curlwp->l_lid, wl->wl_bufcount, 1585 curproc->p_pid, curlwp->l_lid, wl->wl_bufcount,
1586 wl->wl_bufbytes)); 1586 wl->wl_bufbytes));
1587#endif 1587#endif
1588 1588
1589 /* Calculate amount of space needed to flush */ 1589 /* Calculate amount of space needed to flush */
1590 flushsize = wapbl_transaction_len(wl); 1590 flushsize = wapbl_transaction_len(wl);
1591 if (wapbl_verbose_commit) { 1591 if (wapbl_verbose_commit) {
1592 struct timespec ts; 1592 struct timespec ts;
1593 getnanotime(&ts); 1593 getnanotime(&ts);
1594 printf("%s: %lld.%09ld this transaction = %zu bytes\n", 1594 printf("%s: %lld.%09ld this transaction = %zu bytes\n",
1595 __func__, (long long)ts.tv_sec, 1595 __func__, (long long)ts.tv_sec,
1596 (long)ts.tv_nsec, flushsize); 1596 (long)ts.tv_nsec, flushsize);
1597 } 1597 }
1598 1598
1599 if (flushsize > (wl->wl_circ_size - wl->wl_reserved_bytes)) { 1599 if (flushsize > (wl->wl_circ_size - wl->wl_reserved_bytes)) {
1600 /* 1600 /*
1601 * XXX this could be handled more gracefully, perhaps place 1601 * XXX this could be handled more gracefully, perhaps place
1602 * only a partial transaction in the log and allow the 1602 * only a partial transaction in the log and allow the
1603 * remaining to flush without the protection of the journal. 1603 * remaining to flush without the protection of the journal.
1604 */ 1604 */
1605 panic("wapbl_flush: current transaction too big to flush"); 1605 panic("wapbl_flush: current transaction too big to flush");
1606 } 1606 }
1607 1607
1608 error = wapbl_truncate(wl, flushsize); 1608 error = wapbl_truncate(wl, flushsize);
1609 if (error) 1609 if (error)
1610 goto out; 1610 goto out;
1611 1611
1612 off = wl->wl_head; 1612 off = wl->wl_head;
1613 KASSERT((off == 0) || (off >= wl->wl_circ_off)); 1613 KASSERT((off == 0) || (off >= wl->wl_circ_off));
1614 KASSERT((off == 0) || (off < wl->wl_circ_off + wl->wl_circ_size)); 1614 KASSERT((off == 0) || (off < wl->wl_circ_off + wl->wl_circ_size));
1615 error = wapbl_write_blocks(wl, &off); 1615 error = wapbl_write_blocks(wl, &off);
1616 if (error) 1616 if (error)
1617 goto out; 1617 goto out;
1618 error = wapbl_write_revocations(wl, &off); 1618 error = wapbl_write_revocations(wl, &off);
1619 if (error) 1619 if (error)
1620 goto out; 1620 goto out;
1621 error = wapbl_write_inodes(wl, &off); 1621 error = wapbl_write_inodes(wl, &off);
1622 if (error) 1622 if (error)
1623 goto out; 1623 goto out;
1624 1624
1625 reserved = 0; 1625 reserved = 0;
1626 if (wl->wl_inohashcnt) 1626 if (wl->wl_inohashcnt)
1627 reserved = wapbl_transaction_inodes_len(wl); 1627 reserved = wapbl_transaction_inodes_len(wl);
1628 1628
1629 head = wl->wl_head; 1629 head = wl->wl_head;
1630 tail = wl->wl_tail; 1630 tail = wl->wl_tail;
1631 1631
1632 wapbl_advance_head(wl->wl_circ_size, wl->wl_circ_off, flushsize, 1632 wapbl_advance_head(wl->wl_circ_size, wl->wl_circ_off, flushsize,
1633 &head, &tail); 1633 &head, &tail);
1634 1634
1635 KASSERTMSG(head == off, 1635 KASSERTMSG(head == off,
1636 "lost head! head=%"PRIdMAX" tail=%" PRIdMAX 1636 "lost head! head=%"PRIdMAX" tail=%" PRIdMAX
1637 " off=%"PRIdMAX" flush=%zu", 1637 " off=%"PRIdMAX" flush=%zu",
1638 (intmax_t)head, (intmax_t)tail, (intmax_t)off, 1638 (intmax_t)head, (intmax_t)tail, (intmax_t)off,
1639 flushsize); 1639 flushsize);
1640 1640
1641 /* Opportunistically move the tail forward if we can */ 1641 /* Opportunistically move the tail forward if we can */
1642 mutex_enter(&wl->wl_mtx); 1642 mutex_enter(&wl->wl_mtx);
1643 delta = wl->wl_reclaimable_bytes; 1643 delta = wl->wl_reclaimable_bytes;
1644 mutex_exit(&wl->wl_mtx); 1644 mutex_exit(&wl->wl_mtx);
1645 wapbl_advance_tail(wl->wl_circ_size, wl->wl_circ_off, delta, 1645 wapbl_advance_tail(wl->wl_circ_size, wl->wl_circ_off, delta,
1646 &head, &tail); 1646 &head, &tail);
1647 1647
1648 error = wapbl_write_commit(wl, head, tail); 1648 error = wapbl_write_commit(wl, head, tail);
1649 if (error) 1649 if (error)
1650 goto out; 1650 goto out;
1651 1651
1652 we = pool_get(&wapbl_entry_pool, PR_WAITOK); 1652 we = pool_get(&wapbl_entry_pool, PR_WAITOK);
1653 1653
1654#ifdef WAPBL_DEBUG_BUFBYTES 1654#ifdef WAPBL_DEBUG_BUFBYTES
1655 WAPBL_PRINTF(WAPBL_PRINT_FLUSH, 1655 WAPBL_PRINTF(WAPBL_PRINT_FLUSH,
1656 ("wapbl_flush: thread %d.%d head+=%zu tail+=%zu used=%zu" 1656 ("wapbl_flush: thread %d.%d head+=%zu tail+=%zu used=%zu"
1657 " unsynced=%zu" 1657 " unsynced=%zu"
1658 "\n\tbufcount=%zu bufbytes=%zu bcount=%zu deallocs=%d " 1658 "\n\tbufcount=%zu bufbytes=%zu bcount=%zu deallocs=%d "
1659 "inodes=%d\n", 1659 "inodes=%d\n",
1660 curproc->p_pid, curlwp->l_lid, flushsize, delta, 1660 curproc->p_pid, curlwp->l_lid, flushsize, delta,
1661 wapbl_space_used(wl->wl_circ_size, head, tail), 1661 wapbl_space_used(wl->wl_circ_size, head, tail),
1662 wl->wl_unsynced_bufbytes, wl->wl_bufcount, 1662 wl->wl_unsynced_bufbytes, wl->wl_bufcount,
1663 wl->wl_bufbytes, wl->wl_bcount, wl->wl_dealloccnt, 1663 wl->wl_bufbytes, wl->wl_bcount, wl->wl_dealloccnt,
1664 wl->wl_inohashcnt)); 1664 wl->wl_inohashcnt));
1665#else 1665#else
1666 WAPBL_PRINTF(WAPBL_PRINT_FLUSH, 1666 WAPBL_PRINTF(WAPBL_PRINT_FLUSH,
1667 ("wapbl_flush: thread %d.%d head+=%zu tail+=%zu used=%zu" 1667 ("wapbl_flush: thread %d.%d head+=%zu tail+=%zu used=%zu"
1668 "\n\tbufcount=%zu bufbytes=%zu bcount=%zu deallocs=%d " 1668 "\n\tbufcount=%zu bufbytes=%zu bcount=%zu deallocs=%d "
1669 "inodes=%d\n", 1669 "inodes=%d\n",
1670 curproc->p_pid, curlwp->l_lid, flushsize, delta, 1670 curproc->p_pid, curlwp->l_lid, flushsize, delta,
1671 wapbl_space_used(wl->wl_circ_size, head, tail), 1671 wapbl_space_used(wl->wl_circ_size, head, tail),
1672 wl->wl_bufcount, wl->wl_bufbytes, wl->wl_bcount, 1672 wl->wl_bufcount, wl->wl_bufbytes, wl->wl_bcount,
1673 wl->wl_dealloccnt, wl->wl_inohashcnt)); 1673 wl->wl_dealloccnt, wl->wl_inohashcnt));
1674#endif 1674#endif
1675 1675
1676 1676
1677 mutex_enter(&bufcache_lock); 1677 mutex_enter(&bufcache_lock);
1678 mutex_enter(&wl->wl_mtx); 1678 mutex_enter(&wl->wl_mtx);
1679 1679
1680 wl->wl_reserved_bytes = reserved; 1680 wl->wl_reserved_bytes = reserved;
1681 wl->wl_head = head; 1681 wl->wl_head = head;
1682 wl->wl_tail = tail; 1682 wl->wl_tail = tail;
1683 KASSERT(wl->wl_reclaimable_bytes >= delta); 1683 KASSERT(wl->wl_reclaimable_bytes >= delta);
1684 wl->wl_reclaimable_bytes -= delta; 1684 wl->wl_reclaimable_bytes -= delta;
1685 KDASSERT(wl->wl_dealloccnt == 0); 1685 KDASSERT(wl->wl_dealloccnt == 0);
1686#ifdef WAPBL_DEBUG_BUFBYTES 1686#ifdef WAPBL_DEBUG_BUFBYTES
1687 wl->wl_unsynced_bufbytes += wl->wl_bufbytes; 1687 wl->wl_unsynced_bufbytes += wl->wl_bufbytes;
1688#endif 1688#endif
1689 1689
1690 we->we_wapbl = wl; 1690 we->we_wapbl = wl;
1691 we->we_bufcount = wl->wl_bufcount; 1691 we->we_bufcount = wl->wl_bufcount;
1692#ifdef WAPBL_DEBUG_BUFBYTES 1692#ifdef WAPBL_DEBUG_BUFBYTES
1693 we->we_unsynced_bufbytes = wl->wl_bufbytes; 1693 we->we_unsynced_bufbytes = wl->wl_bufbytes;
1694#endif 1694#endif
1695 we->we_reclaimable_bytes = flushsize; 1695 we->we_reclaimable_bytes = flushsize;
1696 we->we_error = 0; 1696 we->we_error = 0;
1697 SIMPLEQ_INSERT_TAIL(&wl->wl_entries, we, we_entries); 1697 SIMPLEQ_INSERT_TAIL(&wl->wl_entries, we, we_entries);
1698 1698
1699 /* 1699 /*
1700 * this flushes bufs in reverse order than they were queued 1700 * this flushes bufs in reverse order than they were queued
1701 * it shouldn't matter, but if we care we could use TAILQ instead. 1701 * it shouldn't matter, but if we care we could use TAILQ instead.
1702 * XXX Note they will get put on the lru queue when they flush 1702 * XXX Note they will get put on the lru queue when they flush
1703 * so we might actually want to change this to preserve order. 1703 * so we might actually want to change this to preserve order.
1704 */ 1704 */
1705 while ((bp = LIST_FIRST(&wl->wl_bufs)) != NULL) { 1705 while ((bp = LIST_FIRST(&wl->wl_bufs)) != NULL) {
1706 if (bbusy(bp, 0, 0, &wl->wl_mtx)) { 1706 if (bbusy(bp, 0, 0, &wl->wl_mtx)) {
1707 continue; 1707 continue;
1708 } 1708 }
1709 bp->b_iodone = wapbl_biodone; 1709 bp->b_iodone = wapbl_biodone;
1710 bp->b_private = we; 1710 bp->b_private = we;
1711 bremfree(bp); 1711 bremfree(bp);
1712 wapbl_remove_buf_locked(wl, bp); 1712 wapbl_remove_buf_locked(wl, bp);
1713 mutex_exit(&wl->wl_mtx); 1713 mutex_exit(&wl->wl_mtx);
1714 mutex_exit(&bufcache_lock); 1714 mutex_exit(&bufcache_lock);
1715 bawrite(bp); 1715 bawrite(bp);
1716 mutex_enter(&bufcache_lock); 1716 mutex_enter(&bufcache_lock);
1717 mutex_enter(&wl->wl_mtx); 1717 mutex_enter(&wl->wl_mtx);
1718 } 1718 }
1719 mutex_exit(&wl->wl_mtx); 1719 mutex_exit(&wl->wl_mtx);
1720 mutex_exit(&bufcache_lock); 1720 mutex_exit(&bufcache_lock);
1721 1721
1722#if 0 1722#if 0
1723 WAPBL_PRINTF(WAPBL_PRINT_FLUSH, 1723 WAPBL_PRINTF(WAPBL_PRINT_FLUSH,
1724 ("wapbl_flush thread %d.%d done flushing entries...\n", 1724 ("wapbl_flush thread %d.%d done flushing entries...\n",
1725 curproc->p_pid, curlwp->l_lid)); 1725 curproc->p_pid, curlwp->l_lid));
1726#endif 1726#endif
1727 1727
1728 wait_out: 1728 wait_out:
1729 1729
1730 /* 1730 /*
1731 * If the waitfor flag is set, don't return until everything is 1731 * If the waitfor flag is set, don't return until everything is
1732 * fully flushed and the on disk log is empty. 1732 * fully flushed and the on disk log is empty.
1733 */ 1733 */
1734 if (waitfor) { 1734 if (waitfor) {
1735 error = wapbl_truncate(wl, wl->wl_circ_size -  1735 error = wapbl_truncate(wl, wl->wl_circ_size -
1736 wl->wl_reserved_bytes); 1736 wl->wl_reserved_bytes);
1737 } 1737 }
1738 1738
1739 out: 1739 out:
1740 if (error) { 1740 if (error) {
1741 wl->wl_flush_abort(wl->wl_mount, 1741 wl->wl_flush_abort(wl->wl_mount,
1742 SIMPLEQ_FIRST(&wl->wl_dealloclist)); 1742 SIMPLEQ_FIRST(&wl->wl_dealloclist));
1743 } 1743 }
1744 1744
1745#ifdef WAPBL_DEBUG_PRINT 1745#ifdef WAPBL_DEBUG_PRINT
1746 if (error) { 1746 if (error) {
1747 pid_t pid = -1; 1747 pid_t pid = -1;
1748 lwpid_t lid = -1; 1748 lwpid_t lid = -1;
1749 if (curproc) 1749 if (curproc)
1750 pid = curproc->p_pid; 1750 pid = curproc->p_pid;
1751 if (curlwp) 1751 if (curlwp)
1752 lid = curlwp->l_lid; 1752 lid = curlwp->l_lid;
1753 mutex_enter(&wl->wl_mtx); 1753 mutex_enter(&wl->wl_mtx);
1754#ifdef WAPBL_DEBUG_BUFBYTES 1754#ifdef WAPBL_DEBUG_BUFBYTES
1755 WAPBL_PRINTF(WAPBL_PRINT_ERROR, 1755 WAPBL_PRINTF(WAPBL_PRINT_ERROR,
1756 ("wapbl_flush: thread %d.%d aborted flush: " 1756 ("wapbl_flush: thread %d.%d aborted flush: "
1757 "error = %d\n" 1757 "error = %d\n"
1758 "\tbufcount=%zu bufbytes=%zu bcount=%zu " 1758 "\tbufcount=%zu bufbytes=%zu bcount=%zu "
1759 "deallocs=%d inodes=%d\n" 1759 "deallocs=%d inodes=%d\n"
1760 "\terrcnt = %d, reclaimable=%zu reserved=%zu " 1760 "\terrcnt = %d, reclaimable=%zu reserved=%zu "
1761 "unsynced=%zu\n", 1761 "unsynced=%zu\n",
1762 pid, lid, error, wl->wl_bufcount, 1762 pid, lid, error, wl->wl_bufcount,
1763 wl->wl_bufbytes, wl->wl_bcount, 1763 wl->wl_bufbytes, wl->wl_bcount,
1764 wl->wl_dealloccnt, wl->wl_inohashcnt, 1764 wl->wl_dealloccnt, wl->wl_inohashcnt,
1765 wl->wl_error_count, wl->wl_reclaimable_bytes, 1765 wl->wl_error_count, wl->wl_reclaimable_bytes,
1766 wl->wl_reserved_bytes, wl->wl_unsynced_bufbytes)); 1766 wl->wl_reserved_bytes, wl->wl_unsynced_bufbytes));
1767 SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) { 1767 SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) {
1768 WAPBL_PRINTF(WAPBL_PRINT_ERROR, 1768 WAPBL_PRINTF(WAPBL_PRINT_ERROR,
1769 ("\tentry: bufcount = %zu, reclaimable = %zu, " 1769 ("\tentry: bufcount = %zu, reclaimable = %zu, "
1770 "error = %d, unsynced = %zu\n", 1770 "error = %d, unsynced = %zu\n",
1771 we->we_bufcount, we->we_reclaimable_bytes, 1771 we->we_bufcount, we->we_reclaimable_bytes,
1772 we->we_error, we->we_unsynced_bufbytes)); 1772 we->we_error, we->we_unsynced_bufbytes));
1773 } 1773 }
1774#else 1774#else
1775 WAPBL_PRINTF(WAPBL_PRINT_ERROR, 1775 WAPBL_PRINTF(WAPBL_PRINT_ERROR,
1776 ("wapbl_flush: thread %d.%d aborted flush: " 1776 ("wapbl_flush: thread %d.%d aborted flush: "
1777 "error = %d\n" 1777 "error = %d\n"
1778 "\tbufcount=%zu bufbytes=%zu bcount=%zu " 1778 "\tbufcount=%zu bufbytes=%zu bcount=%zu "
1779 "deallocs=%d inodes=%d\n" 1779 "deallocs=%d inodes=%d\n"
1780 "\terrcnt = %d, reclaimable=%zu reserved=%zu\n", 1780 "\terrcnt = %d, reclaimable=%zu reserved=%zu\n",
1781 pid, lid, error, wl->wl_bufcount, 1781 pid, lid, error, wl->wl_bufcount,
1782 wl->wl_bufbytes, wl->wl_bcount, 1782 wl->wl_bufbytes, wl->wl_bcount,
1783 wl->wl_dealloccnt, wl->wl_inohashcnt, 1783 wl->wl_dealloccnt, wl->wl_inohashcnt,
1784 wl->wl_error_count, wl->wl_reclaimable_bytes, 1784 wl->wl_error_count, wl->wl_reclaimable_bytes,
1785 wl->wl_reserved_bytes)); 1785 wl->wl_reserved_bytes));
1786 SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) { 1786 SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) {
1787 WAPBL_PRINTF(WAPBL_PRINT_ERROR, 1787 WAPBL_PRINTF(WAPBL_PRINT_ERROR,
1788 ("\tentry: bufcount = %zu, reclaimable = %zu, " 1788 ("\tentry: bufcount = %zu, reclaimable = %zu, "
1789 "error = %d\n", we->we_bufcount, 1789 "error = %d\n", we->we_bufcount,
1790 we->we_reclaimable_bytes, we->we_error)); 1790 we->we_reclaimable_bytes, we->we_error));
1791 } 1791 }
1792#endif 1792#endif
1793 mutex_exit(&wl->wl_mtx); 1793 mutex_exit(&wl->wl_mtx);
1794 } 1794 }
1795#endif 1795#endif
1796 1796
1797 rw_exit(&wl->wl_rwlock); 1797 rw_exit(&wl->wl_rwlock);
1798 return error; 1798 return error;
1799} 1799}
1800 1800
1801/****************************************************************/ 1801/****************************************************************/
1802 1802
1803void 1803void
1804wapbl_jlock_assert(struct wapbl *wl) 1804wapbl_jlock_assert(struct wapbl *wl)
1805{ 1805{
1806 1806
1807 KASSERT(rw_lock_held(&wl->wl_rwlock)); 1807 KASSERT(rw_lock_held(&wl->wl_rwlock));
1808} 1808}
1809 1809
1810void 1810void
1811wapbl_junlock_assert(struct wapbl *wl) 1811wapbl_junlock_assert(struct wapbl *wl)
1812{ 1812{
1813 1813
1814 KASSERT(!rw_write_held(&wl->wl_rwlock)); 1814 KASSERT(!rw_write_held(&wl->wl_rwlock));
1815} 1815}
1816 1816
1817/****************************************************************/ 1817/****************************************************************/
1818 1818
1819/* locks missing */ 1819/* locks missing */
1820void 1820void
1821wapbl_print(struct wapbl *wl, 1821wapbl_print(struct wapbl *wl,
1822 int full, 1822 int full,
1823 void (*pr)(const char *, ...)) 1823 void (*pr)(const char *, ...))
1824{ 1824{
1825 struct buf *bp; 1825 struct buf *bp;
1826 struct wapbl_entry *we; 1826 struct wapbl_entry *we;
1827 (*pr)("wapbl %p", wl); 1827 (*pr)("wapbl %p", wl);
1828 (*pr)("\nlogvp = %p, devvp = %p, logpbn = %"PRId64"\n", 1828 (*pr)("\nlogvp = %p, devvp = %p, logpbn = %"PRId64"\n",
1829 wl->wl_logvp, wl->wl_devvp, wl->wl_logpbn); 1829 wl->wl_logvp, wl->wl_devvp, wl->wl_logpbn);
1830 (*pr)("circ = %zu, header = %zu, head = %"PRIdMAX" tail = %"PRIdMAX"\n", 1830 (*pr)("circ = %zu, header = %zu, head = %"PRIdMAX" tail = %"PRIdMAX"\n",
1831 wl->wl_circ_size, wl->wl_circ_off, 1831 wl->wl_circ_size, wl->wl_circ_off,
1832 (intmax_t)wl->wl_head, (intmax_t)wl->wl_tail); 1832 (intmax_t)wl->wl_head, (intmax_t)wl->wl_tail);
1833 (*pr)("fs_dev_bshift = %d, log_dev_bshift = %d\n", 1833 (*pr)("fs_dev_bshift = %d, log_dev_bshift = %d\n",
1834 wl->wl_log_dev_bshift, wl->wl_fs_dev_bshift); 1834 wl->wl_log_dev_bshift, wl->wl_fs_dev_bshift);
1835#ifdef WAPBL_DEBUG_BUFBYTES 1835#ifdef WAPBL_DEBUG_BUFBYTES
1836 (*pr)("bufcount = %zu, bufbytes = %zu bcount = %zu reclaimable = %zu " 1836 (*pr)("bufcount = %zu, bufbytes = %zu bcount = %zu reclaimable = %zu "
1837 "reserved = %zu errcnt = %d unsynced = %zu\n", 1837 "reserved = %zu errcnt = %d unsynced = %zu\n",
1838 wl->wl_bufcount, wl->wl_bufbytes, wl->wl_bcount, 1838 wl->wl_bufcount, wl->wl_bufbytes, wl->wl_bcount,
1839 wl->wl_reclaimable_bytes, wl->wl_reserved_bytes, 1839 wl->wl_reclaimable_bytes, wl->wl_reserved_bytes,
1840 wl->wl_error_count, wl->wl_unsynced_bufbytes); 1840 wl->wl_error_count, wl->wl_unsynced_bufbytes);
1841#else 1841#else
1842 (*pr)("bufcount = %zu, bufbytes = %zu bcount = %zu reclaimable = %zu " 1842 (*pr)("bufcount = %zu, bufbytes = %zu bcount = %zu reclaimable = %zu "
1843 "reserved = %zu errcnt = %d\n", wl->wl_bufcount, wl->wl_bufbytes, 1843 "reserved = %zu errcnt = %d\n", wl->wl_bufcount, wl->wl_bufbytes,
1844 wl->wl_bcount, wl->wl_reclaimable_bytes, wl->wl_reserved_bytes, 1844 wl->wl_bcount, wl->wl_reclaimable_bytes, wl->wl_reserved_bytes,
1845 wl->wl_error_count); 1845 wl->wl_error_count);
1846#endif 1846#endif
1847 (*pr)("\tdealloccnt = %d, dealloclim = %d\n", 1847 (*pr)("\tdealloccnt = %d, dealloclim = %d\n",
1848 wl->wl_dealloccnt, wl->wl_dealloclim); 1848 wl->wl_dealloccnt, wl->wl_dealloclim);
1849 (*pr)("\tinohashcnt = %d, inohashmask = 0x%08x\n", 1849 (*pr)("\tinohashcnt = %d, inohashmask = 0x%08x\n",
1850 wl->wl_inohashcnt, wl->wl_inohashmask); 1850 wl->wl_inohashcnt, wl->wl_inohashmask);
1851 (*pr)("entries:\n"); 1851 (*pr)("entries:\n");
1852 SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) { 1852 SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) {
1853#ifdef WAPBL_DEBUG_BUFBYTES 1853#ifdef WAPBL_DEBUG_BUFBYTES
1854 (*pr)("\tbufcount = %zu, reclaimable = %zu, error = %d, " 1854 (*pr)("\tbufcount = %zu, reclaimable = %zu, error = %d, "
1855 "unsynced = %zu\n", 1855 "unsynced = %zu\n",
1856 we->we_bufcount, we->we_reclaimable_bytes, 1856 we->we_bufcount, we->we_reclaimable_bytes,
1857 we->we_error, we->we_unsynced_bufbytes); 1857 we->we_error, we->we_unsynced_bufbytes);
1858#else 1858#else
1859 (*pr)("\tbufcount = %zu, reclaimable = %zu, error = %d\n", 1859 (*pr)("\tbufcount = %zu, reclaimable = %zu, error = %d\n",
1860 we->we_bufcount, we->we_reclaimable_bytes, we->we_error); 1860 we->we_bufcount, we->we_reclaimable_bytes, we->we_error);
1861#endif 1861#endif
1862 } 1862 }
1863 if (full) { 1863 if (full) {
1864 int cnt = 0; 1864 int cnt = 0;
1865 (*pr)("bufs ="); 1865 (*pr)("bufs =");
1866 LIST_FOREACH(bp, &wl->wl_bufs, b_wapbllist) { 1866 LIST_FOREACH(bp, &wl->wl_bufs, b_wapbllist) {
1867 if (!LIST_NEXT(bp, b_wapbllist)) { 1867 if (!LIST_NEXT(bp, b_wapbllist)) {
1868 (*pr)(" %p", bp); 1868 (*pr)(" %p", bp);
1869 } else if ((++cnt % 6) == 0) { 1869 } else if ((++cnt % 6) == 0) {
1870 (*pr)(" %p,\n\t", bp); 1870 (*pr)(" %p,\n\t", bp);
1871 } else { 1871 } else {
1872 (*pr)(" %p,", bp); 1872 (*pr)(" %p,", bp);
1873 } 1873 }
1874 } 1874 }
1875 (*pr)("\n"); 1875 (*pr)("\n");
1876 1876
1877 (*pr)("dealloced blks = "); 1877 (*pr)("dealloced blks = ");
1878 { 1878 {
1879 struct wapbl_dealloc *wd; 1879 struct wapbl_dealloc *wd;
1880 cnt = 0; 1880 cnt = 0;
1881 SIMPLEQ_FOREACH(wd, &wl->wl_dealloclist, wd_entries) { 1881 SIMPLEQ_FOREACH(wd, &wl->wl_dealloclist, wd_entries) {
1882 (*pr)(" %"PRId64":%d,", 1882 (*pr)(" %"PRId64":%d,",
1883 wd->wd_blkno, 1883 wd->wd_blkno,
1884 wd->wd_len); 1884 wd->wd_len);
1885 if ((++cnt % 4) == 0) { 1885 if ((++cnt % 4) == 0) {
1886 (*pr)("\n\t"); 1886 (*pr)("\n\t");
1887 } 1887 }
1888 } 1888 }
1889 } 1889 }
1890 (*pr)("\n"); 1890 (*pr)("\n");
1891 1891
1892 (*pr)("registered inodes = "); 1892 (*pr)("registered inodes = ");
1893 { 1893 {
1894 int i; 1894 int i;
1895 cnt = 0; 1895 cnt = 0;
1896 for (i = 0; i <= wl->wl_inohashmask; i++) { 1896 for (i = 0; i <= wl->wl_inohashmask; i++) {
1897 struct wapbl_ino_head *wih; 1897 struct wapbl_ino_head *wih;
1898 struct wapbl_ino *wi; 1898 struct wapbl_ino *wi;
1899 1899
1900 wih = &wl->wl_inohash[i]; 1900 wih = &wl->wl_inohash[i];
1901 LIST_FOREACH(wi, wih, wi_hash) { 1901 LIST_FOREACH(wi, wih, wi_hash) {
1902 if (wi->wi_ino == 0) 1902 if (wi->wi_ino == 0)
1903 continue; 1903 continue;
1904 (*pr)(" %"PRIu64"/0%06"PRIo32",", 1904 (*pr)(" %"PRIu64"/0%06"PRIo32",",
1905 wi->wi_ino, wi->wi_mode); 1905 wi->wi_ino, wi->wi_mode);
1906 if ((++cnt % 4) == 0) { 1906 if ((++cnt % 4) == 0) {
1907 (*pr)("\n\t"); 1907 (*pr)("\n\t");
1908 } 1908 }
1909 } 1909 }
1910 } 1910 }
1911 (*pr)("\n"); 1911 (*pr)("\n");
1912 } 1912 }
1913 } 1913 }
1914} 1914}
1915 1915
1916#if defined(WAPBL_DEBUG) || defined(DDB) 1916#if defined(WAPBL_DEBUG) || defined(DDB)
1917void 1917void
1918wapbl_dump(struct wapbl *wl) 1918wapbl_dump(struct wapbl *wl)
1919{ 1919{
1920#if defined(WAPBL_DEBUG) 1920#if defined(WAPBL_DEBUG)
1921 if (!wl) 1921 if (!wl)
1922 wl = wapbl_debug_wl; 1922 wl = wapbl_debug_wl;
1923#endif 1923#endif
1924 if (!wl) 1924 if (!wl)
1925 return; 1925 return;
1926 wapbl_print(wl, 1, printf); 1926 wapbl_print(wl, 1, printf);
1927} 1927}
1928#endif 1928#endif
1929 1929
1930/****************************************************************/ 1930/****************************************************************/
1931 1931
1932void 1932void
1933wapbl_register_deallocation(struct wapbl *wl, daddr_t blk, int len) 1933wapbl_register_deallocation(struct wapbl *wl, daddr_t blk, int len)
1934{ 1934{
1935 struct wapbl_dealloc *wd; 1935 struct wapbl_dealloc *wd;
1936 1936
1937 wapbl_jlock_assert(wl); 1937 wapbl_jlock_assert(wl);
1938 1938
1939 mutex_enter(&wl->wl_mtx); 1939 mutex_enter(&wl->wl_mtx);
1940 /* XXX should eventually instead tie this into resource estimation */ 1940 /* XXX should eventually instead tie this into resource estimation */
1941 /* 1941 /*
1942 * XXX this panic needs locking/mutex analysis and the 1942 * XXX this panic needs locking/mutex analysis and the
1943 * ability to cope with the failure. 1943 * ability to cope with the failure.
1944 */ 1944 */
1945 /* XXX this XXX doesn't have enough XXX */ 1945 /* XXX this XXX doesn't have enough XXX */
1946 if (__predict_false(wl->wl_dealloccnt >= wl->wl_dealloclim)) 1946 if (__predict_false(wl->wl_dealloccnt >= wl->wl_dealloclim))
1947 panic("wapbl_register_deallocation: out of resources"); 1947 panic("wapbl_register_deallocation: out of resources");
1948 1948
 1949 wl->wl_dealloccnt++;
 1950 mutex_exit(&wl->wl_mtx);
 1951
1949 wd = pool_get(&wapbl_dealloc_pool, PR_WAITOK); 1952 wd = pool_get(&wapbl_dealloc_pool, PR_WAITOK);
1950 wd->wd_blkno = blk; 1953 wd->wd_blkno = blk;
1951 wd->wd_len = len; 1954 wd->wd_len = len;
1952 1955
 1956 mutex_enter(&wl->wl_mtx);
1953 SIMPLEQ_INSERT_TAIL(&wl->wl_dealloclist, wd, wd_entries); 1957 SIMPLEQ_INSERT_TAIL(&wl->wl_dealloclist, wd, wd_entries);
1954 wl->wl_dealloccnt++; 1958 mutex_exit(&wl->wl_mtx);
1955 1959
1956 WAPBL_PRINTF(WAPBL_PRINT_ALLOC, 1960 WAPBL_PRINTF(WAPBL_PRINT_ALLOC,
1957 ("wapbl_register_deallocation: blk=%"PRId64" len=%d\n", blk, len)); 1961 ("wapbl_register_deallocation: blk=%"PRId64" len=%d\n", blk, len));
1958 mutex_exit(&wl->wl_mtx); 
1959} 1962}
1960 1963
1961/****************************************************************/ 1964/****************************************************************/
1962 1965
1963static void 1966static void
1964wapbl_inodetrk_init(struct wapbl *wl, u_int size) 1967wapbl_inodetrk_init(struct wapbl *wl, u_int size)
1965{ 1968{
1966 1969
1967 wl->wl_inohash = hashinit(size, HASH_LIST, true, &wl->wl_inohashmask); 1970 wl->wl_inohash = hashinit(size, HASH_LIST, true, &wl->wl_inohashmask);
1968 if (atomic_inc_uint_nv(&wapbl_ino_pool_refcount) == 1) { 1971 if (atomic_inc_uint_nv(&wapbl_ino_pool_refcount) == 1) {
1969 pool_init(&wapbl_ino_pool, sizeof(struct wapbl_ino), 0, 0, 0, 1972 pool_init(&wapbl_ino_pool, sizeof(struct wapbl_ino), 0, 0, 0,
1970 "wapblinopl", &pool_allocator_nointr, IPL_NONE); 1973 "wapblinopl", &pool_allocator_nointr, IPL_NONE);
1971 } 1974 }
1972} 1975}
1973 1976
1974static void 1977static void
1975wapbl_inodetrk_free(struct wapbl *wl) 1978wapbl_inodetrk_free(struct wapbl *wl)
1976{ 1979{
1977 1980
1978 /* XXX this KASSERT needs locking/mutex analysis */ 1981 /* XXX this KASSERT needs locking/mutex analysis */
1979 KASSERT(wl->wl_inohashcnt == 0); 1982 KASSERT(wl->wl_inohashcnt == 0);
1980 hashdone(wl->wl_inohash, HASH_LIST, wl->wl_inohashmask); 1983 hashdone(wl->wl_inohash, HASH_LIST, wl->wl_inohashmask);
1981 if (atomic_dec_uint_nv(&wapbl_ino_pool_refcount) == 0) { 1984 if (atomic_dec_uint_nv(&wapbl_ino_pool_refcount) == 0) {
1982 pool_destroy(&wapbl_ino_pool); 1985 pool_destroy(&wapbl_ino_pool);
1983 } 1986 }
1984} 1987}
1985 1988
1986static struct wapbl_ino * 1989static struct wapbl_ino *
1987wapbl_inodetrk_get(struct wapbl *wl, ino_t ino) 1990wapbl_inodetrk_get(struct wapbl *wl, ino_t ino)
1988{ 1991{
1989 struct wapbl_ino_head *wih; 1992 struct wapbl_ino_head *wih;
1990 struct wapbl_ino *wi; 1993 struct wapbl_ino *wi;
1991 1994
1992 KASSERT(mutex_owned(&wl->wl_mtx)); 1995 KASSERT(mutex_owned(&wl->wl_mtx));
1993 1996
1994 wih = &wl->wl_inohash[ino & wl->wl_inohashmask]; 1997 wih = &wl->wl_inohash[ino & wl->wl_inohashmask];
1995 LIST_FOREACH(wi, wih, wi_hash) { 1998 LIST_FOREACH(wi, wih, wi_hash) {
1996 if (ino == wi->wi_ino) 1999 if (ino == wi->wi_ino)
1997 return wi; 2000 return wi;
1998 } 2001 }
1999 return 0; 2002 return 0;
2000} 2003}
2001 2004
2002void 2005void
2003wapbl_register_inode(struct wapbl *wl, ino_t ino, mode_t mode) 2006wapbl_register_inode(struct wapbl *wl, ino_t ino, mode_t mode)
2004{ 2007{
2005 struct wapbl_ino_head *wih; 2008 struct wapbl_ino_head *wih;
2006 struct wapbl_ino *wi; 2009 struct wapbl_ino *wi;
2007 2010
2008 wi = pool_get(&wapbl_ino_pool, PR_WAITOK); 2011 wi = pool_get(&wapbl_ino_pool, PR_WAITOK);
2009 2012
2010 mutex_enter(&wl->wl_mtx); 2013 mutex_enter(&wl->wl_mtx);
2011 if (wapbl_inodetrk_get(wl, ino) == NULL) { 2014 if (wapbl_inodetrk_get(wl, ino) == NULL) {
2012 wi->wi_ino = ino; 2015 wi->wi_ino = ino;
2013 wi->wi_mode = mode; 2016 wi->wi_mode = mode;
2014 wih = &wl->wl_inohash[ino & wl->wl_inohashmask]; 2017 wih = &wl->wl_inohash[ino & wl->wl_inohashmask];
2015 LIST_INSERT_HEAD(wih, wi, wi_hash); 2018 LIST_INSERT_HEAD(wih, wi, wi_hash);
2016 wl->wl_inohashcnt++; 2019 wl->wl_inohashcnt++;
2017 WAPBL_PRINTF(WAPBL_PRINT_INODE, 2020 WAPBL_PRINTF(WAPBL_PRINT_INODE,
2018 ("wapbl_register_inode: ino=%"PRId64"\n", ino)); 2021 ("wapbl_register_inode: ino=%"PRId64"\n", ino));
2019 mutex_exit(&wl->wl_mtx); 2022 mutex_exit(&wl->wl_mtx);
2020 } else { 2023 } else {
2021 mutex_exit(&wl->wl_mtx); 2024 mutex_exit(&wl->wl_mtx);
2022 pool_put(&wapbl_ino_pool, wi); 2025 pool_put(&wapbl_ino_pool, wi);
2023 } 2026 }
2024} 2027}
2025 2028
2026void 2029void
2027wapbl_unregister_inode(struct wapbl *wl, ino_t ino, mode_t mode) 2030wapbl_unregister_inode(struct wapbl *wl, ino_t ino, mode_t mode)
2028{ 2031{
2029 struct wapbl_ino *wi; 2032 struct wapbl_ino *wi;
2030 2033
2031 mutex_enter(&wl->wl_mtx); 2034 mutex_enter(&wl->wl_mtx);
2032 wi = wapbl_inodetrk_get(wl, ino); 2035 wi = wapbl_inodetrk_get(wl, ino);
2033 if (wi) { 2036 if (wi) {
2034 WAPBL_PRINTF(WAPBL_PRINT_INODE, 2037 WAPBL_PRINTF(WAPBL_PRINT_INODE,
2035 ("wapbl_unregister_inode: ino=%"PRId64"\n", ino)); 2038 ("wapbl_unregister_inode: ino=%"PRId64"\n", ino));
2036 KASSERT(wl->wl_inohashcnt > 0); 2039 KASSERT(wl->wl_inohashcnt > 0);
2037 wl->wl_inohashcnt--; 2040 wl->wl_inohashcnt--;
2038 LIST_REMOVE(wi, wi_hash); 2041 LIST_REMOVE(wi, wi_hash);
2039 mutex_exit(&wl->wl_mtx); 2042 mutex_exit(&wl->wl_mtx);
2040 2043
2041 pool_put(&wapbl_ino_pool, wi); 2044 pool_put(&wapbl_ino_pool, wi);
2042 } else { 2045 } else {
2043 mutex_exit(&wl->wl_mtx); 2046 mutex_exit(&wl->wl_mtx);
2044 } 2047 }
2045} 2048}
2046 2049
2047/****************************************************************/ 2050/****************************************************************/
2048 2051
2049/* 2052/*
2050 * wapbl_transaction_inodes_len(wl) 2053 * wapbl_transaction_inodes_len(wl)
2051 * 2054 *
2052 * Calculate the number of bytes required for inode registration 2055 * Calculate the number of bytes required for inode registration
2053 * log records in wl. 2056 * log records in wl.
2054 */ 2057 */
2055static inline size_t 2058static inline size_t
2056wapbl_transaction_inodes_len(struct wapbl *wl) 2059wapbl_transaction_inodes_len(struct wapbl *wl)
2057{ 2060{
2058 int blocklen = 1<<wl->wl_log_dev_bshift; 2061 int blocklen = 1<<wl->wl_log_dev_bshift;
2059 int iph; 2062 int iph;
2060 2063
2061 /* Calculate number of inodes described in a inodelist header */ 2064 /* Calculate number of inodes described in a inodelist header */
2062 iph = (blocklen - offsetof(struct wapbl_wc_inodelist, wc_inodes)) / 2065 iph = (blocklen - offsetof(struct wapbl_wc_inodelist, wc_inodes)) /
2063 sizeof(((struct wapbl_wc_inodelist *)0)->wc_inodes[0]); 2066 sizeof(((struct wapbl_wc_inodelist *)0)->wc_inodes[0]);
2064 2067
2065 KASSERT(iph > 0); 2068 KASSERT(iph > 0);
2066 2069
2067 return MAX(1, howmany(wl->wl_inohashcnt, iph)) * blocklen; 2070 return MAX(1, howmany(wl->wl_inohashcnt, iph)) * blocklen;
2068} 2071}
2069 2072
2070 2073
2071/* 2074/*
2072 * wapbl_transaction_len(wl) 2075 * wapbl_transaction_len(wl)
2073 * 2076 *
2074 * Calculate number of bytes required for all log records in wl. 2077 * Calculate number of bytes required for all log records in wl.
2075 */ 2078 */
2076static size_t 2079static size_t
2077wapbl_transaction_len(struct wapbl *wl) 2080wapbl_transaction_len(struct wapbl *wl)
2078{ 2081{
2079 int blocklen = 1<<wl->wl_log_dev_bshift; 2082 int blocklen = 1<<wl->wl_log_dev_bshift;
2080 size_t len; 2083 size_t len;
2081 2084
2082 /* Calculate number of blocks described in a blocklist header */ 2085 /* Calculate number of blocks described in a blocklist header */
2083 len = wl->wl_bcount; 2086 len = wl->wl_bcount;
2084 len += howmany(wl->wl_bufcount, wl->wl_brperjblock) * blocklen; 2087 len += howmany(wl->wl_bufcount, wl->wl_brperjblock) * blocklen;
2085 len += howmany(wl->wl_dealloccnt, wl->wl_brperjblock) * blocklen; 2088 len += howmany(wl->wl_dealloccnt, wl->wl_brperjblock) * blocklen;
2086 len += wapbl_transaction_inodes_len(wl); 2089 len += wapbl_transaction_inodes_len(wl);
2087 2090
2088 return len; 2091 return len;
2089} 2092}
2090 2093
2091/* 2094/*
2092 * wapbl_cache_sync(wl, msg) 2095 * wapbl_cache_sync(wl, msg)
2093 * 2096 *
2094 * Issue DIOCCACHESYNC to wl->wl_devvp. 2097 * Issue DIOCCACHESYNC to wl->wl_devvp.
2095 * 2098 *
2096 * If sysctl(vfs.wapbl.verbose_commit) >= 2, print a message 2099 * If sysctl(vfs.wapbl.verbose_commit) >= 2, print a message
2097 * including msg about the duration of the cache sync. 2100 * including msg about the duration of the cache sync.
2098 */ 2101 */
2099static int 2102static int
2100wapbl_cache_sync(struct wapbl *wl, const char *msg) 2103wapbl_cache_sync(struct wapbl *wl, const char *msg)
2101{ 2104{
2102 const bool verbose = wapbl_verbose_commit >= 2; 2105 const bool verbose = wapbl_verbose_commit >= 2;
2103 struct bintime start_time; 2106 struct bintime start_time;
2104 int force = 1; 2107 int force = 1;
2105 int error; 2108 int error;
2106 2109
2107 if (!wapbl_flush_disk_cache) { 2110 if (!wapbl_flush_disk_cache) {
2108 return 0; 2111 return 0;
2109 } 2112 }
2110 if (verbose) { 2113 if (verbose) {
2111 bintime(&start_time); 2114 bintime(&start_time);
2112 } 2115 }
2113 error = VOP_IOCTL(wl->wl_devvp, DIOCCACHESYNC, &force, 2116 error = VOP_IOCTL(wl->wl_devvp, DIOCCACHESYNC, &force,
2114 FWRITE, FSCRED); 2117 FWRITE, FSCRED);
2115 if (error) { 2118 if (error) {
2116 WAPBL_PRINTF(WAPBL_PRINT_ERROR, 2119 WAPBL_PRINTF(WAPBL_PRINT_ERROR,
2117 ("wapbl_cache_sync: DIOCCACHESYNC on dev 0x%jx " 2120 ("wapbl_cache_sync: DIOCCACHESYNC on dev 0x%jx "
2118 "returned %d\n", (uintmax_t)wl->wl_devvp->v_rdev, error)); 2121 "returned %d\n", (uintmax_t)wl->wl_devvp->v_rdev, error));
2119 } 2122 }
2120 if (verbose) { 2123 if (verbose) {
2121 struct bintime d; 2124 struct bintime d;
2122 struct timespec ts; 2125 struct timespec ts;
2123 2126
2124 bintime(&d); 2127 bintime(&d);
2125 bintime_sub(&d, &start_time); 2128 bintime_sub(&d, &start_time);
2126 bintime2timespec(&d, &ts); 2129 bintime2timespec(&d, &ts);
2127 printf("wapbl_cache_sync: %s: dev 0x%jx %ju.%09lu\n", 2130 printf("wapbl_cache_sync: %s: dev 0x%jx %ju.%09lu\n",
2128 msg, (uintmax_t)wl->wl_devvp->v_rdev, 2131 msg, (uintmax_t)wl->wl_devvp->v_rdev,
2129 (uintmax_t)ts.tv_sec, ts.tv_nsec); 2132 (uintmax_t)ts.tv_sec, ts.tv_nsec);
2130 } 2133 }
2131 return error; 2134 return error;
2132} 2135}
2133 2136
2134/* 2137/*
2135 * wapbl_write_commit(wl, head, tail) 2138 * wapbl_write_commit(wl, head, tail)
2136 * 2139 *
2137 * Issue a disk cache sync to wait for all pending writes to the 2140 * Issue a disk cache sync to wait for all pending writes to the
2138 * log to complete, and then synchronously commit the current 2141 * log to complete, and then synchronously commit the current
2139 * circular queue head and tail to the log, in the next of two 2142 * circular queue head and tail to the log, in the next of two
2140 * locations for commit headers on disk. 2143 * locations for commit headers on disk.
2141 * 2144 *
2142 * Increment the generation number. If the generation number 2145 * Increment the generation number. If the generation number
2143 * rolls over to zero, then a subsequent commit would appear to 2146 * rolls over to zero, then a subsequent commit would appear to
2144 * have an older generation than this one -- in that case, issue a 2147 * have an older generation than this one -- in that case, issue a
2145 * duplicate commit to avoid this. 2148 * duplicate commit to avoid this.
2146 * 2149 *
2147 * => Caller must have exclusive access to wl, either by holding 2150 * => Caller must have exclusive access to wl, either by holding
2148 * wl->wl_rwlock for writer or by being wapbl_start before anyone 2151 * wl->wl_rwlock for writer or by being wapbl_start before anyone
2149 * else has seen wl. 2152 * else has seen wl.
2150 */ 2153 */
2151static int 2154static int
2152wapbl_write_commit(struct wapbl *wl, off_t head, off_t tail) 2155wapbl_write_commit(struct wapbl *wl, off_t head, off_t tail)
2153{ 2156{
2154 struct wapbl_wc_header *wc = wl->wl_wc_header; 2157 struct wapbl_wc_header *wc = wl->wl_wc_header;
2155 struct timespec ts; 2158 struct timespec ts;
2156 int error; 2159 int error;
2157 daddr_t pbn; 2160 daddr_t pbn;
2158 2161
2159 error = wapbl_buffered_flush(wl); 2162 error = wapbl_buffered_flush(wl);
2160 if (error) 2163 if (error)
2161 return error; 2164 return error;
2162 /* 2165 /*
2163 * flush disk cache to ensure that blocks we've written are actually 2166 * flush disk cache to ensure that blocks we've written are actually
2164 * written to the stable storage before the commit header. 2167 * written to the stable storage before the commit header.
2165 * 2168 *
2166 * XXX Calc checksum here, instead we do this for now 2169 * XXX Calc checksum here, instead we do this for now
2167 */ 2170 */
2168 wapbl_cache_sync(wl, "1"); 2171 wapbl_cache_sync(wl, "1");
2169 2172
2170 wc->wc_head = head; 2173 wc->wc_head = head;
2171 wc->wc_tail = tail; 2174 wc->wc_tail = tail;
2172 wc->wc_checksum = 0; 2175 wc->wc_checksum = 0;
2173 wc->wc_version = 1; 2176 wc->wc_version = 1;
2174 getnanotime(&ts); 2177 getnanotime(&ts);
2175 wc->wc_time = ts.tv_sec; 2178 wc->wc_time = ts.tv_sec;
2176 wc->wc_timensec = ts.tv_nsec; 2179 wc->wc_timensec = ts.tv_nsec;
2177 2180
2178 WAPBL_PRINTF(WAPBL_PRINT_WRITE, 2181 WAPBL_PRINTF(WAPBL_PRINT_WRITE,
2179 ("wapbl_write_commit: head = %"PRIdMAX "tail = %"PRIdMAX"\n", 2182 ("wapbl_write_commit: head = %"PRIdMAX "tail = %"PRIdMAX"\n",
2180 (intmax_t)head, (intmax_t)tail)); 2183 (intmax_t)head, (intmax_t)tail));
2181 2184
2182 /* 2185 /*
2183 * write the commit header. 2186 * write the commit header.
2184 * 2187 *
2185 * XXX if generation will rollover, then first zero 2188 * XXX if generation will rollover, then first zero
2186 * over second commit header before trying to write both headers. 2189 * over second commit header before trying to write both headers.
2187 */ 2190 */
2188 2191
2189 pbn = wl->wl_logpbn + (wc->wc_generation % 2); 2192 pbn = wl->wl_logpbn + (wc->wc_generation % 2);
2190#ifdef _KERNEL 2193#ifdef _KERNEL
2191 pbn = btodb(pbn << wc->wc_log_dev_bshift); 2194 pbn = btodb(pbn << wc->wc_log_dev_bshift);
2192#endif 2195#endif
2193 error = wapbl_buffered_write(wc, wc->wc_len, wl, pbn); 2196 error = wapbl_buffered_write(wc, wc->wc_len, wl, pbn);
2194 if (error) 2197 if (error)
2195 return error; 2198 return error;
2196 error = wapbl_buffered_flush(wl); 2199 error = wapbl_buffered_flush(wl);
2197 if (error) 2200 if (error)
2198 return error; 2201 return error;
2199 2202
2200 /* 2203 /*
2201 * flush disk cache to ensure that the commit header is actually 2204 * flush disk cache to ensure that the commit header is actually
2202 * written before meta data blocks. 2205 * written before meta data blocks.
2203 */ 2206 */
2204 wapbl_cache_sync(wl, "2"); 2207 wapbl_cache_sync(wl, "2");
2205 2208
2206 /* 2209 /*
2207 * If the generation number was zero, write it out a second time. 2210 * If the generation number was zero, write it out a second time.
2208 * This handles initialization and generation number rollover 2211 * This handles initialization and generation number rollover
2209 */ 2212 */
2210 if (wc->wc_generation++ == 0) { 2213 if (wc->wc_generation++ == 0) {
2211 error = wapbl_write_commit(wl, head, tail); 2214 error = wapbl_write_commit(wl, head, tail);
2212 /* 2215 /*
2213 * This panic should be able to be removed if we do the 2216 * This panic should be able to be removed if we do the
2214 * zero'ing mentioned above, and we are certain to roll 2217 * zero'ing mentioned above, and we are certain to roll
2215 * back generation number on failure. 2218 * back generation number on failure.
2216 */ 2219 */
2217 if (error) 2220 if (error)
2218 panic("wapbl_write_commit: error writing duplicate " 2221 panic("wapbl_write_commit: error writing duplicate "
2219 "log header: %d", error); 2222 "log header: %d", error);
2220 } 2223 }
2221 return 0; 2224 return 0;
2222} 2225}
2223 2226
2224/* 2227/*
2225 * wapbl_write_blocks(wl, offp) 2228 * wapbl_write_blocks(wl, offp)
2226 * 2229 *
2227 * Write all pending physical blocks in the current transaction 2230 * Write all pending physical blocks in the current transaction
2228 * from wapbl_add_buf to the log on disk, adding to the circular 2231 * from wapbl_add_buf to the log on disk, adding to the circular
2229 * queue head at byte offset *offp, and returning the new head's 2232 * queue head at byte offset *offp, and returning the new head's
2230 * byte offset in *offp. 2233 * byte offset in *offp.
2231 */ 2234 */
2232static int 2235static int
2233wapbl_write_blocks(struct wapbl *wl, off_t *offp) 2236wapbl_write_blocks(struct wapbl *wl, off_t *offp)
2234{ 2237{
2235 struct wapbl_wc_blocklist *wc = 2238 struct wapbl_wc_blocklist *wc =
2236 (struct wapbl_wc_blocklist *)wl->wl_wc_scratch; 2239 (struct wapbl_wc_blocklist *)wl->wl_wc_scratch;
2237 int blocklen = 1<<wl->wl_log_dev_bshift; 2240 int blocklen = 1<<wl->wl_log_dev_bshift;
2238 struct buf *bp; 2241 struct buf *bp;
2239 off_t off = *offp; 2242 off_t off = *offp;
2240 int error; 2243 int error;
2241 size_t padding; 2244 size_t padding;
2242 2245
2243 KASSERT(rw_write_held(&wl->wl_rwlock)); 2246 KASSERT(rw_write_held(&wl->wl_rwlock));
2244 2247
2245 bp = LIST_FIRST(&wl->wl_bufs); 2248 bp = LIST_FIRST(&wl->wl_bufs);
2246 2249
2247 while (bp) { 2250 while (bp) {
2248 int cnt; 2251 int cnt;
2249 struct buf *obp = bp; 2252 struct buf *obp = bp;
2250 2253
2251 KASSERT(bp->b_flags & B_LOCKED); 2254 KASSERT(bp->b_flags & B_LOCKED);
2252 2255
2253 wc->wc_type = WAPBL_WC_BLOCKS; 2256 wc->wc_type = WAPBL_WC_BLOCKS;
2254 wc->wc_len = blocklen; 2257 wc->wc_len = blocklen;
2255 wc->wc_blkcount = 0; 2258 wc->wc_blkcount = 0;
2256 while (bp && (wc->wc_blkcount < wl->wl_brperjblock)) { 2259 while (bp && (wc->wc_blkcount < wl->wl_brperjblock)) {
2257 /* 2260 /*
2258 * Make sure all the physical block numbers are up to 2261 * Make sure all the physical block numbers are up to
2259 * date. If this is not always true on a given 2262 * date. If this is not always true on a given
2260 * filesystem, then VOP_BMAP must be called. We 2263 * filesystem, then VOP_BMAP must be called. We
2261 * could call VOP_BMAP here, or else in the filesystem 2264 * could call VOP_BMAP here, or else in the filesystem
2262 * specific flush callback, although neither of those 2265 * specific flush callback, although neither of those
2263 * solutions allow us to take the vnode lock. If a 2266 * solutions allow us to take the vnode lock. If a
2264 * filesystem requires that we must take the vnode lock 2267 * filesystem requires that we must take the vnode lock
2265 * to call VOP_BMAP, then we can probably do it in 2268 * to call VOP_BMAP, then we can probably do it in
2266 * bwrite when the vnode lock should already be held 2269 * bwrite when the vnode lock should already be held
2267 * by the invoking code. 2270 * by the invoking code.
2268 */ 2271 */
2269 KASSERT((bp->b_vp->v_type == VBLK) || 2272 KASSERT((bp->b_vp->v_type == VBLK) ||
2270 (bp->b_blkno != bp->b_lblkno)); 2273 (bp->b_blkno != bp->b_lblkno));
2271 KASSERT(bp->b_blkno > 0); 2274 KASSERT(bp->b_blkno > 0);
2272 2275
2273 wc->wc_blocks[wc->wc_blkcount].wc_daddr = bp->b_blkno; 2276 wc->wc_blocks[wc->wc_blkcount].wc_daddr = bp->b_blkno;
2274 wc->wc_blocks[wc->wc_blkcount].wc_dlen = bp->b_bcount; 2277 wc->wc_blocks[wc->wc_blkcount].wc_dlen = bp->b_bcount;
2275 wc->wc_len += bp->b_bcount; 2278 wc->wc_len += bp->b_bcount;
2276 wc->wc_blkcount++; 2279 wc->wc_blkcount++;
2277 bp = LIST_NEXT(bp, b_wapbllist); 2280 bp = LIST_NEXT(bp, b_wapbllist);
2278 } 2281 }
2279 if (wc->wc_len % blocklen != 0) { 2282 if (wc->wc_len % blocklen != 0) {
2280 padding = blocklen - wc->wc_len % blocklen; 2283 padding = blocklen - wc->wc_len % blocklen;
2281 wc->wc_len += padding; 2284 wc->wc_len += padding;
2282 } else { 2285 } else {
2283 padding = 0; 2286 padding = 0;
2284 } 2287 }
2285 2288
2286 WAPBL_PRINTF(WAPBL_PRINT_WRITE, 2289 WAPBL_PRINTF(WAPBL_PRINT_WRITE,
2287 ("wapbl_write_blocks: len = %u (padding %zu) off = %"PRIdMAX"\n", 2290 ("wapbl_write_blocks: len = %u (padding %zu) off = %"PRIdMAX"\n",
2288 wc->wc_len, padding, (intmax_t)off)); 2291 wc->wc_len, padding, (intmax_t)off));
2289 2292
2290 error = wapbl_circ_write(wl, wc, blocklen, &off); 2293 error = wapbl_circ_write(wl, wc, blocklen, &off);
2291 if (error) 2294 if (error)
2292 return error; 2295 return error;
2293 bp = obp; 2296 bp = obp;
2294 cnt = 0; 2297 cnt = 0;
2295 while (bp && (cnt++ < wl->wl_brperjblock)) { 2298 while (bp && (cnt++ < wl->wl_brperjblock)) {
2296 error = wapbl_circ_write(wl, bp->b_data, 2299 error = wapbl_circ_write(wl, bp->b_data,
2297 bp->b_bcount, &off); 2300 bp->b_bcount, &off);
2298 if (error) 2301 if (error)
2299 return error; 2302 return error;
2300 bp = LIST_NEXT(bp, b_wapbllist); 2303 bp = LIST_NEXT(bp, b_wapbllist);
2301 } 2304 }
2302 if (padding) { 2305 if (padding) {
2303 void *zero; 2306 void *zero;
2304  2307
2305 zero = wapbl_alloc(padding); 2308 zero = wapbl_alloc(padding);
2306 memset(zero, 0, padding); 2309 memset(zero, 0, padding);
2307 error = wapbl_circ_write(wl, zero, padding, &off); 2310 error = wapbl_circ_write(wl, zero, padding, &off);
2308 wapbl_free(zero, padding); 2311 wapbl_free(zero, padding);
2309 if (error) 2312 if (error)
2310 return error; 2313 return error;
2311 } 2314 }
2312 } 2315 }
2313 *offp = off; 2316 *offp = off;
2314 return 0; 2317 return 0;
2315} 2318}
2316 2319
2317/* 2320/*
2318 * wapbl_write_revocations(wl, offp) 2321 * wapbl_write_revocations(wl, offp)
2319 * 2322 *
2320 * Write all pending deallocations in the current transaction from 2323 * Write all pending deallocations in the current transaction from
2321 * wapbl_register_deallocation to the log on disk, adding to the 2324 * wapbl_register_deallocation to the log on disk, adding to the
2322 * circular queue's head at byte offset *offp, and returning the 2325 * circular queue's head at byte offset *offp, and returning the
2323 * new head's byte offset in *offp. 2326 * new head's byte offset in *offp.
2324 */ 2327 */
2325static int 2328static int
2326wapbl_write_revocations(struct wapbl *wl, off_t *offp) 2329wapbl_write_revocations(struct wapbl *wl, off_t *offp)
2327{ 2330{
2328 struct wapbl_wc_blocklist *wc = 2331 struct wapbl_wc_blocklist *wc =
2329 (struct wapbl_wc_blocklist *)wl->wl_wc_scratch; 2332 (struct wapbl_wc_blocklist *)wl->wl_wc_scratch;
2330 struct wapbl_dealloc *wd, *lwd; 2333 struct wapbl_dealloc *wd, *lwd;
2331 int blocklen = 1<<wl->wl_log_dev_bshift; 2334 int blocklen = 1<<wl->wl_log_dev_bshift;
2332 off_t off = *offp; 2335 off_t off = *offp;
2333 int error; 2336 int error;
2334 2337
2335 if (wl->wl_dealloccnt == 0) 2338 if (wl->wl_dealloccnt == 0)
2336 return 0; 2339 return 0;
2337 2340
2338 while ((wd = SIMPLEQ_FIRST(&wl->wl_dealloclist)) != NULL) { 2341 while ((wd = SIMPLEQ_FIRST(&wl->wl_dealloclist)) != NULL) {
2339 wc->wc_type = WAPBL_WC_REVOCATIONS; 2342 wc->wc_type = WAPBL_WC_REVOCATIONS;
2340 wc->wc_len = blocklen; 2343 wc->wc_len = blocklen;
2341 wc->wc_blkcount = 0; 2344 wc->wc_blkcount = 0;
2342 while (wd && (wc->wc_blkcount < wl->wl_brperjblock)) { 2345 while (wd && (wc->wc_blkcount < wl->wl_brperjblock)) {
2343 wc->wc_blocks[wc->wc_blkcount].wc_daddr = 2346 wc->wc_blocks[wc->wc_blkcount].wc_daddr =
2344 wd->wd_blkno; 2347 wd->wd_blkno;
2345 wc->wc_blocks[wc->wc_blkcount].wc_dlen = 2348 wc->wc_blocks[wc->wc_blkcount].wc_dlen =
2346 wd->wd_len; 2349 wd->wd_len;
2347 wc->wc_blkcount++; 2350 wc->wc_blkcount++;
2348 2351
2349 wd = SIMPLEQ_NEXT(wd, wd_entries); 2352 wd = SIMPLEQ_NEXT(wd, wd_entries);
2350 } 2353 }
2351 WAPBL_PRINTF(WAPBL_PRINT_WRITE, 2354 WAPBL_PRINTF(WAPBL_PRINT_WRITE,
2352 ("wapbl_write_revocations: len = %u off = %"PRIdMAX"\n", 2355 ("wapbl_write_revocations: len = %u off = %"PRIdMAX"\n",
2353 wc->wc_len, (intmax_t)off)); 2356 wc->wc_len, (intmax_t)off));
2354 error = wapbl_circ_write(wl, wc, blocklen, &off); 2357 error = wapbl_circ_write(wl, wc, blocklen, &off);
2355 if (error) 2358 if (error)
2356 return error; 2359 return error;
2357 2360
2358 /* free all successfully written deallocs */ 2361 /* free all successfully written deallocs */
2359 lwd = wd; 2362 lwd = wd;
2360 while ((wd = SIMPLEQ_FIRST(&wl->wl_dealloclist)) != NULL) { 2363 while ((wd = SIMPLEQ_FIRST(&wl->wl_dealloclist)) != NULL) {
2361 if (wd == lwd) 2364 if (wd == lwd)
2362 break; 2365 break;
2363 SIMPLEQ_REMOVE_HEAD(&wl->wl_dealloclist, wd_entries); 2366 SIMPLEQ_REMOVE_HEAD(&wl->wl_dealloclist, wd_entries);
2364 pool_put(&wapbl_dealloc_pool, wd); 2367 pool_put(&wapbl_dealloc_pool, wd);
2365 wl->wl_dealloccnt--; 2368 wl->wl_dealloccnt--;
2366 } 2369 }
2367 } 2370 }
2368 *offp = off; 2371 *offp = off;
2369 return 0; 2372 return 0;
2370} 2373}
2371 2374
2372/* 2375/*
2373 * wapbl_write_inodes(wl, offp) 2376 * wapbl_write_inodes(wl, offp)
2374 * 2377 *
2375 * Write all pending inode allocations in the current transaction 2378 * Write all pending inode allocations in the current transaction
2376 * from wapbl_register_inode to the log on disk, adding to the 2379 * from wapbl_register_inode to the log on disk, adding to the
2377 * circular queue's head at byte offset *offp and returning the 2380 * circular queue's head at byte offset *offp and returning the
2378 * new head's byte offset in *offp. 2381 * new head's byte offset in *offp.
2379 */ 2382 */
2380static int 2383static int
2381wapbl_write_inodes(struct wapbl *wl, off_t *offp) 2384wapbl_write_inodes(struct wapbl *wl, off_t *offp)
2382{ 2385{
2383 struct wapbl_wc_inodelist *wc = 2386 struct wapbl_wc_inodelist *wc =
2384 (struct wapbl_wc_inodelist *)wl->wl_wc_scratch; 2387 (struct wapbl_wc_inodelist *)wl->wl_wc_scratch;
2385 int i; 2388 int i;
2386 int blocklen = 1 << wl->wl_log_dev_bshift; 2389 int blocklen = 1 << wl->wl_log_dev_bshift;
2387 off_t off = *offp; 2390 off_t off = *offp;
2388 int error; 2391 int error;
2389 2392
2390 struct wapbl_ino_head *wih; 2393 struct wapbl_ino_head *wih;
2391 struct wapbl_ino *wi; 2394 struct wapbl_ino *wi;
2392 int iph; 2395 int iph;
2393 2396
2394 iph = (blocklen - offsetof(struct wapbl_wc_inodelist, wc_inodes)) / 2397 iph = (blocklen - offsetof(struct wapbl_wc_inodelist, wc_inodes)) /
2395 sizeof(((struct wapbl_wc_inodelist *)0)->wc_inodes[0]); 2398 sizeof(((struct wapbl_wc_inodelist *)0)->wc_inodes[0]);
2396 2399
2397 i = 0; 2400 i = 0;
2398 wih = &wl->wl_inohash[0]; 2401 wih = &wl->wl_inohash[0];
2399 wi = 0; 2402 wi = 0;
2400 do { 2403 do {
2401 wc->wc_type = WAPBL_WC_INODES; 2404 wc->wc_type = WAPBL_WC_INODES;
2402 wc->wc_len = blocklen; 2405 wc->wc_len = blocklen;
2403 wc->wc_inocnt = 0; 2406 wc->wc_inocnt = 0;
2404 wc->wc_clear = (i == 0); 2407 wc->wc_clear = (i == 0);
2405 while ((i < wl->wl_inohashcnt) && (wc->wc_inocnt < iph)) { 2408 while ((i < wl->wl_inohashcnt) && (wc->wc_inocnt < iph)) {
2406 while (!wi) { 2409 while (!wi) {
2407 KASSERT((wih - &wl->wl_inohash[0]) 2410 KASSERT((wih - &wl->wl_inohash[0])
2408 <= wl->wl_inohashmask); 2411 <= wl->wl_inohashmask);
2409 wi = LIST_FIRST(wih++); 2412 wi = LIST_FIRST(wih++);
2410 } 2413 }
2411 wc->wc_inodes[wc->wc_inocnt].wc_inumber = wi->wi_ino; 2414 wc->wc_inodes[wc->wc_inocnt].wc_inumber = wi->wi_ino;
2412 wc->wc_inodes[wc->wc_inocnt].wc_imode = wi->wi_mode; 2415 wc->wc_inodes[wc->wc_inocnt].wc_imode = wi->wi_mode;
2413 wc->wc_inocnt++; 2416 wc->wc_inocnt++;
2414 i++; 2417 i++;
2415 wi = LIST_NEXT(wi, wi_hash); 2418 wi = LIST_NEXT(wi, wi_hash);
2416 } 2419 }
2417 WAPBL_PRINTF(WAPBL_PRINT_WRITE, 2420 WAPBL_PRINTF(WAPBL_PRINT_WRITE,
2418 ("wapbl_write_inodes: len = %u off = %"PRIdMAX"\n", 2421 ("wapbl_write_inodes: len = %u off = %"PRIdMAX"\n",
2419 wc->wc_len, (intmax_t)off)); 2422 wc->wc_len, (intmax_t)off));
2420 error = wapbl_circ_write(wl, wc, blocklen, &off); 2423 error = wapbl_circ_write(wl, wc, blocklen, &off);
2421 if (error) 2424 if (error)
2422 return error; 2425 return error;
2423 } while (i < wl->wl_inohashcnt); 2426 } while (i < wl->wl_inohashcnt);
2424  2427
2425 *offp = off; 2428 *offp = off;
2426 return 0; 2429 return 0;
2427} 2430}
2428 2431
2429#endif /* _KERNEL */ 2432#endif /* _KERNEL */
2430 2433
2431/****************************************************************/ 2434/****************************************************************/
2432 2435
2433struct wapbl_blk { 2436struct wapbl_blk {
2434 LIST_ENTRY(wapbl_blk) wb_hash; 2437 LIST_ENTRY(wapbl_blk) wb_hash;
2435 daddr_t wb_blk; 2438 daddr_t wb_blk;
2436 off_t wb_off; /* Offset of this block in the log */ 2439 off_t wb_off; /* Offset of this block in the log */
2437}; 2440};
2438#define WAPBL_BLKPOOL_MIN 83 2441#define WAPBL_BLKPOOL_MIN 83
2439 2442
2440static void 2443static void
2441wapbl_blkhash_init(struct wapbl_replay *wr, u_int size) 2444wapbl_blkhash_init(struct wapbl_replay *wr, u_int size)
2442{ 2445{
2443 if (size < WAPBL_BLKPOOL_MIN) 2446 if (size < WAPBL_BLKPOOL_MIN)
2444 size = WAPBL_BLKPOOL_MIN; 2447 size = WAPBL_BLKPOOL_MIN;
2445 KASSERT(wr->wr_blkhash == 0); 2448 KASSERT(wr->wr_blkhash == 0);
2446#ifdef _KERNEL 2449#ifdef _KERNEL
2447 wr->wr_blkhash = hashinit(size, HASH_LIST, true, &wr->wr_blkhashmask); 2450 wr->wr_blkhash = hashinit(size, HASH_LIST, true, &wr->wr_blkhashmask);
2448#else /* ! _KERNEL */ 2451#else /* ! _KERNEL */
2449 /* Manually implement hashinit */ 2452 /* Manually implement hashinit */
2450 { 2453 {
2451 unsigned long i, hashsize; 2454 unsigned long i, hashsize;
2452 for (hashsize = 1; hashsize < size; hashsize <<= 1) 2455 for (hashsize = 1; hashsize < size; hashsize <<= 1)
2453 continue; 2456 continue;
2454 wr->wr_blkhash = wapbl_alloc(hashsize * sizeof(*wr->wr_blkhash)); 2457 wr->wr_blkhash = wapbl_alloc(hashsize * sizeof(*wr->wr_blkhash));
2455 for (i = 0; i < hashsize; i++) 2458 for (i = 0; i < hashsize; i++)
2456 LIST_INIT(&wr->wr_blkhash[i]); 2459 LIST_INIT(&wr->wr_blkhash[i]);
2457 wr->wr_blkhashmask = hashsize - 1; 2460 wr->wr_blkhashmask = hashsize - 1;
2458 } 2461 }
2459#endif /* ! _KERNEL */ 2462#endif /* ! _KERNEL */
2460} 2463}
2461 2464
2462static void 2465static void
2463wapbl_blkhash_free(struct wapbl_replay *wr) 2466wapbl_blkhash_free(struct wapbl_replay *wr)
2464{ 2467{
2465 KASSERT(wr->wr_blkhashcnt == 0); 2468 KASSERT(wr->wr_blkhashcnt == 0);
2466#ifdef _KERNEL 2469#ifdef _KERNEL
2467 hashdone(wr->wr_blkhash, HASH_LIST, wr->wr_blkhashmask); 2470 hashdone(wr->wr_blkhash, HASH_LIST, wr->wr_blkhashmask);
2468#else /* ! _KERNEL */ 2471#else /* ! _KERNEL */
2469 wapbl_free(wr->wr_blkhash, 2472 wapbl_free(wr->wr_blkhash,
2470 (wr->wr_blkhashmask + 1) * sizeof(*wr->wr_blkhash)); 2473 (wr->wr_blkhashmask + 1) * sizeof(*wr->wr_blkhash));
2471#endif /* ! _KERNEL */ 2474#endif /* ! _KERNEL */
2472} 2475}
2473 2476
2474static struct wapbl_blk * 2477static struct wapbl_blk *
2475wapbl_blkhash_get(struct wapbl_replay *wr, daddr_t blk) 2478wapbl_blkhash_get(struct wapbl_replay *wr, daddr_t blk)
2476{ 2479{
2477 struct wapbl_blk_head *wbh; 2480 struct wapbl_blk_head *wbh;
2478 struct wapbl_blk *wb; 2481 struct wapbl_blk *wb;
2479 wbh = &wr->wr_blkhash[blk & wr->wr_blkhashmask]; 2482 wbh = &wr->wr_blkhash[blk & wr->wr_blkhashmask];
2480 LIST_FOREACH(wb, wbh, wb_hash) { 2483 LIST_FOREACH(wb, wbh, wb_hash) {
2481 if (blk == wb->wb_blk) 2484 if (blk == wb->wb_blk)
2482 return wb; 2485 return wb;
2483 } 2486 }
2484 return 0; 2487 return 0;
2485} 2488}
2486 2489
2487static void 2490static void
2488wapbl_blkhash_ins(struct wapbl_replay *wr, daddr_t blk, off_t off) 2491wapbl_blkhash_ins(struct wapbl_replay *wr, daddr_t blk, off_t off)
2489{ 2492{
2490 struct wapbl_blk_head *wbh; 2493 struct wapbl_blk_head *wbh;
2491 struct wapbl_blk *wb; 2494 struct wapbl_blk *wb;
2492 wb = wapbl_blkhash_get(wr, blk); 2495 wb = wapbl_blkhash_get(wr, blk);
2493 if (wb) { 2496 if (wb) {
2494 KASSERT(wb->wb_blk == blk); 2497 KASSERT(wb->wb_blk == blk);
2495 wb->wb_off = off; 2498 wb->wb_off = off;
2496 } else { 2499 } else {
2497 wb = wapbl_alloc(sizeof(*wb)); 2500 wb = wapbl_alloc(sizeof(*wb));
2498 wb->wb_blk = blk; 2501 wb->wb_blk = blk;
2499 wb->wb_off = off; 2502 wb->wb_off = off;
2500 wbh = &wr->wr_blkhash[blk & wr->wr_blkhashmask]; 2503 wbh = &wr->wr_blkhash[blk & wr->wr_blkhashmask];
2501 LIST_INSERT_HEAD(wbh, wb, wb_hash); 2504 LIST_INSERT_HEAD(wbh, wb, wb_hash);
2502 wr->wr_blkhashcnt++; 2505 wr->wr_blkhashcnt++;
2503 } 2506 }
2504} 2507}
2505 2508
2506static void 2509static void
2507wapbl_blkhash_rem(struct wapbl_replay *wr, daddr_t blk) 2510wapbl_blkhash_rem(struct wapbl_replay *wr, daddr_t blk)
2508{ 2511{
2509 struct wapbl_blk *wb = wapbl_blkhash_get(wr, blk); 2512 struct wapbl_blk *wb = wapbl_blkhash_get(wr, blk);
2510 if (wb) { 2513 if (wb) {
2511 KASSERT(wr->wr_blkhashcnt > 0); 2514 KASSERT(wr->wr_blkhashcnt > 0);
2512 wr->wr_blkhashcnt--; 2515 wr->wr_blkhashcnt--;
2513 LIST_REMOVE(wb, wb_hash); 2516 LIST_REMOVE(wb, wb_hash);
2514 wapbl_free(wb, sizeof(*wb)); 2517 wapbl_free(wb, sizeof(*wb));
2515 } 2518 }
2516} 2519}
2517 2520
2518static void 2521static void
2519wapbl_blkhash_clear(struct wapbl_replay *wr) 2522wapbl_blkhash_clear(struct wapbl_replay *wr)
2520{ 2523{
2521 unsigned long i; 2524 unsigned long i;
2522 for (i = 0; i <= wr->wr_blkhashmask; i++) { 2525 for (i = 0; i <= wr->wr_blkhashmask; i++) {
2523 struct wapbl_blk *wb; 2526 struct wapbl_blk *wb;
2524 2527
2525 while ((wb = LIST_FIRST(&wr->wr_blkhash[i]))) { 2528 while ((wb = LIST_FIRST(&wr->wr_blkhash[i]))) {
2526 KASSERT(wr->wr_blkhashcnt > 0); 2529 KASSERT(wr->wr_blkhashcnt > 0);
2527 wr->wr_blkhashcnt--; 2530 wr->wr_blkhashcnt--;
2528 LIST_REMOVE(wb, wb_hash); 2531 LIST_REMOVE(wb, wb_hash);
2529 wapbl_free(wb, sizeof(*wb)); 2532 wapbl_free(wb, sizeof(*wb));
2530 } 2533 }
2531 } 2534 }
2532 KASSERT(wr->wr_blkhashcnt == 0); 2535 KASSERT(wr->wr_blkhashcnt == 0);
2533} 2536}
2534 2537
2535/****************************************************************/ 2538/****************************************************************/
2536 2539
2537/* 2540/*
2538 * wapbl_circ_read(wr, data, len, offp) 2541 * wapbl_circ_read(wr, data, len, offp)
2539 * 2542 *
2540 * Read len bytes into data from the circular queue of wr, 2543 * Read len bytes into data from the circular queue of wr,
2541 * starting at the linear byte offset *offp, and returning the new 2544 * starting at the linear byte offset *offp, and returning the new
2542 * linear byte offset in *offp. 2545 * linear byte offset in *offp.
2543 * 2546 *
2544 * If the starting linear byte offset precedes wr->wr_circ_off, 2547 * If the starting linear byte offset precedes wr->wr_circ_off,
2545 * the read instead begins at wr->wr_circ_off. XXX WTF? This 2548 * the read instead begins at wr->wr_circ_off. XXX WTF? This
2546 * should be a KASSERT, not a conditional. 2549 * should be a KASSERT, not a conditional.
2547 */ 2550 */
2548static int 2551static int
2549wapbl_circ_read(struct wapbl_replay *wr, void *data, size_t len, off_t *offp) 2552wapbl_circ_read(struct wapbl_replay *wr, void *data, size_t len, off_t *offp)
2550{ 2553{
2551 size_t slen; 2554 size_t slen;
2552 off_t off = *offp; 2555 off_t off = *offp;
2553 int error; 2556 int error;
2554 daddr_t pbn; 2557 daddr_t pbn;
2555 2558
2556 KASSERT(((len >> wr->wr_log_dev_bshift) << 2559 KASSERT(((len >> wr->wr_log_dev_bshift) <<
2557 wr->wr_log_dev_bshift) == len); 2560 wr->wr_log_dev_bshift) == len);
2558 2561
2559 if (off < wr->wr_circ_off) 2562 if (off < wr->wr_circ_off)
2560 off = wr->wr_circ_off; 2563 off = wr->wr_circ_off;
2561 slen = wr->wr_circ_off + wr->wr_circ_size - off; 2564 slen = wr->wr_circ_off + wr->wr_circ_size - off;
2562 if (slen < len) { 2565 if (slen < len) {
2563 pbn = wr->wr_logpbn + (off >> wr->wr_log_dev_bshift); 2566 pbn = wr->wr_logpbn + (off >> wr->wr_log_dev_bshift);
2564#ifdef _KERNEL 2567#ifdef _KERNEL
2565 pbn = btodb(pbn << wr->wr_log_dev_bshift); 2568 pbn = btodb(pbn << wr->wr_log_dev_bshift);
2566#endif 2569#endif
2567 error = wapbl_read(data, slen, wr->wr_devvp, pbn); 2570 error = wapbl_read(data, slen, wr->wr_devvp, pbn);
2568 if (error) 2571 if (error)
2569 return error; 2572 return error;
2570 data = (uint8_t *)data + slen; 2573 data = (uint8_t *)data + slen;
2571 len -= slen; 2574 len -= slen;
2572 off = wr->wr_circ_off; 2575 off = wr->wr_circ_off;
2573 } 2576 }
2574 pbn = wr->wr_logpbn + (off >> wr->wr_log_dev_bshift); 2577 pbn = wr->wr_logpbn + (off >> wr->wr_log_dev_bshift);
2575#ifdef _KERNEL 2578#ifdef _KERNEL
2576 pbn = btodb(pbn << wr->wr_log_dev_bshift); 2579 pbn = btodb(pbn << wr->wr_log_dev_bshift);
2577#endif 2580#endif
2578 error = wapbl_read(data, len, wr->wr_devvp, pbn); 2581 error = wapbl_read(data, len, wr->wr_devvp, pbn);
2579 if (error) 2582 if (error)
2580 return error; 2583 return error;
2581 off += len; 2584 off += len;
2582 if (off >= wr->wr_circ_off + wr->wr_circ_size) 2585 if (off >= wr->wr_circ_off + wr->wr_circ_size)
2583 off = wr->wr_circ_off; 2586 off = wr->wr_circ_off;
2584 *offp = off; 2587 *offp = off;
2585 return 0; 2588 return 0;
2586} 2589}
2587 2590
2588/* 2591/*
2589 * wapbl_circ_advance(wr, len, offp) 2592 * wapbl_circ_advance(wr, len, offp)
2590 * 2593 *
2591 * Compute the linear byte offset of the circular queue of wr that 2594 * Compute the linear byte offset of the circular queue of wr that
2592 * is len bytes past *offp, and store it in *offp. 2595 * is len bytes past *offp, and store it in *offp.
2593 * 2596 *
2594 * This is as if wapbl_circ_read, but without actually reading 2597 * This is as if wapbl_circ_read, but without actually reading
2595 * anything. 2598 * anything.
2596 * 2599 *
2597 * If the starting linear byte offset precedes wr->wr_circ_off, it 2600 * If the starting linear byte offset precedes wr->wr_circ_off, it
2598 * is taken to be wr->wr_circ_off instead. XXX WTF? This should 2601 * is taken to be wr->wr_circ_off instead. XXX WTF? This should
2599 * be a KASSERT, not a conditional. 2602 * be a KASSERT, not a conditional.
2600 */ 2603 */
2601static void 2604static void
2602wapbl_circ_advance(struct wapbl_replay *wr, size_t len, off_t *offp) 2605wapbl_circ_advance(struct wapbl_replay *wr, size_t len, off_t *offp)
2603{ 2606{
2604 size_t slen; 2607 size_t slen;
2605 off_t off = *offp; 2608 off_t off = *offp;
2606 2609
2607 KASSERT(((len >> wr->wr_log_dev_bshift) << 2610 KASSERT(((len >> wr->wr_log_dev_bshift) <<
2608 wr->wr_log_dev_bshift) == len); 2611 wr->wr_log_dev_bshift) == len);
2609 2612
2610 if (off < wr->wr_circ_off) 2613 if (off < wr->wr_circ_off)
2611 off = wr->wr_circ_off; 2614 off = wr->wr_circ_off;
2612 slen = wr->wr_circ_off + wr->wr_circ_size - off; 2615 slen = wr->wr_circ_off + wr->wr_circ_size - off;
2613 if (slen < len) { 2616 if (slen < len) {
2614 len -= slen; 2617 len -= slen;
2615 off = wr->wr_circ_off; 2618 off = wr->wr_circ_off;
2616 } 2619 }
2617 off += len; 2620 off += len;
2618 if (off >= wr->wr_circ_off + wr->wr_circ_size) 2621 if (off >= wr->wr_circ_off + wr->wr_circ_size)
2619 off = wr->wr_circ_off; 2622 off = wr->wr_circ_off;
2620 *offp = off; 2623 *offp = off;
2621} 2624}
2622 2625
2623/****************************************************************/ 2626/****************************************************************/
2624 2627
2625int 2628int
2626wapbl_replay_start(struct wapbl_replay **wrp, struct vnode *vp, 2629wapbl_replay_start(struct wapbl_replay **wrp, struct vnode *vp,
2627 daddr_t off, size_t count, size_t blksize) 2630 daddr_t off, size_t count, size_t blksize)
2628{ 2631{
2629 struct wapbl_replay *wr; 2632 struct wapbl_replay *wr;
2630 int error; 2633 int error;
2631 struct vnode *devvp; 2634 struct vnode *devvp;
2632 daddr_t logpbn; 2635 daddr_t logpbn;
2633 uint8_t *scratch; 2636 uint8_t *scratch;
2634 struct wapbl_wc_header *wch; 2637 struct wapbl_wc_header *wch;
2635 struct wapbl_wc_header *wch2; 2638 struct wapbl_wc_header *wch2;
2636 /* Use this until we read the actual log header */ 2639 /* Use this until we read the actual log header */
2637 int log_dev_bshift = ilog2(blksize); 2640 int log_dev_bshift = ilog2(blksize);
2638 size_t used; 2641 size_t used;
2639 daddr_t pbn; 2642 daddr_t pbn;
2640 2643
2641 WAPBL_PRINTF(WAPBL_PRINT_REPLAY, 2644 WAPBL_PRINTF(WAPBL_PRINT_REPLAY,
2642 ("wapbl_replay_start: vp=%p off=%"PRId64 " count=%zu blksize=%zu\n", 2645 ("wapbl_replay_start: vp=%p off=%"PRId64 " count=%zu blksize=%zu\n",
2643 vp, off, count, blksize)); 2646 vp, off, count, blksize));
2644 2647
2645 if (off < 0) 2648 if (off < 0)
2646 return EINVAL; 2649 return EINVAL;
2647 2650
2648 if (blksize < DEV_BSIZE) 2651 if (blksize < DEV_BSIZE)
2649 return EINVAL; 2652 return EINVAL;
2650 if (blksize % DEV_BSIZE) 2653 if (blksize % DEV_BSIZE)
2651 return EINVAL; 2654 return EINVAL;
2652 2655
2653#ifdef _KERNEL 2656#ifdef _KERNEL
2654#if 0 2657#if 0
2655 /* XXX vp->v_size isn't reliably set for VBLK devices, 2658 /* XXX vp->v_size isn't reliably set for VBLK devices,
2656 * especially root. However, we might still want to verify 2659 * especially root. However, we might still want to verify
2657 * that the full load is readable */ 2660 * that the full load is readable */
2658 if ((off + count) * blksize > vp->v_size) 2661 if ((off + count) * blksize > vp->v_size)
2659 return EINVAL; 2662 return EINVAL;
2660#endif 2663#endif
2661 if ((error = VOP_BMAP(vp, off, &devvp, &logpbn, 0)) != 0) { 2664 if ((error = VOP_BMAP(vp, off, &devvp, &logpbn, 0)) != 0) {
2662 return error; 2665 return error;
2663 } 2666 }
2664#else /* ! _KERNEL */ 2667#else /* ! _KERNEL */
2665 devvp = vp; 2668 devvp = vp;
2666 logpbn = off; 2669 logpbn = off;
2667#endif /* ! _KERNEL */ 2670#endif /* ! _KERNEL */
2668 2671
2669 scratch = wapbl_alloc(MAXBSIZE); 2672 scratch = wapbl_alloc(MAXBSIZE);
2670 2673
2671 pbn = logpbn; 2674 pbn = logpbn;
2672#ifdef _KERNEL 2675#ifdef _KERNEL
2673 pbn = btodb(pbn << log_dev_bshift); 2676 pbn = btodb(pbn << log_dev_bshift);
2674#endif 2677#endif
2675 error = wapbl_read(scratch, 2<<log_dev_bshift, devvp, pbn); 2678 error = wapbl_read(scratch, 2<<log_dev_bshift, devvp, pbn);
2676 if (error) 2679 if (error)
2677 goto errout; 2680 goto errout;
2678 2681
2679 wch = (struct wapbl_wc_header *)scratch; 2682 wch = (struct wapbl_wc_header *)scratch;
2680 wch2 = 2683 wch2 =
2681 (struct wapbl_wc_header *)(scratch + (1<<log_dev_bshift)); 2684 (struct wapbl_wc_header *)(scratch + (1<<log_dev_bshift));
2682 /* XXX verify checksums and magic numbers */ 2685 /* XXX verify checksums and magic numbers */
2683 if (wch->wc_type != WAPBL_WC_HEADER) { 2686 if (wch->wc_type != WAPBL_WC_HEADER) {
2684 printf("Unrecognized wapbl magic: 0x%08x\n", wch->wc_type); 2687 printf("Unrecognized wapbl magic: 0x%08x\n", wch->wc_type);
2685 error = EFTYPE; 2688 error = EFTYPE;
2686 goto errout; 2689 goto errout;
2687 } 2690 }
2688 2691
2689 if (wch2->wc_generation > wch->wc_generation) 2692 if (wch2->wc_generation > wch->wc_generation)
2690 wch = wch2; 2693 wch = wch2;
2691 2694
2692 wr = wapbl_calloc(1, sizeof(*wr)); 2695 wr = wapbl_calloc(1, sizeof(*wr));
2693 2696
2694 wr->wr_logvp = vp; 2697 wr->wr_logvp = vp;
2695 wr->wr_devvp = devvp; 2698 wr->wr_devvp = devvp;
2696 wr->wr_logpbn = logpbn; 2699 wr->wr_logpbn = logpbn;
2697 2700
2698 wr->wr_scratch = scratch; 2701 wr->wr_scratch = scratch;
2699 2702
2700 wr->wr_log_dev_bshift = wch->wc_log_dev_bshift; 2703 wr->wr_log_dev_bshift = wch->wc_log_dev_bshift;
2701 wr->wr_fs_dev_bshift = wch->wc_fs_dev_bshift; 2704 wr->wr_fs_dev_bshift = wch->wc_fs_dev_bshift;
2702 wr->wr_circ_off = wch->wc_circ_off; 2705 wr->wr_circ_off = wch->wc_circ_off;
2703 wr->wr_circ_size = wch->wc_circ_size; 2706 wr->wr_circ_size = wch->wc_circ_size;
2704 wr->wr_generation = wch->wc_generation; 2707 wr->wr_generation = wch->wc_generation;
2705 2708
2706 used = wapbl_space_used(wch->wc_circ_size, wch->wc_head, wch->wc_tail); 2709 used = wapbl_space_used(wch->wc_circ_size, wch->wc_head, wch->wc_tail);
2707 2710
2708 WAPBL_PRINTF(WAPBL_PRINT_REPLAY, 2711 WAPBL_PRINTF(WAPBL_PRINT_REPLAY,
2709 ("wapbl_replay: head=%"PRId64" tail=%"PRId64" off=%"PRId64 2712 ("wapbl_replay: head=%"PRId64" tail=%"PRId64" off=%"PRId64
2710 " len=%"PRId64" used=%zu\n", 2713 " len=%"PRId64" used=%zu\n",
2711 wch->wc_head, wch->wc_tail, wch->wc_circ_off, 2714 wch->wc_head, wch->wc_tail, wch->wc_circ_off,
2712 wch->wc_circ_size, used)); 2715 wch->wc_circ_size, used));
2713 2716
2714 wapbl_blkhash_init(wr, (used >> wch->wc_fs_dev_bshift)); 2717 wapbl_blkhash_init(wr, (used >> wch->wc_fs_dev_bshift));
2715 2718
2716 error = wapbl_replay_process(wr, wch->wc_head, wch->wc_tail); 2719 error = wapbl_replay_process(wr, wch->wc_head, wch->wc_tail);
2717 if (error) { 2720 if (error) {
2718 wapbl_replay_stop(wr); 2721 wapbl_replay_stop(wr);
2719 wapbl_replay_free(wr); 2722 wapbl_replay_free(wr);
2720 return error; 2723 return error;
2721 } 2724 }
2722 2725
2723 *wrp = wr; 2726 *wrp = wr;
2724 return 0; 2727 return 0;
2725 2728
2726 errout: 2729 errout:
2727 wapbl_free(scratch, MAXBSIZE); 2730 wapbl_free(scratch, MAXBSIZE);
2728 return error; 2731 return error;
2729} 2732}
2730 2733
2731void 2734void
2732wapbl_replay_stop(struct wapbl_replay *wr) 2735wapbl_replay_stop(struct wapbl_replay *wr)
2733{ 2736{
2734 2737
2735 if (!wapbl_replay_isopen(wr)) 2738 if (!wapbl_replay_isopen(wr))
2736 return; 2739 return;
2737 2740
2738 WAPBL_PRINTF(WAPBL_PRINT_REPLAY, ("wapbl_replay_stop called\n")); 2741 WAPBL_PRINTF(WAPBL_PRINT_REPLAY, ("wapbl_replay_stop called\n"));
2739 2742
2740 wapbl_free(wr->wr_scratch, MAXBSIZE); 2743 wapbl_free(wr->wr_scratch, MAXBSIZE);
2741 wr->wr_scratch = NULL; 2744 wr->wr_scratch = NULL;
2742 2745
2743 wr->wr_logvp = NULL; 2746 wr->wr_logvp = NULL;
2744 2747
2745 wapbl_blkhash_clear(wr); 2748 wapbl_blkhash_clear(wr);
2746 wapbl_blkhash_free(wr); 2749 wapbl_blkhash_free(wr);
2747} 2750}
2748 2751
2749void 2752void
2750wapbl_replay_free(struct wapbl_replay *wr) 2753wapbl_replay_free(struct wapbl_replay *wr)
2751{ 2754{
2752 2755
2753 KDASSERT(!wapbl_replay_isopen(wr)); 2756 KDASSERT(!wapbl_replay_isopen(wr));
2754 2757
2755 if (wr->wr_inodes) 2758 if (wr->wr_inodes)
2756 wapbl_free(wr->wr_inodes, 2759 wapbl_free(wr->wr_inodes,
2757 wr->wr_inodescnt * sizeof(wr->wr_inodes[0])); 2760 wr->wr_inodescnt * sizeof(wr->wr_inodes[0]));
2758 wapbl_free(wr, sizeof(*wr)); 2761 wapbl_free(wr, sizeof(*wr));
2759} 2762}
2760 2763
2761#ifdef _KERNEL 2764#ifdef _KERNEL
2762int 2765int
2763wapbl_replay_isopen1(struct wapbl_replay *wr) 2766wapbl_replay_isopen1(struct wapbl_replay *wr)
2764{ 2767{
2765 2768
2766 return wapbl_replay_isopen(wr); 2769 return wapbl_replay_isopen(wr);
2767} 2770}
2768#endif 2771#endif
2769 2772
2770/* 2773/*
2771 * calculate the disk address for the i'th block in the wc_blockblist 2774 * calculate the disk address for the i'th block in the wc_blockblist
2772 * offset by j blocks of size blen. 2775 * offset by j blocks of size blen.
2773 * 2776 *
2774 * wc_daddr is always a kernel disk address in DEV_BSIZE units that 2777 * wc_daddr is always a kernel disk address in DEV_BSIZE units that
2775 * was written to the journal. 2778 * was written to the journal.
2776 * 2779 *
2777 * The kernel needs that address plus the offset in DEV_BSIZE units. 2780 * The kernel needs that address plus the offset in DEV_BSIZE units.
2778 * 2781 *
2779 * Userland needs that address plus the offset in blen units. 2782 * Userland needs that address plus the offset in blen units.
2780 * 2783 *
2781 */ 2784 */
2782static daddr_t 2785static daddr_t
2783wapbl_block_daddr(struct wapbl_wc_blocklist *wc, int i, int j, int blen) 2786wapbl_block_daddr(struct wapbl_wc_blocklist *wc, int i, int j, int blen)
2784{ 2787{
2785 daddr_t pbn; 2788 daddr_t pbn;
2786 2789
2787#ifdef _KERNEL 2790#ifdef _KERNEL
2788 pbn = wc->wc_blocks[i].wc_daddr + btodb(j * blen); 2791 pbn = wc->wc_blocks[i].wc_daddr + btodb(j * blen);
2789#else 2792#else
2790 pbn = dbtob(wc->wc_blocks[i].wc_daddr) / blen + j; 2793 pbn = dbtob(wc->wc_blocks[i].wc_daddr) / blen + j;
2791#endif 2794#endif
2792 2795
2793 return pbn; 2796 return pbn;
2794} 2797}
2795 2798
2796static void 2799static void
2797wapbl_replay_process_blocks(struct wapbl_replay *wr, off_t *offp) 2800wapbl_replay_process_blocks(struct wapbl_replay *wr, off_t *offp)
2798{ 2801{
2799 struct wapbl_wc_blocklist *wc = 2802 struct wapbl_wc_blocklist *wc =
2800 (struct wapbl_wc_blocklist *)wr->wr_scratch; 2803 (struct wapbl_wc_blocklist *)wr->wr_scratch;
2801 int fsblklen = 1 << wr->wr_fs_dev_bshift; 2804 int fsblklen = 1 << wr->wr_fs_dev_bshift;
2802 int i, j, n; 2805 int i, j, n;
2803 2806
2804 for (i = 0; i < wc->wc_blkcount; i++) { 2807 for (i = 0; i < wc->wc_blkcount; i++) {
2805 /* 2808 /*
2806 * Enter each physical block into the hashtable independently. 2809 * Enter each physical block into the hashtable independently.
2807 */ 2810 */
2808 n = wc->wc_blocks[i].wc_dlen >> wr->wr_fs_dev_bshift; 2811 n = wc->wc_blocks[i].wc_dlen >> wr->wr_fs_dev_bshift;
2809 for (j = 0; j < n; j++) { 2812 for (j = 0; j < n; j++) {
2810 wapbl_blkhash_ins(wr, wapbl_block_daddr(wc, i, j, fsblklen), 2813 wapbl_blkhash_ins(wr, wapbl_block_daddr(wc, i, j, fsblklen),
2811 *offp); 2814 *offp);
2812 wapbl_circ_advance(wr, fsblklen, offp); 2815 wapbl_circ_advance(wr, fsblklen, offp);
2813 } 2816 }
2814 } 2817 }
2815} 2818}
2816 2819
2817static void 2820static void
2818wapbl_replay_process_revocations(struct wapbl_replay *wr) 2821wapbl_replay_process_revocations(struct wapbl_replay *wr)
2819{ 2822{
2820 struct wapbl_wc_blocklist *wc = 2823 struct wapbl_wc_blocklist *wc =
2821 (struct wapbl_wc_blocklist *)wr->wr_scratch; 2824 (struct wapbl_wc_blocklist *)wr->wr_scratch;
2822 int fsblklen = 1 << wr->wr_fs_dev_bshift; 2825 int fsblklen = 1 << wr->wr_fs_dev_bshift;
2823 int i, j, n; 2826 int i, j, n;
2824 2827
2825 for (i = 0; i < wc->wc_blkcount; i++) { 2828 for (i = 0; i < wc->wc_blkcount; i++) {
2826 /* 2829 /*
2827 * Remove any blocks found from the hashtable. 2830 * Remove any blocks found from the hashtable.
2828 */ 2831 */
2829 n = wc->wc_blocks[i].wc_dlen >> wr->wr_fs_dev_bshift; 2832 n = wc->wc_blocks[i].wc_dlen >> wr->wr_fs_dev_bshift;
2830 for (j = 0; j < n; j++) 2833 for (j = 0; j < n; j++)
2831 wapbl_blkhash_rem(wr, wapbl_block_daddr(wc, i, j, fsblklen)); 2834 wapbl_blkhash_rem(wr, wapbl_block_daddr(wc, i, j, fsblklen));
2832 } 2835 }
2833} 2836}
2834 2837
2835static void 2838static void
2836wapbl_replay_process_inodes(struct wapbl_replay *wr, off_t oldoff, off_t newoff) 2839wapbl_replay_process_inodes(struct wapbl_replay *wr, off_t oldoff, off_t newoff)
2837{ 2840{
2838 struct wapbl_wc_inodelist *wc = 2841 struct wapbl_wc_inodelist *wc =
2839 (struct wapbl_wc_inodelist *)wr->wr_scratch; 2842 (struct wapbl_wc_inodelist *)wr->wr_scratch;
2840 void *new_inodes; 2843 void *new_inodes;
2841 const size_t oldsize = wr->wr_inodescnt * sizeof(wr->wr_inodes[0]); 2844 const size_t oldsize = wr->wr_inodescnt * sizeof(wr->wr_inodes[0]);
2842 2845
2843 KASSERT(sizeof(wr->wr_inodes[0]) == sizeof(wc->wc_inodes[0])); 2846 KASSERT(sizeof(wr->wr_inodes[0]) == sizeof(wc->wc_inodes[0]));
2844 2847
2845 /* 2848 /*
2846 * Keep track of where we found this so location won't be 2849 * Keep track of where we found this so location won't be
2847 * overwritten. 2850 * overwritten.
2848 */ 2851 */
2849 if (wc->wc_clear) { 2852 if (wc->wc_clear) {
2850 wr->wr_inodestail = oldoff; 2853 wr->wr_inodestail = oldoff;
2851 wr->wr_inodescnt = 0; 2854 wr->wr_inodescnt = 0;
2852 if (wr->wr_inodes != NULL) { 2855 if (wr->wr_inodes != NULL) {
2853 wapbl_free(wr->wr_inodes, oldsize); 2856 wapbl_free(wr->wr_inodes, oldsize);
2854 wr->wr_inodes = NULL; 2857 wr->wr_inodes = NULL;
2855 } 2858 }
2856 } 2859 }
2857 wr->wr_inodeshead = newoff; 2860 wr->wr_inodeshead = newoff;
2858 if (wc->wc_inocnt == 0) 2861 if (wc->wc_inocnt == 0)
2859 return; 2862 return;
2860 2863
2861 new_inodes = wapbl_alloc((wr->wr_inodescnt + wc->wc_inocnt) * 2864 new_inodes = wapbl_alloc((wr->wr_inodescnt + wc->wc_inocnt) *
2862 sizeof(wr->wr_inodes[0])); 2865 sizeof(wr->wr_inodes[0]));
2863 if (wr->wr_inodes != NULL) { 2866 if (wr->wr_inodes != NULL) {
2864 memcpy(new_inodes, wr->wr_inodes, oldsize); 2867 memcpy(new_inodes, wr->wr_inodes, oldsize);
2865 wapbl_free(wr->wr_inodes, oldsize); 2868 wapbl_free(wr->wr_inodes, oldsize);
2866 } 2869 }
2867 wr->wr_inodes = new_inodes; 2870 wr->wr_inodes = new_inodes;
2868 memcpy(&wr->wr_inodes[wr->wr_inodescnt], wc->wc_inodes, 2871 memcpy(&wr->wr_inodes[wr->wr_inodescnt], wc->wc_inodes,
2869 wc->wc_inocnt * sizeof(wr->wr_inodes[0])); 2872 wc->wc_inocnt * sizeof(wr->wr_inodes[0]));
2870 wr->wr_inodescnt += wc->wc_inocnt; 2873 wr->wr_inodescnt += wc->wc_inocnt;
2871} 2874}
2872 2875
2873static int 2876static int
2874wapbl_replay_process(struct wapbl_replay *wr, off_t head, off_t tail) 2877wapbl_replay_process(struct wapbl_replay *wr, off_t head, off_t tail)
2875{ 2878{
2876 off_t off; 2879 off_t off;
2877 int error; 2880 int error;
2878 2881
2879 int logblklen = 1 << wr->wr_log_dev_bshift; 2882 int logblklen = 1 << wr->wr_log_dev_bshift;
2880 2883
2881 wapbl_blkhash_clear(wr); 2884 wapbl_blkhash_clear(wr);
2882 2885
2883 off = tail; 2886 off = tail;
2884 while (off != head) { 2887 while (off != head) {
2885 struct wapbl_wc_null *wcn; 2888 struct wapbl_wc_null *wcn;
2886 off_t saveoff = off; 2889 off_t saveoff = off;
2887 error = wapbl_circ_read(wr, wr->wr_scratch, logblklen, &off); 2890 error = wapbl_circ_read(wr, wr->wr_scratch, logblklen, &off);
2888 if (error) 2891 if (error)
2889 goto errout; 2892 goto errout;
2890 wcn = (struct wapbl_wc_null *)wr->wr_scratch; 2893 wcn = (struct wapbl_wc_null *)wr->wr_scratch;
2891 switch (wcn->wc_type) { 2894 switch (wcn->wc_type) {
2892 case WAPBL_WC_BLOCKS: 2895 case WAPBL_WC_BLOCKS:
2893 wapbl_replay_process_blocks(wr, &off); 2896 wapbl_replay_process_blocks(wr, &off);
2894 break; 2897 break;
2895 2898
2896 case WAPBL_WC_REVOCATIONS: 2899 case WAPBL_WC_REVOCATIONS:
2897 wapbl_replay_process_revocations(wr); 2900 wapbl_replay_process_revocations(wr);
2898 break; 2901 break;
2899 2902
2900 case WAPBL_WC_INODES: 2903 case WAPBL_WC_INODES:
2901 wapbl_replay_process_inodes(wr, saveoff, off); 2904 wapbl_replay_process_inodes(wr, saveoff, off);
2902 break; 2905 break;
2903 2906
2904 default: 2907 default:
2905 printf("Unrecognized wapbl type: 0x%08x\n", 2908 printf("Unrecognized wapbl type: 0x%08x\n",
2906 wcn->wc_type); 2909 wcn->wc_type);
2907 error = EFTYPE; 2910 error = EFTYPE;
2908 goto errout; 2911 goto errout;
2909 } 2912 }
2910 wapbl_circ_advance(wr, wcn->wc_len, &saveoff); 2913 wapbl_circ_advance(wr, wcn->wc_len, &saveoff);
2911 if (off != saveoff) { 2914 if (off != saveoff) {
2912 printf("wapbl_replay: corrupted records\n"); 2915 printf("wapbl_replay: corrupted records\n");
2913 error = EFTYPE; 2916 error = EFTYPE;
2914 goto errout; 2917 goto errout;
2915 } 2918 }
2916 } 2919 }
2917 return 0; 2920 return 0;
2918 2921
2919 errout: 2922 errout:
2920 wapbl_blkhash_clear(wr); 2923 wapbl_blkhash_clear(wr);
2921 return error; 2924 return error;
2922} 2925}
2923 2926
2924#if 0 2927#if 0
2925int 2928int
2926wapbl_replay_verify(struct wapbl_replay *wr, struct vnode *fsdevvp) 2929wapbl_replay_verify(struct wapbl_replay *wr, struct vnode *fsdevvp)
2927{ 2930{
2928 off_t off; 2931 off_t off;
2929 int mismatchcnt = 0; 2932 int mismatchcnt = 0;
2930 int logblklen = 1 << wr->wr_log_dev_bshift; 2933 int logblklen = 1 << wr->wr_log_dev_bshift;
2931 int fsblklen = 1 << wr->wr_fs_dev_bshift; 2934 int fsblklen = 1 << wr->wr_fs_dev_bshift;
2932 void *scratch1 = wapbl_alloc(MAXBSIZE); 2935 void *scratch1 = wapbl_alloc(MAXBSIZE);
2933 void *scratch2 = wapbl_alloc(MAXBSIZE); 2936 void *scratch2 = wapbl_alloc(MAXBSIZE);
2934 int error = 0; 2937 int error = 0;
2935 2938
2936 KDASSERT(wapbl_replay_isopen(wr)); 2939 KDASSERT(wapbl_replay_isopen(wr));
2937 2940
2938 off = wch->wc_tail; 2941 off = wch->wc_tail;
2939 while (off != wch->wc_head) { 2942 while (off != wch->wc_head) {
2940 struct wapbl_wc_null *wcn; 2943 struct wapbl_wc_null *wcn;
2941#ifdef DEBUG 2944#ifdef DEBUG
2942 off_t saveoff = off; 2945 off_t saveoff = off;
2943#endif 2946#endif
2944 error = wapbl_circ_read(wr, wr->wr_scratch, logblklen, &off); 2947 error = wapbl_circ_read(wr, wr->wr_scratch, logblklen, &off);
2945 if (error) 2948 if (error)
2946 goto out; 2949 goto out;
2947 wcn = (struct wapbl_wc_null *)wr->wr_scratch; 2950 wcn = (struct wapbl_wc_null *)wr->wr_scratch;
2948 switch (wcn->wc_type) { 2951 switch (wcn->wc_type) {
2949 case WAPBL_WC_BLOCKS: 2952 case WAPBL_WC_BLOCKS:
2950 { 2953 {
2951 struct wapbl_wc_blocklist *wc = 2954 struct wapbl_wc_blocklist *wc =
2952 (struct wapbl_wc_blocklist *)wr->wr_scratch; 2955 (struct wapbl_wc_blocklist *)wr->wr_scratch;
2953 int i; 2956 int i;
2954 for (i = 0; i < wc->wc_blkcount; i++) { 2957 for (i = 0; i < wc->wc_blkcount; i++) {
2955 int foundcnt = 0; 2958 int foundcnt = 0;
2956 int dirtycnt = 0; 2959 int dirtycnt = 0;
2957 int j, n; 2960 int j, n;