Sun Mar 5 13:57:29 2017 UTC ()
add some event counters, for commits, writes, cache flush


(jdolecek)
diff -r1.86 -r1.87 src/sys/kern/vfs_wapbl.c

cvs diff -r1.86 -r1.87 src/sys/kern/vfs_wapbl.c (switch to unified diff)

--- src/sys/kern/vfs_wapbl.c 2016/11/10 20:56:32 1.86
+++ src/sys/kern/vfs_wapbl.c 2017/03/05 13:57:29 1.87
@@ -1,3184 +1,3235 @@ @@ -1,3184 +1,3235 @@
1/* $NetBSD: vfs_wapbl.c,v 1.86 2016/11/10 20:56:32 jdolecek Exp $ */ 1/* $NetBSD: vfs_wapbl.c,v 1.87 2017/03/05 13:57:29 jdolecek Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2003, 2008, 2009 The NetBSD Foundation, Inc. 4 * Copyright (c) 2003, 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Wasabi Systems, Inc. 8 * by Wasabi Systems, Inc.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32/* 32/*
33 * This implements file system independent write ahead filesystem logging. 33 * This implements file system independent write ahead filesystem logging.
34 */ 34 */
35 35
36#define WAPBL_INTERNAL 36#define WAPBL_INTERNAL
37 37
38#include <sys/cdefs.h> 38#include <sys/cdefs.h>
39__KERNEL_RCSID(0, "$NetBSD: vfs_wapbl.c,v 1.86 2016/11/10 20:56:32 jdolecek Exp $"); 39__KERNEL_RCSID(0, "$NetBSD: vfs_wapbl.c,v 1.87 2017/03/05 13:57:29 jdolecek Exp $");
40 40
41#include <sys/param.h> 41#include <sys/param.h>
42#include <sys/bitops.h> 42#include <sys/bitops.h>
43#include <sys/time.h> 43#include <sys/time.h>
44#include <sys/wapbl.h> 44#include <sys/wapbl.h>
45#include <sys/wapbl_replay.h> 45#include <sys/wapbl_replay.h>
46 46
47#ifdef _KERNEL 47#ifdef _KERNEL
48 48
49#include <sys/atomic.h> 49#include <sys/atomic.h>
50#include <sys/conf.h> 50#include <sys/conf.h>
51#include <sys/file.h> 51#include <sys/file.h>
52#include <sys/kauth.h> 52#include <sys/kauth.h>
53#include <sys/kernel.h> 53#include <sys/kernel.h>
54#include <sys/module.h> 54#include <sys/module.h>
55#include <sys/mount.h> 55#include <sys/mount.h>
56#include <sys/mutex.h> 56#include <sys/mutex.h>
57#include <sys/namei.h> 57#include <sys/namei.h>
58#include <sys/proc.h> 58#include <sys/proc.h>
59#include <sys/resourcevar.h> 59#include <sys/resourcevar.h>
60#include <sys/sysctl.h> 60#include <sys/sysctl.h>
61#include <sys/uio.h> 61#include <sys/uio.h>
62#include <sys/vnode.h> 62#include <sys/vnode.h>
63 63
64#include <miscfs/specfs/specdev.h> 64#include <miscfs/specfs/specdev.h>
65 65
66#define wapbl_alloc(s) kmem_alloc((s), KM_SLEEP) 66#define wapbl_alloc(s) kmem_alloc((s), KM_SLEEP)
67#define wapbl_free(a, s) kmem_free((a), (s)) 67#define wapbl_free(a, s) kmem_free((a), (s))
68#define wapbl_calloc(n, s) kmem_zalloc((n)*(s), KM_SLEEP) 68#define wapbl_calloc(n, s) kmem_zalloc((n)*(s), KM_SLEEP)
69 69
70static struct sysctllog *wapbl_sysctl; 70static struct sysctllog *wapbl_sysctl;
71static int wapbl_flush_disk_cache = 1; 71static int wapbl_flush_disk_cache = 1;
72static int wapbl_verbose_commit = 0; 72static int wapbl_verbose_commit = 0;
73 73
74static inline size_t wapbl_space_free(size_t, off_t, off_t); 74static inline size_t wapbl_space_free(size_t, off_t, off_t);
75 75
76#else /* !_KERNEL */ 76#else /* !_KERNEL */
77 77
78#include <assert.h> 78#include <assert.h>
79#include <errno.h> 79#include <errno.h>
80#include <stdbool.h> 80#include <stdbool.h>
81#include <stdio.h> 81#include <stdio.h>
82#include <stdlib.h> 82#include <stdlib.h>
83#include <string.h> 83#include <string.h>
84 84
85#define KDASSERT(x) assert(x) 85#define KDASSERT(x) assert(x)
86#define KASSERT(x) assert(x) 86#define KASSERT(x) assert(x)
87#define wapbl_alloc(s) malloc(s) 87#define wapbl_alloc(s) malloc(s)
88#define wapbl_free(a, s) free(a) 88#define wapbl_free(a, s) free(a)
89#define wapbl_calloc(n, s) calloc((n), (s)) 89#define wapbl_calloc(n, s) calloc((n), (s))
90 90
91#endif /* !_KERNEL */ 91#endif /* !_KERNEL */
92 92
93/* 93/*
94 * INTERNAL DATA STRUCTURES 94 * INTERNAL DATA STRUCTURES
95 */ 95 */
96 96
97/*  97/*
98 * This structure holds per-mount log information. 98 * This structure holds per-mount log information.
99 * 99 *
100 * Legend: a = atomic access only 100 * Legend: a = atomic access only
101 * r = read-only after init 101 * r = read-only after init
102 * l = rwlock held 102 * l = rwlock held
103 * m = mutex held 103 * m = mutex held
104 * lm = rwlock held writing or mutex held 104 * lm = rwlock held writing or mutex held
105 * u = unlocked access ok 105 * u = unlocked access ok
106 * b = bufcache_lock held 106 * b = bufcache_lock held
107 */ 107 */
108LIST_HEAD(wapbl_ino_head, wapbl_ino); 108LIST_HEAD(wapbl_ino_head, wapbl_ino);
109struct wapbl { 109struct wapbl {
110 struct vnode *wl_logvp; /* r: log here */ 110 struct vnode *wl_logvp; /* r: log here */
111 struct vnode *wl_devvp; /* r: log on this device */ 111 struct vnode *wl_devvp; /* r: log on this device */
112 struct mount *wl_mount; /* r: mountpoint wl is associated with */ 112 struct mount *wl_mount; /* r: mountpoint wl is associated with */
113 daddr_t wl_logpbn; /* r: Physical block number of start of log */ 113 daddr_t wl_logpbn; /* r: Physical block number of start of log */
114 int wl_log_dev_bshift; /* r: logarithm of device block size of log 114 int wl_log_dev_bshift; /* r: logarithm of device block size of log
115 device */ 115 device */
116 int wl_fs_dev_bshift; /* r: logarithm of device block size of 116 int wl_fs_dev_bshift; /* r: logarithm of device block size of
117 filesystem device */ 117 filesystem device */
118 118
119 unsigned wl_lock_count; /* m: Count of transactions in progress */ 119 unsigned wl_lock_count; /* m: Count of transactions in progress */
120 120
121 size_t wl_circ_size; /* r: Number of bytes in buffer of log */ 121 size_t wl_circ_size; /* r: Number of bytes in buffer of log */
122 size_t wl_circ_off; /* r: Number of bytes reserved at start */ 122 size_t wl_circ_off; /* r: Number of bytes reserved at start */
123 123
124 size_t wl_bufcount_max; /* r: Number of buffers reserved for log */ 124 size_t wl_bufcount_max; /* r: Number of buffers reserved for log */
125 size_t wl_bufbytes_max; /* r: Number of buf bytes reserved for log */ 125 size_t wl_bufbytes_max; /* r: Number of buf bytes reserved for log */
126 126
127 off_t wl_head; /* l: Byte offset of log head */ 127 off_t wl_head; /* l: Byte offset of log head */
128 off_t wl_tail; /* l: Byte offset of log tail */ 128 off_t wl_tail; /* l: Byte offset of log tail */
129 /* 129 /*
130 * WAPBL log layout, stored on wl_devvp at wl_logpbn: 130 * WAPBL log layout, stored on wl_devvp at wl_logpbn:
131 * 131 *
132 * ___________________ wl_circ_size __________________ 132 * ___________________ wl_circ_size __________________
133 * / \ 133 * / \
134 * +---------+---------+-------+--------------+--------+ 134 * +---------+---------+-------+--------------+--------+
135 * [ commit0 | commit1 | CCWCW | EEEEEEEEEEEE | CCCWCW ] 135 * [ commit0 | commit1 | CCWCW | EEEEEEEEEEEE | CCCWCW ]
136 * +---------+---------+-------+--------------+--------+ 136 * +---------+---------+-------+--------------+--------+
137 * wl_circ_off --^ ^-- wl_head ^-- wl_tail 137 * wl_circ_off --^ ^-- wl_head ^-- wl_tail
138 * 138 *
139 * commit0 and commit1 are commit headers. A commit header has 139 * commit0 and commit1 are commit headers. A commit header has
140 * a generation number, indicating which of the two headers is 140 * a generation number, indicating which of the two headers is
141 * more recent, and an assignment of head and tail pointers. 141 * more recent, and an assignment of head and tail pointers.
142 * The rest is a circular queue of log records, starting at 142 * The rest is a circular queue of log records, starting at
143 * the byte offset wl_circ_off. 143 * the byte offset wl_circ_off.
144 * 144 *
145 * E marks empty space for records. 145 * E marks empty space for records.
146 * W marks records for block writes issued but waiting. 146 * W marks records for block writes issued but waiting.
147 * C marks completed records. 147 * C marks completed records.
148 * 148 *
149 * wapbl_flush writes new records to empty `E' spaces after 149 * wapbl_flush writes new records to empty `E' spaces after
150 * wl_head from the current transaction in memory. 150 * wl_head from the current transaction in memory.
151 * 151 *
152 * wapbl_truncate advances wl_tail past any completed `C' 152 * wapbl_truncate advances wl_tail past any completed `C'
153 * records, freeing them up for use. 153 * records, freeing them up for use.
154 * 154 *
155 * head == tail == 0 means log is empty. 155 * head == tail == 0 means log is empty.
156 * head == tail != 0 means log is full. 156 * head == tail != 0 means log is full.
157 * 157 *
158 * See assertions in wapbl_advance() for other boundary 158 * See assertions in wapbl_advance() for other boundary
159 * conditions. 159 * conditions.
160 * 160 *
161 * Only wapbl_flush moves the head, except when wapbl_truncate 161 * Only wapbl_flush moves the head, except when wapbl_truncate
162 * sets it to 0 to indicate that the log is empty. 162 * sets it to 0 to indicate that the log is empty.
163 * 163 *
164 * Only wapbl_truncate moves the tail, except when wapbl_flush 164 * Only wapbl_truncate moves the tail, except when wapbl_flush
165 * sets it to wl_circ_off to indicate that the log is full. 165 * sets it to wl_circ_off to indicate that the log is full.
166 */ 166 */
167 167
168 struct wapbl_wc_header *wl_wc_header; /* l */ 168 struct wapbl_wc_header *wl_wc_header; /* l */
169 void *wl_wc_scratch; /* l: scratch space (XXX: por que?!?) */ 169 void *wl_wc_scratch; /* l: scratch space (XXX: por que?!?) */
170 170
171 kmutex_t wl_mtx; /* u: short-term lock */ 171 kmutex_t wl_mtx; /* u: short-term lock */
172 krwlock_t wl_rwlock; /* u: File system transaction lock */ 172 krwlock_t wl_rwlock; /* u: File system transaction lock */
173 173
174 /* 174 /*
175 * Must be held while accessing 175 * Must be held while accessing
176 * wl_count or wl_bufs or head or tail 176 * wl_count or wl_bufs or head or tail
177 */ 177 */
178 178
 179#if _KERNEL
179 /* 180 /*
180 * Callback called from within the flush routine to flush any extra 181 * Callback called from within the flush routine to flush any extra
181 * bits. Note that flush may be skipped without calling this if 182 * bits. Note that flush may be skipped without calling this if
182 * there are no outstanding buffers in the transaction. 183 * there are no outstanding buffers in the transaction.
183 */ 184 */
184#if _KERNEL 
185 wapbl_flush_fn_t wl_flush; /* r */ 185 wapbl_flush_fn_t wl_flush; /* r */
186 wapbl_flush_fn_t wl_flush_abort;/* r */ 186 wapbl_flush_fn_t wl_flush_abort;/* r */
 187
 188 /* Event counters */
 189 char wl_ev_group[EVCNT_STRING_MAX]; /* r */
 190 struct evcnt wl_ev_commit; /* l */
 191 struct evcnt wl_ev_journalwrite; /* l */
 192 struct evcnt wl_ev_metawrite; /* lm */
 193 struct evcnt wl_ev_cacheflush; /* l */
187#endif 194#endif
188 195
189 size_t wl_bufbytes; /* m: Byte count of pages in wl_bufs */ 196 size_t wl_bufbytes; /* m: Byte count of pages in wl_bufs */
190 size_t wl_bufcount; /* m: Count of buffers in wl_bufs */ 197 size_t wl_bufcount; /* m: Count of buffers in wl_bufs */
191 size_t wl_bcount; /* m: Total bcount of wl_bufs */ 198 size_t wl_bcount; /* m: Total bcount of wl_bufs */
192 199
193 LIST_HEAD(, buf) wl_bufs; /* m: Buffers in current transaction */ 200 LIST_HEAD(, buf) wl_bufs; /* m: Buffers in current transaction */
194 201
195 kcondvar_t wl_reclaimable_cv; /* m (obviously) */ 202 kcondvar_t wl_reclaimable_cv; /* m (obviously) */
196 size_t wl_reclaimable_bytes; /* m: Amount of space available for 203 size_t wl_reclaimable_bytes; /* m: Amount of space available for
197 reclamation by truncate */ 204 reclamation by truncate */
198 int wl_error_count; /* m: # of wl_entries with errors */ 205 int wl_error_count; /* m: # of wl_entries with errors */
199 size_t wl_reserved_bytes; /* never truncate log smaller than this */ 206 size_t wl_reserved_bytes; /* never truncate log smaller than this */
200 207
201#ifdef WAPBL_DEBUG_BUFBYTES 208#ifdef WAPBL_DEBUG_BUFBYTES
202 size_t wl_unsynced_bufbytes; /* Byte count of unsynced buffers */ 209 size_t wl_unsynced_bufbytes; /* Byte count of unsynced buffers */
203#endif 210#endif
204 211
205#if _KERNEL 212#if _KERNEL
206 int wl_brperjblock; /* r Block records per journal block */ 213 int wl_brperjblock; /* r Block records per journal block */
207#endif 214#endif
208 215
209 TAILQ_HEAD(, wapbl_dealloc) wl_dealloclist; /* lm: list head */ 216 TAILQ_HEAD(, wapbl_dealloc) wl_dealloclist; /* lm: list head */
210 int wl_dealloccnt; /* lm: total count */ 217 int wl_dealloccnt; /* lm: total count */
211 int wl_dealloclim; /* r: max count */ 218 int wl_dealloclim; /* r: max count */
212 219
213 /* hashtable of inode numbers for allocated but unlinked inodes */ 220 /* hashtable of inode numbers for allocated but unlinked inodes */
214 /* synch ??? */ 221 /* synch ??? */
215 struct wapbl_ino_head *wl_inohash; 222 struct wapbl_ino_head *wl_inohash;
216 u_long wl_inohashmask; 223 u_long wl_inohashmask;
217 int wl_inohashcnt; 224 int wl_inohashcnt;
218 225
219 SIMPLEQ_HEAD(, wapbl_entry) wl_entries; /* On disk transaction 226 SIMPLEQ_HEAD(, wapbl_entry) wl_entries; /* On disk transaction
220 accounting */ 227 accounting */
221 228
222 u_char *wl_buffer; /* l: buffer for wapbl_buffered_write() */ 229 u_char *wl_buffer; /* l: buffer for wapbl_buffered_write() */
223 daddr_t wl_buffer_dblk; /* l: buffer disk block address */ 230 daddr_t wl_buffer_dblk; /* l: buffer disk block address */
224 size_t wl_buffer_used; /* l: buffer current use */ 231 size_t wl_buffer_used; /* l: buffer current use */
225}; 232};
226 233
227#ifdef WAPBL_DEBUG_PRINT 234#ifdef WAPBL_DEBUG_PRINT
228int wapbl_debug_print = WAPBL_DEBUG_PRINT; 235int wapbl_debug_print = WAPBL_DEBUG_PRINT;
229#endif 236#endif
230 237
231/****************************************************************/ 238/****************************************************************/
232#ifdef _KERNEL 239#ifdef _KERNEL
233 240
234#ifdef WAPBL_DEBUG 241#ifdef WAPBL_DEBUG
235struct wapbl *wapbl_debug_wl; 242struct wapbl *wapbl_debug_wl;
236#endif 243#endif
237 244
238static int wapbl_write_commit(struct wapbl *wl, off_t head, off_t tail); 245static int wapbl_write_commit(struct wapbl *wl, off_t head, off_t tail);
239static int wapbl_write_blocks(struct wapbl *wl, off_t *offp); 246static int wapbl_write_blocks(struct wapbl *wl, off_t *offp);
240static int wapbl_write_revocations(struct wapbl *wl, off_t *offp); 247static int wapbl_write_revocations(struct wapbl *wl, off_t *offp);
241static int wapbl_write_inodes(struct wapbl *wl, off_t *offp); 248static int wapbl_write_inodes(struct wapbl *wl, off_t *offp);
242#endif /* _KERNEL */ 249#endif /* _KERNEL */
243 250
244static int wapbl_replay_process(struct wapbl_replay *wr, off_t, off_t); 251static int wapbl_replay_process(struct wapbl_replay *wr, off_t, off_t);
245 252
246static inline size_t wapbl_space_used(size_t avail, off_t head, 253static inline size_t wapbl_space_used(size_t avail, off_t head,
247 off_t tail); 254 off_t tail);
248 255
249#ifdef _KERNEL 256#ifdef _KERNEL
250 257
251static struct pool wapbl_entry_pool; 258static struct pool wapbl_entry_pool;
252static struct pool wapbl_dealloc_pool; 259static struct pool wapbl_dealloc_pool;
253 260
254#define WAPBL_INODETRK_SIZE 83 261#define WAPBL_INODETRK_SIZE 83
255static int wapbl_ino_pool_refcount; 262static int wapbl_ino_pool_refcount;
256static struct pool wapbl_ino_pool; 263static struct pool wapbl_ino_pool;
257struct wapbl_ino { 264struct wapbl_ino {
258 LIST_ENTRY(wapbl_ino) wi_hash; 265 LIST_ENTRY(wapbl_ino) wi_hash;
259 ino_t wi_ino; 266 ino_t wi_ino;
260 mode_t wi_mode; 267 mode_t wi_mode;
261}; 268};
262 269
263static void wapbl_inodetrk_init(struct wapbl *wl, u_int size); 270static void wapbl_inodetrk_init(struct wapbl *wl, u_int size);
264static void wapbl_inodetrk_free(struct wapbl *wl); 271static void wapbl_inodetrk_free(struct wapbl *wl);
265static struct wapbl_ino *wapbl_inodetrk_get(struct wapbl *wl, ino_t ino); 272static struct wapbl_ino *wapbl_inodetrk_get(struct wapbl *wl, ino_t ino);
266 273
267static size_t wapbl_transaction_len(struct wapbl *wl); 274static size_t wapbl_transaction_len(struct wapbl *wl);
268static inline size_t wapbl_transaction_inodes_len(struct wapbl *wl); 275static inline size_t wapbl_transaction_inodes_len(struct wapbl *wl);
269 276
270static void wapbl_deallocation_free(struct wapbl *, struct wapbl_dealloc *, 277static void wapbl_deallocation_free(struct wapbl *, struct wapbl_dealloc *,
271 bool); 278 bool);
272 279
 280static void wapbl_evcnt_init(struct wapbl *);
 281static void wapbl_evcnt_free(struct wapbl *);
 282
273#if 0 283#if 0
274int wapbl_replay_verify(struct wapbl_replay *, struct vnode *); 284int wapbl_replay_verify(struct wapbl_replay *, struct vnode *);
275#endif 285#endif
276 286
277static int wapbl_replay_isopen1(struct wapbl_replay *); 287static int wapbl_replay_isopen1(struct wapbl_replay *);
278 288
279struct wapbl_ops wapbl_ops = { 289struct wapbl_ops wapbl_ops = {
280 .wo_wapbl_discard = wapbl_discard, 290 .wo_wapbl_discard = wapbl_discard,
281 .wo_wapbl_replay_isopen = wapbl_replay_isopen1, 291 .wo_wapbl_replay_isopen = wapbl_replay_isopen1,
282 .wo_wapbl_replay_can_read = wapbl_replay_can_read, 292 .wo_wapbl_replay_can_read = wapbl_replay_can_read,
283 .wo_wapbl_replay_read = wapbl_replay_read, 293 .wo_wapbl_replay_read = wapbl_replay_read,
284 .wo_wapbl_add_buf = wapbl_add_buf, 294 .wo_wapbl_add_buf = wapbl_add_buf,
285 .wo_wapbl_remove_buf = wapbl_remove_buf, 295 .wo_wapbl_remove_buf = wapbl_remove_buf,
286 .wo_wapbl_resize_buf = wapbl_resize_buf, 296 .wo_wapbl_resize_buf = wapbl_resize_buf,
287 .wo_wapbl_begin = wapbl_begin, 297 .wo_wapbl_begin = wapbl_begin,
288 .wo_wapbl_end = wapbl_end, 298 .wo_wapbl_end = wapbl_end,
289 .wo_wapbl_junlock_assert= wapbl_junlock_assert, 299 .wo_wapbl_junlock_assert= wapbl_junlock_assert,
290 300
291 /* XXX: the following is only used to say "this is a wapbl buf" */ 301 /* XXX: the following is only used to say "this is a wapbl buf" */
292 .wo_wapbl_biodone = wapbl_biodone, 302 .wo_wapbl_biodone = wapbl_biodone,
293}; 303};
294 304
295static int 305static int
296wapbl_sysctl_init(void) 306wapbl_sysctl_init(void)
297{ 307{
298 int rv; 308 int rv;
299 const struct sysctlnode *rnode, *cnode; 309 const struct sysctlnode *rnode, *cnode;
300 310
301 wapbl_sysctl = NULL; 311 wapbl_sysctl = NULL;
302 312
303 rv = sysctl_createv(&wapbl_sysctl, 0, NULL, &rnode, 313 rv = sysctl_createv(&wapbl_sysctl, 0, NULL, &rnode,
304 CTLFLAG_PERMANENT, 314 CTLFLAG_PERMANENT,
305 CTLTYPE_NODE, "wapbl", 315 CTLTYPE_NODE, "wapbl",
306 SYSCTL_DESCR("WAPBL journaling options"), 316 SYSCTL_DESCR("WAPBL journaling options"),
307 NULL, 0, NULL, 0, 317 NULL, 0, NULL, 0,
308 CTL_VFS, CTL_CREATE, CTL_EOL); 318 CTL_VFS, CTL_CREATE, CTL_EOL);
309 if (rv) 319 if (rv)
310 return rv; 320 return rv;
311 321
312 rv = sysctl_createv(&wapbl_sysctl, 0, &rnode, &cnode, 322 rv = sysctl_createv(&wapbl_sysctl, 0, &rnode, &cnode,
313 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 323 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
314 CTLTYPE_INT, "flush_disk_cache", 324 CTLTYPE_INT, "flush_disk_cache",
315 SYSCTL_DESCR("flush disk cache"), 325 SYSCTL_DESCR("flush disk cache"),
316 NULL, 0, &wapbl_flush_disk_cache, 0, 326 NULL, 0, &wapbl_flush_disk_cache, 0,
317 CTL_CREATE, CTL_EOL); 327 CTL_CREATE, CTL_EOL);
318 if (rv) 328 if (rv)
319 return rv; 329 return rv;
320 330
321 rv = sysctl_createv(&wapbl_sysctl, 0, &rnode, &cnode, 331 rv = sysctl_createv(&wapbl_sysctl, 0, &rnode, &cnode,
322 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 332 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
323 CTLTYPE_INT, "verbose_commit", 333 CTLTYPE_INT, "verbose_commit",
324 SYSCTL_DESCR("show time and size of wapbl log commits"), 334 SYSCTL_DESCR("show time and size of wapbl log commits"),
325 NULL, 0, &wapbl_verbose_commit, 0, 335 NULL, 0, &wapbl_verbose_commit, 0,
326 CTL_CREATE, CTL_EOL); 336 CTL_CREATE, CTL_EOL);
327 return rv; 337 return rv;
328} 338}
329 339
330static void 340static void
331wapbl_init(void) 341wapbl_init(void)
332{ 342{
333 343
334 pool_init(&wapbl_entry_pool, sizeof(struct wapbl_entry), 0, 0, 0, 344 pool_init(&wapbl_entry_pool, sizeof(struct wapbl_entry), 0, 0, 0,
335 "wapblentrypl", &pool_allocator_kmem, IPL_VM); 345 "wapblentrypl", &pool_allocator_kmem, IPL_VM);
336 pool_init(&wapbl_dealloc_pool, sizeof(struct wapbl_dealloc), 0, 0, 0, 346 pool_init(&wapbl_dealloc_pool, sizeof(struct wapbl_dealloc), 0, 0, 0,
337 "wapbldealloc", &pool_allocator_nointr, IPL_NONE); 347 "wapbldealloc", &pool_allocator_nointr, IPL_NONE);
338 348
339 wapbl_sysctl_init(); 349 wapbl_sysctl_init();
340} 350}
341 351
342static int 352static int
343wapbl_fini(void) 353wapbl_fini(void)
344{ 354{
345 355
346 if (wapbl_sysctl != NULL) 356 if (wapbl_sysctl != NULL)
347 sysctl_teardown(&wapbl_sysctl); 357 sysctl_teardown(&wapbl_sysctl);
348 358
349 pool_destroy(&wapbl_dealloc_pool); 359 pool_destroy(&wapbl_dealloc_pool);
350 pool_destroy(&wapbl_entry_pool); 360 pool_destroy(&wapbl_entry_pool);
351 361
352 return 0; 362 return 0;
353} 363}
354 364
 365static void
 366wapbl_evcnt_init(struct wapbl *wl)
 367{
 368 snprintf(wl->wl_ev_group, sizeof(wl->wl_ev_group),
 369 "wapbl fsid 0x%x/0x%x",
 370 wl->wl_mount->mnt_stat.f_fsidx.__fsid_val[0],
 371 wl->wl_mount->mnt_stat.f_fsidx.__fsid_val[1]
 372 );
 373
 374 evcnt_attach_dynamic(&wl->wl_ev_commit, EVCNT_TYPE_MISC,
 375 NULL, wl->wl_ev_group, "commit");
 376 evcnt_attach_dynamic(&wl->wl_ev_journalwrite, EVCNT_TYPE_MISC,
 377 NULL, wl->wl_ev_group, "journal sync block write");
 378 evcnt_attach_dynamic(&wl->wl_ev_metawrite, EVCNT_TYPE_MISC,
 379 NULL, wl->wl_ev_group, "metadata finished block write");
 380 evcnt_attach_dynamic(&wl->wl_ev_cacheflush, EVCNT_TYPE_MISC,
 381 NULL, wl->wl_ev_group, "cache flush");
 382}
 383
 384static void
 385wapbl_evcnt_free(struct wapbl *wl)
 386{
 387 evcnt_detach(&wl->wl_ev_commit);
 388 evcnt_detach(&wl->wl_ev_journalwrite);
 389 evcnt_detach(&wl->wl_ev_metawrite);
 390 evcnt_detach(&wl->wl_ev_cacheflush);
 391}
 392
355static int 393static int
356wapbl_start_flush_inodes(struct wapbl *wl, struct wapbl_replay *wr) 394wapbl_start_flush_inodes(struct wapbl *wl, struct wapbl_replay *wr)
357{ 395{
358 int error, i; 396 int error, i;
359 397
360 WAPBL_PRINTF(WAPBL_PRINT_REPLAY, 398 WAPBL_PRINTF(WAPBL_PRINT_REPLAY,
361 ("wapbl_start: reusing log with %d inodes\n", wr->wr_inodescnt)); 399 ("wapbl_start: reusing log with %d inodes\n", wr->wr_inodescnt));
362 400
363 /* 401 /*
364 * Its only valid to reuse the replay log if its 402 * Its only valid to reuse the replay log if its
365 * the same as the new log we just opened. 403 * the same as the new log we just opened.
366 */ 404 */
367 KDASSERT(!wapbl_replay_isopen(wr)); 405 KDASSERT(!wapbl_replay_isopen(wr));
368 KASSERT(wl->wl_devvp->v_type == VBLK); 406 KASSERT(wl->wl_devvp->v_type == VBLK);
369 KASSERT(wr->wr_devvp->v_type == VBLK); 407 KASSERT(wr->wr_devvp->v_type == VBLK);
370 KASSERT(wl->wl_devvp->v_rdev == wr->wr_devvp->v_rdev); 408 KASSERT(wl->wl_devvp->v_rdev == wr->wr_devvp->v_rdev);
371 KASSERT(wl->wl_logpbn == wr->wr_logpbn); 409 KASSERT(wl->wl_logpbn == wr->wr_logpbn);
372 KASSERT(wl->wl_circ_size == wr->wr_circ_size); 410 KASSERT(wl->wl_circ_size == wr->wr_circ_size);
373 KASSERT(wl->wl_circ_off == wr->wr_circ_off); 411 KASSERT(wl->wl_circ_off == wr->wr_circ_off);
374 KASSERT(wl->wl_log_dev_bshift == wr->wr_log_dev_bshift); 412 KASSERT(wl->wl_log_dev_bshift == wr->wr_log_dev_bshift);
375 KASSERT(wl->wl_fs_dev_bshift == wr->wr_fs_dev_bshift); 413 KASSERT(wl->wl_fs_dev_bshift == wr->wr_fs_dev_bshift);
376 414
377 wl->wl_wc_header->wc_generation = wr->wr_generation + 1; 415 wl->wl_wc_header->wc_generation = wr->wr_generation + 1;
378 416
379 for (i = 0; i < wr->wr_inodescnt; i++) 417 for (i = 0; i < wr->wr_inodescnt; i++)
380 wapbl_register_inode(wl, wr->wr_inodes[i].wr_inumber, 418 wapbl_register_inode(wl, wr->wr_inodes[i].wr_inumber,
381 wr->wr_inodes[i].wr_imode); 419 wr->wr_inodes[i].wr_imode);
382 420
383 /* Make sure new transaction won't overwrite old inodes list */ 421 /* Make sure new transaction won't overwrite old inodes list */
384 KDASSERT(wapbl_transaction_len(wl) <=  422 KDASSERT(wapbl_transaction_len(wl) <=
385 wapbl_space_free(wl->wl_circ_size, wr->wr_inodeshead, 423 wapbl_space_free(wl->wl_circ_size, wr->wr_inodeshead,
386 wr->wr_inodestail)); 424 wr->wr_inodestail));
387 425
388 wl->wl_head = wl->wl_tail = wr->wr_inodeshead; 426 wl->wl_head = wl->wl_tail = wr->wr_inodeshead;
389 wl->wl_reclaimable_bytes = wl->wl_reserved_bytes = 427 wl->wl_reclaimable_bytes = wl->wl_reserved_bytes =
390 wapbl_transaction_len(wl); 428 wapbl_transaction_len(wl);
391 429
392 error = wapbl_write_inodes(wl, &wl->wl_head); 430 error = wapbl_write_inodes(wl, &wl->wl_head);
393 if (error) 431 if (error)
394 return error; 432 return error;
395 433
396 KASSERT(wl->wl_head != wl->wl_tail); 434 KASSERT(wl->wl_head != wl->wl_tail);
397 KASSERT(wl->wl_head != 0); 435 KASSERT(wl->wl_head != 0);
398 436
399 return 0; 437 return 0;
400} 438}
401 439
402int 440int
403wapbl_start(struct wapbl ** wlp, struct mount *mp, struct vnode *vp, 441wapbl_start(struct wapbl ** wlp, struct mount *mp, struct vnode *vp,
404 daddr_t off, size_t count, size_t blksize, struct wapbl_replay *wr, 442 daddr_t off, size_t count, size_t blksize, struct wapbl_replay *wr,
405 wapbl_flush_fn_t flushfn, wapbl_flush_fn_t flushabortfn) 443 wapbl_flush_fn_t flushfn, wapbl_flush_fn_t flushabortfn)
406{ 444{
407 struct wapbl *wl; 445 struct wapbl *wl;
408 struct vnode *devvp; 446 struct vnode *devvp;
409 daddr_t logpbn; 447 daddr_t logpbn;
410 int error; 448 int error;
411 int log_dev_bshift = ilog2(blksize); 449 int log_dev_bshift = ilog2(blksize);
412 int fs_dev_bshift = log_dev_bshift; 450 int fs_dev_bshift = log_dev_bshift;
413 int run; 451 int run;
414 452
415 WAPBL_PRINTF(WAPBL_PRINT_OPEN, ("wapbl_start: vp=%p off=%" PRId64 453 WAPBL_PRINTF(WAPBL_PRINT_OPEN, ("wapbl_start: vp=%p off=%" PRId64
416 " count=%zu blksize=%zu\n", vp, off, count, blksize)); 454 " count=%zu blksize=%zu\n", vp, off, count, blksize));
417 455
418 if (log_dev_bshift > fs_dev_bshift) { 456 if (log_dev_bshift > fs_dev_bshift) {
419 WAPBL_PRINTF(WAPBL_PRINT_OPEN, 457 WAPBL_PRINTF(WAPBL_PRINT_OPEN,
420 ("wapbl: log device's block size cannot be larger " 458 ("wapbl: log device's block size cannot be larger "
421 "than filesystem's\n")); 459 "than filesystem's\n"));
422 /* 460 /*
423 * Not currently implemented, although it could be if 461 * Not currently implemented, although it could be if
424 * needed someday. 462 * needed someday.
425 */ 463 */
426 return ENOSYS; 464 return ENOSYS;
427 } 465 }
428 466
429 if (off < 0) 467 if (off < 0)
430 return EINVAL; 468 return EINVAL;
431 469
432 if (blksize < DEV_BSIZE) 470 if (blksize < DEV_BSIZE)
433 return EINVAL; 471 return EINVAL;
434 if (blksize % DEV_BSIZE) 472 if (blksize % DEV_BSIZE)
435 return EINVAL; 473 return EINVAL;
436 474
437 /* XXXTODO: verify that the full load is writable */ 475 /* XXXTODO: verify that the full load is writable */
438 476
439 /* 477 /*
440 * XXX check for minimum log size 478 * XXX check for minimum log size
441 * minimum is governed by minimum amount of space 479 * minimum is governed by minimum amount of space
442 * to complete a transaction. (probably truncate) 480 * to complete a transaction. (probably truncate)
443 */ 481 */
444 /* XXX for now pick something minimal */ 482 /* XXX for now pick something minimal */
445 if ((count * blksize) < MAXPHYS) { 483 if ((count * blksize) < MAXPHYS) {
446 return ENOSPC; 484 return ENOSPC;
447 } 485 }
448 486
449 if ((error = VOP_BMAP(vp, off, &devvp, &logpbn, &run)) != 0) { 487 if ((error = VOP_BMAP(vp, off, &devvp, &logpbn, &run)) != 0) {
450 return error; 488 return error;
451 } 489 }
452 490
453 wl = wapbl_calloc(1, sizeof(*wl)); 491 wl = wapbl_calloc(1, sizeof(*wl));
454 rw_init(&wl->wl_rwlock); 492 rw_init(&wl->wl_rwlock);
455 mutex_init(&wl->wl_mtx, MUTEX_DEFAULT, IPL_NONE); 493 mutex_init(&wl->wl_mtx, MUTEX_DEFAULT, IPL_NONE);
456 cv_init(&wl->wl_reclaimable_cv, "wapblrec"); 494 cv_init(&wl->wl_reclaimable_cv, "wapblrec");
457 LIST_INIT(&wl->wl_bufs); 495 LIST_INIT(&wl->wl_bufs);
458 SIMPLEQ_INIT(&wl->wl_entries); 496 SIMPLEQ_INIT(&wl->wl_entries);
459 497
460 wl->wl_logvp = vp; 498 wl->wl_logvp = vp;
461 wl->wl_devvp = devvp; 499 wl->wl_devvp = devvp;
462 wl->wl_mount = mp; 500 wl->wl_mount = mp;
463 wl->wl_logpbn = logpbn; 501 wl->wl_logpbn = logpbn;
464 wl->wl_log_dev_bshift = log_dev_bshift; 502 wl->wl_log_dev_bshift = log_dev_bshift;
465 wl->wl_fs_dev_bshift = fs_dev_bshift; 503 wl->wl_fs_dev_bshift = fs_dev_bshift;
466 504
467 wl->wl_flush = flushfn; 505 wl->wl_flush = flushfn;
468 wl->wl_flush_abort = flushabortfn; 506 wl->wl_flush_abort = flushabortfn;
469 507
470 /* Reserve two log device blocks for the commit headers */ 508 /* Reserve two log device blocks for the commit headers */
471 wl->wl_circ_off = 2<<wl->wl_log_dev_bshift; 509 wl->wl_circ_off = 2<<wl->wl_log_dev_bshift;
472 wl->wl_circ_size = ((count * blksize) - wl->wl_circ_off); 510 wl->wl_circ_size = ((count * blksize) - wl->wl_circ_off);
473 /* truncate the log usage to a multiple of log_dev_bshift */ 511 /* truncate the log usage to a multiple of log_dev_bshift */
474 wl->wl_circ_size >>= wl->wl_log_dev_bshift; 512 wl->wl_circ_size >>= wl->wl_log_dev_bshift;
475 wl->wl_circ_size <<= wl->wl_log_dev_bshift; 513 wl->wl_circ_size <<= wl->wl_log_dev_bshift;
476 514
477 /* 515 /*
478 * wl_bufbytes_max limits the size of the in memory transaction space. 516 * wl_bufbytes_max limits the size of the in memory transaction space.
479 * - Since buffers are allocated and accounted for in units of 517 * - Since buffers are allocated and accounted for in units of
480 * PAGE_SIZE it is required to be a multiple of PAGE_SIZE 518 * PAGE_SIZE it is required to be a multiple of PAGE_SIZE
481 * (i.e. 1<<PAGE_SHIFT) 519 * (i.e. 1<<PAGE_SHIFT)
482 * - Since the log device has to be written in units of 520 * - Since the log device has to be written in units of
483 * 1<<wl_log_dev_bshift it is required to be a mulitple of 521 * 1<<wl_log_dev_bshift it is required to be a mulitple of
484 * 1<<wl_log_dev_bshift. 522 * 1<<wl_log_dev_bshift.
485 * - Since filesystem will provide data in units of 1<<wl_fs_dev_bshift, 523 * - Since filesystem will provide data in units of 1<<wl_fs_dev_bshift,
486 * it is convenient to be a multiple of 1<<wl_fs_dev_bshift. 524 * it is convenient to be a multiple of 1<<wl_fs_dev_bshift.
487 * Therefore it must be multiple of the least common multiple of those 525 * Therefore it must be multiple of the least common multiple of those
488 * three quantities. Fortunately, all of those quantities are 526 * three quantities. Fortunately, all of those quantities are
489 * guaranteed to be a power of two, and the least common multiple of 527 * guaranteed to be a power of two, and the least common multiple of
490 * a set of numbers which are all powers of two is simply the maximum 528 * a set of numbers which are all powers of two is simply the maximum
491 * of those numbers. Finally, the maximum logarithm of a power of two 529 * of those numbers. Finally, the maximum logarithm of a power of two
492 * is the same as the log of the maximum power of two. So we can do 530 * is the same as the log of the maximum power of two. So we can do
493 * the following operations to size wl_bufbytes_max: 531 * the following operations to size wl_bufbytes_max:
494 */ 532 */
495 533
496 /* XXX fix actual number of pages reserved per filesystem. */ 534 /* XXX fix actual number of pages reserved per filesystem. */
497 wl->wl_bufbytes_max = MIN(wl->wl_circ_size, buf_memcalc() / 2); 535 wl->wl_bufbytes_max = MIN(wl->wl_circ_size, buf_memcalc() / 2);
498 536
499 /* Round wl_bufbytes_max to the largest power of two constraint */ 537 /* Round wl_bufbytes_max to the largest power of two constraint */
500 wl->wl_bufbytes_max >>= PAGE_SHIFT; 538 wl->wl_bufbytes_max >>= PAGE_SHIFT;
501 wl->wl_bufbytes_max <<= PAGE_SHIFT; 539 wl->wl_bufbytes_max <<= PAGE_SHIFT;
502 wl->wl_bufbytes_max >>= wl->wl_log_dev_bshift; 540 wl->wl_bufbytes_max >>= wl->wl_log_dev_bshift;
503 wl->wl_bufbytes_max <<= wl->wl_log_dev_bshift; 541 wl->wl_bufbytes_max <<= wl->wl_log_dev_bshift;
504 wl->wl_bufbytes_max >>= wl->wl_fs_dev_bshift; 542 wl->wl_bufbytes_max >>= wl->wl_fs_dev_bshift;
505 wl->wl_bufbytes_max <<= wl->wl_fs_dev_bshift; 543 wl->wl_bufbytes_max <<= wl->wl_fs_dev_bshift;
506 544
507 /* XXX maybe use filesystem fragment size instead of 1024 */ 545 /* XXX maybe use filesystem fragment size instead of 1024 */
508 /* XXX fix actual number of buffers reserved per filesystem. */ 546 /* XXX fix actual number of buffers reserved per filesystem. */
509 wl->wl_bufcount_max = (nbuf / 2) * 1024; 547 wl->wl_bufcount_max = (nbuf / 2) * 1024;
510 548
511 wl->wl_brperjblock = ((1<<wl->wl_log_dev_bshift) 549 wl->wl_brperjblock = ((1<<wl->wl_log_dev_bshift)
512 - offsetof(struct wapbl_wc_blocklist, wc_blocks)) / 550 - offsetof(struct wapbl_wc_blocklist, wc_blocks)) /
513 sizeof(((struct wapbl_wc_blocklist *)0)->wc_blocks[0]); 551 sizeof(((struct wapbl_wc_blocklist *)0)->wc_blocks[0]);
514 KASSERT(wl->wl_brperjblock > 0); 552 KASSERT(wl->wl_brperjblock > 0);
515 553
516 /* XXX tie this into resource estimation */ 554 /* XXX tie this into resource estimation */
517 wl->wl_dealloclim = wl->wl_bufbytes_max / mp->mnt_stat.f_bsize / 2; 555 wl->wl_dealloclim = wl->wl_bufbytes_max / mp->mnt_stat.f_bsize / 2;
518 TAILQ_INIT(&wl->wl_dealloclist); 556 TAILQ_INIT(&wl->wl_dealloclist);
519  557
520 wl->wl_buffer = wapbl_alloc(MAXPHYS); 558 wl->wl_buffer = wapbl_alloc(MAXPHYS);
521 wl->wl_buffer_used = 0; 559 wl->wl_buffer_used = 0;
522 560
523 wapbl_inodetrk_init(wl, WAPBL_INODETRK_SIZE); 561 wapbl_inodetrk_init(wl, WAPBL_INODETRK_SIZE);
524 562
 563 wapbl_evcnt_init(wl);
 564
525 /* Initialize the commit header */ 565 /* Initialize the commit header */
526 { 566 {
527 struct wapbl_wc_header *wc; 567 struct wapbl_wc_header *wc;
528 size_t len = 1 << wl->wl_log_dev_bshift; 568 size_t len = 1 << wl->wl_log_dev_bshift;
529 wc = wapbl_calloc(1, len); 569 wc = wapbl_calloc(1, len);
530 wc->wc_type = WAPBL_WC_HEADER; 570 wc->wc_type = WAPBL_WC_HEADER;
531 wc->wc_len = len; 571 wc->wc_len = len;
532 wc->wc_circ_off = wl->wl_circ_off; 572 wc->wc_circ_off = wl->wl_circ_off;
533 wc->wc_circ_size = wl->wl_circ_size; 573 wc->wc_circ_size = wl->wl_circ_size;
534 /* XXX wc->wc_fsid */ 574 /* XXX wc->wc_fsid */
535 wc->wc_log_dev_bshift = wl->wl_log_dev_bshift; 575 wc->wc_log_dev_bshift = wl->wl_log_dev_bshift;
536 wc->wc_fs_dev_bshift = wl->wl_fs_dev_bshift; 576 wc->wc_fs_dev_bshift = wl->wl_fs_dev_bshift;
537 wl->wl_wc_header = wc; 577 wl->wl_wc_header = wc;
538 wl->wl_wc_scratch = wapbl_alloc(len); 578 wl->wl_wc_scratch = wapbl_alloc(len);
539 } 579 }
540 580
541 /* 581 /*
542 * if there was an existing set of unlinked but 582 * if there was an existing set of unlinked but
543 * allocated inodes, preserve it in the new 583 * allocated inodes, preserve it in the new
544 * log. 584 * log.
545 */ 585 */
546 if (wr && wr->wr_inodescnt) { 586 if (wr && wr->wr_inodescnt) {
547 error = wapbl_start_flush_inodes(wl, wr); 587 error = wapbl_start_flush_inodes(wl, wr);
548 if (error) 588 if (error)
549 goto errout; 589 goto errout;
550 } 590 }
551 591
552 error = wapbl_write_commit(wl, wl->wl_head, wl->wl_tail); 592 error = wapbl_write_commit(wl, wl->wl_head, wl->wl_tail);
553 if (error) { 593 if (error) {
554 goto errout; 594 goto errout;
555 } 595 }
556 596
557 *wlp = wl; 597 *wlp = wl;
558#if defined(WAPBL_DEBUG) 598#if defined(WAPBL_DEBUG)
559 wapbl_debug_wl = wl; 599 wapbl_debug_wl = wl;
560#endif 600#endif
561 601
562 return 0; 602 return 0;
563 errout: 603 errout:
564 wapbl_discard(wl); 604 wapbl_discard(wl);
565 wapbl_free(wl->wl_wc_scratch, wl->wl_wc_header->wc_len); 605 wapbl_free(wl->wl_wc_scratch, wl->wl_wc_header->wc_len);
566 wapbl_free(wl->wl_wc_header, wl->wl_wc_header->wc_len); 606 wapbl_free(wl->wl_wc_header, wl->wl_wc_header->wc_len);
567 wapbl_free(wl->wl_buffer, MAXPHYS); 607 wapbl_free(wl->wl_buffer, MAXPHYS);
568 wapbl_inodetrk_free(wl); 608 wapbl_inodetrk_free(wl);
569 wapbl_free(wl, sizeof(*wl)); 609 wapbl_free(wl, sizeof(*wl));
570 610
571 return error; 611 return error;
572} 612}
573 613
574/* 614/*
575 * Like wapbl_flush, only discards the transaction 615 * Like wapbl_flush, only discards the transaction
576 * completely 616 * completely
577 */ 617 */
578 618
579void 619void
580wapbl_discard(struct wapbl *wl) 620wapbl_discard(struct wapbl *wl)
581{ 621{
582 struct wapbl_entry *we; 622 struct wapbl_entry *we;
583 struct wapbl_dealloc *wd; 623 struct wapbl_dealloc *wd;
584 struct buf *bp; 624 struct buf *bp;
585 int i; 625 int i;
586 626
587 /* 627 /*
588 * XXX we may consider using upgrade here 628 * XXX we may consider using upgrade here
589 * if we want to call flush from inside a transaction 629 * if we want to call flush from inside a transaction
590 */ 630 */
591 rw_enter(&wl->wl_rwlock, RW_WRITER); 631 rw_enter(&wl->wl_rwlock, RW_WRITER);
592 wl->wl_flush(wl->wl_mount, TAILQ_FIRST(&wl->wl_dealloclist)); 632 wl->wl_flush(wl->wl_mount, TAILQ_FIRST(&wl->wl_dealloclist));
593 633
594#ifdef WAPBL_DEBUG_PRINT 634#ifdef WAPBL_DEBUG_PRINT
595 { 635 {
596 pid_t pid = -1; 636 pid_t pid = -1;
597 lwpid_t lid = -1; 637 lwpid_t lid = -1;
598 if (curproc) 638 if (curproc)
599 pid = curproc->p_pid; 639 pid = curproc->p_pid;
600 if (curlwp) 640 if (curlwp)
601 lid = curlwp->l_lid; 641 lid = curlwp->l_lid;
602#ifdef WAPBL_DEBUG_BUFBYTES 642#ifdef WAPBL_DEBUG_BUFBYTES
603 WAPBL_PRINTF(WAPBL_PRINT_DISCARD, 643 WAPBL_PRINTF(WAPBL_PRINT_DISCARD,
604 ("wapbl_discard: thread %d.%d discarding " 644 ("wapbl_discard: thread %d.%d discarding "
605 "transaction\n" 645 "transaction\n"
606 "\tbufcount=%zu bufbytes=%zu bcount=%zu " 646 "\tbufcount=%zu bufbytes=%zu bcount=%zu "
607 "deallocs=%d inodes=%d\n" 647 "deallocs=%d inodes=%d\n"
608 "\terrcnt = %u, reclaimable=%zu reserved=%zu " 648 "\terrcnt = %u, reclaimable=%zu reserved=%zu "
609 "unsynced=%zu\n", 649 "unsynced=%zu\n",
610 pid, lid, wl->wl_bufcount, wl->wl_bufbytes, 650 pid, lid, wl->wl_bufcount, wl->wl_bufbytes,
611 wl->wl_bcount, wl->wl_dealloccnt, 651 wl->wl_bcount, wl->wl_dealloccnt,
612 wl->wl_inohashcnt, wl->wl_error_count, 652 wl->wl_inohashcnt, wl->wl_error_count,
613 wl->wl_reclaimable_bytes, wl->wl_reserved_bytes, 653 wl->wl_reclaimable_bytes, wl->wl_reserved_bytes,
614 wl->wl_unsynced_bufbytes)); 654 wl->wl_unsynced_bufbytes));
615 SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) { 655 SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) {
616 WAPBL_PRINTF(WAPBL_PRINT_DISCARD, 656 WAPBL_PRINTF(WAPBL_PRINT_DISCARD,
617 ("\tentry: bufcount = %zu, reclaimable = %zu, " 657 ("\tentry: bufcount = %zu, reclaimable = %zu, "
618 "error = %d, unsynced = %zu\n", 658 "error = %d, unsynced = %zu\n",
619 we->we_bufcount, we->we_reclaimable_bytes, 659 we->we_bufcount, we->we_reclaimable_bytes,
620 we->we_error, we->we_unsynced_bufbytes)); 660 we->we_error, we->we_unsynced_bufbytes));
621 } 661 }
622#else /* !WAPBL_DEBUG_BUFBYTES */ 662#else /* !WAPBL_DEBUG_BUFBYTES */
623 WAPBL_PRINTF(WAPBL_PRINT_DISCARD, 663 WAPBL_PRINTF(WAPBL_PRINT_DISCARD,
624 ("wapbl_discard: thread %d.%d discarding transaction\n" 664 ("wapbl_discard: thread %d.%d discarding transaction\n"
625 "\tbufcount=%zu bufbytes=%zu bcount=%zu " 665 "\tbufcount=%zu bufbytes=%zu bcount=%zu "
626 "deallocs=%d inodes=%d\n" 666 "deallocs=%d inodes=%d\n"
627 "\terrcnt = %u, reclaimable=%zu reserved=%zu\n", 667 "\terrcnt = %u, reclaimable=%zu reserved=%zu\n",
628 pid, lid, wl->wl_bufcount, wl->wl_bufbytes, 668 pid, lid, wl->wl_bufcount, wl->wl_bufbytes,
629 wl->wl_bcount, wl->wl_dealloccnt, 669 wl->wl_bcount, wl->wl_dealloccnt,
630 wl->wl_inohashcnt, wl->wl_error_count, 670 wl->wl_inohashcnt, wl->wl_error_count,
631 wl->wl_reclaimable_bytes, wl->wl_reserved_bytes)); 671 wl->wl_reclaimable_bytes, wl->wl_reserved_bytes));
632 SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) { 672 SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) {
633 WAPBL_PRINTF(WAPBL_PRINT_DISCARD, 673 WAPBL_PRINTF(WAPBL_PRINT_DISCARD,
634 ("\tentry: bufcount = %zu, reclaimable = %zu, " 674 ("\tentry: bufcount = %zu, reclaimable = %zu, "
635 "error = %d\n", 675 "error = %d\n",
636 we->we_bufcount, we->we_reclaimable_bytes, 676 we->we_bufcount, we->we_reclaimable_bytes,
637 we->we_error)); 677 we->we_error));
638 } 678 }
639#endif /* !WAPBL_DEBUG_BUFBYTES */ 679#endif /* !WAPBL_DEBUG_BUFBYTES */
640 } 680 }
641#endif /* WAPBL_DEBUG_PRINT */ 681#endif /* WAPBL_DEBUG_PRINT */
642 682
643 for (i = 0; i <= wl->wl_inohashmask; i++) { 683 for (i = 0; i <= wl->wl_inohashmask; i++) {
644 struct wapbl_ino_head *wih; 684 struct wapbl_ino_head *wih;
645 struct wapbl_ino *wi; 685 struct wapbl_ino *wi;
646 686
647 wih = &wl->wl_inohash[i]; 687 wih = &wl->wl_inohash[i];
648 while ((wi = LIST_FIRST(wih)) != NULL) { 688 while ((wi = LIST_FIRST(wih)) != NULL) {
649 LIST_REMOVE(wi, wi_hash); 689 LIST_REMOVE(wi, wi_hash);
650 pool_put(&wapbl_ino_pool, wi); 690 pool_put(&wapbl_ino_pool, wi);
651 KASSERT(wl->wl_inohashcnt > 0); 691 KASSERT(wl->wl_inohashcnt > 0);
652 wl->wl_inohashcnt--; 692 wl->wl_inohashcnt--;
653 } 693 }
654 } 694 }
655 695
656 /* 696 /*
657 * clean buffer list 697 * clean buffer list
658 */ 698 */
659 mutex_enter(&bufcache_lock); 699 mutex_enter(&bufcache_lock);
660 mutex_enter(&wl->wl_mtx); 700 mutex_enter(&wl->wl_mtx);
661 while ((bp = LIST_FIRST(&wl->wl_bufs)) != NULL) { 701 while ((bp = LIST_FIRST(&wl->wl_bufs)) != NULL) {
662 if (bbusy(bp, 0, 0, &wl->wl_mtx) == 0) { 702 if (bbusy(bp, 0, 0, &wl->wl_mtx) == 0) {
663 /* 703 /*
664 * The buffer will be unlocked and 704 * The buffer will be unlocked and
665 * removed from the transaction in brelse 705 * removed from the transaction in brelse
666 */ 706 */
667 mutex_exit(&wl->wl_mtx); 707 mutex_exit(&wl->wl_mtx);
668 brelsel(bp, 0); 708 brelsel(bp, 0);
669 mutex_enter(&wl->wl_mtx); 709 mutex_enter(&wl->wl_mtx);
670 } 710 }
671 } 711 }
672 mutex_exit(&wl->wl_mtx); 712 mutex_exit(&wl->wl_mtx);
673 mutex_exit(&bufcache_lock); 713 mutex_exit(&bufcache_lock);
674 714
675 /* 715 /*
676 * Remove references to this wl from wl_entries, free any which 716 * Remove references to this wl from wl_entries, free any which
677 * no longer have buffers, others will be freed in wapbl_biodone 717 * no longer have buffers, others will be freed in wapbl_biodone
678 * when they no longer have any buffers. 718 * when they no longer have any buffers.
679 */ 719 */
680 while ((we = SIMPLEQ_FIRST(&wl->wl_entries)) != NULL) { 720 while ((we = SIMPLEQ_FIRST(&wl->wl_entries)) != NULL) {
681 SIMPLEQ_REMOVE_HEAD(&wl->wl_entries, we_entries); 721 SIMPLEQ_REMOVE_HEAD(&wl->wl_entries, we_entries);
682 /* XXX should we be accumulating wl_error_count 722 /* XXX should we be accumulating wl_error_count
683 * and increasing reclaimable bytes ? */ 723 * and increasing reclaimable bytes ? */
684 we->we_wapbl = NULL; 724 we->we_wapbl = NULL;
685 if (we->we_bufcount == 0) { 725 if (we->we_bufcount == 0) {
686#ifdef WAPBL_DEBUG_BUFBYTES 726#ifdef WAPBL_DEBUG_BUFBYTES
687 KASSERT(we->we_unsynced_bufbytes == 0); 727 KASSERT(we->we_unsynced_bufbytes == 0);
688#endif 728#endif
689 pool_put(&wapbl_entry_pool, we); 729 pool_put(&wapbl_entry_pool, we);
690 } 730 }
691 } 731 }
692 732
693 /* Discard list of deallocs */ 733 /* Discard list of deallocs */
694 while ((wd = TAILQ_FIRST(&wl->wl_dealloclist)) != NULL) 734 while ((wd = TAILQ_FIRST(&wl->wl_dealloclist)) != NULL)
695 wapbl_deallocation_free(wl, wd, true); 735 wapbl_deallocation_free(wl, wd, true);
696 736
697 /* XXX should we clear wl_reserved_bytes? */ 737 /* XXX should we clear wl_reserved_bytes? */
698 738
699 KASSERT(wl->wl_bufbytes == 0); 739 KASSERT(wl->wl_bufbytes == 0);
700 KASSERT(wl->wl_bcount == 0); 740 KASSERT(wl->wl_bcount == 0);
701 KASSERT(wl->wl_bufcount == 0); 741 KASSERT(wl->wl_bufcount == 0);
702 KASSERT(LIST_EMPTY(&wl->wl_bufs)); 742 KASSERT(LIST_EMPTY(&wl->wl_bufs));
703 KASSERT(SIMPLEQ_EMPTY(&wl->wl_entries)); 743 KASSERT(SIMPLEQ_EMPTY(&wl->wl_entries));
704 KASSERT(wl->wl_inohashcnt == 0); 744 KASSERT(wl->wl_inohashcnt == 0);
705 KASSERT(TAILQ_EMPTY(&wl->wl_dealloclist)); 745 KASSERT(TAILQ_EMPTY(&wl->wl_dealloclist));
706 KASSERT(wl->wl_dealloccnt == 0); 746 KASSERT(wl->wl_dealloccnt == 0);
707 747
708 rw_exit(&wl->wl_rwlock); 748 rw_exit(&wl->wl_rwlock);
709} 749}
710 750
711int 751int
712wapbl_stop(struct wapbl *wl, int force) 752wapbl_stop(struct wapbl *wl, int force)
713{ 753{
714 int error; 754 int error;
715 755
716 WAPBL_PRINTF(WAPBL_PRINT_OPEN, ("wapbl_stop called\n")); 756 WAPBL_PRINTF(WAPBL_PRINT_OPEN, ("wapbl_stop called\n"));
717 error = wapbl_flush(wl, 1); 757 error = wapbl_flush(wl, 1);
718 if (error) { 758 if (error) {
719 if (force) 759 if (force)
720 wapbl_discard(wl); 760 wapbl_discard(wl);
721 else 761 else
722 return error; 762 return error;
723 } 763 }
724 764
725 /* Unlinked inodes persist after a flush */ 765 /* Unlinked inodes persist after a flush */
726 if (wl->wl_inohashcnt) { 766 if (wl->wl_inohashcnt) {
727 if (force) { 767 if (force) {
728 wapbl_discard(wl); 768 wapbl_discard(wl);
729 } else { 769 } else {
730 return EBUSY; 770 return EBUSY;
731 } 771 }
732 } 772 }
733 773
734 KASSERT(wl->wl_bufbytes == 0); 774 KASSERT(wl->wl_bufbytes == 0);
735 KASSERT(wl->wl_bcount == 0); 775 KASSERT(wl->wl_bcount == 0);
736 KASSERT(wl->wl_bufcount == 0); 776 KASSERT(wl->wl_bufcount == 0);
737 KASSERT(LIST_EMPTY(&wl->wl_bufs)); 777 KASSERT(LIST_EMPTY(&wl->wl_bufs));
738 KASSERT(wl->wl_dealloccnt == 0); 778 KASSERT(wl->wl_dealloccnt == 0);
739 KASSERT(SIMPLEQ_EMPTY(&wl->wl_entries)); 779 KASSERT(SIMPLEQ_EMPTY(&wl->wl_entries));
740 KASSERT(wl->wl_inohashcnt == 0); 780 KASSERT(wl->wl_inohashcnt == 0);
741 KASSERT(TAILQ_EMPTY(&wl->wl_dealloclist)); 781 KASSERT(TAILQ_EMPTY(&wl->wl_dealloclist));
742 KASSERT(wl->wl_dealloccnt == 0); 782 KASSERT(wl->wl_dealloccnt == 0);
743 783
744 wapbl_free(wl->wl_wc_scratch, wl->wl_wc_header->wc_len); 784 wapbl_free(wl->wl_wc_scratch, wl->wl_wc_header->wc_len);
745 wapbl_free(wl->wl_wc_header, wl->wl_wc_header->wc_len); 785 wapbl_free(wl->wl_wc_header, wl->wl_wc_header->wc_len);
746 wapbl_free(wl->wl_buffer, MAXPHYS); 786 wapbl_free(wl->wl_buffer, MAXPHYS);
747 wapbl_inodetrk_free(wl); 787 wapbl_inodetrk_free(wl);
748 788
 789 wapbl_evcnt_free(wl);
 790
749 cv_destroy(&wl->wl_reclaimable_cv); 791 cv_destroy(&wl->wl_reclaimable_cv);
750 mutex_destroy(&wl->wl_mtx); 792 mutex_destroy(&wl->wl_mtx);
751 rw_destroy(&wl->wl_rwlock); 793 rw_destroy(&wl->wl_rwlock);
752 wapbl_free(wl, sizeof(*wl)); 794 wapbl_free(wl, sizeof(*wl));
753 795
754 return 0; 796 return 0;
755} 797}
756 798
757/****************************************************************/ 799/****************************************************************/
758/* 800/*
759 * Unbuffered disk I/O 801 * Unbuffered disk I/O
760 */ 802 */
761 803
762static int 804static int
763wapbl_doio(void *data, size_t len, struct vnode *devvp, daddr_t pbn, int flags) 805wapbl_doio(void *data, size_t len, struct vnode *devvp, daddr_t pbn, int flags)
764{ 806{
765 struct pstats *pstats = curlwp->l_proc->p_stats; 807 struct pstats *pstats = curlwp->l_proc->p_stats;
766 struct buf *bp; 808 struct buf *bp;
767 int error; 809 int error;
768 810
769 KASSERT((flags & ~(B_WRITE | B_READ)) == 0); 811 KASSERT((flags & ~(B_WRITE | B_READ)) == 0);
770 KASSERT(devvp->v_type == VBLK); 812 KASSERT(devvp->v_type == VBLK);
771 813
772 if ((flags & (B_WRITE | B_READ)) == B_WRITE) { 814 if ((flags & (B_WRITE | B_READ)) == B_WRITE) {
773 mutex_enter(devvp->v_interlock); 815 mutex_enter(devvp->v_interlock);
774 devvp->v_numoutput++; 816 devvp->v_numoutput++;
775 mutex_exit(devvp->v_interlock); 817 mutex_exit(devvp->v_interlock);
776 pstats->p_ru.ru_oublock++; 818 pstats->p_ru.ru_oublock++;
777 } else { 819 } else {
778 pstats->p_ru.ru_inblock++; 820 pstats->p_ru.ru_inblock++;
779 } 821 }
780 822
781 bp = getiobuf(devvp, true); 823 bp = getiobuf(devvp, true);
782 bp->b_flags = flags; 824 bp->b_flags = flags;
783 bp->b_cflags = BC_BUSY; /* silly & dubious */ 825 bp->b_cflags = BC_BUSY; /* silly & dubious */
784 bp->b_dev = devvp->v_rdev; 826 bp->b_dev = devvp->v_rdev;
785 bp->b_data = data; 827 bp->b_data = data;
786 bp->b_bufsize = bp->b_resid = bp->b_bcount = len; 828 bp->b_bufsize = bp->b_resid = bp->b_bcount = len;
787 bp->b_blkno = pbn; 829 bp->b_blkno = pbn;
788 BIO_SETPRIO(bp, BPRIO_TIMECRITICAL); 830 BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
789 831
790 WAPBL_PRINTF(WAPBL_PRINT_IO, 832 WAPBL_PRINTF(WAPBL_PRINT_IO,
791 ("wapbl_doio: %s %d bytes at block %"PRId64" on dev 0x%"PRIx64"\n", 833 ("wapbl_doio: %s %d bytes at block %"PRId64" on dev 0x%"PRIx64"\n",
792 BUF_ISWRITE(bp) ? "write" : "read", bp->b_bcount, 834 BUF_ISWRITE(bp) ? "write" : "read", bp->b_bcount,
793 bp->b_blkno, bp->b_dev)); 835 bp->b_blkno, bp->b_dev));
794 836
795 VOP_STRATEGY(devvp, bp); 837 VOP_STRATEGY(devvp, bp);
796 838
797 error = biowait(bp); 839 error = biowait(bp);
798 putiobuf(bp); 840 putiobuf(bp);
799 841
800 if (error) { 842 if (error) {
801 WAPBL_PRINTF(WAPBL_PRINT_ERROR, 843 WAPBL_PRINTF(WAPBL_PRINT_ERROR,
802 ("wapbl_doio: %s %zu bytes at block %" PRId64 844 ("wapbl_doio: %s %zu bytes at block %" PRId64
803 " on dev 0x%"PRIx64" failed with error %d\n", 845 " on dev 0x%"PRIx64" failed with error %d\n",
804 (((flags & (B_WRITE | B_READ)) == B_WRITE) ? 846 (((flags & (B_WRITE | B_READ)) == B_WRITE) ?
805 "write" : "read"), 847 "write" : "read"),
806 len, pbn, devvp->v_rdev, error)); 848 len, pbn, devvp->v_rdev, error));
807 } 849 }
808 850
809 return error; 851 return error;
810} 852}
811 853
812/* 854/*
813 * wapbl_write(data, len, devvp, pbn) 855 * wapbl_write(data, len, devvp, pbn)
814 * 856 *
815 * Synchronously write len bytes from data to physical block pbn 857 * Synchronously write len bytes from data to physical block pbn
816 * on devvp. 858 * on devvp.
817 */ 859 */
818int 860int
819wapbl_write(void *data, size_t len, struct vnode *devvp, daddr_t pbn) 861wapbl_write(void *data, size_t len, struct vnode *devvp, daddr_t pbn)
820{ 862{
821 863
822 return wapbl_doio(data, len, devvp, pbn, B_WRITE); 864 return wapbl_doio(data, len, devvp, pbn, B_WRITE);
823} 865}
824 866
825/* 867/*
826 * wapbl_read(data, len, devvp, pbn) 868 * wapbl_read(data, len, devvp, pbn)
827 * 869 *
828 * Synchronously read len bytes into data from physical block pbn 870 * Synchronously read len bytes into data from physical block pbn
829 * on devvp. 871 * on devvp.
830 */ 872 */
831int 873int
832wapbl_read(void *data, size_t len, struct vnode *devvp, daddr_t pbn) 874wapbl_read(void *data, size_t len, struct vnode *devvp, daddr_t pbn)
833{ 875{
834 876
835 return wapbl_doio(data, len, devvp, pbn, B_READ); 877 return wapbl_doio(data, len, devvp, pbn, B_READ);
836} 878}
837 879
838/****************************************************************/ 880/****************************************************************/
839/* 881/*
840 * Buffered disk writes -- try to coalesce writes and emit 882 * Buffered disk writes -- try to coalesce writes and emit
841 * MAXPHYS-aligned blocks. 883 * MAXPHYS-aligned blocks.
842 */ 884 */
843 885
844/* 886/*
845 * wapbl_buffered_flush(wl) 887 * wapbl_buffered_flush(wl)
846 * 888 *
847 * Flush any buffered writes from wapbl_buffered_write. 889 * Flush any buffered writes from wapbl_buffered_write.
848 */ 890 */
849static int 891static int
850wapbl_buffered_flush(struct wapbl *wl) 892wapbl_buffered_flush(struct wapbl *wl)
851{ 893{
852 int error; 894 int error;
853 895
854 if (wl->wl_buffer_used == 0) 896 if (wl->wl_buffer_used == 0)
855 return 0; 897 return 0;
856 898
857 error = wapbl_doio(wl->wl_buffer, wl->wl_buffer_used, 899 error = wapbl_doio(wl->wl_buffer, wl->wl_buffer_used,
858 wl->wl_devvp, wl->wl_buffer_dblk, B_WRITE); 900 wl->wl_devvp, wl->wl_buffer_dblk, B_WRITE);
859 wl->wl_buffer_used = 0; 901 wl->wl_buffer_used = 0;
860 902
 903 wl->wl_ev_journalwrite.ev_count++;
 904
861 return error; 905 return error;
862} 906}
863 907
864/* 908/*
865 * wapbl_buffered_write(data, len, wl, pbn) 909 * wapbl_buffered_write(data, len, wl, pbn)
866 * 910 *
867 * Write len bytes from data to physical block pbn on 911 * Write len bytes from data to physical block pbn on
868 * wl->wl_devvp. The write may not complete until 912 * wl->wl_devvp. The write may not complete until
869 * wapbl_buffered_flush. 913 * wapbl_buffered_flush.
870 */ 914 */
871static int 915static int
872wapbl_buffered_write(void *data, size_t len, struct wapbl *wl, daddr_t pbn) 916wapbl_buffered_write(void *data, size_t len, struct wapbl *wl, daddr_t pbn)
873{ 917{
874 int error; 918 int error;
875 size_t resid; 919 size_t resid;
876 920
877 /* 921 /*
878 * If not adjacent to buffered data flush first. Disk block 922 * If not adjacent to buffered data flush first. Disk block
879 * address is always valid for non-empty buffer. 923 * address is always valid for non-empty buffer.
880 */ 924 */
881 if (wl->wl_buffer_used > 0 && 925 if (wl->wl_buffer_used > 0 &&
882 pbn != wl->wl_buffer_dblk + btodb(wl->wl_buffer_used)) { 926 pbn != wl->wl_buffer_dblk + btodb(wl->wl_buffer_used)) {
883 error = wapbl_buffered_flush(wl); 927 error = wapbl_buffered_flush(wl);
884 if (error) 928 if (error)
885 return error; 929 return error;
886 } 930 }
887 /* 931 /*
888 * If this write goes to an empty buffer we have to 932 * If this write goes to an empty buffer we have to
889 * save the disk block address first. 933 * save the disk block address first.
890 */ 934 */
891 if (wl->wl_buffer_used == 0) 935 if (wl->wl_buffer_used == 0)
892 wl->wl_buffer_dblk = pbn; 936 wl->wl_buffer_dblk = pbn;
893 /* 937 /*
894 * Remaining space so this buffer ends on a MAXPHYS boundary. 938 * Remaining space so this buffer ends on a MAXPHYS boundary.
895 * 939 *
896 * Cannot become less or equal zero as the buffer would have been 940 * Cannot become less or equal zero as the buffer would have been
897 * flushed on the last call then. 941 * flushed on the last call then.
898 */ 942 */
899 resid = MAXPHYS - dbtob(wl->wl_buffer_dblk % btodb(MAXPHYS)) - 943 resid = MAXPHYS - dbtob(wl->wl_buffer_dblk % btodb(MAXPHYS)) -
900 wl->wl_buffer_used; 944 wl->wl_buffer_used;
901 KASSERT(resid > 0); 945 KASSERT(resid > 0);
902 KASSERT(dbtob(btodb(resid)) == resid); 946 KASSERT(dbtob(btodb(resid)) == resid);
903 if (len >= resid) { 947 if (len >= resid) {
904 memcpy(wl->wl_buffer + wl->wl_buffer_used, data, resid); 948 memcpy(wl->wl_buffer + wl->wl_buffer_used, data, resid);
905 wl->wl_buffer_used += resid; 949 wl->wl_buffer_used += resid;
906 error = wapbl_doio(wl->wl_buffer, wl->wl_buffer_used, 950 error = wapbl_doio(wl->wl_buffer, wl->wl_buffer_used,
907 wl->wl_devvp, wl->wl_buffer_dblk, B_WRITE); 951 wl->wl_devvp, wl->wl_buffer_dblk, B_WRITE);
908 data = (uint8_t *)data + resid; 952 data = (uint8_t *)data + resid;
909 len -= resid; 953 len -= resid;
910 wl->wl_buffer_dblk = pbn + btodb(resid); 954 wl->wl_buffer_dblk = pbn + btodb(resid);
911 wl->wl_buffer_used = 0; 955 wl->wl_buffer_used = 0;
912 if (error) 956 if (error)
913 return error; 957 return error;
914 } 958 }
915 KASSERT(len < MAXPHYS); 959 KASSERT(len < MAXPHYS);
916 if (len > 0) { 960 if (len > 0) {
917 memcpy(wl->wl_buffer + wl->wl_buffer_used, data, len); 961 memcpy(wl->wl_buffer + wl->wl_buffer_used, data, len);
918 wl->wl_buffer_used += len; 962 wl->wl_buffer_used += len;
919 } 963 }
920 964
921 return 0; 965 return 0;
922} 966}
923 967
924/* 968/*
925 * wapbl_circ_write(wl, data, len, offp) 969 * wapbl_circ_write(wl, data, len, offp)
926 * 970 *
927 * Write len bytes from data to the circular queue of wl, starting 971 * Write len bytes from data to the circular queue of wl, starting
928 * at linear byte offset *offp, and returning the new linear byte 972 * at linear byte offset *offp, and returning the new linear byte
929 * offset in *offp. 973 * offset in *offp.
930 * 974 *
931 * If the starting linear byte offset precedes wl->wl_circ_off, 975 * If the starting linear byte offset precedes wl->wl_circ_off,
932 * the write instead begins at wl->wl_circ_off. XXX WTF? This 976 * the write instead begins at wl->wl_circ_off. XXX WTF? This
933 * should be a KASSERT, not a conditional. 977 * should be a KASSERT, not a conditional.
934 * 978 *
935 * The write is buffered in wl and must be flushed with 979 * The write is buffered in wl and must be flushed with
936 * wapbl_buffered_flush before it will be submitted to the disk. 980 * wapbl_buffered_flush before it will be submitted to the disk.
937 */ 981 */
938static int 982static int
939wapbl_circ_write(struct wapbl *wl, void *data, size_t len, off_t *offp) 983wapbl_circ_write(struct wapbl *wl, void *data, size_t len, off_t *offp)
940{ 984{
941 size_t slen; 985 size_t slen;
942 off_t off = *offp; 986 off_t off = *offp;
943 int error; 987 int error;
944 daddr_t pbn; 988 daddr_t pbn;
945 989
946 KDASSERT(((len >> wl->wl_log_dev_bshift) << 990 KDASSERT(((len >> wl->wl_log_dev_bshift) <<
947 wl->wl_log_dev_bshift) == len); 991 wl->wl_log_dev_bshift) == len);
948 992
949 if (off < wl->wl_circ_off) 993 if (off < wl->wl_circ_off)
950 off = wl->wl_circ_off; 994 off = wl->wl_circ_off;
951 slen = wl->wl_circ_off + wl->wl_circ_size - off; 995 slen = wl->wl_circ_off + wl->wl_circ_size - off;
952 if (slen < len) { 996 if (slen < len) {
953 pbn = wl->wl_logpbn + (off >> wl->wl_log_dev_bshift); 997 pbn = wl->wl_logpbn + (off >> wl->wl_log_dev_bshift);
954#ifdef _KERNEL 998#ifdef _KERNEL
955 pbn = btodb(pbn << wl->wl_log_dev_bshift); 999 pbn = btodb(pbn << wl->wl_log_dev_bshift);
956#endif 1000#endif
957 error = wapbl_buffered_write(data, slen, wl, pbn); 1001 error = wapbl_buffered_write(data, slen, wl, pbn);
958 if (error) 1002 if (error)
959 return error; 1003 return error;
960 data = (uint8_t *)data + slen; 1004 data = (uint8_t *)data + slen;
961 len -= slen; 1005 len -= slen;
962 off = wl->wl_circ_off; 1006 off = wl->wl_circ_off;
963 } 1007 }
964 pbn = wl->wl_logpbn + (off >> wl->wl_log_dev_bshift); 1008 pbn = wl->wl_logpbn + (off >> wl->wl_log_dev_bshift);
965#ifdef _KERNEL 1009#ifdef _KERNEL
966 pbn = btodb(pbn << wl->wl_log_dev_bshift); 1010 pbn = btodb(pbn << wl->wl_log_dev_bshift);
967#endif 1011#endif
968 error = wapbl_buffered_write(data, len, wl, pbn); 1012 error = wapbl_buffered_write(data, len, wl, pbn);
969 if (error) 1013 if (error)
970 return error; 1014 return error;
971 off += len; 1015 off += len;
972 if (off >= wl->wl_circ_off + wl->wl_circ_size) 1016 if (off >= wl->wl_circ_off + wl->wl_circ_size)
973 off = wl->wl_circ_off; 1017 off = wl->wl_circ_off;
974 *offp = off; 1018 *offp = off;
975 return 0; 1019 return 0;
976} 1020}
977 1021
978/****************************************************************/ 1022/****************************************************************/
979/* 1023/*
980 * WAPBL transactions: entering, adding/removing bufs, and exiting 1024 * WAPBL transactions: entering, adding/removing bufs, and exiting
981 */ 1025 */
982 1026
983int 1027int
984wapbl_begin(struct wapbl *wl, const char *file, int line) 1028wapbl_begin(struct wapbl *wl, const char *file, int line)
985{ 1029{
986 int doflush; 1030 int doflush;
987 unsigned lockcount; 1031 unsigned lockcount;
988 1032
989 KDASSERT(wl); 1033 KDASSERT(wl);
990 1034
991 /* 1035 /*
992 * XXX this needs to be made much more sophisticated. 1036 * XXX this needs to be made much more sophisticated.
993 * perhaps each wapbl_begin could reserve a specified 1037 * perhaps each wapbl_begin could reserve a specified
994 * number of buffers and bytes. 1038 * number of buffers and bytes.
995 */ 1039 */
996 mutex_enter(&wl->wl_mtx); 1040 mutex_enter(&wl->wl_mtx);
997 lockcount = wl->wl_lock_count; 1041 lockcount = wl->wl_lock_count;
998 doflush = ((wl->wl_bufbytes + (lockcount * MAXPHYS)) > 1042 doflush = ((wl->wl_bufbytes + (lockcount * MAXPHYS)) >
999 wl->wl_bufbytes_max / 2) || 1043 wl->wl_bufbytes_max / 2) ||
1000 ((wl->wl_bufcount + (lockcount * 10)) > 1044 ((wl->wl_bufcount + (lockcount * 10)) >
1001 wl->wl_bufcount_max / 2) || 1045 wl->wl_bufcount_max / 2) ||
1002 (wapbl_transaction_len(wl) > wl->wl_circ_size / 2) || 1046 (wapbl_transaction_len(wl) > wl->wl_circ_size / 2) ||
1003 (wl->wl_dealloccnt >= (wl->wl_dealloclim / 2)); 1047 (wl->wl_dealloccnt >= (wl->wl_dealloclim / 2));
1004 mutex_exit(&wl->wl_mtx); 1048 mutex_exit(&wl->wl_mtx);
1005 1049
1006 if (doflush) { 1050 if (doflush) {
1007 WAPBL_PRINTF(WAPBL_PRINT_FLUSH, 1051 WAPBL_PRINTF(WAPBL_PRINT_FLUSH,
1008 ("force flush lockcnt=%d bufbytes=%zu " 1052 ("force flush lockcnt=%d bufbytes=%zu "
1009 "(max=%zu) bufcount=%zu (max=%zu) " 1053 "(max=%zu) bufcount=%zu (max=%zu) "
1010 "dealloccnt %d (lim=%d)\n", 1054 "dealloccnt %d (lim=%d)\n",
1011 lockcount, wl->wl_bufbytes, 1055 lockcount, wl->wl_bufbytes,
1012 wl->wl_bufbytes_max, wl->wl_bufcount, 1056 wl->wl_bufbytes_max, wl->wl_bufcount,
1013 wl->wl_bufcount_max, 1057 wl->wl_bufcount_max,
1014 wl->wl_dealloccnt, wl->wl_dealloclim)); 1058 wl->wl_dealloccnt, wl->wl_dealloclim));
1015 } 1059 }
1016 1060
1017 if (doflush) { 1061 if (doflush) {
1018 int error = wapbl_flush(wl, 0); 1062 int error = wapbl_flush(wl, 0);
1019 if (error) 1063 if (error)
1020 return error; 1064 return error;
1021 } 1065 }
1022 1066
1023 rw_enter(&wl->wl_rwlock, RW_READER); 1067 rw_enter(&wl->wl_rwlock, RW_READER);
1024 mutex_enter(&wl->wl_mtx); 1068 mutex_enter(&wl->wl_mtx);
1025 wl->wl_lock_count++; 1069 wl->wl_lock_count++;
1026 mutex_exit(&wl->wl_mtx); 1070 mutex_exit(&wl->wl_mtx);
1027 1071
1028#if defined(WAPBL_DEBUG_PRINT) 1072#if defined(WAPBL_DEBUG_PRINT)
1029 WAPBL_PRINTF(WAPBL_PRINT_TRANSACTION, 1073 WAPBL_PRINTF(WAPBL_PRINT_TRANSACTION,
1030 ("wapbl_begin thread %d.%d with bufcount=%zu " 1074 ("wapbl_begin thread %d.%d with bufcount=%zu "
1031 "bufbytes=%zu bcount=%zu at %s:%d\n", 1075 "bufbytes=%zu bcount=%zu at %s:%d\n",
1032 curproc->p_pid, curlwp->l_lid, wl->wl_bufcount, 1076 curproc->p_pid, curlwp->l_lid, wl->wl_bufcount,
1033 wl->wl_bufbytes, wl->wl_bcount, file, line)); 1077 wl->wl_bufbytes, wl->wl_bcount, file, line));
1034#endif 1078#endif
1035 1079
1036 return 0; 1080 return 0;
1037} 1081}
1038 1082
1039void 1083void
1040wapbl_end(struct wapbl *wl) 1084wapbl_end(struct wapbl *wl)
1041{ 1085{
1042 1086
1043#if defined(WAPBL_DEBUG_PRINT) 1087#if defined(WAPBL_DEBUG_PRINT)
1044 WAPBL_PRINTF(WAPBL_PRINT_TRANSACTION, 1088 WAPBL_PRINTF(WAPBL_PRINT_TRANSACTION,
1045 ("wapbl_end thread %d.%d with bufcount=%zu " 1089 ("wapbl_end thread %d.%d with bufcount=%zu "
1046 "bufbytes=%zu bcount=%zu\n", 1090 "bufbytes=%zu bcount=%zu\n",
1047 curproc->p_pid, curlwp->l_lid, wl->wl_bufcount, 1091 curproc->p_pid, curlwp->l_lid, wl->wl_bufcount,
1048 wl->wl_bufbytes, wl->wl_bcount)); 1092 wl->wl_bufbytes, wl->wl_bcount));
1049#endif 1093#endif
1050 1094
1051 /* 1095 /*
1052 * XXX this could be handled more gracefully, perhaps place 1096 * XXX this could be handled more gracefully, perhaps place
1053 * only a partial transaction in the log and allow the 1097 * only a partial transaction in the log and allow the
1054 * remaining to flush without the protection of the journal. 1098 * remaining to flush without the protection of the journal.
1055 */ 1099 */
1056 KASSERTMSG((wapbl_transaction_len(wl) <= 1100 KASSERTMSG((wapbl_transaction_len(wl) <=
1057 (wl->wl_circ_size - wl->wl_reserved_bytes)), 1101 (wl->wl_circ_size - wl->wl_reserved_bytes)),
1058 "wapbl_end: current transaction too big to flush"); 1102 "wapbl_end: current transaction too big to flush");
1059 1103
1060 mutex_enter(&wl->wl_mtx); 1104 mutex_enter(&wl->wl_mtx);
1061 KASSERT(wl->wl_lock_count > 0); 1105 KASSERT(wl->wl_lock_count > 0);
1062 wl->wl_lock_count--; 1106 wl->wl_lock_count--;
1063 mutex_exit(&wl->wl_mtx); 1107 mutex_exit(&wl->wl_mtx);
1064 1108
1065 rw_exit(&wl->wl_rwlock); 1109 rw_exit(&wl->wl_rwlock);
1066} 1110}
1067 1111
1068void 1112void
1069wapbl_add_buf(struct wapbl *wl, struct buf * bp) 1113wapbl_add_buf(struct wapbl *wl, struct buf * bp)
1070{ 1114{
1071 1115
1072 KASSERT(bp->b_cflags & BC_BUSY); 1116 KASSERT(bp->b_cflags & BC_BUSY);
1073 KASSERT(bp->b_vp); 1117 KASSERT(bp->b_vp);
1074 1118
1075 wapbl_jlock_assert(wl); 1119 wapbl_jlock_assert(wl);
1076 1120
1077#if 0 1121#if 0
1078 /* 1122 /*
1079 * XXX this might be an issue for swapfiles. 1123 * XXX this might be an issue for swapfiles.
1080 * see uvm_swap.c:1702 1124 * see uvm_swap.c:1702
1081 * 1125 *
1082 * XXX2 why require it then? leap of semantics? 1126 * XXX2 why require it then? leap of semantics?
1083 */ 1127 */
1084 KASSERT((bp->b_cflags & BC_NOCACHE) == 0); 1128 KASSERT((bp->b_cflags & BC_NOCACHE) == 0);
1085#endif 1129#endif
1086 1130
1087 mutex_enter(&wl->wl_mtx); 1131 mutex_enter(&wl->wl_mtx);
1088 if (bp->b_flags & B_LOCKED) { 1132 if (bp->b_flags & B_LOCKED) {
1089 LIST_REMOVE(bp, b_wapbllist); 1133 LIST_REMOVE(bp, b_wapbllist);
1090 WAPBL_PRINTF(WAPBL_PRINT_BUFFER2, 1134 WAPBL_PRINTF(WAPBL_PRINT_BUFFER2,
1091 ("wapbl_add_buf thread %d.%d re-adding buf %p " 1135 ("wapbl_add_buf thread %d.%d re-adding buf %p "
1092 "with %d bytes %d bcount\n", 1136 "with %d bytes %d bcount\n",
1093 curproc->p_pid, curlwp->l_lid, bp, bp->b_bufsize, 1137 curproc->p_pid, curlwp->l_lid, bp, bp->b_bufsize,
1094 bp->b_bcount)); 1138 bp->b_bcount));
1095 } else { 1139 } else {
1096 /* unlocked by dirty buffers shouldn't exist */ 1140 /* unlocked by dirty buffers shouldn't exist */
1097 KASSERT(!(bp->b_oflags & BO_DELWRI)); 1141 KASSERT(!(bp->b_oflags & BO_DELWRI));
1098 wl->wl_bufbytes += bp->b_bufsize; 1142 wl->wl_bufbytes += bp->b_bufsize;
1099 wl->wl_bcount += bp->b_bcount; 1143 wl->wl_bcount += bp->b_bcount;
1100 wl->wl_bufcount++; 1144 wl->wl_bufcount++;
1101 WAPBL_PRINTF(WAPBL_PRINT_BUFFER, 1145 WAPBL_PRINTF(WAPBL_PRINT_BUFFER,
1102 ("wapbl_add_buf thread %d.%d adding buf %p " 1146 ("wapbl_add_buf thread %d.%d adding buf %p "
1103 "with %d bytes %d bcount\n", 1147 "with %d bytes %d bcount\n",
1104 curproc->p_pid, curlwp->l_lid, bp, bp->b_bufsize, 1148 curproc->p_pid, curlwp->l_lid, bp, bp->b_bufsize,
1105 bp->b_bcount)); 1149 bp->b_bcount));
1106 } 1150 }
1107 LIST_INSERT_HEAD(&wl->wl_bufs, bp, b_wapbllist); 1151 LIST_INSERT_HEAD(&wl->wl_bufs, bp, b_wapbllist);
1108 mutex_exit(&wl->wl_mtx); 1152 mutex_exit(&wl->wl_mtx);
1109 1153
1110 bp->b_flags |= B_LOCKED; 1154 bp->b_flags |= B_LOCKED;
1111} 1155}
1112 1156
1113static void 1157static void
1114wapbl_remove_buf_locked(struct wapbl * wl, struct buf *bp) 1158wapbl_remove_buf_locked(struct wapbl * wl, struct buf *bp)
1115{ 1159{
1116 1160
1117 KASSERT(mutex_owned(&wl->wl_mtx)); 1161 KASSERT(mutex_owned(&wl->wl_mtx));
1118 KASSERT(bp->b_cflags & BC_BUSY); 1162 KASSERT(bp->b_cflags & BC_BUSY);
1119 wapbl_jlock_assert(wl); 1163 wapbl_jlock_assert(wl);
1120 1164
1121#if 0 1165#if 0
1122 /* 1166 /*
1123 * XXX this might be an issue for swapfiles. 1167 * XXX this might be an issue for swapfiles.
1124 * see uvm_swap.c:1725 1168 * see uvm_swap.c:1725
1125 * 1169 *
1126 * XXXdeux: see above 1170 * XXXdeux: see above
1127 */ 1171 */
1128 KASSERT((bp->b_flags & BC_NOCACHE) == 0); 1172 KASSERT((bp->b_flags & BC_NOCACHE) == 0);
1129#endif 1173#endif
1130 KASSERT(bp->b_flags & B_LOCKED); 1174 KASSERT(bp->b_flags & B_LOCKED);
1131 1175
1132 WAPBL_PRINTF(WAPBL_PRINT_BUFFER, 1176 WAPBL_PRINTF(WAPBL_PRINT_BUFFER,
1133 ("wapbl_remove_buf thread %d.%d removing buf %p with " 1177 ("wapbl_remove_buf thread %d.%d removing buf %p with "
1134 "%d bytes %d bcount\n", 1178 "%d bytes %d bcount\n",
1135 curproc->p_pid, curlwp->l_lid, bp, bp->b_bufsize, bp->b_bcount)); 1179 curproc->p_pid, curlwp->l_lid, bp, bp->b_bufsize, bp->b_bcount));
1136 1180
1137 KASSERT(wl->wl_bufbytes >= bp->b_bufsize); 1181 KASSERT(wl->wl_bufbytes >= bp->b_bufsize);
1138 wl->wl_bufbytes -= bp->b_bufsize; 1182 wl->wl_bufbytes -= bp->b_bufsize;
1139 KASSERT(wl->wl_bcount >= bp->b_bcount); 1183 KASSERT(wl->wl_bcount >= bp->b_bcount);
1140 wl->wl_bcount -= bp->b_bcount; 1184 wl->wl_bcount -= bp->b_bcount;
1141 KASSERT(wl->wl_bufcount > 0); 1185 KASSERT(wl->wl_bufcount > 0);
1142 wl->wl_bufcount--; 1186 wl->wl_bufcount--;
1143 KASSERT((wl->wl_bufcount == 0) == (wl->wl_bufbytes == 0)); 1187 KASSERT((wl->wl_bufcount == 0) == (wl->wl_bufbytes == 0));
1144 KASSERT((wl->wl_bufcount == 0) == (wl->wl_bcount == 0)); 1188 KASSERT((wl->wl_bufcount == 0) == (wl->wl_bcount == 0));
1145 LIST_REMOVE(bp, b_wapbllist); 1189 LIST_REMOVE(bp, b_wapbllist);
1146 1190
1147 bp->b_flags &= ~B_LOCKED; 1191 bp->b_flags &= ~B_LOCKED;
1148} 1192}
1149 1193
1150/* called from brelsel() in vfs_bio among other places */ 1194/* called from brelsel() in vfs_bio among other places */
1151void 1195void
1152wapbl_remove_buf(struct wapbl * wl, struct buf *bp) 1196wapbl_remove_buf(struct wapbl * wl, struct buf *bp)
1153{ 1197{
1154 1198
1155 mutex_enter(&wl->wl_mtx); 1199 mutex_enter(&wl->wl_mtx);
1156 wapbl_remove_buf_locked(wl, bp); 1200 wapbl_remove_buf_locked(wl, bp);
1157 mutex_exit(&wl->wl_mtx); 1201 mutex_exit(&wl->wl_mtx);
1158} 1202}
1159 1203
1160void 1204void
1161wapbl_resize_buf(struct wapbl *wl, struct buf *bp, long oldsz, long oldcnt) 1205wapbl_resize_buf(struct wapbl *wl, struct buf *bp, long oldsz, long oldcnt)
1162{ 1206{
1163 1207
1164 KASSERT(bp->b_cflags & BC_BUSY); 1208 KASSERT(bp->b_cflags & BC_BUSY);
1165 1209
1166 /* 1210 /*
1167 * XXX: why does this depend on B_LOCKED? otherwise the buf 1211 * XXX: why does this depend on B_LOCKED? otherwise the buf
1168 * is not for a transaction? if so, why is this called in the 1212 * is not for a transaction? if so, why is this called in the
1169 * first place? 1213 * first place?
1170 */ 1214 */
1171 if (bp->b_flags & B_LOCKED) { 1215 if (bp->b_flags & B_LOCKED) {
1172 mutex_enter(&wl->wl_mtx); 1216 mutex_enter(&wl->wl_mtx);
1173 wl->wl_bufbytes += bp->b_bufsize - oldsz; 1217 wl->wl_bufbytes += bp->b_bufsize - oldsz;
1174 wl->wl_bcount += bp->b_bcount - oldcnt; 1218 wl->wl_bcount += bp->b_bcount - oldcnt;
1175 mutex_exit(&wl->wl_mtx); 1219 mutex_exit(&wl->wl_mtx);
1176 } 1220 }
1177} 1221}
1178 1222
1179#endif /* _KERNEL */ 1223#endif /* _KERNEL */
1180 1224
1181/****************************************************************/ 1225/****************************************************************/
1182/* Some utility inlines */ 1226/* Some utility inlines */
1183 1227
1184/* 1228/*
1185 * wapbl_space_used(avail, head, tail) 1229 * wapbl_space_used(avail, head, tail)
1186 * 1230 *
1187 * Number of bytes used in a circular queue of avail total bytes, 1231 * Number of bytes used in a circular queue of avail total bytes,
1188 * from tail to head. 1232 * from tail to head.
1189 */ 1233 */
1190static inline size_t 1234static inline size_t
1191wapbl_space_used(size_t avail, off_t head, off_t tail) 1235wapbl_space_used(size_t avail, off_t head, off_t tail)
1192{ 1236{
1193 1237
1194 if (tail == 0) { 1238 if (tail == 0) {
1195 KASSERT(head == 0); 1239 KASSERT(head == 0);
1196 return 0; 1240 return 0;
1197 } 1241 }
1198 return ((head + (avail - 1) - tail) % avail) + 1; 1242 return ((head + (avail - 1) - tail) % avail) + 1;
1199} 1243}
1200 1244
1201#ifdef _KERNEL 1245#ifdef _KERNEL
1202/* 1246/*
1203 * wapbl_advance(size, off, oldoff, delta) 1247 * wapbl_advance(size, off, oldoff, delta)
1204 * 1248 *
1205 * Given a byte offset oldoff into a circular queue of size bytes 1249 * Given a byte offset oldoff into a circular queue of size bytes
1206 * starting at off, return a new byte offset oldoff + delta into 1250 * starting at off, return a new byte offset oldoff + delta into
1207 * the circular queue. 1251 * the circular queue.
1208 */ 1252 */
1209static inline off_t 1253static inline off_t
1210wapbl_advance(size_t size, size_t off, off_t oldoff, size_t delta) 1254wapbl_advance(size_t size, size_t off, off_t oldoff, size_t delta)
1211{ 1255{
1212 off_t newoff; 1256 off_t newoff;
1213 1257
1214 /* Define acceptable ranges for inputs. */ 1258 /* Define acceptable ranges for inputs. */
1215 KASSERT(delta <= (size_t)size); 1259 KASSERT(delta <= (size_t)size);
1216 KASSERT((oldoff == 0) || ((size_t)oldoff >= off)); 1260 KASSERT((oldoff == 0) || ((size_t)oldoff >= off));
1217 KASSERT(oldoff < (off_t)(size + off)); 1261 KASSERT(oldoff < (off_t)(size + off));
1218 1262
1219 if ((oldoff == 0) && (delta != 0)) 1263 if ((oldoff == 0) && (delta != 0))
1220 newoff = off + delta; 1264 newoff = off + delta;
1221 else if ((oldoff + delta) < (size + off)) 1265 else if ((oldoff + delta) < (size + off))
1222 newoff = oldoff + delta; 1266 newoff = oldoff + delta;
1223 else 1267 else
1224 newoff = (oldoff + delta) - size; 1268 newoff = (oldoff + delta) - size;
1225 1269
1226 /* Note some interesting axioms */ 1270 /* Note some interesting axioms */
1227 KASSERT((delta != 0) || (newoff == oldoff)); 1271 KASSERT((delta != 0) || (newoff == oldoff));
1228 KASSERT((delta == 0) || (newoff != 0)); 1272 KASSERT((delta == 0) || (newoff != 0));
1229 KASSERT((delta != (size)) || (newoff == oldoff)); 1273 KASSERT((delta != (size)) || (newoff == oldoff));
1230 1274
1231 /* Define acceptable ranges for output. */ 1275 /* Define acceptable ranges for output. */
1232 KASSERT((newoff == 0) || ((size_t)newoff >= off)); 1276 KASSERT((newoff == 0) || ((size_t)newoff >= off));
1233 KASSERT((size_t)newoff < (size + off)); 1277 KASSERT((size_t)newoff < (size + off));
1234 return newoff; 1278 return newoff;
1235} 1279}
1236 1280
1237/* 1281/*
1238 * wapbl_space_free(avail, head, tail) 1282 * wapbl_space_free(avail, head, tail)
1239 * 1283 *
1240 * Number of bytes free in a circular queue of avail total bytes, 1284 * Number of bytes free in a circular queue of avail total bytes,
1241 * in which everything from tail to head is used. 1285 * in which everything from tail to head is used.
1242 */ 1286 */
1243static inline size_t 1287static inline size_t
1244wapbl_space_free(size_t avail, off_t head, off_t tail) 1288wapbl_space_free(size_t avail, off_t head, off_t tail)
1245{ 1289{
1246 1290
1247 return avail - wapbl_space_used(avail, head, tail); 1291 return avail - wapbl_space_used(avail, head, tail);
1248} 1292}
1249 1293
1250/* 1294/*
1251 * wapbl_advance_head(size, off, delta, headp, tailp) 1295 * wapbl_advance_head(size, off, delta, headp, tailp)
1252 * 1296 *
1253 * In a circular queue of size bytes starting at off, given the 1297 * In a circular queue of size bytes starting at off, given the
1254 * old head and tail offsets *headp and *tailp, store the new head 1298 * old head and tail offsets *headp and *tailp, store the new head
1255 * and tail offsets in *headp and *tailp resulting from adding 1299 * and tail offsets in *headp and *tailp resulting from adding
1256 * delta bytes of data to the head. 1300 * delta bytes of data to the head.
1257 */ 1301 */
1258static inline void 1302static inline void
1259wapbl_advance_head(size_t size, size_t off, size_t delta, off_t *headp, 1303wapbl_advance_head(size_t size, size_t off, size_t delta, off_t *headp,
1260 off_t *tailp) 1304 off_t *tailp)
1261{ 1305{
1262 off_t head = *headp; 1306 off_t head = *headp;
1263 off_t tail = *tailp; 1307 off_t tail = *tailp;
1264 1308
1265 KASSERT(delta <= wapbl_space_free(size, head, tail)); 1309 KASSERT(delta <= wapbl_space_free(size, head, tail));
1266 head = wapbl_advance(size, off, head, delta); 1310 head = wapbl_advance(size, off, head, delta);
1267 if ((tail == 0) && (head != 0)) 1311 if ((tail == 0) && (head != 0))
1268 tail = off; 1312 tail = off;
1269 *headp = head; 1313 *headp = head;
1270 *tailp = tail; 1314 *tailp = tail;
1271} 1315}
1272 1316
1273/* 1317/*
1274 * wapbl_advance_tail(size, off, delta, headp, tailp) 1318 * wapbl_advance_tail(size, off, delta, headp, tailp)
1275 * 1319 *
1276 * In a circular queue of size bytes starting at off, given the 1320 * In a circular queue of size bytes starting at off, given the
1277 * old head and tail offsets *headp and *tailp, store the new head 1321 * old head and tail offsets *headp and *tailp, store the new head
1278 * and tail offsets in *headp and *tailp resulting from removing 1322 * and tail offsets in *headp and *tailp resulting from removing
1279 * delta bytes of data from the tail. 1323 * delta bytes of data from the tail.
1280 */ 1324 */
1281static inline void 1325static inline void
1282wapbl_advance_tail(size_t size, size_t off, size_t delta, off_t *headp, 1326wapbl_advance_tail(size_t size, size_t off, size_t delta, off_t *headp,
1283 off_t *tailp) 1327 off_t *tailp)
1284{ 1328{
1285 off_t head = *headp; 1329 off_t head = *headp;
1286 off_t tail = *tailp; 1330 off_t tail = *tailp;
1287 1331
1288 KASSERT(delta <= wapbl_space_used(size, head, tail)); 1332 KASSERT(delta <= wapbl_space_used(size, head, tail));
1289 tail = wapbl_advance(size, off, tail, delta); 1333 tail = wapbl_advance(size, off, tail, delta);
1290 if (head == tail) { 1334 if (head == tail) {
1291 head = tail = 0; 1335 head = tail = 0;
1292 } 1336 }
1293 *headp = head; 1337 *headp = head;
1294 *tailp = tail; 1338 *tailp = tail;
1295} 1339}
1296 1340
1297 1341
1298/****************************************************************/ 1342/****************************************************************/
1299 1343
1300/* 1344/*
1301 * wapbl_truncate(wl, minfree) 1345 * wapbl_truncate(wl, minfree)
1302 * 1346 *
1303 * Wait until at least minfree bytes are available in the log. 1347 * Wait until at least minfree bytes are available in the log.
1304 * 1348 *
1305 * If it was necessary to wait for writes to complete, 1349 * If it was necessary to wait for writes to complete,
1306 * advance the circular queue tail to reflect the new write 1350 * advance the circular queue tail to reflect the new write
1307 * completions and issue a write commit to the log. 1351 * completions and issue a write commit to the log.
1308 * 1352 *
1309 * => Caller must hold wl->wl_rwlock writer lock. 1353 * => Caller must hold wl->wl_rwlock writer lock.
1310 */ 1354 */
1311static int 1355static int
1312wapbl_truncate(struct wapbl *wl, size_t minfree) 1356wapbl_truncate(struct wapbl *wl, size_t minfree)
1313{ 1357{
1314 size_t delta; 1358 size_t delta;
1315 size_t avail; 1359 size_t avail;
1316 off_t head; 1360 off_t head;
1317 off_t tail; 1361 off_t tail;
1318 int error = 0; 1362 int error = 0;
1319 1363
1320 KASSERT(minfree <= (wl->wl_circ_size - wl->wl_reserved_bytes)); 1364 KASSERT(minfree <= (wl->wl_circ_size - wl->wl_reserved_bytes));
1321 KASSERT(rw_write_held(&wl->wl_rwlock)); 1365 KASSERT(rw_write_held(&wl->wl_rwlock));
1322 1366
1323 mutex_enter(&wl->wl_mtx); 1367 mutex_enter(&wl->wl_mtx);
1324 1368
1325 /* 1369 /*
1326 * First check to see if we have to do a commit 1370 * First check to see if we have to do a commit
1327 * at all. 1371 * at all.
1328 */ 1372 */
1329 avail = wapbl_space_free(wl->wl_circ_size, wl->wl_head, wl->wl_tail); 1373 avail = wapbl_space_free(wl->wl_circ_size, wl->wl_head, wl->wl_tail);
1330 if (minfree < avail) { 1374 if (minfree < avail) {
1331 mutex_exit(&wl->wl_mtx); 1375 mutex_exit(&wl->wl_mtx);
1332 return 0; 1376 return 0;
1333 } 1377 }
1334 minfree -= avail; 1378 minfree -= avail;
1335 while ((wl->wl_error_count == 0) && 1379 while ((wl->wl_error_count == 0) &&
1336 (wl->wl_reclaimable_bytes < minfree)) { 1380 (wl->wl_reclaimable_bytes < minfree)) {
1337 WAPBL_PRINTF(WAPBL_PRINT_TRUNCATE, 1381 WAPBL_PRINTF(WAPBL_PRINT_TRUNCATE,
1338 ("wapbl_truncate: sleeping on %p wl=%p bytes=%zd " 1382 ("wapbl_truncate: sleeping on %p wl=%p bytes=%zd "
1339 "minfree=%zd\n", 1383 "minfree=%zd\n",
1340 &wl->wl_reclaimable_bytes, wl, wl->wl_reclaimable_bytes, 1384 &wl->wl_reclaimable_bytes, wl, wl->wl_reclaimable_bytes,
1341 minfree)); 1385 minfree));
1342 1386
1343 cv_wait(&wl->wl_reclaimable_cv, &wl->wl_mtx); 1387 cv_wait(&wl->wl_reclaimable_cv, &wl->wl_mtx);
1344 } 1388 }
1345 if (wl->wl_reclaimable_bytes < minfree) { 1389 if (wl->wl_reclaimable_bytes < minfree) {
1346 KASSERT(wl->wl_error_count); 1390 KASSERT(wl->wl_error_count);
1347 /* XXX maybe get actual error from buffer instead someday? */ 1391 /* XXX maybe get actual error from buffer instead someday? */
1348 error = EIO; 1392 error = EIO;
1349 } 1393 }
1350 head = wl->wl_head; 1394 head = wl->wl_head;
1351 tail = wl->wl_tail; 1395 tail = wl->wl_tail;
1352 delta = wl->wl_reclaimable_bytes; 1396 delta = wl->wl_reclaimable_bytes;
1353 1397
1354 /* If all of of the entries are flushed, then be sure to keep 1398 /* If all of of the entries are flushed, then be sure to keep
1355 * the reserved bytes reserved. Watch out for discarded transactions, 1399 * the reserved bytes reserved. Watch out for discarded transactions,
1356 * which could leave more bytes reserved than are reclaimable. 1400 * which could leave more bytes reserved than are reclaimable.
1357 */ 1401 */
1358 if (SIMPLEQ_EMPTY(&wl->wl_entries) &&  1402 if (SIMPLEQ_EMPTY(&wl->wl_entries) &&
1359 (delta >= wl->wl_reserved_bytes)) { 1403 (delta >= wl->wl_reserved_bytes)) {
1360 delta -= wl->wl_reserved_bytes; 1404 delta -= wl->wl_reserved_bytes;
1361 } 1405 }
1362 wapbl_advance_tail(wl->wl_circ_size, wl->wl_circ_off, delta, &head, 1406 wapbl_advance_tail(wl->wl_circ_size, wl->wl_circ_off, delta, &head,
1363 &tail); 1407 &tail);
1364 KDASSERT(wl->wl_reserved_bytes <= 1408 KDASSERT(wl->wl_reserved_bytes <=
1365 wapbl_space_used(wl->wl_circ_size, head, tail)); 1409 wapbl_space_used(wl->wl_circ_size, head, tail));
1366 mutex_exit(&wl->wl_mtx); 1410 mutex_exit(&wl->wl_mtx);
1367 1411
1368 if (error) 1412 if (error)
1369 return error; 1413 return error;
1370 1414
1371 /* 1415 /*
1372 * This is where head, tail and delta are unprotected 1416 * This is where head, tail and delta are unprotected
1373 * from races against itself or flush. This is ok since 1417 * from races against itself or flush. This is ok since
1374 * we only call this routine from inside flush itself. 1418 * we only call this routine from inside flush itself.
1375 * 1419 *
1376 * XXX: how can it race against itself when accessed only 1420 * XXX: how can it race against itself when accessed only
1377 * from behind the write-locked rwlock? 1421 * from behind the write-locked rwlock?
1378 */ 1422 */
1379 error = wapbl_write_commit(wl, head, tail); 1423 error = wapbl_write_commit(wl, head, tail);
1380 if (error) 1424 if (error)
1381 return error; 1425 return error;
1382 1426
1383 wl->wl_head = head; 1427 wl->wl_head = head;
1384 wl->wl_tail = tail; 1428 wl->wl_tail = tail;
1385 1429
1386 mutex_enter(&wl->wl_mtx); 1430 mutex_enter(&wl->wl_mtx);
1387 KASSERT(wl->wl_reclaimable_bytes >= delta); 1431 KASSERT(wl->wl_reclaimable_bytes >= delta);
1388 wl->wl_reclaimable_bytes -= delta; 1432 wl->wl_reclaimable_bytes -= delta;
1389 mutex_exit(&wl->wl_mtx); 1433 mutex_exit(&wl->wl_mtx);
1390 WAPBL_PRINTF(WAPBL_PRINT_TRUNCATE, 1434 WAPBL_PRINTF(WAPBL_PRINT_TRUNCATE,
1391 ("wapbl_truncate thread %d.%d truncating %zu bytes\n", 1435 ("wapbl_truncate thread %d.%d truncating %zu bytes\n",
1392 curproc->p_pid, curlwp->l_lid, delta)); 1436 curproc->p_pid, curlwp->l_lid, delta));
1393 1437
1394 return 0; 1438 return 0;
1395} 1439}
1396 1440
1397/****************************************************************/ 1441/****************************************************************/
1398 1442
1399void 1443void
1400wapbl_biodone(struct buf *bp) 1444wapbl_biodone(struct buf *bp)
1401{ 1445{
1402 struct wapbl_entry *we = bp->b_private; 1446 struct wapbl_entry *we = bp->b_private;
1403 struct wapbl *wl = we->we_wapbl; 1447 struct wapbl *wl = we->we_wapbl;
1404#ifdef WAPBL_DEBUG_BUFBYTES 1448#ifdef WAPBL_DEBUG_BUFBYTES
1405 const int bufsize = bp->b_bufsize; 1449 const int bufsize = bp->b_bufsize;
1406#endif 1450#endif
1407 1451
1408 /* 1452 /*
1409 * Handle possible flushing of buffers after log has been 1453 * Handle possible flushing of buffers after log has been
1410 * decomissioned. 1454 * decomissioned.
1411 */ 1455 */
1412 if (!wl) { 1456 if (!wl) {
1413 KASSERT(we->we_bufcount > 0); 1457 KASSERT(we->we_bufcount > 0);
1414 we->we_bufcount--; 1458 we->we_bufcount--;
1415#ifdef WAPBL_DEBUG_BUFBYTES 1459#ifdef WAPBL_DEBUG_BUFBYTES
1416 KASSERT(we->we_unsynced_bufbytes >= bufsize); 1460 KASSERT(we->we_unsynced_bufbytes >= bufsize);
1417 we->we_unsynced_bufbytes -= bufsize; 1461 we->we_unsynced_bufbytes -= bufsize;
1418#endif 1462#endif
1419 1463
1420 if (we->we_bufcount == 0) { 1464 if (we->we_bufcount == 0) {
1421#ifdef WAPBL_DEBUG_BUFBYTES 1465#ifdef WAPBL_DEBUG_BUFBYTES
1422 KASSERT(we->we_unsynced_bufbytes == 0); 1466 KASSERT(we->we_unsynced_bufbytes == 0);
1423#endif 1467#endif
1424 pool_put(&wapbl_entry_pool, we); 1468 pool_put(&wapbl_entry_pool, we);
1425 } 1469 }
1426 1470
1427 brelse(bp, 0); 1471 brelse(bp, 0);
1428 return; 1472 return;
1429 } 1473 }
1430 1474
1431#ifdef ohbother 1475#ifdef ohbother
1432 KDASSERT(bp->b_oflags & BO_DONE); 1476 KDASSERT(bp->b_oflags & BO_DONE);
1433 KDASSERT(!(bp->b_oflags & BO_DELWRI)); 1477 KDASSERT(!(bp->b_oflags & BO_DELWRI));
1434 KDASSERT(bp->b_flags & B_ASYNC); 1478 KDASSERT(bp->b_flags & B_ASYNC);
1435 KDASSERT(bp->b_cflags & BC_BUSY); 1479 KDASSERT(bp->b_cflags & BC_BUSY);
1436 KDASSERT(!(bp->b_flags & B_LOCKED)); 1480 KDASSERT(!(bp->b_flags & B_LOCKED));
1437 KDASSERT(!(bp->b_flags & B_READ)); 1481 KDASSERT(!(bp->b_flags & B_READ));
1438 KDASSERT(!(bp->b_cflags & BC_INVAL)); 1482 KDASSERT(!(bp->b_cflags & BC_INVAL));
1439 KDASSERT(!(bp->b_cflags & BC_NOCACHE)); 1483 KDASSERT(!(bp->b_cflags & BC_NOCACHE));
1440#endif 1484#endif
1441 1485
1442 if (bp->b_error) { 1486 if (bp->b_error) {
1443 /* 1487 /*
1444 * If an error occurs, it would be nice to leave the buffer 1488 * If an error occurs, it would be nice to leave the buffer
1445 * as a delayed write on the LRU queue so that we can retry 1489 * as a delayed write on the LRU queue so that we can retry
1446 * it later. But buffercache(9) can't handle dirty buffer 1490 * it later. But buffercache(9) can't handle dirty buffer
1447 * reuse, so just mark the log permanently errored out. 1491 * reuse, so just mark the log permanently errored out.
1448 */ 1492 */
1449 mutex_enter(&wl->wl_mtx); 1493 mutex_enter(&wl->wl_mtx);
1450 if (wl->wl_error_count == 0) { 1494 if (wl->wl_error_count == 0) {
1451 wl->wl_error_count++; 1495 wl->wl_error_count++;
1452 cv_broadcast(&wl->wl_reclaimable_cv); 1496 cv_broadcast(&wl->wl_reclaimable_cv);
1453 } 1497 }
1454 mutex_exit(&wl->wl_mtx); 1498 mutex_exit(&wl->wl_mtx);
1455 } 1499 }
1456 1500
1457 /* 1501 /*
1458 * Release the buffer here. wapbl_flush() may wait for the 1502 * Release the buffer here. wapbl_flush() may wait for the
1459 * log to become empty and we better unbusy the buffer before 1503 * log to become empty and we better unbusy the buffer before
1460 * wapbl_flush() returns. 1504 * wapbl_flush() returns.
1461 */ 1505 */
1462 brelse(bp, 0); 1506 brelse(bp, 0);
1463 1507
1464 mutex_enter(&wl->wl_mtx); 1508 mutex_enter(&wl->wl_mtx);
1465 1509
1466 KASSERT(we->we_bufcount > 0); 1510 KASSERT(we->we_bufcount > 0);
1467 we->we_bufcount--; 1511 we->we_bufcount--;
1468#ifdef WAPBL_DEBUG_BUFBYTES 1512#ifdef WAPBL_DEBUG_BUFBYTES
1469 KASSERT(we->we_unsynced_bufbytes >= bufsize); 1513 KASSERT(we->we_unsynced_bufbytes >= bufsize);
1470 we->we_unsynced_bufbytes -= bufsize; 1514 we->we_unsynced_bufbytes -= bufsize;
1471 KASSERT(wl->wl_unsynced_bufbytes >= bufsize); 1515 KASSERT(wl->wl_unsynced_bufbytes >= bufsize);
1472 wl->wl_unsynced_bufbytes -= bufsize; 1516 wl->wl_unsynced_bufbytes -= bufsize;
1473#endif 1517#endif
 1518 wl->wl_ev_metawrite.ev_count++;
1474 1519
1475 /* 1520 /*
1476 * If the current transaction can be reclaimed, start 1521 * If the current transaction can be reclaimed, start
1477 * at the beginning and reclaim any consecutive reclaimable 1522 * at the beginning and reclaim any consecutive reclaimable
1478 * transactions. If we successfully reclaim anything, 1523 * transactions. If we successfully reclaim anything,
1479 * then wakeup anyone waiting for the reclaim. 1524 * then wakeup anyone waiting for the reclaim.
1480 */ 1525 */
1481 if (we->we_bufcount == 0) { 1526 if (we->we_bufcount == 0) {
1482 size_t delta = 0; 1527 size_t delta = 0;
1483 int errcnt = 0; 1528 int errcnt = 0;
1484#ifdef WAPBL_DEBUG_BUFBYTES 1529#ifdef WAPBL_DEBUG_BUFBYTES
1485 KDASSERT(we->we_unsynced_bufbytes == 0); 1530 KDASSERT(we->we_unsynced_bufbytes == 0);
1486#endif 1531#endif
1487 /* 1532 /*
1488 * clear any posted error, since the buffer it came from 1533 * clear any posted error, since the buffer it came from
1489 * has successfully flushed by now 1534 * has successfully flushed by now
1490 */ 1535 */
1491 while ((we = SIMPLEQ_FIRST(&wl->wl_entries)) && 1536 while ((we = SIMPLEQ_FIRST(&wl->wl_entries)) &&
1492 (we->we_bufcount == 0)) { 1537 (we->we_bufcount == 0)) {
1493 delta += we->we_reclaimable_bytes; 1538 delta += we->we_reclaimable_bytes;
1494 if (we->we_error) 1539 if (we->we_error)
1495 errcnt++; 1540 errcnt++;
1496 SIMPLEQ_REMOVE_HEAD(&wl->wl_entries, we_entries); 1541 SIMPLEQ_REMOVE_HEAD(&wl->wl_entries, we_entries);
1497 pool_put(&wapbl_entry_pool, we); 1542 pool_put(&wapbl_entry_pool, we);
1498 } 1543 }
1499 1544
1500 if (delta) { 1545 if (delta) {
1501 wl->wl_reclaimable_bytes += delta; 1546 wl->wl_reclaimable_bytes += delta;
1502 KASSERT(wl->wl_error_count >= errcnt); 1547 KASSERT(wl->wl_error_count >= errcnt);
1503 wl->wl_error_count -= errcnt; 1548 wl->wl_error_count -= errcnt;
1504 cv_broadcast(&wl->wl_reclaimable_cv); 1549 cv_broadcast(&wl->wl_reclaimable_cv);
1505 } 1550 }
1506 } 1551 }
1507 1552
1508 mutex_exit(&wl->wl_mtx); 1553 mutex_exit(&wl->wl_mtx);
1509} 1554}
1510 1555
1511/* 1556/*
1512 * wapbl_flush(wl, wait) 1557 * wapbl_flush(wl, wait)
1513 * 1558 *
1514 * Flush pending block writes, deallocations, and inodes from 1559 * Flush pending block writes, deallocations, and inodes from
1515 * the current transaction in memory to the log on disk: 1560 * the current transaction in memory to the log on disk:
1516 * 1561 *
1517 * 1. Call the file system's wl_flush callback to flush any 1562 * 1. Call the file system's wl_flush callback to flush any
1518 * per-file-system pending updates. 1563 * per-file-system pending updates.
1519 * 2. Wait for enough space in the log for the current transaction. 1564 * 2. Wait for enough space in the log for the current transaction.
1520 * 3. Synchronously write the new log records, advancing the 1565 * 3. Synchronously write the new log records, advancing the
1521 * circular queue head. 1566 * circular queue head.
1522 * 4. Issue the pending block writes asynchronously, now that they 1567 * 4. Issue the pending block writes asynchronously, now that they
1523 * are recorded in the log and can be replayed after crash. 1568 * are recorded in the log and can be replayed after crash.
1524 * 5. If wait is true, wait for all writes to complete and for the 1569 * 5. If wait is true, wait for all writes to complete and for the
1525 * log to become empty. 1570 * log to become empty.
1526 * 1571 *
1527 * On failure, call the file system's wl_flush_abort callback. 1572 * On failure, call the file system's wl_flush_abort callback.
1528 */ 1573 */
1529int 1574int
1530wapbl_flush(struct wapbl *wl, int waitfor) 1575wapbl_flush(struct wapbl *wl, int waitfor)
1531{ 1576{
1532 struct buf *bp; 1577 struct buf *bp;
1533 struct wapbl_entry *we; 1578 struct wapbl_entry *we;
1534 off_t off; 1579 off_t off;
1535 off_t head; 1580 off_t head;
1536 off_t tail; 1581 off_t tail;
1537 size_t delta = 0; 1582 size_t delta = 0;
1538 size_t flushsize; 1583 size_t flushsize;
1539 size_t reserved; 1584 size_t reserved;
1540 int error = 0; 1585 int error = 0;
1541 1586
1542 /* 1587 /*
1543 * Do a quick check to see if a full flush can be skipped 1588 * Do a quick check to see if a full flush can be skipped
1544 * This assumes that the flush callback does not need to be called 1589 * This assumes that the flush callback does not need to be called
1545 * unless there are other outstanding bufs. 1590 * unless there are other outstanding bufs.
1546 */ 1591 */
1547 if (!waitfor) { 1592 if (!waitfor) {
1548 size_t nbufs; 1593 size_t nbufs;
1549 mutex_enter(&wl->wl_mtx); /* XXX need mutex here to 1594 mutex_enter(&wl->wl_mtx); /* XXX need mutex here to
1550 protect the KASSERTS */ 1595 protect the KASSERTS */
1551 nbufs = wl->wl_bufcount; 1596 nbufs = wl->wl_bufcount;
1552 KASSERT((wl->wl_bufcount == 0) == (wl->wl_bufbytes == 0)); 1597 KASSERT((wl->wl_bufcount == 0) == (wl->wl_bufbytes == 0));
1553 KASSERT((wl->wl_bufcount == 0) == (wl->wl_bcount == 0)); 1598 KASSERT((wl->wl_bufcount == 0) == (wl->wl_bcount == 0));
1554 mutex_exit(&wl->wl_mtx); 1599 mutex_exit(&wl->wl_mtx);
1555 if (nbufs == 0) 1600 if (nbufs == 0)
1556 return 0; 1601 return 0;
1557 } 1602 }
1558 1603
1559 /* 1604 /*
1560 * XXX we may consider using LK_UPGRADE here 1605 * XXX we may consider using LK_UPGRADE here
1561 * if we want to call flush from inside a transaction 1606 * if we want to call flush from inside a transaction
1562 */ 1607 */
1563 rw_enter(&wl->wl_rwlock, RW_WRITER); 1608 rw_enter(&wl->wl_rwlock, RW_WRITER);
1564 wl->wl_flush(wl->wl_mount, TAILQ_FIRST(&wl->wl_dealloclist)); 1609 wl->wl_flush(wl->wl_mount, TAILQ_FIRST(&wl->wl_dealloclist));
1565 1610
1566 /* 1611 /*
1567 * Now that we are exclusively locked and the file system has 1612 * Now that we are exclusively locked and the file system has
1568 * issued any deferred block writes for this transaction, check 1613 * issued any deferred block writes for this transaction, check
1569 * whether there are any blocks to write to the log. If not, 1614 * whether there are any blocks to write to the log. If not,
1570 * skip waiting for space or writing any log entries. 1615 * skip waiting for space or writing any log entries.
1571 * 1616 *
1572 * XXX Shouldn't this also check wl_dealloccnt and 1617 * XXX Shouldn't this also check wl_dealloccnt and
1573 * wl_inohashcnt? Perhaps wl_dealloccnt doesn't matter if the 1618 * wl_inohashcnt? Perhaps wl_dealloccnt doesn't matter if the
1574 * file system didn't produce any blocks as a consequence of 1619 * file system didn't produce any blocks as a consequence of
1575 * it, but the same does not seem to be so of wl_inohashcnt. 1620 * it, but the same does not seem to be so of wl_inohashcnt.
1576 */ 1621 */
1577 if (wl->wl_bufcount == 0) { 1622 if (wl->wl_bufcount == 0) {
1578 goto wait_out; 1623 goto wait_out;
1579 } 1624 }
1580 1625
1581#if 0 1626#if 0
1582 WAPBL_PRINTF(WAPBL_PRINT_FLUSH, 1627 WAPBL_PRINTF(WAPBL_PRINT_FLUSH,
1583 ("wapbl_flush thread %d.%d flushing entries with " 1628 ("wapbl_flush thread %d.%d flushing entries with "
1584 "bufcount=%zu bufbytes=%zu\n", 1629 "bufcount=%zu bufbytes=%zu\n",
1585 curproc->p_pid, curlwp->l_lid, wl->wl_bufcount, 1630 curproc->p_pid, curlwp->l_lid, wl->wl_bufcount,
1586 wl->wl_bufbytes)); 1631 wl->wl_bufbytes));
1587#endif 1632#endif
1588 1633
1589 /* Calculate amount of space needed to flush */ 1634 /* Calculate amount of space needed to flush */
1590 flushsize = wapbl_transaction_len(wl); 1635 flushsize = wapbl_transaction_len(wl);
1591 if (wapbl_verbose_commit) { 1636 if (wapbl_verbose_commit) {
1592 struct timespec ts; 1637 struct timespec ts;
1593 getnanotime(&ts); 1638 getnanotime(&ts);
1594 printf("%s: %lld.%09ld this transaction = %zu bytes\n", 1639 printf("%s: %lld.%09ld this transaction = %zu bytes\n",
1595 __func__, (long long)ts.tv_sec, 1640 __func__, (long long)ts.tv_sec,
1596 (long)ts.tv_nsec, flushsize); 1641 (long)ts.tv_nsec, flushsize);
1597 } 1642 }
1598 1643
1599 if (flushsize > (wl->wl_circ_size - wl->wl_reserved_bytes)) { 1644 if (flushsize > (wl->wl_circ_size - wl->wl_reserved_bytes)) {
1600 /* 1645 /*
1601 * XXX this could be handled more gracefully, perhaps place 1646 * XXX this could be handled more gracefully, perhaps place
1602 * only a partial transaction in the log and allow the 1647 * only a partial transaction in the log and allow the
1603 * remaining to flush without the protection of the journal. 1648 * remaining to flush without the protection of the journal.
1604 */ 1649 */
1605 panic("wapbl_flush: current transaction too big to flush"); 1650 panic("wapbl_flush: current transaction too big to flush");
1606 } 1651 }
1607 1652
1608 error = wapbl_truncate(wl, flushsize); 1653 error = wapbl_truncate(wl, flushsize);
1609 if (error) 1654 if (error)
1610 goto out; 1655 goto out;
1611 1656
1612 off = wl->wl_head; 1657 off = wl->wl_head;
1613 KASSERT((off == 0) || (off >= wl->wl_circ_off)); 1658 KASSERT((off == 0) || (off >= wl->wl_circ_off));
1614 KASSERT((off == 0) || (off < wl->wl_circ_off + wl->wl_circ_size)); 1659 KASSERT((off == 0) || (off < wl->wl_circ_off + wl->wl_circ_size));
1615 error = wapbl_write_blocks(wl, &off); 1660 error = wapbl_write_blocks(wl, &off);
1616 if (error) 1661 if (error)
1617 goto out; 1662 goto out;
1618 error = wapbl_write_revocations(wl, &off); 1663 error = wapbl_write_revocations(wl, &off);
1619 if (error) 1664 if (error)
1620 goto out; 1665 goto out;
1621 error = wapbl_write_inodes(wl, &off); 1666 error = wapbl_write_inodes(wl, &off);
1622 if (error) 1667 if (error)
1623 goto out; 1668 goto out;
1624 1669
1625 reserved = 0; 1670 reserved = 0;
1626 if (wl->wl_inohashcnt) 1671 if (wl->wl_inohashcnt)
1627 reserved = wapbl_transaction_inodes_len(wl); 1672 reserved = wapbl_transaction_inodes_len(wl);
1628 1673
1629 head = wl->wl_head; 1674 head = wl->wl_head;
1630 tail = wl->wl_tail; 1675 tail = wl->wl_tail;
1631 1676
1632 wapbl_advance_head(wl->wl_circ_size, wl->wl_circ_off, flushsize, 1677 wapbl_advance_head(wl->wl_circ_size, wl->wl_circ_off, flushsize,
1633 &head, &tail); 1678 &head, &tail);
1634 1679
1635 KASSERTMSG(head == off, 1680 KASSERTMSG(head == off,
1636 "lost head! head=%"PRIdMAX" tail=%" PRIdMAX 1681 "lost head! head=%"PRIdMAX" tail=%" PRIdMAX
1637 " off=%"PRIdMAX" flush=%zu", 1682 " off=%"PRIdMAX" flush=%zu",
1638 (intmax_t)head, (intmax_t)tail, (intmax_t)off, 1683 (intmax_t)head, (intmax_t)tail, (intmax_t)off,
1639 flushsize); 1684 flushsize);
1640 1685
1641 /* Opportunistically move the tail forward if we can */ 1686 /* Opportunistically move the tail forward if we can */
1642 mutex_enter(&wl->wl_mtx); 1687 mutex_enter(&wl->wl_mtx);
1643 delta = wl->wl_reclaimable_bytes; 1688 delta = wl->wl_reclaimable_bytes;
1644 mutex_exit(&wl->wl_mtx); 1689 mutex_exit(&wl->wl_mtx);
1645 wapbl_advance_tail(wl->wl_circ_size, wl->wl_circ_off, delta, 1690 wapbl_advance_tail(wl->wl_circ_size, wl->wl_circ_off, delta,
1646 &head, &tail); 1691 &head, &tail);
1647 1692
1648 error = wapbl_write_commit(wl, head, tail); 1693 error = wapbl_write_commit(wl, head, tail);
1649 if (error) 1694 if (error)
1650 goto out; 1695 goto out;
1651 1696
1652 we = pool_get(&wapbl_entry_pool, PR_WAITOK); 1697 we = pool_get(&wapbl_entry_pool, PR_WAITOK);
1653 1698
1654#ifdef WAPBL_DEBUG_BUFBYTES 1699#ifdef WAPBL_DEBUG_BUFBYTES
1655 WAPBL_PRINTF(WAPBL_PRINT_FLUSH, 1700 WAPBL_PRINTF(WAPBL_PRINT_FLUSH,
1656 ("wapbl_flush: thread %d.%d head+=%zu tail+=%zu used=%zu" 1701 ("wapbl_flush: thread %d.%d head+=%zu tail+=%zu used=%zu"
1657 " unsynced=%zu" 1702 " unsynced=%zu"
1658 "\n\tbufcount=%zu bufbytes=%zu bcount=%zu deallocs=%d " 1703 "\n\tbufcount=%zu bufbytes=%zu bcount=%zu deallocs=%d "
1659 "inodes=%d\n", 1704 "inodes=%d\n",
1660 curproc->p_pid, curlwp->l_lid, flushsize, delta, 1705 curproc->p_pid, curlwp->l_lid, flushsize, delta,
1661 wapbl_space_used(wl->wl_circ_size, head, tail), 1706 wapbl_space_used(wl->wl_circ_size, head, tail),
1662 wl->wl_unsynced_bufbytes, wl->wl_bufcount, 1707 wl->wl_unsynced_bufbytes, wl->wl_bufcount,
1663 wl->wl_bufbytes, wl->wl_bcount, wl->wl_dealloccnt, 1708 wl->wl_bufbytes, wl->wl_bcount, wl->wl_dealloccnt,
1664 wl->wl_inohashcnt)); 1709 wl->wl_inohashcnt));
1665#else 1710#else
1666 WAPBL_PRINTF(WAPBL_PRINT_FLUSH, 1711 WAPBL_PRINTF(WAPBL_PRINT_FLUSH,
1667 ("wapbl_flush: thread %d.%d head+=%zu tail+=%zu used=%zu" 1712 ("wapbl_flush: thread %d.%d head+=%zu tail+=%zu used=%zu"
1668 "\n\tbufcount=%zu bufbytes=%zu bcount=%zu deallocs=%d " 1713 "\n\tbufcount=%zu bufbytes=%zu bcount=%zu deallocs=%d "
1669 "inodes=%d\n", 1714 "inodes=%d\n",
1670 curproc->p_pid, curlwp->l_lid, flushsize, delta, 1715 curproc->p_pid, curlwp->l_lid, flushsize, delta,
1671 wapbl_space_used(wl->wl_circ_size, head, tail), 1716 wapbl_space_used(wl->wl_circ_size, head, tail),
1672 wl->wl_bufcount, wl->wl_bufbytes, wl->wl_bcount, 1717 wl->wl_bufcount, wl->wl_bufbytes, wl->wl_bcount,
1673 wl->wl_dealloccnt, wl->wl_inohashcnt)); 1718 wl->wl_dealloccnt, wl->wl_inohashcnt));
1674#endif 1719#endif
1675 1720
1676 1721
1677 mutex_enter(&bufcache_lock); 1722 mutex_enter(&bufcache_lock);
1678 mutex_enter(&wl->wl_mtx); 1723 mutex_enter(&wl->wl_mtx);
1679 1724
1680 wl->wl_reserved_bytes = reserved; 1725 wl->wl_reserved_bytes = reserved;
1681 wl->wl_head = head; 1726 wl->wl_head = head;
1682 wl->wl_tail = tail; 1727 wl->wl_tail = tail;
1683 KASSERT(wl->wl_reclaimable_bytes >= delta); 1728 KASSERT(wl->wl_reclaimable_bytes >= delta);
1684 wl->wl_reclaimable_bytes -= delta; 1729 wl->wl_reclaimable_bytes -= delta;
1685 KDASSERT(wl->wl_dealloccnt == 0); 1730 KDASSERT(wl->wl_dealloccnt == 0);
1686#ifdef WAPBL_DEBUG_BUFBYTES 1731#ifdef WAPBL_DEBUG_BUFBYTES
1687 wl->wl_unsynced_bufbytes += wl->wl_bufbytes; 1732 wl->wl_unsynced_bufbytes += wl->wl_bufbytes;
1688#endif 1733#endif
1689 1734
1690 we->we_wapbl = wl; 1735 we->we_wapbl = wl;
1691 we->we_bufcount = wl->wl_bufcount; 1736 we->we_bufcount = wl->wl_bufcount;
1692#ifdef WAPBL_DEBUG_BUFBYTES 1737#ifdef WAPBL_DEBUG_BUFBYTES
1693 we->we_unsynced_bufbytes = wl->wl_bufbytes; 1738 we->we_unsynced_bufbytes = wl->wl_bufbytes;
1694#endif 1739#endif
1695 we->we_reclaimable_bytes = flushsize; 1740 we->we_reclaimable_bytes = flushsize;
1696 we->we_error = 0; 1741 we->we_error = 0;
1697 SIMPLEQ_INSERT_TAIL(&wl->wl_entries, we, we_entries); 1742 SIMPLEQ_INSERT_TAIL(&wl->wl_entries, we, we_entries);
1698 1743
1699 /* 1744 /*
1700 * this flushes bufs in reverse order than they were queued 1745 * this flushes bufs in reverse order than they were queued
1701 * it shouldn't matter, but if we care we could use TAILQ instead. 1746 * it shouldn't matter, but if we care we could use TAILQ instead.
1702 * XXX Note they will get put on the lru queue when they flush 1747 * XXX Note they will get put on the lru queue when they flush
1703 * so we might actually want to change this to preserve order. 1748 * so we might actually want to change this to preserve order.
1704 */ 1749 */
1705 while ((bp = LIST_FIRST(&wl->wl_bufs)) != NULL) { 1750 while ((bp = LIST_FIRST(&wl->wl_bufs)) != NULL) {
1706 if (bbusy(bp, 0, 0, &wl->wl_mtx)) { 1751 if (bbusy(bp, 0, 0, &wl->wl_mtx)) {
1707 continue; 1752 continue;
1708 } 1753 }
1709 bp->b_iodone = wapbl_biodone; 1754 bp->b_iodone = wapbl_biodone;
1710 bp->b_private = we; 1755 bp->b_private = we;
1711 bremfree(bp); 1756 bremfree(bp);
1712 wapbl_remove_buf_locked(wl, bp); 1757 wapbl_remove_buf_locked(wl, bp);
1713 mutex_exit(&wl->wl_mtx); 1758 mutex_exit(&wl->wl_mtx);
1714 mutex_exit(&bufcache_lock); 1759 mutex_exit(&bufcache_lock);
1715 bawrite(bp); 1760 bawrite(bp);
1716 mutex_enter(&bufcache_lock); 1761 mutex_enter(&bufcache_lock);
1717 mutex_enter(&wl->wl_mtx); 1762 mutex_enter(&wl->wl_mtx);
1718 } 1763 }
1719 mutex_exit(&wl->wl_mtx); 1764 mutex_exit(&wl->wl_mtx);
1720 mutex_exit(&bufcache_lock); 1765 mutex_exit(&bufcache_lock);
1721 1766
1722#if 0 1767#if 0
1723 WAPBL_PRINTF(WAPBL_PRINT_FLUSH, 1768 WAPBL_PRINTF(WAPBL_PRINT_FLUSH,
1724 ("wapbl_flush thread %d.%d done flushing entries...\n", 1769 ("wapbl_flush thread %d.%d done flushing entries...\n",
1725 curproc->p_pid, curlwp->l_lid)); 1770 curproc->p_pid, curlwp->l_lid));
1726#endif 1771#endif
1727 1772
1728 wait_out: 1773 wait_out:
1729 1774
1730 /* 1775 /*
1731 * If the waitfor flag is set, don't return until everything is 1776 * If the waitfor flag is set, don't return until everything is
1732 * fully flushed and the on disk log is empty. 1777 * fully flushed and the on disk log is empty.
1733 */ 1778 */
1734 if (waitfor) { 1779 if (waitfor) {
1735 error = wapbl_truncate(wl, wl->wl_circ_size -  1780 error = wapbl_truncate(wl, wl->wl_circ_size -
1736 wl->wl_reserved_bytes); 1781 wl->wl_reserved_bytes);
1737 } 1782 }
1738 1783
1739 out: 1784 out:
1740 if (error) { 1785 if (error) {
1741 wl->wl_flush_abort(wl->wl_mount, 1786 wl->wl_flush_abort(wl->wl_mount,
1742 TAILQ_FIRST(&wl->wl_dealloclist)); 1787 TAILQ_FIRST(&wl->wl_dealloclist));
1743 } 1788 }
1744 1789
1745#ifdef WAPBL_DEBUG_PRINT 1790#ifdef WAPBL_DEBUG_PRINT
1746 if (error) { 1791 if (error) {
1747 pid_t pid = -1; 1792 pid_t pid = -1;
1748 lwpid_t lid = -1; 1793 lwpid_t lid = -1;
1749 if (curproc) 1794 if (curproc)
1750 pid = curproc->p_pid; 1795 pid = curproc->p_pid;
1751 if (curlwp) 1796 if (curlwp)
1752 lid = curlwp->l_lid; 1797 lid = curlwp->l_lid;
1753 mutex_enter(&wl->wl_mtx); 1798 mutex_enter(&wl->wl_mtx);
1754#ifdef WAPBL_DEBUG_BUFBYTES 1799#ifdef WAPBL_DEBUG_BUFBYTES
1755 WAPBL_PRINTF(WAPBL_PRINT_ERROR, 1800 WAPBL_PRINTF(WAPBL_PRINT_ERROR,
1756 ("wapbl_flush: thread %d.%d aborted flush: " 1801 ("wapbl_flush: thread %d.%d aborted flush: "
1757 "error = %d\n" 1802 "error = %d\n"
1758 "\tbufcount=%zu bufbytes=%zu bcount=%zu " 1803 "\tbufcount=%zu bufbytes=%zu bcount=%zu "
1759 "deallocs=%d inodes=%d\n" 1804 "deallocs=%d inodes=%d\n"
1760 "\terrcnt = %d, reclaimable=%zu reserved=%zu " 1805 "\terrcnt = %d, reclaimable=%zu reserved=%zu "
1761 "unsynced=%zu\n", 1806 "unsynced=%zu\n",
1762 pid, lid, error, wl->wl_bufcount, 1807 pid, lid, error, wl->wl_bufcount,
1763 wl->wl_bufbytes, wl->wl_bcount, 1808 wl->wl_bufbytes, wl->wl_bcount,
1764 wl->wl_dealloccnt, wl->wl_inohashcnt, 1809 wl->wl_dealloccnt, wl->wl_inohashcnt,
1765 wl->wl_error_count, wl->wl_reclaimable_bytes, 1810 wl->wl_error_count, wl->wl_reclaimable_bytes,
1766 wl->wl_reserved_bytes, wl->wl_unsynced_bufbytes)); 1811 wl->wl_reserved_bytes, wl->wl_unsynced_bufbytes));
1767 SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) { 1812 SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) {
1768 WAPBL_PRINTF(WAPBL_PRINT_ERROR, 1813 WAPBL_PRINTF(WAPBL_PRINT_ERROR,
1769 ("\tentry: bufcount = %zu, reclaimable = %zu, " 1814 ("\tentry: bufcount = %zu, reclaimable = %zu, "
1770 "error = %d, unsynced = %zu\n", 1815 "error = %d, unsynced = %zu\n",
1771 we->we_bufcount, we->we_reclaimable_bytes, 1816 we->we_bufcount, we->we_reclaimable_bytes,
1772 we->we_error, we->we_unsynced_bufbytes)); 1817 we->we_error, we->we_unsynced_bufbytes));
1773 } 1818 }
1774#else 1819#else
1775 WAPBL_PRINTF(WAPBL_PRINT_ERROR, 1820 WAPBL_PRINTF(WAPBL_PRINT_ERROR,
1776 ("wapbl_flush: thread %d.%d aborted flush: " 1821 ("wapbl_flush: thread %d.%d aborted flush: "
1777 "error = %d\n" 1822 "error = %d\n"
1778 "\tbufcount=%zu bufbytes=%zu bcount=%zu " 1823 "\tbufcount=%zu bufbytes=%zu bcount=%zu "
1779 "deallocs=%d inodes=%d\n" 1824 "deallocs=%d inodes=%d\n"
1780 "\terrcnt = %d, reclaimable=%zu reserved=%zu\n", 1825 "\terrcnt = %d, reclaimable=%zu reserved=%zu\n",
1781 pid, lid, error, wl->wl_bufcount, 1826 pid, lid, error, wl->wl_bufcount,
1782 wl->wl_bufbytes, wl->wl_bcount, 1827 wl->wl_bufbytes, wl->wl_bcount,
1783 wl->wl_dealloccnt, wl->wl_inohashcnt, 1828 wl->wl_dealloccnt, wl->wl_inohashcnt,
1784 wl->wl_error_count, wl->wl_reclaimable_bytes, 1829 wl->wl_error_count, wl->wl_reclaimable_bytes,
1785 wl->wl_reserved_bytes)); 1830 wl->wl_reserved_bytes));
1786 SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) { 1831 SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) {
1787 WAPBL_PRINTF(WAPBL_PRINT_ERROR, 1832 WAPBL_PRINTF(WAPBL_PRINT_ERROR,
1788 ("\tentry: bufcount = %zu, reclaimable = %zu, " 1833 ("\tentry: bufcount = %zu, reclaimable = %zu, "
1789 "error = %d\n", we->we_bufcount, 1834 "error = %d\n", we->we_bufcount,
1790 we->we_reclaimable_bytes, we->we_error)); 1835 we->we_reclaimable_bytes, we->we_error));
1791 } 1836 }
1792#endif 1837#endif
1793 mutex_exit(&wl->wl_mtx); 1838 mutex_exit(&wl->wl_mtx);
1794 } 1839 }
1795#endif 1840#endif
1796 1841
1797 rw_exit(&wl->wl_rwlock); 1842 rw_exit(&wl->wl_rwlock);
1798 return error; 1843 return error;
1799} 1844}
1800 1845
1801/****************************************************************/ 1846/****************************************************************/
1802 1847
1803void 1848void
1804wapbl_jlock_assert(struct wapbl *wl) 1849wapbl_jlock_assert(struct wapbl *wl)
1805{ 1850{
1806 1851
1807 KASSERT(rw_lock_held(&wl->wl_rwlock)); 1852 KASSERT(rw_lock_held(&wl->wl_rwlock));
1808} 1853}
1809 1854
1810void 1855void
1811wapbl_junlock_assert(struct wapbl *wl) 1856wapbl_junlock_assert(struct wapbl *wl)
1812{ 1857{
1813 1858
1814 KASSERT(!rw_write_held(&wl->wl_rwlock)); 1859 KASSERT(!rw_write_held(&wl->wl_rwlock));
1815} 1860}
1816 1861
1817/****************************************************************/ 1862/****************************************************************/
1818 1863
1819/* locks missing */ 1864/* locks missing */
1820void 1865void
1821wapbl_print(struct wapbl *wl, 1866wapbl_print(struct wapbl *wl,
1822 int full, 1867 int full,
1823 void (*pr)(const char *, ...)) 1868 void (*pr)(const char *, ...))
1824{ 1869{
1825 struct buf *bp; 1870 struct buf *bp;
1826 struct wapbl_entry *we; 1871 struct wapbl_entry *we;
1827 (*pr)("wapbl %p", wl); 1872 (*pr)("wapbl %p", wl);
1828 (*pr)("\nlogvp = %p, devvp = %p, logpbn = %"PRId64"\n", 1873 (*pr)("\nlogvp = %p, devvp = %p, logpbn = %"PRId64"\n",
1829 wl->wl_logvp, wl->wl_devvp, wl->wl_logpbn); 1874 wl->wl_logvp, wl->wl_devvp, wl->wl_logpbn);
1830 (*pr)("circ = %zu, header = %zu, head = %"PRIdMAX" tail = %"PRIdMAX"\n", 1875 (*pr)("circ = %zu, header = %zu, head = %"PRIdMAX" tail = %"PRIdMAX"\n",
1831 wl->wl_circ_size, wl->wl_circ_off, 1876 wl->wl_circ_size, wl->wl_circ_off,
1832 (intmax_t)wl->wl_head, (intmax_t)wl->wl_tail); 1877 (intmax_t)wl->wl_head, (intmax_t)wl->wl_tail);
1833 (*pr)("fs_dev_bshift = %d, log_dev_bshift = %d\n", 1878 (*pr)("fs_dev_bshift = %d, log_dev_bshift = %d\n",
1834 wl->wl_log_dev_bshift, wl->wl_fs_dev_bshift); 1879 wl->wl_log_dev_bshift, wl->wl_fs_dev_bshift);
1835#ifdef WAPBL_DEBUG_BUFBYTES 1880#ifdef WAPBL_DEBUG_BUFBYTES
1836 (*pr)("bufcount = %zu, bufbytes = %zu bcount = %zu reclaimable = %zu " 1881 (*pr)("bufcount = %zu, bufbytes = %zu bcount = %zu reclaimable = %zu "
1837 "reserved = %zu errcnt = %d unsynced = %zu\n", 1882 "reserved = %zu errcnt = %d unsynced = %zu\n",
1838 wl->wl_bufcount, wl->wl_bufbytes, wl->wl_bcount, 1883 wl->wl_bufcount, wl->wl_bufbytes, wl->wl_bcount,
1839 wl->wl_reclaimable_bytes, wl->wl_reserved_bytes, 1884 wl->wl_reclaimable_bytes, wl->wl_reserved_bytes,
1840 wl->wl_error_count, wl->wl_unsynced_bufbytes); 1885 wl->wl_error_count, wl->wl_unsynced_bufbytes);
1841#else 1886#else
1842 (*pr)("bufcount = %zu, bufbytes = %zu bcount = %zu reclaimable = %zu " 1887 (*pr)("bufcount = %zu, bufbytes = %zu bcount = %zu reclaimable = %zu "
1843 "reserved = %zu errcnt = %d\n", wl->wl_bufcount, wl->wl_bufbytes, 1888 "reserved = %zu errcnt = %d\n", wl->wl_bufcount, wl->wl_bufbytes,
1844 wl->wl_bcount, wl->wl_reclaimable_bytes, wl->wl_reserved_bytes, 1889 wl->wl_bcount, wl->wl_reclaimable_bytes, wl->wl_reserved_bytes,
1845 wl->wl_error_count); 1890 wl->wl_error_count);
1846#endif 1891#endif
1847 (*pr)("\tdealloccnt = %d, dealloclim = %d\n", 1892 (*pr)("\tdealloccnt = %d, dealloclim = %d\n",
1848 wl->wl_dealloccnt, wl->wl_dealloclim); 1893 wl->wl_dealloccnt, wl->wl_dealloclim);
1849 (*pr)("\tinohashcnt = %d, inohashmask = 0x%08x\n", 1894 (*pr)("\tinohashcnt = %d, inohashmask = 0x%08x\n",
1850 wl->wl_inohashcnt, wl->wl_inohashmask); 1895 wl->wl_inohashcnt, wl->wl_inohashmask);
1851 (*pr)("entries:\n"); 1896 (*pr)("entries:\n");
1852 SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) { 1897 SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) {
1853#ifdef WAPBL_DEBUG_BUFBYTES 1898#ifdef WAPBL_DEBUG_BUFBYTES
1854 (*pr)("\tbufcount = %zu, reclaimable = %zu, error = %d, " 1899 (*pr)("\tbufcount = %zu, reclaimable = %zu, error = %d, "
1855 "unsynced = %zu\n", 1900 "unsynced = %zu\n",
1856 we->we_bufcount, we->we_reclaimable_bytes, 1901 we->we_bufcount, we->we_reclaimable_bytes,
1857 we->we_error, we->we_unsynced_bufbytes); 1902 we->we_error, we->we_unsynced_bufbytes);
1858#else 1903#else
1859 (*pr)("\tbufcount = %zu, reclaimable = %zu, error = %d\n", 1904 (*pr)("\tbufcount = %zu, reclaimable = %zu, error = %d\n",
1860 we->we_bufcount, we->we_reclaimable_bytes, we->we_error); 1905 we->we_bufcount, we->we_reclaimable_bytes, we->we_error);
1861#endif 1906#endif
1862 } 1907 }
1863 if (full) { 1908 if (full) {
1864 int cnt = 0; 1909 int cnt = 0;
1865 (*pr)("bufs ="); 1910 (*pr)("bufs =");
1866 LIST_FOREACH(bp, &wl->wl_bufs, b_wapbllist) { 1911 LIST_FOREACH(bp, &wl->wl_bufs, b_wapbllist) {
1867 if (!LIST_NEXT(bp, b_wapbllist)) { 1912 if (!LIST_NEXT(bp, b_wapbllist)) {
1868 (*pr)(" %p", bp); 1913 (*pr)(" %p", bp);
1869 } else if ((++cnt % 6) == 0) { 1914 } else if ((++cnt % 6) == 0) {
1870 (*pr)(" %p,\n\t", bp); 1915 (*pr)(" %p,\n\t", bp);
1871 } else { 1916 } else {
1872 (*pr)(" %p,", bp); 1917 (*pr)(" %p,", bp);
1873 } 1918 }
1874 } 1919 }
1875 (*pr)("\n"); 1920 (*pr)("\n");
1876 1921
1877 (*pr)("dealloced blks = "); 1922 (*pr)("dealloced blks = ");
1878 { 1923 {
1879 struct wapbl_dealloc *wd; 1924 struct wapbl_dealloc *wd;
1880 cnt = 0; 1925 cnt = 0;
1881 TAILQ_FOREACH(wd, &wl->wl_dealloclist, wd_entries) { 1926 TAILQ_FOREACH(wd, &wl->wl_dealloclist, wd_entries) {
1882 (*pr)(" %"PRId64":%d,", 1927 (*pr)(" %"PRId64":%d,",
1883 wd->wd_blkno, 1928 wd->wd_blkno,
1884 wd->wd_len); 1929 wd->wd_len);
1885 if ((++cnt % 4) == 0) { 1930 if ((++cnt % 4) == 0) {
1886 (*pr)("\n\t"); 1931 (*pr)("\n\t");
1887 } 1932 }
1888 } 1933 }
1889 } 1934 }
1890 (*pr)("\n"); 1935 (*pr)("\n");
1891 1936
1892 (*pr)("registered inodes = "); 1937 (*pr)("registered inodes = ");
1893 { 1938 {
1894 int i; 1939 int i;
1895 cnt = 0; 1940 cnt = 0;
1896 for (i = 0; i <= wl->wl_inohashmask; i++) { 1941 for (i = 0; i <= wl->wl_inohashmask; i++) {
1897 struct wapbl_ino_head *wih; 1942 struct wapbl_ino_head *wih;
1898 struct wapbl_ino *wi; 1943 struct wapbl_ino *wi;
1899 1944
1900 wih = &wl->wl_inohash[i]; 1945 wih = &wl->wl_inohash[i];
1901 LIST_FOREACH(wi, wih, wi_hash) { 1946 LIST_FOREACH(wi, wih, wi_hash) {
1902 if (wi->wi_ino == 0) 1947 if (wi->wi_ino == 0)
1903 continue; 1948 continue;
1904 (*pr)(" %"PRIu64"/0%06"PRIo32",", 1949 (*pr)(" %"PRIu64"/0%06"PRIo32",",
1905 wi->wi_ino, wi->wi_mode); 1950 wi->wi_ino, wi->wi_mode);
1906 if ((++cnt % 4) == 0) { 1951 if ((++cnt % 4) == 0) {
1907 (*pr)("\n\t"); 1952 (*pr)("\n\t");
1908 } 1953 }
1909 } 1954 }
1910 } 1955 }
1911 (*pr)("\n"); 1956 (*pr)("\n");
1912 } 1957 }
1913 } 1958 }
1914} 1959}
1915 1960
1916#if defined(WAPBL_DEBUG) || defined(DDB) 1961#if defined(WAPBL_DEBUG) || defined(DDB)
1917void 1962void
1918wapbl_dump(struct wapbl *wl) 1963wapbl_dump(struct wapbl *wl)
1919{ 1964{
1920#if defined(WAPBL_DEBUG) 1965#if defined(WAPBL_DEBUG)
1921 if (!wl) 1966 if (!wl)
1922 wl = wapbl_debug_wl; 1967 wl = wapbl_debug_wl;
1923#endif 1968#endif
1924 if (!wl) 1969 if (!wl)
1925 return; 1970 return;
1926 wapbl_print(wl, 1, printf); 1971 wapbl_print(wl, 1, printf);
1927} 1972}
1928#endif 1973#endif
1929 1974
1930/****************************************************************/ 1975/****************************************************************/
1931 1976
1932int 1977int
1933wapbl_register_deallocation(struct wapbl *wl, daddr_t blk, int len, bool force, 1978wapbl_register_deallocation(struct wapbl *wl, daddr_t blk, int len, bool force,
1934 void **cookiep) 1979 void **cookiep)
1935{ 1980{
1936 struct wapbl_dealloc *wd; 1981 struct wapbl_dealloc *wd;
1937 int error = 0; 1982 int error = 0;
1938 1983
1939 wapbl_jlock_assert(wl); 1984 wapbl_jlock_assert(wl);
1940 1985
1941 mutex_enter(&wl->wl_mtx); 1986 mutex_enter(&wl->wl_mtx);
1942 1987
1943 if (__predict_false(wl->wl_dealloccnt >= wl->wl_dealloclim)) { 1988 if (__predict_false(wl->wl_dealloccnt >= wl->wl_dealloclim)) {
1944 if (!force) { 1989 if (!force) {
1945 error = EAGAIN; 1990 error = EAGAIN;
1946 goto out; 1991 goto out;
1947 } 1992 }
1948 1993
1949 /* 1994 /*
1950 * Forced registration can only be used when: 1995 * Forced registration can only be used when:
1951 * 1) the caller can't cope with failure 1996 * 1) the caller can't cope with failure
1952 * 2) the path can be triggered only bounded, small 1997 * 2) the path can be triggered only bounded, small
1953 * times per transaction 1998 * times per transaction
1954 * If this is not fullfilled, and the path would be triggered 1999 * If this is not fullfilled, and the path would be triggered
1955 * many times, this could overflow maximum transaction size 2000 * many times, this could overflow maximum transaction size
1956 * and panic later. 2001 * and panic later.
1957 */ 2002 */
1958 printf("%s: forced dealloc registration over limit: %d >= %d\n", 2003 printf("%s: forced dealloc registration over limit: %d >= %d\n",
1959 wl->wl_mount->mnt_stat.f_mntonname, 2004 wl->wl_mount->mnt_stat.f_mntonname,
1960 wl->wl_dealloccnt, wl->wl_dealloclim); 2005 wl->wl_dealloccnt, wl->wl_dealloclim);
1961 } 2006 }
1962 2007
1963 wl->wl_dealloccnt++; 2008 wl->wl_dealloccnt++;
1964 mutex_exit(&wl->wl_mtx); 2009 mutex_exit(&wl->wl_mtx);
1965 2010
1966 wd = pool_get(&wapbl_dealloc_pool, PR_WAITOK); 2011 wd = pool_get(&wapbl_dealloc_pool, PR_WAITOK);
1967 wd->wd_blkno = blk; 2012 wd->wd_blkno = blk;
1968 wd->wd_len = len; 2013 wd->wd_len = len;
1969 2014
1970 mutex_enter(&wl->wl_mtx); 2015 mutex_enter(&wl->wl_mtx);
1971 TAILQ_INSERT_TAIL(&wl->wl_dealloclist, wd, wd_entries); 2016 TAILQ_INSERT_TAIL(&wl->wl_dealloclist, wd, wd_entries);
1972 2017
1973 if (cookiep) 2018 if (cookiep)
1974 *cookiep = wd; 2019 *cookiep = wd;
1975 2020
1976 out: 2021 out:
1977 mutex_exit(&wl->wl_mtx); 2022 mutex_exit(&wl->wl_mtx);
1978 2023
1979 WAPBL_PRINTF(WAPBL_PRINT_ALLOC, 2024 WAPBL_PRINTF(WAPBL_PRINT_ALLOC,
1980 ("wapbl_register_deallocation: blk=%"PRId64" len=%d error=%d\n", 2025 ("wapbl_register_deallocation: blk=%"PRId64" len=%d error=%d\n",
1981 blk, len, error)); 2026 blk, len, error));
1982 2027
1983 return error; 2028 return error;
1984} 2029}
1985 2030
1986static void 2031static void
1987wapbl_deallocation_free(struct wapbl *wl, struct wapbl_dealloc *wd, 2032wapbl_deallocation_free(struct wapbl *wl, struct wapbl_dealloc *wd,
1988 bool locked) 2033 bool locked)
1989{ 2034{
1990 KASSERT(!locked 2035 KASSERT(!locked
1991 || rw_lock_held(&wl->wl_rwlock) || mutex_owned(&wl->wl_mtx)); 2036 || rw_lock_held(&wl->wl_rwlock) || mutex_owned(&wl->wl_mtx));
1992 2037
1993 if (!locked) 2038 if (!locked)
1994 mutex_enter(&wl->wl_mtx); 2039 mutex_enter(&wl->wl_mtx);
1995 2040
1996 TAILQ_REMOVE(&wl->wl_dealloclist, wd, wd_entries); 2041 TAILQ_REMOVE(&wl->wl_dealloclist, wd, wd_entries);
1997 wl->wl_dealloccnt--; 2042 wl->wl_dealloccnt--;
1998 2043
1999 if (!locked) 2044 if (!locked)
2000 mutex_exit(&wl->wl_mtx); 2045 mutex_exit(&wl->wl_mtx);
2001 2046
2002 pool_put(&wapbl_dealloc_pool, wd); 2047 pool_put(&wapbl_dealloc_pool, wd);
2003} 2048}
2004 2049
2005void 2050void
2006wapbl_unregister_deallocation(struct wapbl *wl, void *cookie) 2051wapbl_unregister_deallocation(struct wapbl *wl, void *cookie)
2007{ 2052{
2008 KASSERT(cookie != NULL); 2053 KASSERT(cookie != NULL);
2009 wapbl_deallocation_free(wl, cookie, false); 2054 wapbl_deallocation_free(wl, cookie, false);
2010} 2055}
2011 2056
2012/****************************************************************/ 2057/****************************************************************/
2013 2058
2014static void 2059static void
2015wapbl_inodetrk_init(struct wapbl *wl, u_int size) 2060wapbl_inodetrk_init(struct wapbl *wl, u_int size)
2016{ 2061{
2017 2062
2018 wl->wl_inohash = hashinit(size, HASH_LIST, true, &wl->wl_inohashmask); 2063 wl->wl_inohash = hashinit(size, HASH_LIST, true, &wl->wl_inohashmask);
2019 if (atomic_inc_uint_nv(&wapbl_ino_pool_refcount) == 1) { 2064 if (atomic_inc_uint_nv(&wapbl_ino_pool_refcount) == 1) {
2020 pool_init(&wapbl_ino_pool, sizeof(struct wapbl_ino), 0, 0, 0, 2065 pool_init(&wapbl_ino_pool, sizeof(struct wapbl_ino), 0, 0, 0,
2021 "wapblinopl", &pool_allocator_nointr, IPL_NONE); 2066 "wapblinopl", &pool_allocator_nointr, IPL_NONE);
2022 } 2067 }
2023} 2068}
2024 2069
2025static void 2070static void
2026wapbl_inodetrk_free(struct wapbl *wl) 2071wapbl_inodetrk_free(struct wapbl *wl)
2027{ 2072{
2028 2073
2029 /* XXX this KASSERT needs locking/mutex analysis */ 2074 /* XXX this KASSERT needs locking/mutex analysis */
2030 KASSERT(wl->wl_inohashcnt == 0); 2075 KASSERT(wl->wl_inohashcnt == 0);
2031 hashdone(wl->wl_inohash, HASH_LIST, wl->wl_inohashmask); 2076 hashdone(wl->wl_inohash, HASH_LIST, wl->wl_inohashmask);
2032 if (atomic_dec_uint_nv(&wapbl_ino_pool_refcount) == 0) { 2077 if (atomic_dec_uint_nv(&wapbl_ino_pool_refcount) == 0) {
2033 pool_destroy(&wapbl_ino_pool); 2078 pool_destroy(&wapbl_ino_pool);
2034 } 2079 }
2035} 2080}
2036 2081
2037static struct wapbl_ino * 2082static struct wapbl_ino *
2038wapbl_inodetrk_get(struct wapbl *wl, ino_t ino) 2083wapbl_inodetrk_get(struct wapbl *wl, ino_t ino)
2039{ 2084{
2040 struct wapbl_ino_head *wih; 2085 struct wapbl_ino_head *wih;
2041 struct wapbl_ino *wi; 2086 struct wapbl_ino *wi;
2042 2087
2043 KASSERT(mutex_owned(&wl->wl_mtx)); 2088 KASSERT(mutex_owned(&wl->wl_mtx));
2044 2089
2045 wih = &wl->wl_inohash[ino & wl->wl_inohashmask]; 2090 wih = &wl->wl_inohash[ino & wl->wl_inohashmask];
2046 LIST_FOREACH(wi, wih, wi_hash) { 2091 LIST_FOREACH(wi, wih, wi_hash) {
2047 if (ino == wi->wi_ino) 2092 if (ino == wi->wi_ino)
2048 return wi; 2093 return wi;
2049 } 2094 }
2050 return 0; 2095 return 0;
2051} 2096}
2052 2097
2053void 2098void
2054wapbl_register_inode(struct wapbl *wl, ino_t ino, mode_t mode) 2099wapbl_register_inode(struct wapbl *wl, ino_t ino, mode_t mode)
2055{ 2100{
2056 struct wapbl_ino_head *wih; 2101 struct wapbl_ino_head *wih;
2057 struct wapbl_ino *wi; 2102 struct wapbl_ino *wi;
2058 2103
2059 wi = pool_get(&wapbl_ino_pool, PR_WAITOK); 2104 wi = pool_get(&wapbl_ino_pool, PR_WAITOK);
2060 2105
2061 mutex_enter(&wl->wl_mtx); 2106 mutex_enter(&wl->wl_mtx);
2062 if (wapbl_inodetrk_get(wl, ino) == NULL) { 2107 if (wapbl_inodetrk_get(wl, ino) == NULL) {
2063 wi->wi_ino = ino; 2108 wi->wi_ino = ino;
2064 wi->wi_mode = mode; 2109 wi->wi_mode = mode;
2065 wih = &wl->wl_inohash[ino & wl->wl_inohashmask]; 2110 wih = &wl->wl_inohash[ino & wl->wl_inohashmask];
2066 LIST_INSERT_HEAD(wih, wi, wi_hash); 2111 LIST_INSERT_HEAD(wih, wi, wi_hash);
2067 wl->wl_inohashcnt++; 2112 wl->wl_inohashcnt++;
2068 WAPBL_PRINTF(WAPBL_PRINT_INODE, 2113 WAPBL_PRINTF(WAPBL_PRINT_INODE,
2069 ("wapbl_register_inode: ino=%"PRId64"\n", ino)); 2114 ("wapbl_register_inode: ino=%"PRId64"\n", ino));
2070 mutex_exit(&wl->wl_mtx); 2115 mutex_exit(&wl->wl_mtx);
2071 } else { 2116 } else {
2072 mutex_exit(&wl->wl_mtx); 2117 mutex_exit(&wl->wl_mtx);
2073 pool_put(&wapbl_ino_pool, wi); 2118 pool_put(&wapbl_ino_pool, wi);
2074 } 2119 }
2075} 2120}
2076 2121
2077void 2122void
2078wapbl_unregister_inode(struct wapbl *wl, ino_t ino, mode_t mode) 2123wapbl_unregister_inode(struct wapbl *wl, ino_t ino, mode_t mode)
2079{ 2124{
2080 struct wapbl_ino *wi; 2125 struct wapbl_ino *wi;
2081 2126
2082 mutex_enter(&wl->wl_mtx); 2127 mutex_enter(&wl->wl_mtx);
2083 wi = wapbl_inodetrk_get(wl, ino); 2128 wi = wapbl_inodetrk_get(wl, ino);
2084 if (wi) { 2129 if (wi) {
2085 WAPBL_PRINTF(WAPBL_PRINT_INODE, 2130 WAPBL_PRINTF(WAPBL_PRINT_INODE,
2086 ("wapbl_unregister_inode: ino=%"PRId64"\n", ino)); 2131 ("wapbl_unregister_inode: ino=%"PRId64"\n", ino));
2087 KASSERT(wl->wl_inohashcnt > 0); 2132 KASSERT(wl->wl_inohashcnt > 0);
2088 wl->wl_inohashcnt--; 2133 wl->wl_inohashcnt--;
2089 LIST_REMOVE(wi, wi_hash); 2134 LIST_REMOVE(wi, wi_hash);
2090 mutex_exit(&wl->wl_mtx); 2135 mutex_exit(&wl->wl_mtx);
2091 2136
2092 pool_put(&wapbl_ino_pool, wi); 2137 pool_put(&wapbl_ino_pool, wi);
2093 } else { 2138 } else {
2094 mutex_exit(&wl->wl_mtx); 2139 mutex_exit(&wl->wl_mtx);
2095 } 2140 }
2096} 2141}
2097 2142
2098/****************************************************************/ 2143/****************************************************************/
2099 2144
2100/* 2145/*
2101 * wapbl_transaction_inodes_len(wl) 2146 * wapbl_transaction_inodes_len(wl)
2102 * 2147 *
2103 * Calculate the number of bytes required for inode registration 2148 * Calculate the number of bytes required for inode registration
2104 * log records in wl. 2149 * log records in wl.
2105 */ 2150 */
2106static inline size_t 2151static inline size_t
2107wapbl_transaction_inodes_len(struct wapbl *wl) 2152wapbl_transaction_inodes_len(struct wapbl *wl)
2108{ 2153{
2109 int blocklen = 1<<wl->wl_log_dev_bshift; 2154 int blocklen = 1<<wl->wl_log_dev_bshift;
2110 int iph; 2155 int iph;
2111 2156
2112 /* Calculate number of inodes described in a inodelist header */ 2157 /* Calculate number of inodes described in a inodelist header */
2113 iph = (blocklen - offsetof(struct wapbl_wc_inodelist, wc_inodes)) / 2158 iph = (blocklen - offsetof(struct wapbl_wc_inodelist, wc_inodes)) /
2114 sizeof(((struct wapbl_wc_inodelist *)0)->wc_inodes[0]); 2159 sizeof(((struct wapbl_wc_inodelist *)0)->wc_inodes[0]);
2115 2160
2116 KASSERT(iph > 0); 2161 KASSERT(iph > 0);
2117 2162
2118 return MAX(1, howmany(wl->wl_inohashcnt, iph)) * blocklen; 2163 return MAX(1, howmany(wl->wl_inohashcnt, iph)) * blocklen;
2119} 2164}
2120 2165
2121 2166
2122/* 2167/*
2123 * wapbl_transaction_len(wl) 2168 * wapbl_transaction_len(wl)
2124 * 2169 *
2125 * Calculate number of bytes required for all log records in wl. 2170 * Calculate number of bytes required for all log records in wl.
2126 */ 2171 */
2127static size_t 2172static size_t
2128wapbl_transaction_len(struct wapbl *wl) 2173wapbl_transaction_len(struct wapbl *wl)
2129{ 2174{
2130 int blocklen = 1<<wl->wl_log_dev_bshift; 2175 int blocklen = 1<<wl->wl_log_dev_bshift;
2131 size_t len; 2176 size_t len;
2132 2177
2133 /* Calculate number of blocks described in a blocklist header */ 2178 /* Calculate number of blocks described in a blocklist header */
2134 len = wl->wl_bcount; 2179 len = wl->wl_bcount;
2135 len += howmany(wl->wl_bufcount, wl->wl_brperjblock) * blocklen; 2180 len += howmany(wl->wl_bufcount, wl->wl_brperjblock) * blocklen;
2136 len += howmany(wl->wl_dealloccnt, wl->wl_brperjblock) * blocklen; 2181 len += howmany(wl->wl_dealloccnt, wl->wl_brperjblock) * blocklen;
2137 len += wapbl_transaction_inodes_len(wl); 2182 len += wapbl_transaction_inodes_len(wl);
2138 2183
2139 return len; 2184 return len;
2140} 2185}
2141 2186
2142/* 2187/*
2143 * wapbl_cache_sync(wl, msg) 2188 * wapbl_cache_sync(wl, msg)
2144 * 2189 *
2145 * Issue DIOCCACHESYNC to wl->wl_devvp. 2190 * Issue DIOCCACHESYNC to wl->wl_devvp.
2146 * 2191 *
2147 * If sysctl(vfs.wapbl.verbose_commit) >= 2, print a message 2192 * If sysctl(vfs.wapbl.verbose_commit) >= 2, print a message
2148 * including msg about the duration of the cache sync. 2193 * including msg about the duration of the cache sync.
2149 */ 2194 */
2150static int 2195static int
2151wapbl_cache_sync(struct wapbl *wl, const char *msg) 2196wapbl_cache_sync(struct wapbl *wl, const char *msg)
2152{ 2197{
2153 const bool verbose = wapbl_verbose_commit >= 2; 2198 const bool verbose = wapbl_verbose_commit >= 2;
2154 struct bintime start_time; 2199 struct bintime start_time;
2155 int force = 1; 2200 int force = 1;
2156 int error; 2201 int error;
2157 2202
2158 if (!wapbl_flush_disk_cache) { 2203 if (!wapbl_flush_disk_cache) {
2159 return 0; 2204 return 0;
2160 } 2205 }
2161 if (verbose) { 2206 if (verbose) {
2162 bintime(&start_time); 2207 bintime(&start_time);
2163 } 2208 }
2164 error = VOP_IOCTL(wl->wl_devvp, DIOCCACHESYNC, &force, 2209 error = VOP_IOCTL(wl->wl_devvp, DIOCCACHESYNC, &force,
2165 FWRITE, FSCRED); 2210 FWRITE, FSCRED);
2166 if (error) { 2211 if (error) {
2167 WAPBL_PRINTF(WAPBL_PRINT_ERROR, 2212 WAPBL_PRINTF(WAPBL_PRINT_ERROR,
2168 ("wapbl_cache_sync: DIOCCACHESYNC on dev 0x%jx " 2213 ("wapbl_cache_sync: DIOCCACHESYNC on dev 0x%jx "
2169 "returned %d\n", (uintmax_t)wl->wl_devvp->v_rdev, error)); 2214 "returned %d\n", (uintmax_t)wl->wl_devvp->v_rdev, error));
2170 } 2215 }
2171 if (verbose) { 2216 if (verbose) {
2172 struct bintime d; 2217 struct bintime d;
2173 struct timespec ts; 2218 struct timespec ts;
2174 2219
2175 bintime(&d); 2220 bintime(&d);
2176 bintime_sub(&d, &start_time); 2221 bintime_sub(&d, &start_time);
2177 bintime2timespec(&d, &ts); 2222 bintime2timespec(&d, &ts);
2178 printf("wapbl_cache_sync: %s: dev 0x%jx %ju.%09lu\n", 2223 printf("wapbl_cache_sync: %s: dev 0x%jx %ju.%09lu\n",
2179 msg, (uintmax_t)wl->wl_devvp->v_rdev, 2224 msg, (uintmax_t)wl->wl_devvp->v_rdev,
2180 (uintmax_t)ts.tv_sec, ts.tv_nsec); 2225 (uintmax_t)ts.tv_sec, ts.tv_nsec);
2181 } 2226 }
 2227
 2228 wl->wl_ev_cacheflush.ev_count++;
 2229
2182 return error; 2230 return error;
2183} 2231}
2184 2232
2185/* 2233/*
2186 * wapbl_write_commit(wl, head, tail) 2234 * wapbl_write_commit(wl, head, tail)
2187 * 2235 *
2188 * Issue a disk cache sync to wait for all pending writes to the 2236 * Issue a disk cache sync to wait for all pending writes to the
2189 * log to complete, and then synchronously commit the current 2237 * log to complete, and then synchronously commit the current
2190 * circular queue head and tail to the log, in the next of two 2238 * circular queue head and tail to the log, in the next of two
2191 * locations for commit headers on disk. 2239 * locations for commit headers on disk.
2192 * 2240 *
2193 * Increment the generation number. If the generation number 2241 * Increment the generation number. If the generation number
2194 * rolls over to zero, then a subsequent commit would appear to 2242 * rolls over to zero, then a subsequent commit would appear to
2195 * have an older generation than this one -- in that case, issue a 2243 * have an older generation than this one -- in that case, issue a
2196 * duplicate commit to avoid this. 2244 * duplicate commit to avoid this.
2197 * 2245 *
2198 * => Caller must have exclusive access to wl, either by holding 2246 * => Caller must have exclusive access to wl, either by holding
2199 * wl->wl_rwlock for writer or by being wapbl_start before anyone 2247 * wl->wl_rwlock for writer or by being wapbl_start before anyone
2200 * else has seen wl. 2248 * else has seen wl.
2201 */ 2249 */
2202static int 2250static int
2203wapbl_write_commit(struct wapbl *wl, off_t head, off_t tail) 2251wapbl_write_commit(struct wapbl *wl, off_t head, off_t tail)
2204{ 2252{
2205 struct wapbl_wc_header *wc = wl->wl_wc_header; 2253 struct wapbl_wc_header *wc = wl->wl_wc_header;
2206 struct timespec ts; 2254 struct timespec ts;
2207 int error; 2255 int error;
2208 daddr_t pbn; 2256 daddr_t pbn;
2209 2257
2210 error = wapbl_buffered_flush(wl); 2258 error = wapbl_buffered_flush(wl);
2211 if (error) 2259 if (error)
2212 return error; 2260 return error;
2213 /* 2261 /*
2214 * flush disk cache to ensure that blocks we've written are actually 2262 * flush disk cache to ensure that blocks we've written are actually
2215 * written to the stable storage before the commit header. 2263 * written to the stable storage before the commit header.
2216 * 2264 *
2217 * XXX Calc checksum here, instead we do this for now 2265 * XXX Calc checksum here, instead we do this for now
2218 */ 2266 */
2219 wapbl_cache_sync(wl, "1"); 2267 wapbl_cache_sync(wl, "1");
2220 2268
2221 wc->wc_head = head; 2269 wc->wc_head = head;
2222 wc->wc_tail = tail; 2270 wc->wc_tail = tail;
2223 wc->wc_checksum = 0; 2271 wc->wc_checksum = 0;
2224 wc->wc_version = 1; 2272 wc->wc_version = 1;
2225 getnanotime(&ts); 2273 getnanotime(&ts);
2226 wc->wc_time = ts.tv_sec; 2274 wc->wc_time = ts.tv_sec;
2227 wc->wc_timensec = ts.tv_nsec; 2275 wc->wc_timensec = ts.tv_nsec;
2228 2276
2229 WAPBL_PRINTF(WAPBL_PRINT_WRITE, 2277 WAPBL_PRINTF(WAPBL_PRINT_WRITE,
2230 ("wapbl_write_commit: head = %"PRIdMAX "tail = %"PRIdMAX"\n", 2278 ("wapbl_write_commit: head = %"PRIdMAX "tail = %"PRIdMAX"\n",
2231 (intmax_t)head, (intmax_t)tail)); 2279 (intmax_t)head, (intmax_t)tail));
2232 2280
2233 /* 2281 /*
2234 * write the commit header. 2282 * write the commit header.
2235 * 2283 *
2236 * XXX if generation will rollover, then first zero 2284 * XXX if generation will rollover, then first zero
2237 * over second commit header before trying to write both headers. 2285 * over second commit header before trying to write both headers.
2238 */ 2286 */
2239 2287
2240 pbn = wl->wl_logpbn + (wc->wc_generation % 2); 2288 pbn = wl->wl_logpbn + (wc->wc_generation % 2);
2241#ifdef _KERNEL 2289#ifdef _KERNEL
2242 pbn = btodb(pbn << wc->wc_log_dev_bshift); 2290 pbn = btodb(pbn << wc->wc_log_dev_bshift);
2243#endif 2291#endif
2244 error = wapbl_buffered_write(wc, wc->wc_len, wl, pbn); 2292 error = wapbl_buffered_write(wc, wc->wc_len, wl, pbn);
2245 if (error) 2293 if (error)
2246 return error; 2294 return error;
2247 error = wapbl_buffered_flush(wl); 2295 error = wapbl_buffered_flush(wl);
2248 if (error) 2296 if (error)
2249 return error; 2297 return error;
2250 2298
2251 /* 2299 /*
2252 * flush disk cache to ensure that the commit header is actually 2300 * flush disk cache to ensure that the commit header is actually
2253 * written before meta data blocks. 2301 * written before meta data blocks.
2254 */ 2302 */
2255 wapbl_cache_sync(wl, "2"); 2303 wapbl_cache_sync(wl, "2");
2256 2304
2257 /* 2305 /*
2258 * If the generation number was zero, write it out a second time. 2306 * If the generation number was zero, write it out a second time.
2259 * This handles initialization and generation number rollover 2307 * This handles initialization and generation number rollover
2260 */ 2308 */
2261 if (wc->wc_generation++ == 0) { 2309 if (wc->wc_generation++ == 0) {
2262 error = wapbl_write_commit(wl, head, tail); 2310 error = wapbl_write_commit(wl, head, tail);
2263 /* 2311 /*
2264 * This panic should be able to be removed if we do the 2312 * This panic should be able to be removed if we do the
2265 * zero'ing mentioned above, and we are certain to roll 2313 * zero'ing mentioned above, and we are certain to roll
2266 * back generation number on failure. 2314 * back generation number on failure.
2267 */ 2315 */
2268 if (error) 2316 if (error)
2269 panic("wapbl_write_commit: error writing duplicate " 2317 panic("wapbl_write_commit: error writing duplicate "
2270 "log header: %d", error); 2318 "log header: %d", error);
2271 } 2319 }
 2320
 2321 wl->wl_ev_commit.ev_count++;
 2322
2272 return 0; 2323 return 0;
2273} 2324}
2274 2325
2275/* 2326/*
2276 * wapbl_write_blocks(wl, offp) 2327 * wapbl_write_blocks(wl, offp)
2277 * 2328 *
2278 * Write all pending physical blocks in the current transaction 2329 * Write all pending physical blocks in the current transaction
2279 * from wapbl_add_buf to the log on disk, adding to the circular 2330 * from wapbl_add_buf to the log on disk, adding to the circular
2280 * queue head at byte offset *offp, and returning the new head's 2331 * queue head at byte offset *offp, and returning the new head's
2281 * byte offset in *offp. 2332 * byte offset in *offp.
2282 */ 2333 */
2283static int 2334static int
2284wapbl_write_blocks(struct wapbl *wl, off_t *offp) 2335wapbl_write_blocks(struct wapbl *wl, off_t *offp)
2285{ 2336{
2286 struct wapbl_wc_blocklist *wc = 2337 struct wapbl_wc_blocklist *wc =
2287 (struct wapbl_wc_blocklist *)wl->wl_wc_scratch; 2338 (struct wapbl_wc_blocklist *)wl->wl_wc_scratch;
2288 int blocklen = 1<<wl->wl_log_dev_bshift; 2339 int blocklen = 1<<wl->wl_log_dev_bshift;
2289 struct buf *bp; 2340 struct buf *bp;
2290 off_t off = *offp; 2341 off_t off = *offp;
2291 int error; 2342 int error;
2292 size_t padding; 2343 size_t padding;
2293 2344
2294 KASSERT(rw_write_held(&wl->wl_rwlock)); 2345 KASSERT(rw_write_held(&wl->wl_rwlock));
2295 2346
2296 bp = LIST_FIRST(&wl->wl_bufs); 2347 bp = LIST_FIRST(&wl->wl_bufs);
2297 2348
2298 while (bp) { 2349 while (bp) {
2299 int cnt; 2350 int cnt;
2300 struct buf *obp = bp; 2351 struct buf *obp = bp;
2301 2352
2302 KASSERT(bp->b_flags & B_LOCKED); 2353 KASSERT(bp->b_flags & B_LOCKED);
2303 2354
2304 wc->wc_type = WAPBL_WC_BLOCKS; 2355 wc->wc_type = WAPBL_WC_BLOCKS;
2305 wc->wc_len = blocklen; 2356 wc->wc_len = blocklen;
2306 wc->wc_blkcount = 0; 2357 wc->wc_blkcount = 0;
2307 while (bp && (wc->wc_blkcount < wl->wl_brperjblock)) { 2358 while (bp && (wc->wc_blkcount < wl->wl_brperjblock)) {
2308 /* 2359 /*
2309 * Make sure all the physical block numbers are up to 2360 * Make sure all the physical block numbers are up to
2310 * date. If this is not always true on a given 2361 * date. If this is not always true on a given
2311 * filesystem, then VOP_BMAP must be called. We 2362 * filesystem, then VOP_BMAP must be called. We
2312 * could call VOP_BMAP here, or else in the filesystem 2363 * could call VOP_BMAP here, or else in the filesystem
2313 * specific flush callback, although neither of those 2364 * specific flush callback, although neither of those
2314 * solutions allow us to take the vnode lock. If a 2365 * solutions allow us to take the vnode lock. If a
2315 * filesystem requires that we must take the vnode lock 2366 * filesystem requires that we must take the vnode lock
2316 * to call VOP_BMAP, then we can probably do it in 2367 * to call VOP_BMAP, then we can probably do it in
2317 * bwrite when the vnode lock should already be held 2368 * bwrite when the vnode lock should already be held
2318 * by the invoking code. 2369 * by the invoking code.
2319 */ 2370 */
2320 KASSERT((bp->b_vp->v_type == VBLK) || 2371 KASSERT((bp->b_vp->v_type == VBLK) ||
2321 (bp->b_blkno != bp->b_lblkno)); 2372 (bp->b_blkno != bp->b_lblkno));
2322 KASSERT(bp->b_blkno > 0); 2373 KASSERT(bp->b_blkno > 0);
2323 2374
2324 wc->wc_blocks[wc->wc_blkcount].wc_daddr = bp->b_blkno; 2375 wc->wc_blocks[wc->wc_blkcount].wc_daddr = bp->b_blkno;
2325 wc->wc_blocks[wc->wc_blkcount].wc_dlen = bp->b_bcount; 2376 wc->wc_blocks[wc->wc_blkcount].wc_dlen = bp->b_bcount;
2326 wc->wc_len += bp->b_bcount; 2377 wc->wc_len += bp->b_bcount;
2327 wc->wc_blkcount++; 2378 wc->wc_blkcount++;
2328 bp = LIST_NEXT(bp, b_wapbllist); 2379 bp = LIST_NEXT(bp, b_wapbllist);
2329 } 2380 }
2330 if (wc->wc_len % blocklen != 0) { 2381 if (wc->wc_len % blocklen != 0) {
2331 padding = blocklen - wc->wc_len % blocklen; 2382 padding = blocklen - wc->wc_len % blocklen;
2332 wc->wc_len += padding; 2383 wc->wc_len += padding;
2333 } else { 2384 } else {
2334 padding = 0; 2385 padding = 0;
2335 } 2386 }
2336 2387
2337 WAPBL_PRINTF(WAPBL_PRINT_WRITE, 2388 WAPBL_PRINTF(WAPBL_PRINT_WRITE,
2338 ("wapbl_write_blocks: len = %u (padding %zu) off = %"PRIdMAX"\n", 2389 ("wapbl_write_blocks: len = %u (padding %zu) off = %"PRIdMAX"\n",
2339 wc->wc_len, padding, (intmax_t)off)); 2390 wc->wc_len, padding, (intmax_t)off));
2340 2391
2341 error = wapbl_circ_write(wl, wc, blocklen, &off); 2392 error = wapbl_circ_write(wl, wc, blocklen, &off);
2342 if (error) 2393 if (error)
2343 return error; 2394 return error;
2344 bp = obp; 2395 bp = obp;
2345 cnt = 0; 2396 cnt = 0;
2346 while (bp && (cnt++ < wl->wl_brperjblock)) { 2397 while (bp && (cnt++ < wl->wl_brperjblock)) {
2347 error = wapbl_circ_write(wl, bp->b_data, 2398 error = wapbl_circ_write(wl, bp->b_data,
2348 bp->b_bcount, &off); 2399 bp->b_bcount, &off);
2349 if (error) 2400 if (error)
2350 return error; 2401 return error;
2351 bp = LIST_NEXT(bp, b_wapbllist); 2402 bp = LIST_NEXT(bp, b_wapbllist);
2352 } 2403 }
2353 if (padding) { 2404 if (padding) {
2354 void *zero; 2405 void *zero;
2355  2406
2356 zero = wapbl_alloc(padding); 2407 zero = wapbl_alloc(padding);
2357 memset(zero, 0, padding); 2408 memset(zero, 0, padding);
2358 error = wapbl_circ_write(wl, zero, padding, &off); 2409 error = wapbl_circ_write(wl, zero, padding, &off);
2359 wapbl_free(zero, padding); 2410 wapbl_free(zero, padding);
2360 if (error) 2411 if (error)
2361 return error; 2412 return error;
2362 } 2413 }
2363 } 2414 }
2364 *offp = off; 2415 *offp = off;
2365 return 0; 2416 return 0;
2366} 2417}
2367 2418
2368/* 2419/*
2369 * wapbl_write_revocations(wl, offp) 2420 * wapbl_write_revocations(wl, offp)
2370 * 2421 *
2371 * Write all pending deallocations in the current transaction from 2422 * Write all pending deallocations in the current transaction from
2372 * wapbl_register_deallocation to the log on disk, adding to the 2423 * wapbl_register_deallocation to the log on disk, adding to the
2373 * circular queue's head at byte offset *offp, and returning the 2424 * circular queue's head at byte offset *offp, and returning the
2374 * new head's byte offset in *offp. 2425 * new head's byte offset in *offp.
2375 */ 2426 */
2376static int 2427static int
2377wapbl_write_revocations(struct wapbl *wl, off_t *offp) 2428wapbl_write_revocations(struct wapbl *wl, off_t *offp)
2378{ 2429{
2379 struct wapbl_wc_blocklist *wc = 2430 struct wapbl_wc_blocklist *wc =
2380 (struct wapbl_wc_blocklist *)wl->wl_wc_scratch; 2431 (struct wapbl_wc_blocklist *)wl->wl_wc_scratch;
2381 struct wapbl_dealloc *wd, *lwd; 2432 struct wapbl_dealloc *wd, *lwd;
2382 int blocklen = 1<<wl->wl_log_dev_bshift; 2433 int blocklen = 1<<wl->wl_log_dev_bshift;
2383 off_t off = *offp; 2434 off_t off = *offp;
2384 int error; 2435 int error;
2385 2436
2386 if (wl->wl_dealloccnt == 0) 2437 if (wl->wl_dealloccnt == 0)
2387 return 0; 2438 return 0;
2388 2439
2389 while ((wd = TAILQ_FIRST(&wl->wl_dealloclist)) != NULL) { 2440 while ((wd = TAILQ_FIRST(&wl->wl_dealloclist)) != NULL) {
2390 wc->wc_type = WAPBL_WC_REVOCATIONS; 2441 wc->wc_type = WAPBL_WC_REVOCATIONS;
2391 wc->wc_len = blocklen; 2442 wc->wc_len = blocklen;
2392 wc->wc_blkcount = 0; 2443 wc->wc_blkcount = 0;
2393 while (wd && (wc->wc_blkcount < wl->wl_brperjblock)) { 2444 while (wd && (wc->wc_blkcount < wl->wl_brperjblock)) {
2394 wc->wc_blocks[wc->wc_blkcount].wc_daddr = 2445 wc->wc_blocks[wc->wc_blkcount].wc_daddr =
2395 wd->wd_blkno; 2446 wd->wd_blkno;
2396 wc->wc_blocks[wc->wc_blkcount].wc_dlen = 2447 wc->wc_blocks[wc->wc_blkcount].wc_dlen =
2397 wd->wd_len; 2448 wd->wd_len;
2398 wc->wc_blkcount++; 2449 wc->wc_blkcount++;
2399 2450
2400 wd = TAILQ_NEXT(wd, wd_entries); 2451 wd = TAILQ_NEXT(wd, wd_entries);
2401 } 2452 }
2402 WAPBL_PRINTF(WAPBL_PRINT_WRITE, 2453 WAPBL_PRINTF(WAPBL_PRINT_WRITE,
2403 ("wapbl_write_revocations: len = %u off = %"PRIdMAX"\n", 2454 ("wapbl_write_revocations: len = %u off = %"PRIdMAX"\n",
2404 wc->wc_len, (intmax_t)off)); 2455 wc->wc_len, (intmax_t)off));
2405 error = wapbl_circ_write(wl, wc, blocklen, &off); 2456 error = wapbl_circ_write(wl, wc, blocklen, &off);
2406 if (error) 2457 if (error)
2407 return error; 2458 return error;
2408 2459
2409 /* free all successfully written deallocs */ 2460 /* free all successfully written deallocs */
2410 lwd = wd; 2461 lwd = wd;
2411 while ((wd = TAILQ_FIRST(&wl->wl_dealloclist)) != NULL) { 2462 while ((wd = TAILQ_FIRST(&wl->wl_dealloclist)) != NULL) {
2412 if (wd == lwd) 2463 if (wd == lwd)
2413 break; 2464 break;
2414 wapbl_deallocation_free(wl, wd, true); 2465 wapbl_deallocation_free(wl, wd, true);
2415 } 2466 }
2416 } 2467 }
2417 *offp = off; 2468 *offp = off;
2418 return 0; 2469 return 0;
2419} 2470}
2420 2471
2421/* 2472/*
2422 * wapbl_write_inodes(wl, offp) 2473 * wapbl_write_inodes(wl, offp)
2423 * 2474 *
2424 * Write all pending inode allocations in the current transaction 2475 * Write all pending inode allocations in the current transaction
2425 * from wapbl_register_inode to the log on disk, adding to the 2476 * from wapbl_register_inode to the log on disk, adding to the
2426 * circular queue's head at byte offset *offp and returning the 2477 * circular queue's head at byte offset *offp and returning the
2427 * new head's byte offset in *offp. 2478 * new head's byte offset in *offp.
2428 */ 2479 */
2429static int 2480static int
2430wapbl_write_inodes(struct wapbl *wl, off_t *offp) 2481wapbl_write_inodes(struct wapbl *wl, off_t *offp)
2431{ 2482{
2432 struct wapbl_wc_inodelist *wc = 2483 struct wapbl_wc_inodelist *wc =
2433 (struct wapbl_wc_inodelist *)wl->wl_wc_scratch; 2484 (struct wapbl_wc_inodelist *)wl->wl_wc_scratch;
2434 int i; 2485 int i;
2435 int blocklen = 1 << wl->wl_log_dev_bshift; 2486 int blocklen = 1 << wl->wl_log_dev_bshift;
2436 off_t off = *offp; 2487 off_t off = *offp;
2437 int error; 2488 int error;
2438 2489
2439 struct wapbl_ino_head *wih; 2490 struct wapbl_ino_head *wih;
2440 struct wapbl_ino *wi; 2491 struct wapbl_ino *wi;
2441 int iph; 2492 int iph;
2442 2493
2443 iph = (blocklen - offsetof(struct wapbl_wc_inodelist, wc_inodes)) / 2494 iph = (blocklen - offsetof(struct wapbl_wc_inodelist, wc_inodes)) /
2444 sizeof(((struct wapbl_wc_inodelist *)0)->wc_inodes[0]); 2495 sizeof(((struct wapbl_wc_inodelist *)0)->wc_inodes[0]);
2445 2496
2446 i = 0; 2497 i = 0;
2447 wih = &wl->wl_inohash[0]; 2498 wih = &wl->wl_inohash[0];
2448 wi = 0; 2499 wi = 0;
2449 do { 2500 do {
2450 wc->wc_type = WAPBL_WC_INODES; 2501 wc->wc_type = WAPBL_WC_INODES;
2451 wc->wc_len = blocklen; 2502 wc->wc_len = blocklen;
2452 wc->wc_inocnt = 0; 2503 wc->wc_inocnt = 0;
2453 wc->wc_clear = (i == 0); 2504 wc->wc_clear = (i == 0);
2454 while ((i < wl->wl_inohashcnt) && (wc->wc_inocnt < iph)) { 2505 while ((i < wl->wl_inohashcnt) && (wc->wc_inocnt < iph)) {
2455 while (!wi) { 2506 while (!wi) {
2456 KASSERT((wih - &wl->wl_inohash[0]) 2507 KASSERT((wih - &wl->wl_inohash[0])
2457 <= wl->wl_inohashmask); 2508 <= wl->wl_inohashmask);
2458 wi = LIST_FIRST(wih++); 2509 wi = LIST_FIRST(wih++);
2459 } 2510 }
2460 wc->wc_inodes[wc->wc_inocnt].wc_inumber = wi->wi_ino; 2511 wc->wc_inodes[wc->wc_inocnt].wc_inumber = wi->wi_ino;
2461 wc->wc_inodes[wc->wc_inocnt].wc_imode = wi->wi_mode; 2512 wc->wc_inodes[wc->wc_inocnt].wc_imode = wi->wi_mode;
2462 wc->wc_inocnt++; 2513 wc->wc_inocnt++;
2463 i++; 2514 i++;
2464 wi = LIST_NEXT(wi, wi_hash); 2515 wi = LIST_NEXT(wi, wi_hash);
2465 } 2516 }
2466 WAPBL_PRINTF(WAPBL_PRINT_WRITE, 2517 WAPBL_PRINTF(WAPBL_PRINT_WRITE,
2467 ("wapbl_write_inodes: len = %u off = %"PRIdMAX"\n", 2518 ("wapbl_write_inodes: len = %u off = %"PRIdMAX"\n",
2468 wc->wc_len, (intmax_t)off)); 2519 wc->wc_len, (intmax_t)off));
2469 error = wapbl_circ_write(wl, wc, blocklen, &off); 2520 error = wapbl_circ_write(wl, wc, blocklen, &off);
2470 if (error) 2521 if (error)
2471 return error; 2522 return error;
2472 } while (i < wl->wl_inohashcnt); 2523 } while (i < wl->wl_inohashcnt);
2473  2524
2474 *offp = off; 2525 *offp = off;
2475 return 0; 2526 return 0;
2476} 2527}
2477 2528
2478#endif /* _KERNEL */ 2529#endif /* _KERNEL */
2479 2530
2480/****************************************************************/ 2531/****************************************************************/
2481 2532
2482struct wapbl_blk { 2533struct wapbl_blk {
2483 LIST_ENTRY(wapbl_blk) wb_hash; 2534 LIST_ENTRY(wapbl_blk) wb_hash;
2484 daddr_t wb_blk; 2535 daddr_t wb_blk;
2485 off_t wb_off; /* Offset of this block in the log */ 2536 off_t wb_off; /* Offset of this block in the log */
2486}; 2537};
2487#define WAPBL_BLKPOOL_MIN 83 2538#define WAPBL_BLKPOOL_MIN 83
2488 2539
2489static void 2540static void
2490wapbl_blkhash_init(struct wapbl_replay *wr, u_int size) 2541wapbl_blkhash_init(struct wapbl_replay *wr, u_int size)
2491{ 2542{
2492 if (size < WAPBL_BLKPOOL_MIN) 2543 if (size < WAPBL_BLKPOOL_MIN)
2493 size = WAPBL_BLKPOOL_MIN; 2544 size = WAPBL_BLKPOOL_MIN;
2494 KASSERT(wr->wr_blkhash == 0); 2545 KASSERT(wr->wr_blkhash == 0);
2495#ifdef _KERNEL 2546#ifdef _KERNEL
2496 wr->wr_blkhash = hashinit(size, HASH_LIST, true, &wr->wr_blkhashmask); 2547 wr->wr_blkhash = hashinit(size, HASH_LIST, true, &wr->wr_blkhashmask);
2497#else /* ! _KERNEL */ 2548#else /* ! _KERNEL */
2498 /* Manually implement hashinit */ 2549 /* Manually implement hashinit */
2499 { 2550 {
2500 unsigned long i, hashsize; 2551 unsigned long i, hashsize;
2501 for (hashsize = 1; hashsize < size; hashsize <<= 1) 2552 for (hashsize = 1; hashsize < size; hashsize <<= 1)
2502 continue; 2553 continue;
2503 wr->wr_blkhash = wapbl_alloc(hashsize * sizeof(*wr->wr_blkhash)); 2554 wr->wr_blkhash = wapbl_alloc(hashsize * sizeof(*wr->wr_blkhash));
2504 for (i = 0; i < hashsize; i++) 2555 for (i = 0; i < hashsize; i++)
2505 LIST_INIT(&wr->wr_blkhash[i]); 2556 LIST_INIT(&wr->wr_blkhash[i]);
2506 wr->wr_blkhashmask = hashsize - 1; 2557 wr->wr_blkhashmask = hashsize - 1;
2507 } 2558 }
2508#endif /* ! _KERNEL */ 2559#endif /* ! _KERNEL */
2509} 2560}
2510 2561
2511static void 2562static void
2512wapbl_blkhash_free(struct wapbl_replay *wr) 2563wapbl_blkhash_free(struct wapbl_replay *wr)
2513{ 2564{
2514 KASSERT(wr->wr_blkhashcnt == 0); 2565 KASSERT(wr->wr_blkhashcnt == 0);
2515#ifdef _KERNEL 2566#ifdef _KERNEL
2516 hashdone(wr->wr_blkhash, HASH_LIST, wr->wr_blkhashmask); 2567 hashdone(wr->wr_blkhash, HASH_LIST, wr->wr_blkhashmask);
2517#else /* ! _KERNEL */ 2568#else /* ! _KERNEL */
2518 wapbl_free(wr->wr_blkhash, 2569 wapbl_free(wr->wr_blkhash,
2519 (wr->wr_blkhashmask + 1) * sizeof(*wr->wr_blkhash)); 2570 (wr->wr_blkhashmask + 1) * sizeof(*wr->wr_blkhash));
2520#endif /* ! _KERNEL */ 2571#endif /* ! _KERNEL */
2521} 2572}
2522 2573
2523static struct wapbl_blk * 2574static struct wapbl_blk *
2524wapbl_blkhash_get(struct wapbl_replay *wr, daddr_t blk) 2575wapbl_blkhash_get(struct wapbl_replay *wr, daddr_t blk)
2525{ 2576{
2526 struct wapbl_blk_head *wbh; 2577 struct wapbl_blk_head *wbh;
2527 struct wapbl_blk *wb; 2578 struct wapbl_blk *wb;
2528 wbh = &wr->wr_blkhash[blk & wr->wr_blkhashmask]; 2579 wbh = &wr->wr_blkhash[blk & wr->wr_blkhashmask];
2529 LIST_FOREACH(wb, wbh, wb_hash) { 2580 LIST_FOREACH(wb, wbh, wb_hash) {
2530 if (blk == wb->wb_blk) 2581 if (blk == wb->wb_blk)
2531 return wb; 2582 return wb;
2532 } 2583 }
2533 return 0; 2584 return 0;
2534} 2585}
2535 2586
2536static void 2587static void
2537wapbl_blkhash_ins(struct wapbl_replay *wr, daddr_t blk, off_t off) 2588wapbl_blkhash_ins(struct wapbl_replay *wr, daddr_t blk, off_t off)
2538{ 2589{
2539 struct wapbl_blk_head *wbh; 2590 struct wapbl_blk_head *wbh;
2540 struct wapbl_blk *wb; 2591 struct wapbl_blk *wb;
2541 wb = wapbl_blkhash_get(wr, blk); 2592 wb = wapbl_blkhash_get(wr, blk);
2542 if (wb) { 2593 if (wb) {
2543 KASSERT(wb->wb_blk == blk); 2594 KASSERT(wb->wb_blk == blk);
2544 wb->wb_off = off; 2595 wb->wb_off = off;
2545 } else { 2596 } else {
2546 wb = wapbl_alloc(sizeof(*wb)); 2597 wb = wapbl_alloc(sizeof(*wb));
2547 wb->wb_blk = blk; 2598 wb->wb_blk = blk;
2548 wb->wb_off = off; 2599 wb->wb_off = off;
2549 wbh = &wr->wr_blkhash[blk & wr->wr_blkhashmask]; 2600 wbh = &wr->wr_blkhash[blk & wr->wr_blkhashmask];
2550 LIST_INSERT_HEAD(wbh, wb, wb_hash); 2601 LIST_INSERT_HEAD(wbh, wb, wb_hash);
2551 wr->wr_blkhashcnt++; 2602 wr->wr_blkhashcnt++;
2552 } 2603 }
2553} 2604}
2554 2605
2555static void 2606static void
2556wapbl_blkhash_rem(struct wapbl_replay *wr, daddr_t blk) 2607wapbl_blkhash_rem(struct wapbl_replay *wr, daddr_t blk)
2557{ 2608{
2558 struct wapbl_blk *wb = wapbl_blkhash_get(wr, blk); 2609 struct wapbl_blk *wb = wapbl_blkhash_get(wr, blk);
2559 if (wb) { 2610 if (wb) {
2560 KASSERT(wr->wr_blkhashcnt > 0); 2611 KASSERT(wr->wr_blkhashcnt > 0);
2561 wr->wr_blkhashcnt--; 2612 wr->wr_blkhashcnt--;
2562 LIST_REMOVE(wb, wb_hash); 2613 LIST_REMOVE(wb, wb_hash);
2563 wapbl_free(wb, sizeof(*wb)); 2614 wapbl_free(wb, sizeof(*wb));
2564 } 2615 }
2565} 2616}
2566 2617
2567static void 2618static void
2568wapbl_blkhash_clear(struct wapbl_replay *wr) 2619wapbl_blkhash_clear(struct wapbl_replay *wr)
2569{ 2620{
2570 unsigned long i; 2621 unsigned long i;
2571 for (i = 0; i <= wr->wr_blkhashmask; i++) { 2622 for (i = 0; i <= wr->wr_blkhashmask; i++) {
2572 struct wapbl_blk *wb; 2623 struct wapbl_blk *wb;
2573 2624
2574 while ((wb = LIST_FIRST(&wr->wr_blkhash[i]))) { 2625 while ((wb = LIST_FIRST(&wr->wr_blkhash[i]))) {
2575 KASSERT(wr->wr_blkhashcnt > 0); 2626 KASSERT(wr->wr_blkhashcnt > 0);
2576 wr->wr_blkhashcnt--; 2627 wr->wr_blkhashcnt--;
2577 LIST_REMOVE(wb, wb_hash); 2628 LIST_REMOVE(wb, wb_hash);
2578 wapbl_free(wb, sizeof(*wb)); 2629 wapbl_free(wb, sizeof(*wb));
2579 } 2630 }
2580 } 2631 }
2581 KASSERT(wr->wr_blkhashcnt == 0); 2632 KASSERT(wr->wr_blkhashcnt == 0);
2582} 2633}
2583 2634
2584/****************************************************************/ 2635/****************************************************************/
2585 2636
2586/* 2637/*
2587 * wapbl_circ_read(wr, data, len, offp) 2638 * wapbl_circ_read(wr, data, len, offp)
2588 * 2639 *
2589 * Read len bytes into data from the circular queue of wr, 2640 * Read len bytes into data from the circular queue of wr,
2590 * starting at the linear byte offset *offp, and returning the new 2641 * starting at the linear byte offset *offp, and returning the new
2591 * linear byte offset in *offp. 2642 * linear byte offset in *offp.
2592 * 2643 *
2593 * If the starting linear byte offset precedes wr->wr_circ_off, 2644 * If the starting linear byte offset precedes wr->wr_circ_off,
2594 * the read instead begins at wr->wr_circ_off. XXX WTF? This 2645 * the read instead begins at wr->wr_circ_off. XXX WTF? This
2595 * should be a KASSERT, not a conditional. 2646 * should be a KASSERT, not a conditional.
2596 */ 2647 */
2597static int 2648static int
2598wapbl_circ_read(struct wapbl_replay *wr, void *data, size_t len, off_t *offp) 2649wapbl_circ_read(struct wapbl_replay *wr, void *data, size_t len, off_t *offp)
2599{ 2650{
2600 size_t slen; 2651 size_t slen;
2601 off_t off = *offp; 2652 off_t off = *offp;
2602 int error; 2653 int error;
2603 daddr_t pbn; 2654 daddr_t pbn;
2604 2655
2605 KASSERT(((len >> wr->wr_log_dev_bshift) << 2656 KASSERT(((len >> wr->wr_log_dev_bshift) <<
2606 wr->wr_log_dev_bshift) == len); 2657 wr->wr_log_dev_bshift) == len);
2607 2658
2608 if (off < wr->wr_circ_off) 2659 if (off < wr->wr_circ_off)
2609 off = wr->wr_circ_off; 2660 off = wr->wr_circ_off;
2610 slen = wr->wr_circ_off + wr->wr_circ_size - off; 2661 slen = wr->wr_circ_off + wr->wr_circ_size - off;
2611 if (slen < len) { 2662 if (slen < len) {
2612 pbn = wr->wr_logpbn + (off >> wr->wr_log_dev_bshift); 2663 pbn = wr->wr_logpbn + (off >> wr->wr_log_dev_bshift);
2613#ifdef _KERNEL 2664#ifdef _KERNEL
2614 pbn = btodb(pbn << wr->wr_log_dev_bshift); 2665 pbn = btodb(pbn << wr->wr_log_dev_bshift);
2615#endif 2666#endif
2616 error = wapbl_read(data, slen, wr->wr_devvp, pbn); 2667 error = wapbl_read(data, slen, wr->wr_devvp, pbn);
2617 if (error) 2668 if (error)
2618 return error; 2669 return error;
2619 data = (uint8_t *)data + slen; 2670 data = (uint8_t *)data + slen;
2620 len -= slen; 2671 len -= slen;
2621 off = wr->wr_circ_off; 2672 off = wr->wr_circ_off;
2622 } 2673 }
2623 pbn = wr->wr_logpbn + (off >> wr->wr_log_dev_bshift); 2674 pbn = wr->wr_logpbn + (off >> wr->wr_log_dev_bshift);
2624#ifdef _KERNEL 2675#ifdef _KERNEL
2625 pbn = btodb(pbn << wr->wr_log_dev_bshift); 2676 pbn = btodb(pbn << wr->wr_log_dev_bshift);
2626#endif 2677#endif
2627 error = wapbl_read(data, len, wr->wr_devvp, pbn); 2678 error = wapbl_read(data, len, wr->wr_devvp, pbn);
2628 if (error) 2679 if (error)
2629 return error; 2680 return error;
2630 off += len; 2681 off += len;
2631 if (off >= wr->wr_circ_off + wr->wr_circ_size) 2682 if (off >= wr->wr_circ_off + wr->wr_circ_size)
2632 off = wr->wr_circ_off; 2683 off = wr->wr_circ_off;
2633 *offp = off; 2684 *offp = off;
2634 return 0; 2685 return 0;
2635} 2686}
2636 2687
2637/* 2688/*
2638 * wapbl_circ_advance(wr, len, offp) 2689 * wapbl_circ_advance(wr, len, offp)
2639 * 2690 *
2640 * Compute the linear byte offset of the circular queue of wr that 2691 * Compute the linear byte offset of the circular queue of wr that
2641 * is len bytes past *offp, and store it in *offp. 2692 * is len bytes past *offp, and store it in *offp.
2642 * 2693 *
2643 * This is as if wapbl_circ_read, but without actually reading 2694 * This is as if wapbl_circ_read, but without actually reading
2644 * anything. 2695 * anything.
2645 * 2696 *
2646 * If the starting linear byte offset precedes wr->wr_circ_off, it 2697 * If the starting linear byte offset precedes wr->wr_circ_off, it
2647 * is taken to be wr->wr_circ_off instead. XXX WTF? This should 2698 * is taken to be wr->wr_circ_off instead. XXX WTF? This should
2648 * be a KASSERT, not a conditional. 2699 * be a KASSERT, not a conditional.
2649 */ 2700 */
2650static void 2701static void
2651wapbl_circ_advance(struct wapbl_replay *wr, size_t len, off_t *offp) 2702wapbl_circ_advance(struct wapbl_replay *wr, size_t len, off_t *offp)
2652{ 2703{
2653 size_t slen; 2704 size_t slen;
2654 off_t off = *offp; 2705 off_t off = *offp;
2655 2706
2656 KASSERT(((len >> wr->wr_log_dev_bshift) << 2707 KASSERT(((len >> wr->wr_log_dev_bshift) <<
2657 wr->wr_log_dev_bshift) == len); 2708 wr->wr_log_dev_bshift) == len);
2658 2709
2659 if (off < wr->wr_circ_off) 2710 if (off < wr->wr_circ_off)
2660 off = wr->wr_circ_off; 2711 off = wr->wr_circ_off;
2661 slen = wr->wr_circ_off + wr->wr_circ_size - off; 2712 slen = wr->wr_circ_off + wr->wr_circ_size - off;
2662 if (slen < len) { 2713 if (slen < len) {
2663 len -= slen; 2714 len -= slen;
2664 off = wr->wr_circ_off; 2715 off = wr->wr_circ_off;
2665 } 2716 }
2666 off += len; 2717 off += len;
2667 if (off >= wr->wr_circ_off + wr->wr_circ_size) 2718 if (off >= wr->wr_circ_off + wr->wr_circ_size)
2668 off = wr->wr_circ_off; 2719 off = wr->wr_circ_off;
2669 *offp = off; 2720 *offp = off;
2670} 2721}
2671 2722
2672/****************************************************************/ 2723/****************************************************************/
2673 2724
2674int 2725int
2675wapbl_replay_start(struct wapbl_replay **wrp, struct vnode *vp, 2726wapbl_replay_start(struct wapbl_replay **wrp, struct vnode *vp,
2676 daddr_t off, size_t count, size_t blksize) 2727 daddr_t off, size_t count, size_t blksize)
2677{ 2728{
2678 struct wapbl_replay *wr; 2729 struct wapbl_replay *wr;
2679 int error; 2730 int error;
2680 struct vnode *devvp; 2731 struct vnode *devvp;
2681 daddr_t logpbn; 2732 daddr_t logpbn;
2682 uint8_t *scratch; 2733 uint8_t *scratch;
2683 struct wapbl_wc_header *wch; 2734 struct wapbl_wc_header *wch;
2684 struct wapbl_wc_header *wch2; 2735 struct wapbl_wc_header *wch2;
2685 /* Use this until we read the actual log header */ 2736 /* Use this until we read the actual log header */
2686 int log_dev_bshift = ilog2(blksize); 2737 int log_dev_bshift = ilog2(blksize);
2687 size_t used; 2738 size_t used;
2688 daddr_t pbn; 2739 daddr_t pbn;
2689 2740
2690 WAPBL_PRINTF(WAPBL_PRINT_REPLAY, 2741 WAPBL_PRINTF(WAPBL_PRINT_REPLAY,
2691 ("wapbl_replay_start: vp=%p off=%"PRId64 " count=%zu blksize=%zu\n", 2742 ("wapbl_replay_start: vp=%p off=%"PRId64 " count=%zu blksize=%zu\n",
2692 vp, off, count, blksize)); 2743 vp, off, count, blksize));
2693 2744
2694 if (off < 0) 2745 if (off < 0)
2695 return EINVAL; 2746 return EINVAL;
2696 2747
2697 if (blksize < DEV_BSIZE) 2748 if (blksize < DEV_BSIZE)
2698 return EINVAL; 2749 return EINVAL;
2699 if (blksize % DEV_BSIZE) 2750 if (blksize % DEV_BSIZE)
2700 return EINVAL; 2751 return EINVAL;
2701 2752
2702#ifdef _KERNEL 2753#ifdef _KERNEL
2703#if 0 2754#if 0
2704 /* XXX vp->v_size isn't reliably set for VBLK devices, 2755 /* XXX vp->v_size isn't reliably set for VBLK devices,
2705 * especially root. However, we might still want to verify 2756 * especially root. However, we might still want to verify
2706 * that the full load is readable */ 2757 * that the full load is readable */
2707 if ((off + count) * blksize > vp->v_size) 2758 if ((off + count) * blksize > vp->v_size)
2708 return EINVAL; 2759 return EINVAL;
2709#endif 2760#endif
2710 if ((error = VOP_BMAP(vp, off, &devvp, &logpbn, 0)) != 0) { 2761 if ((error = VOP_BMAP(vp, off, &devvp, &logpbn, 0)) != 0) {
2711 return error; 2762 return error;
2712 } 2763 }
2713#else /* ! _KERNEL */ 2764#else /* ! _KERNEL */
2714 devvp = vp; 2765 devvp = vp;
2715 logpbn = off; 2766 logpbn = off;
2716#endif /* ! _KERNEL */ 2767#endif /* ! _KERNEL */
2717 2768
2718 scratch = wapbl_alloc(MAXBSIZE); 2769 scratch = wapbl_alloc(MAXBSIZE);
2719 2770
2720 pbn = logpbn; 2771 pbn = logpbn;
2721#ifdef _KERNEL 2772#ifdef _KERNEL
2722 pbn = btodb(pbn << log_dev_bshift); 2773 pbn = btodb(pbn << log_dev_bshift);
2723#endif 2774#endif
2724 error = wapbl_read(scratch, 2<<log_dev_bshift, devvp, pbn); 2775 error = wapbl_read(scratch, 2<<log_dev_bshift, devvp, pbn);
2725 if (error) 2776 if (error)
2726 goto errout; 2777 goto errout;
2727 2778
2728 wch = (struct wapbl_wc_header *)scratch; 2779 wch = (struct wapbl_wc_header *)scratch;
2729 wch2 = 2780 wch2 =
2730 (struct wapbl_wc_header *)(scratch + (1<<log_dev_bshift)); 2781 (struct wapbl_wc_header *)(scratch + (1<<log_dev_bshift));
2731 /* XXX verify checksums and magic numbers */ 2782 /* XXX verify checksums and magic numbers */
2732 if (wch->wc_type != WAPBL_WC_HEADER) { 2783 if (wch->wc_type != WAPBL_WC_HEADER) {
2733 printf("Unrecognized wapbl magic: 0x%08x\n", wch->wc_type); 2784 printf("Unrecognized wapbl magic: 0x%08x\n", wch->wc_type);
2734 error = EFTYPE; 2785 error = EFTYPE;
2735 goto errout; 2786 goto errout;
2736 } 2787 }
2737 2788
2738 if (wch2->wc_generation > wch->wc_generation) 2789 if (wch2->wc_generation > wch->wc_generation)
2739 wch = wch2; 2790 wch = wch2;
2740 2791
2741 wr = wapbl_calloc(1, sizeof(*wr)); 2792 wr = wapbl_calloc(1, sizeof(*wr));
2742 2793
2743 wr->wr_logvp = vp; 2794 wr->wr_logvp = vp;
2744 wr->wr_devvp = devvp; 2795 wr->wr_devvp = devvp;
2745 wr->wr_logpbn = logpbn; 2796 wr->wr_logpbn = logpbn;
2746 2797
2747 wr->wr_scratch = scratch; 2798 wr->wr_scratch = scratch;
2748 2799
2749 wr->wr_log_dev_bshift = wch->wc_log_dev_bshift; 2800 wr->wr_log_dev_bshift = wch->wc_log_dev_bshift;
2750 wr->wr_fs_dev_bshift = wch->wc_fs_dev_bshift; 2801 wr->wr_fs_dev_bshift = wch->wc_fs_dev_bshift;
2751 wr->wr_circ_off = wch->wc_circ_off; 2802 wr->wr_circ_off = wch->wc_circ_off;
2752 wr->wr_circ_size = wch->wc_circ_size; 2803 wr->wr_circ_size = wch->wc_circ_size;
2753 wr->wr_generation = wch->wc_generation; 2804 wr->wr_generation = wch->wc_generation;
2754 2805
2755 used = wapbl_space_used(wch->wc_circ_size, wch->wc_head, wch->wc_tail); 2806 used = wapbl_space_used(wch->wc_circ_size, wch->wc_head, wch->wc_tail);
2756 2807
2757 WAPBL_PRINTF(WAPBL_PRINT_REPLAY, 2808 WAPBL_PRINTF(WAPBL_PRINT_REPLAY,
2758 ("wapbl_replay: head=%"PRId64" tail=%"PRId64" off=%"PRId64 2809 ("wapbl_replay: head=%"PRId64" tail=%"PRId64" off=%"PRId64
2759 " len=%"PRId64" used=%zu\n", 2810 " len=%"PRId64" used=%zu\n",
2760 wch->wc_head, wch->wc_tail, wch->wc_circ_off, 2811 wch->wc_head, wch->wc_tail, wch->wc_circ_off,
2761 wch->wc_circ_size, used)); 2812 wch->wc_circ_size, used));
2762 2813
2763 wapbl_blkhash_init(wr, (used >> wch->wc_fs_dev_bshift)); 2814 wapbl_blkhash_init(wr, (used >> wch->wc_fs_dev_bshift));
2764 2815
2765 error = wapbl_replay_process(wr, wch->wc_head, wch->wc_tail); 2816 error = wapbl_replay_process(wr, wch->wc_head, wch->wc_tail);
2766 if (error) { 2817 if (error) {
2767 wapbl_replay_stop(wr); 2818 wapbl_replay_stop(wr);
2768 wapbl_replay_free(wr); 2819 wapbl_replay_free(wr);
2769 return error; 2820 return error;
2770 } 2821 }
2771 2822
2772 *wrp = wr; 2823 *wrp = wr;
2773 return 0; 2824 return 0;
2774 2825
2775 errout: 2826 errout:
2776 wapbl_free(scratch, MAXBSIZE); 2827 wapbl_free(scratch, MAXBSIZE);
2777 return error; 2828 return error;
2778} 2829}
2779 2830
2780void 2831void
2781wapbl_replay_stop(struct wapbl_replay *wr) 2832wapbl_replay_stop(struct wapbl_replay *wr)
2782{ 2833{
2783 2834
2784 if (!wapbl_replay_isopen(wr)) 2835 if (!wapbl_replay_isopen(wr))
2785 return; 2836 return;
2786 2837
2787 WAPBL_PRINTF(WAPBL_PRINT_REPLAY, ("wapbl_replay_stop called\n")); 2838 WAPBL_PRINTF(WAPBL_PRINT_REPLAY, ("wapbl_replay_stop called\n"));
2788 2839
2789 wapbl_free(wr->wr_scratch, MAXBSIZE); 2840 wapbl_free(wr->wr_scratch, MAXBSIZE);
2790 wr->wr_scratch = NULL; 2841 wr->wr_scratch = NULL;
2791 2842
2792 wr->wr_logvp = NULL; 2843 wr->wr_logvp = NULL;
2793 2844
2794 wapbl_blkhash_clear(wr); 2845 wapbl_blkhash_clear(wr);
2795 wapbl_blkhash_free(wr); 2846 wapbl_blkhash_free(wr);
2796} 2847}
2797 2848
2798void 2849void
2799wapbl_replay_free(struct wapbl_replay *wr) 2850wapbl_replay_free(struct wapbl_replay *wr)
2800{ 2851{
2801 2852
2802 KDASSERT(!wapbl_replay_isopen(wr)); 2853 KDASSERT(!wapbl_replay_isopen(wr));
2803 2854
2804 if (wr->wr_inodes) 2855 if (wr->wr_inodes)
2805 wapbl_free(wr->wr_inodes, 2856 wapbl_free(wr->wr_inodes,
2806 wr->wr_inodescnt * sizeof(wr->wr_inodes[0])); 2857 wr->wr_inodescnt * sizeof(wr->wr_inodes[0]));
2807 wapbl_free(wr, sizeof(*wr)); 2858 wapbl_free(wr, sizeof(*wr));
2808} 2859}
2809 2860
2810#ifdef _KERNEL 2861#ifdef _KERNEL
2811int 2862int
2812wapbl_replay_isopen1(struct wapbl_replay *wr) 2863wapbl_replay_isopen1(struct wapbl_replay *wr)
2813{ 2864{
2814 2865
2815 return wapbl_replay_isopen(wr); 2866 return wapbl_replay_isopen(wr);
2816} 2867}
2817#endif 2868#endif
2818 2869
2819/* 2870/*
2820 * calculate the disk address for the i'th block in the wc_blockblist 2871 * calculate the disk address for the i'th block in the wc_blockblist
2821 * offset by j blocks of size blen. 2872 * offset by j blocks of size blen.
2822 * 2873 *
2823 * wc_daddr is always a kernel disk address in DEV_BSIZE units that 2874 * wc_daddr is always a kernel disk address in DEV_BSIZE units that
2824 * was written to the journal. 2875 * was written to the journal.
2825 * 2876 *
2826 * The kernel needs that address plus the offset in DEV_BSIZE units. 2877 * The kernel needs that address plus the offset in DEV_BSIZE units.
2827 * 2878 *
2828 * Userland needs that address plus the offset in blen units. 2879 * Userland needs that address plus the offset in blen units.
2829 * 2880 *
2830 */ 2881 */
2831static daddr_t 2882static daddr_t
2832wapbl_block_daddr(struct wapbl_wc_blocklist *wc, int i, int j, int blen) 2883wapbl_block_daddr(struct wapbl_wc_blocklist *wc, int i, int j, int blen)
2833{ 2884{
2834 daddr_t pbn; 2885 daddr_t pbn;
2835 2886
2836#ifdef _KERNEL 2887#ifdef _KERNEL
2837 pbn = wc->wc_blocks[i].wc_daddr + btodb(j * blen); 2888 pbn = wc->wc_blocks[i].wc_daddr + btodb(j * blen);
2838#else 2889#else
2839 pbn = dbtob(wc->wc_blocks[i].wc_daddr) / blen + j; 2890 pbn = dbtob(wc->wc_blocks[i].wc_daddr) / blen + j;
2840#endif 2891#endif
2841 2892
2842 return pbn; 2893 return pbn;
2843} 2894}
2844 2895
2845static void 2896static void
2846wapbl_replay_process_blocks(struct wapbl_replay *wr, off_t *offp) 2897wapbl_replay_process_blocks(struct wapbl_replay *wr, off_t *offp)
2847{ 2898{
2848 struct wapbl_wc_blocklist *wc = 2899 struct wapbl_wc_blocklist *wc =
2849 (struct wapbl_wc_blocklist *)wr->wr_scratch; 2900 (struct wapbl_wc_blocklist *)wr->wr_scratch;
2850 int fsblklen = 1 << wr->wr_fs_dev_bshift; 2901 int fsblklen = 1 << wr->wr_fs_dev_bshift;
2851 int i, j, n; 2902 int i, j, n;
2852 2903
2853 for (i = 0; i < wc->wc_blkcount; i++) { 2904 for (i = 0; i < wc->wc_blkcount; i++) {
2854 /* 2905 /*
2855 * Enter each physical block into the hashtable independently. 2906 * Enter each physical block into the hashtable independently.
2856 */ 2907 */
2857 n = wc->wc_blocks[i].wc_dlen >> wr->wr_fs_dev_bshift; 2908 n = wc->wc_blocks[i].wc_dlen >> wr->wr_fs_dev_bshift;
2858 for (j = 0; j < n; j++) { 2909 for (j = 0; j < n; j++) {
2859 wapbl_blkhash_ins(wr, wapbl_block_daddr(wc, i, j, fsblklen), 2910 wapbl_blkhash_ins(wr, wapbl_block_daddr(wc, i, j, fsblklen),
2860 *offp); 2911 *offp);
2861 wapbl_circ_advance(wr, fsblklen, offp); 2912 wapbl_circ_advance(wr, fsblklen, offp);
2862 } 2913 }
2863 } 2914 }
2864} 2915}
2865 2916
2866static void 2917static void
2867wapbl_replay_process_revocations(struct wapbl_replay *wr) 2918wapbl_replay_process_revocations(struct wapbl_replay *wr)
2868{ 2919{
2869 struct wapbl_wc_blocklist *wc = 2920 struct wapbl_wc_blocklist *wc =
2870 (struct wapbl_wc_blocklist *)wr->wr_scratch; 2921 (struct wapbl_wc_blocklist *)wr->wr_scratch;
2871 int fsblklen = 1 << wr->wr_fs_dev_bshift; 2922 int fsblklen = 1 << wr->wr_fs_dev_bshift;
2872 int i, j, n; 2923 int i, j, n;
2873 2924
2874 for (i = 0; i < wc->wc_blkcount; i++) { 2925 for (i = 0; i < wc->wc_blkcount; i++) {
2875 /* 2926 /*
2876 * Remove any blocks found from the hashtable. 2927 * Remove any blocks found from the hashtable.
2877 */ 2928 */
2878 n = wc->wc_blocks[i].wc_dlen >> wr->wr_fs_dev_bshift; 2929 n = wc->wc_blocks[i].wc_dlen >> wr->wr_fs_dev_bshift;
2879 for (j = 0; j < n; j++) 2930 for (j = 0; j < n; j++)
2880 wapbl_blkhash_rem(wr, wapbl_block_daddr(wc, i, j, fsblklen)); 2931 wapbl_blkhash_rem(wr, wapbl_block_daddr(wc, i, j, fsblklen));
2881 } 2932 }
2882} 2933}
2883 2934
2884static void 2935static void
2885wapbl_replay_process_inodes(struct wapbl_replay *wr, off_t oldoff, off_t newoff) 2936wapbl_replay_process_inodes(struct wapbl_replay *wr, off_t oldoff, off_t newoff)
2886{ 2937{
2887 struct wapbl_wc_inodelist *wc = 2938 struct wapbl_wc_inodelist *wc =
2888 (struct wapbl_wc_inodelist *)wr->wr_scratch; 2939 (struct wapbl_wc_inodelist *)wr->wr_scratch;
2889 void *new_inodes; 2940 void *new_inodes;
2890 const size_t oldsize = wr->wr_inodescnt * sizeof(wr->wr_inodes[0]); 2941 const size_t oldsize = wr->wr_inodescnt * sizeof(wr->wr_inodes[0]);
2891 2942
2892 KASSERT(sizeof(wr->wr_inodes[0]) == sizeof(wc->wc_inodes[0])); 2943 KASSERT(sizeof(wr->wr_inodes[0]) == sizeof(wc->wc_inodes[0]));
2893 2944
2894 /* 2945 /*
2895 * Keep track of where we found this so location won't be 2946 * Keep track of where we found this so location won't be
2896 * overwritten. 2947 * overwritten.
2897 */ 2948 */
2898 if (wc->wc_clear) { 2949 if (wc->wc_clear) {
2899 wr->wr_inodestail = oldoff; 2950 wr->wr_inodestail = oldoff;
2900 wr->wr_inodescnt = 0; 2951 wr->wr_inodescnt = 0;
2901 if (wr->wr_inodes != NULL) { 2952 if (wr->wr_inodes != NULL) {
2902 wapbl_free(wr->wr_inodes, oldsize); 2953 wapbl_free(wr->wr_inodes, oldsize);
2903 wr->wr_inodes = NULL; 2954 wr->wr_inodes = NULL;
2904 } 2955 }
2905 } 2956 }
2906 wr->wr_inodeshead = newoff; 2957 wr->wr_inodeshead = newoff;
2907 if (wc->wc_inocnt == 0) 2958 if (wc->wc_inocnt == 0)
2908 return; 2959 return;
2909 2960
2910 new_inodes = wapbl_alloc((wr->wr_inodescnt + wc->wc_inocnt) * 2961 new_inodes = wapbl_alloc((wr->wr_inodescnt + wc->wc_inocnt) *
2911 sizeof(wr->wr_inodes[0])); 2962 sizeof(wr->wr_inodes[0]));
2912 if (wr->wr_inodes != NULL) { 2963 if (wr->wr_inodes != NULL) {
2913 memcpy(new_inodes, wr->wr_inodes, oldsize); 2964 memcpy(new_inodes, wr->wr_inodes, oldsize);
2914 wapbl_free(wr->wr_inodes, oldsize); 2965 wapbl_free(wr->wr_inodes, oldsize);
2915 } 2966 }
2916 wr->wr_inodes = new_inodes; 2967 wr->wr_inodes = new_inodes;
2917 memcpy(&wr->wr_inodes[wr->wr_inodescnt], wc->wc_inodes, 2968 memcpy(&wr->wr_inodes[wr->wr_inodescnt], wc->wc_inodes,
2918 wc->wc_inocnt * sizeof(wr->wr_inodes[0])); 2969 wc->wc_inocnt * sizeof(wr->wr_inodes[0]));
2919 wr->wr_inodescnt += wc->wc_inocnt; 2970 wr->wr_inodescnt += wc->wc_inocnt;
2920} 2971}
2921 2972
2922static int 2973static int
2923wapbl_replay_process(struct wapbl_replay *wr, off_t head, off_t tail) 2974wapbl_replay_process(struct wapbl_replay *wr, off_t head, off_t tail)
2924{ 2975{
2925 off_t off; 2976 off_t off;
2926 int error; 2977 int error;
2927 2978
2928 int logblklen = 1 << wr->wr_log_dev_bshift; 2979 int logblklen = 1 << wr->wr_log_dev_bshift;
2929 2980
2930 wapbl_blkhash_clear(wr); 2981 wapbl_blkhash_clear(wr);
2931 2982
2932 off = tail; 2983 off = tail;
2933 while (off != head) { 2984 while (off != head) {
2934 struct wapbl_wc_null *wcn; 2985 struct wapbl_wc_null *wcn;
2935 off_t saveoff = off; 2986 off_t saveoff = off;
2936 error = wapbl_circ_read(wr, wr->wr_scratch, logblklen, &off); 2987 error = wapbl_circ_read(wr, wr->wr_scratch, logblklen, &off);
2937 if (error) 2988 if (error)
2938 goto errout; 2989 goto errout;
2939 wcn = (struct wapbl_wc_null *)wr->wr_scratch; 2990 wcn = (struct wapbl_wc_null *)wr->wr_scratch;
2940 switch (wcn->wc_type) { 2991 switch (wcn->wc_type) {
2941 case WAPBL_WC_BLOCKS: 2992 case WAPBL_WC_BLOCKS:
2942 wapbl_replay_process_blocks(wr, &off); 2993 wapbl_replay_process_blocks(wr, &off);
2943 break; 2994 break;
2944 2995
2945 case WAPBL_WC_REVOCATIONS: 2996 case WAPBL_WC_REVOCATIONS:
2946 wapbl_replay_process_revocations(wr); 2997 wapbl_replay_process_revocations(wr);
2947 break; 2998 break;
2948 2999
2949 case WAPBL_WC_INODES: 3000 case WAPBL_WC_INODES:
2950 wapbl_replay_process_inodes(wr, saveoff, off); 3001 wapbl_replay_process_inodes(wr, saveoff, off);
2951 break; 3002 break;
2952 3003
2953 default: 3004 default:
2954 printf("Unrecognized wapbl type: 0x%08x\n", 3005 printf("Unrecognized wapbl type: 0x%08x\n",
2955 wcn->wc_type); 3006 wcn->wc_type);
2956 error = EFTYPE; 3007 error = EFTYPE;
2957 goto errout; 3008 goto errout;
2958 } 3009 }
2959 wapbl_circ_advance(wr, wcn->wc_len, &saveoff); 3010 wapbl_circ_advance(wr, wcn->wc_len, &saveoff);
2960 if (off != saveoff) { 3011 if (off != saveoff) {
2961 printf("wapbl_replay: corrupted records\n"); 3012 printf("wapbl_replay: corrupted records\n");
2962 error = EFTYPE; 3013 error = EFTYPE;
2963 goto errout; 3014 goto errout;
2964 } 3015 }
2965 } 3016 }
2966 return 0; 3017 return 0;
2967 3018
2968 errout: 3019 errout:
2969 wapbl_blkhash_clear(wr); 3020 wapbl_blkhash_clear(wr);
2970 return error; 3021 return error;
2971} 3022}
2972 3023
2973#if 0 3024#if 0
2974int 3025int
2975wapbl_replay_verify(struct wapbl_replay *wr, struct vnode *fsdevvp) 3026wapbl_replay_verify(struct wapbl_replay *wr, struct vnode *fsdevvp)
2976{ 3027{
2977 off_t off; 3028 off_t off;
2978 int mismatchcnt = 0; 3029 int mismatchcnt = 0;
2979 int logblklen = 1 << wr->wr_log_dev_bshift; 3030 int logblklen = 1 << wr->wr_log_dev_bshift;
2980 int fsblklen = 1 << wr->wr_fs_dev_bshift; 3031 int fsblklen = 1 << wr->wr_fs_dev_bshift;
2981 void *scratch1 = wapbl_alloc(MAXBSIZE); 3032 void *scratch1 = wapbl_alloc(MAXBSIZE);
2982 void *scratch2 = wapbl_alloc(MAXBSIZE); 3033 void *scratch2 = wapbl_alloc(MAXBSIZE);
2983 int error = 0; 3034 int error = 0;
2984 3035
2985 KDASSERT(wapbl_replay_isopen(wr)); 3036 KDASSERT(wapbl_replay_isopen(wr));
2986 3037
2987 off = wch->wc_tail; 3038 off = wch->wc_tail;
2988 while (off != wch->wc_head) { 3039 while (off != wch->wc_head) {
2989 struct wapbl_wc_null *wcn; 3040 struct wapbl_wc_null *wcn;
2990#ifdef DEBUG 3041#ifdef DEBUG
2991 off_t saveoff = off; 3042 off_t saveoff = off;
2992#endif 3043#endif
2993 error = wapbl_circ_read(wr, wr->wr_scratch, logblklen, &off); 3044 error = wapbl_circ_read(wr, wr->wr_scratch, logblklen, &off);
2994 if (error) 3045 if (error)
2995 goto out; 3046 goto out;
2996 wcn = (struct wapbl_wc_null *)wr->wr_scratch; 3047 wcn = (struct wapbl_wc_null *)wr->wr_scratch;
2997 switch (wcn->wc_type) { 3048 switch (wcn->wc_type) {
2998 case WAPBL_WC_BLOCKS: 3049 case WAPBL_WC_BLOCKS:
2999 { 3050 {
3000 struct wapbl_wc_blocklist *wc = 3051 struct wapbl_wc_blocklist *wc =
3001 (struct wapbl_wc_blocklist *)wr->wr_scratch; 3052 (struct wapbl_wc_blocklist *)wr->wr_scratch;
3002 int i; 3053 int i;
3003 for (i = 0; i < wc->wc_blkcount; i++) { 3054 for (i = 0; i < wc->wc_blkcount; i++) {
3004 int foundcnt = 0; 3055 int foundcnt = 0;
3005 int dirtycnt = 0; 3056 int dirtycnt = 0;
3006 int j, n; 3057 int j, n;
3007 /* 3058 /*
3008 * Check each physical block into the 3059 * Check each physical block into the
3009 * hashtable independently 3060 * hashtable independently
3010 */ 3061 */
3011 n = wc->wc_blocks[i].wc_dlen >> 3062 n = wc->wc_blocks[i].wc_dlen >>
3012 wch->wc_fs_dev_bshift; 3063 wch->wc_fs_dev_bshift;
3013 for (j = 0; j < n; j++) { 3064 for (j = 0; j < n; j++) {
3014 struct wapbl_blk *wb = 3065 struct wapbl_blk *wb =
3015 wapbl_blkhash_get(wr, 3066 wapbl_blkhash_get(wr,
3016 wapbl_block_daddr(wc, i, j, fsblklen)); 3067 wapbl_block_daddr(wc, i, j, fsblklen));
3017 if (wb && (wb->wb_off == off)) { 3068 if (wb && (wb->wb_off == off)) {
3018 foundcnt++; 3069 foundcnt++;
3019 error = 3070 error =
3020 wapbl_circ_read(wr, 3071 wapbl_circ_read(wr,
3021 scratch1, fsblklen, 3072 scratch1, fsblklen,
3022 &off); 3073 &off);
3023 if (error) 3074 if (error)
3024 goto out; 3075 goto out;
3025 error = 3076 error =
3026 wapbl_read(scratch2, 3077 wapbl_read(scratch2,
3027 fsblklen, fsdevvp, 3078 fsblklen, fsdevvp,
3028 wb->wb_blk); 3079 wb->wb_blk);
3029 if (error) 3080 if (error)
3030 goto out; 3081 goto out;
3031 if (memcmp(scratch1, 3082 if (memcmp(scratch1,
3032 scratch2, 3083 scratch2,
3033 fsblklen)) { 3084 fsblklen)) {
3034 printf( 3085 printf(
3035 "wapbl_verify: mismatch block %"PRId64" at off %"PRIdMAX"\n", 3086 "wapbl_verify: mismatch block %"PRId64" at off %"PRIdMAX"\n",
3036 wb->wb_blk, (intmax_t)off); 3087 wb->wb_blk, (intmax_t)off);
3037 dirtycnt++; 3088 dirtycnt++;
3038 mismatchcnt++; 3089 mismatchcnt++;
3039 } 3090 }
3040 } else { 3091 } else {
3041 wapbl_circ_advance(wr, 3092 wapbl_circ_advance(wr,
3042 fsblklen, &off); 3093 fsblklen, &off);
3043 } 3094 }
3044 } 3095 }
3045#if 0 3096#if 0
3046 /* 3097 /*
3047 * If all of the blocks in an entry 3098 * If all of the blocks in an entry
3048 * are clean, then remove all of its 3099 * are clean, then remove all of its
3049 * blocks from the hashtable since they 3100 * blocks from the hashtable since they
3050 * never will need replay. 3101 * never will need replay.
3051 */ 3102 */
3052 if ((foundcnt != 0) && 3103 if ((foundcnt != 0) &&
3053 (dirtycnt == 0)) { 3104 (dirtycnt == 0)) {
3054 off = saveoff; 3105 off = saveoff;
3055 wapbl_circ_advance(wr, 3106 wapbl_circ_advance(wr,
3056 logblklen, &off); 3107 logblklen, &off);
3057 for (j = 0; j < n; j++) { 3108 for (j = 0; j < n; j++) {
3058 struct wapbl_blk *wb = 3109 struct wapbl_blk *wb =
3059 wapbl_blkhash_get(wr, 3110 wapbl_blkhash_get(wr,
3060 wapbl_block_daddr(wc, i, j, fsblklen)); 3111 wapbl_block_daddr(wc, i, j, fsblklen));
3061 if (wb && 3112 if (wb &&
3062 (wb->wb_off == off)) { 3113 (wb->wb_off == off)) {
3063 wapbl_blkhash_rem(wr, wb->wb_blk); 3114 wapbl_blkhash_rem(wr, wb->wb_blk);
3064 } 3115 }
3065 wapbl_circ_advance(wr, 3116 wapbl_circ_advance(wr,
3066 fsblklen, &off); 3117 fsblklen, &off);
3067 } 3118 }
3068 } 3119 }
3069#endif 3120#endif
3070 } 3121 }
3071 } 3122 }
3072 break; 3123 break;
3073 case WAPBL_WC_REVOCATIONS: 3124 case WAPBL_WC_REVOCATIONS:
3074 case WAPBL_WC_INODES: 3125 case WAPBL_WC_INODES:
3075 break; 3126 break;
3076 default: 3127 default:
3077 KASSERT(0); 3128 KASSERT(0);
3078 } 3129 }
3079#ifdef DEBUG 3130#ifdef DEBUG
3080 wapbl_circ_advance(wr, wcn->wc_len, &saveoff); 3131 wapbl_circ_advance(wr, wcn->wc_len, &saveoff);
3081 KASSERT(off == saveoff); 3132 KASSERT(off == saveoff);
3082#endif 3133#endif
3083 } 3134 }
3084 out: 3135 out:
3085 wapbl_free(scratch1, MAXBSIZE); 3136 wapbl_free(scratch1, MAXBSIZE);
3086 wapbl_free(scratch2, MAXBSIZE); 3137 wapbl_free(scratch2, MAXBSIZE);
3087 if (!error && mismatchcnt) 3138 if (!error && mismatchcnt)
3088 error = EFTYPE; 3139 error = EFTYPE;
3089 return error; 3140 return error;
3090} 3141}
3091#endif 3142#endif
3092 3143
3093int 3144int
3094wapbl_replay_write(struct wapbl_replay *wr, struct vnode *fsdevvp) 3145wapbl_replay_write(struct wapbl_replay *wr, struct vnode *fsdevvp)
3095{ 3146{
3096 struct wapbl_blk *wb; 3147 struct wapbl_blk *wb;
3097 size_t i; 3148 size_t i;
3098 off_t off; 3149 off_t off;
3099 void *scratch; 3150 void *scratch;
3100 int error = 0; 3151 int error = 0;
3101 int fsblklen = 1 << wr->wr_fs_dev_bshift; 3152 int fsblklen = 1 << wr->wr_fs_dev_bshift;
3102 3153
3103 KDASSERT(wapbl_replay_isopen(wr)); 3154 KDASSERT(wapbl_replay_isopen(wr));
3104 3155
3105 scratch = wapbl_alloc(MAXBSIZE); 3156 scratch = wapbl_alloc(MAXBSIZE);
3106 3157
3107 for (i = 0; i <= wr->wr_blkhashmask; ++i) { 3158 for (i = 0; i <= wr->wr_blkhashmask; ++i) {
3108 LIST_FOREACH(wb, &wr->wr_blkhash[i], wb_hash) { 3159 LIST_FOREACH(wb, &wr->wr_blkhash[i], wb_hash) {
3109 off = wb->wb_off; 3160 off = wb->wb_off;
3110 error = wapbl_circ_read(wr, scratch, fsblklen, &off); 3161 error = wapbl_circ_read(wr, scratch, fsblklen, &off);
3111 if (error) 3162 if (error)
3112 break; 3163 break;
3113 error = wapbl_write(scratch, fsblklen, fsdevvp, 3164 error = wapbl_write(scratch, fsblklen, fsdevvp,
3114 wb->wb_blk); 3165 wb->wb_blk);
3115 if (error) 3166 if (error)
3116 break; 3167 break;
3117 } 3168 }
3118 } 3169 }
3119 3170
3120 wapbl_free(scratch, MAXBSIZE); 3171 wapbl_free(scratch, MAXBSIZE);
3121 return error; 3172 return error;
3122} 3173}
3123 3174
3124int 3175int
3125wapbl_replay_can_read(struct wapbl_replay *wr, daddr_t blk, long len) 3176wapbl_replay_can_read(struct wapbl_replay *wr, daddr_t blk, long len)
3126{ 3177{
3127 int fsblklen = 1 << wr->wr_fs_dev_bshift; 3178 int fsblklen = 1 << wr->wr_fs_dev_bshift;
3128 3179
3129 KDASSERT(wapbl_replay_isopen(wr)); 3180 KDASSERT(wapbl_replay_isopen(wr));
3130 KASSERT((len % fsblklen) == 0); 3181 KASSERT((len % fsblklen) == 0);
3131 3182
3132 while (len != 0) { 3183 while (len != 0) {
3133 struct wapbl_blk *wb = wapbl_blkhash_get(wr, blk); 3184 struct wapbl_blk *wb = wapbl_blkhash_get(wr, blk);
3134 if (wb) 3185 if (wb)
3135 return 1; 3186 return 1;
3136 len -= fsblklen; 3187 len -= fsblklen;
3137 } 3188 }
3138 return 0; 3189 return 0;
3139} 3190}
3140 3191
3141int 3192int
3142wapbl_replay_read(struct wapbl_replay *wr, void *data, daddr_t blk, long len) 3193wapbl_replay_read(struct wapbl_replay *wr, void *data, daddr_t blk, long len)
3143{ 3194{
3144 int fsblklen = 1 << wr->wr_fs_dev_bshift; 3195 int fsblklen = 1 << wr->wr_fs_dev_bshift;
3145 3196
3146 KDASSERT(wapbl_replay_isopen(wr)); 3197 KDASSERT(wapbl_replay_isopen(wr));
3147 3198
3148 KASSERT((len % fsblklen) == 0); 3199 KASSERT((len % fsblklen) == 0);
3149 3200
3150 while (len != 0) { 3201 while (len != 0) {
3151 struct wapbl_blk *wb = wapbl_blkhash_get(wr, blk); 3202 struct wapbl_blk *wb = wapbl_blkhash_get(wr, blk);
3152 if (wb) { 3203 if (wb) {
3153 off_t off = wb->wb_off; 3204 off_t off = wb->wb_off;
3154 int error; 3205 int error;
3155 error = wapbl_circ_read(wr, data, fsblklen, &off); 3206 error = wapbl_circ_read(wr, data, fsblklen, &off);
3156 if (error) 3207 if (error)
3157 return error; 3208 return error;
3158 } 3209 }
3159 data = (uint8_t *)data + fsblklen; 3210 data = (uint8_t *)data + fsblklen;
3160 len -= fsblklen; 3211 len -= fsblklen;
3161 blk++; 3212 blk++;
3162 } 3213 }
3163 return 0; 3214 return 0;
3164} 3215}
3165 3216
3166#ifdef _KERNEL 3217#ifdef _KERNEL
3167 3218
3168MODULE(MODULE_CLASS_VFS, wapbl, NULL); 3219MODULE(MODULE_CLASS_VFS, wapbl, NULL);
3169 3220
3170static int 3221static int
3171wapbl_modcmd(modcmd_t cmd, void *arg) 3222wapbl_modcmd(modcmd_t cmd, void *arg)
3172{ 3223{
3173 3224
3174 switch (cmd) { 3225 switch (cmd) {
3175 case MODULE_CMD_INIT: 3226 case MODULE_CMD_INIT:
3176 wapbl_init(); 3227 wapbl_init();
3177 return 0; 3228 return 0;
3178 case MODULE_CMD_FINI: 3229 case MODULE_CMD_FINI:
3179 return wapbl_fini(); 3230 return wapbl_fini();
3180 default: 3231 default:
3181 return ENOTTY; 3232 return ENOTTY;
3182 } 3233 }
3183} 3234}
3184#endif /* _KERNEL */ 3235#endif /* _KERNEL */