Sat Jul 25 10:40:35 2015 UTC ()
Use accessors in DEBUG and DIAGNOSTIC code as well


(martin)
diff -r1.130 -r1.131 src/sys/ufs/lfs/lfs_bio.c
diff -r1.43 -r1.44 src/sys/ufs/lfs/lfs_debug.c
diff -r1.3 -r1.4 src/sys/ufs/lfs/lfs_pages.c
diff -r1.243 -r1.244 src/sys/ufs/lfs/lfs_segment.c
diff -r1.275 -r1.276 src/sys/ufs/lfs/lfs_vnops.c

cvs diff -r1.130 -r1.131 src/sys/ufs/lfs/lfs_bio.c (switch to unified diff)

--- src/sys/ufs/lfs/lfs_bio.c 2015/07/24 06:59:32 1.130
+++ src/sys/ufs/lfs/lfs_bio.c 2015/07/25 10:40:35 1.131
@@ -1,844 +1,844 @@ @@ -1,844 +1,844 @@
1/* $NetBSD: lfs_bio.c,v 1.130 2015/07/24 06:59:32 dholland Exp $ */ 1/* $NetBSD: lfs_bio.c,v 1.131 2015/07/25 10:40:35 martin Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2008 The NetBSD Foundation, Inc. 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant@hhhh.org>. 8 * by Konrad E. Schroder <perseant@hhhh.org>.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31/* 31/*
32 * Copyright (c) 1991, 1993 32 * Copyright (c) 1991, 1993
33 * The Regents of the University of California. All rights reserved. 33 * The Regents of the University of California. All rights reserved.
34 * 34 *
35 * Redistribution and use in source and binary forms, with or without 35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions 36 * modification, are permitted provided that the following conditions
37 * are met: 37 * are met:
38 * 1. Redistributions of source code must retain the above copyright 38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer. 39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright 40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the 41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution. 42 * documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors 43 * 3. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software 44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission. 45 * without specific prior written permission.
46 * 46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE. 57 * SUCH DAMAGE.
58 * 58 *
59 * @(#)lfs_bio.c 8.10 (Berkeley) 6/10/95 59 * @(#)lfs_bio.c 8.10 (Berkeley) 6/10/95
60 */ 60 */
61 61
62#include <sys/cdefs.h> 62#include <sys/cdefs.h>
63__KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.130 2015/07/24 06:59:32 dholland Exp $"); 63__KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.131 2015/07/25 10:40:35 martin Exp $");
64 64
65#include <sys/param.h> 65#include <sys/param.h>
66#include <sys/systm.h> 66#include <sys/systm.h>
67#include <sys/proc.h> 67#include <sys/proc.h>
68#include <sys/buf.h> 68#include <sys/buf.h>
69#include <sys/vnode.h> 69#include <sys/vnode.h>
70#include <sys/resourcevar.h> 70#include <sys/resourcevar.h>
71#include <sys/mount.h> 71#include <sys/mount.h>
72#include <sys/kernel.h> 72#include <sys/kernel.h>
73#include <sys/kauth.h> 73#include <sys/kauth.h>
74 74
75#include <ufs/lfs/ulfs_inode.h> 75#include <ufs/lfs/ulfs_inode.h>
76#include <ufs/lfs/ulfsmount.h> 76#include <ufs/lfs/ulfsmount.h>
77#include <ufs/lfs/ulfs_extern.h> 77#include <ufs/lfs/ulfs_extern.h>
78 78
79#include <ufs/lfs/lfs.h> 79#include <ufs/lfs/lfs.h>
80#include <ufs/lfs/lfs_extern.h> 80#include <ufs/lfs/lfs_extern.h>
81#include <ufs/lfs/lfs_kernel.h> 81#include <ufs/lfs/lfs_kernel.h>
82 82
83#include <uvm/uvm.h> 83#include <uvm/uvm.h>
84 84
85/* 85/*
86 * LFS block write function. 86 * LFS block write function.
87 * 87 *
88 * XXX 88 * XXX
89 * No write cost accounting is done. 89 * No write cost accounting is done.
90 * This is almost certainly wrong for synchronous operations and NFS. 90 * This is almost certainly wrong for synchronous operations and NFS.
91 * 91 *
92 * protected by lfs_lock. 92 * protected by lfs_lock.
93 */ 93 */
94int locked_queue_count = 0; /* Count of locked-down buffers. */ 94int locked_queue_count = 0; /* Count of locked-down buffers. */
95long locked_queue_bytes = 0L; /* Total size of locked buffers. */ 95long locked_queue_bytes = 0L; /* Total size of locked buffers. */
96int lfs_subsys_pages = 0L; /* Total number LFS-written pages */ 96int lfs_subsys_pages = 0L; /* Total number LFS-written pages */
97int lfs_fs_pagetrip = 0; /* # of pages to trip per-fs write */ 97int lfs_fs_pagetrip = 0; /* # of pages to trip per-fs write */
98int lfs_writing = 0; /* Set if already kicked off a writer 98int lfs_writing = 0; /* Set if already kicked off a writer
99 because of buffer space */ 99 because of buffer space */
100int locked_queue_waiters = 0; /* Number of processes waiting on lq */ 100int locked_queue_waiters = 0; /* Number of processes waiting on lq */
101 101
102/* Lock and condition variables for above. */ 102/* Lock and condition variables for above. */
103kcondvar_t locked_queue_cv; 103kcondvar_t locked_queue_cv;
104kcondvar_t lfs_writing_cv; 104kcondvar_t lfs_writing_cv;
105kmutex_t lfs_lock; 105kmutex_t lfs_lock;
106 106
107extern int lfs_dostats; 107extern int lfs_dostats;
108 108
109/* 109/*
110 * reserved number/bytes of locked buffers 110 * reserved number/bytes of locked buffers
111 */ 111 */
112int locked_queue_rcount = 0; 112int locked_queue_rcount = 0;
113long locked_queue_rbytes = 0L; 113long locked_queue_rbytes = 0L;
114 114
115static int lfs_fits_buf(struct lfs *, int, int); 115static int lfs_fits_buf(struct lfs *, int, int);
116static int lfs_reservebuf(struct lfs *, struct vnode *vp, struct vnode *vp2, 116static int lfs_reservebuf(struct lfs *, struct vnode *vp, struct vnode *vp2,
117 int, int); 117 int, int);
118static int lfs_reserveavail(struct lfs *, struct vnode *vp, struct vnode *vp2, 118static int lfs_reserveavail(struct lfs *, struct vnode *vp, struct vnode *vp2,
119 int); 119 int);
120 120
121static int 121static int
122lfs_fits_buf(struct lfs *fs, int n, int bytes) 122lfs_fits_buf(struct lfs *fs, int n, int bytes)
123{ 123{
124 int count_fit, bytes_fit; 124 int count_fit, bytes_fit;
125 125
126 ASSERT_NO_SEGLOCK(fs); 126 ASSERT_NO_SEGLOCK(fs);
127 KASSERT(mutex_owned(&lfs_lock)); 127 KASSERT(mutex_owned(&lfs_lock));
128 128
129 count_fit = 129 count_fit =
130 (locked_queue_count + locked_queue_rcount + n <= LFS_WAIT_BUFS); 130 (locked_queue_count + locked_queue_rcount + n <= LFS_WAIT_BUFS);
131 bytes_fit = 131 bytes_fit =
132 (locked_queue_bytes + locked_queue_rbytes + bytes <= LFS_WAIT_BYTES); 132 (locked_queue_bytes + locked_queue_rbytes + bytes <= LFS_WAIT_BYTES);
133 133
134#ifdef DEBUG 134#ifdef DEBUG
135 if (!count_fit) { 135 if (!count_fit) {
136 DLOG((DLOG_AVAIL, "lfs_fits_buf: no fit count: %d + %d + %d >= %d\n", 136 DLOG((DLOG_AVAIL, "lfs_fits_buf: no fit count: %d + %d + %d >= %d\n",
137 locked_queue_count, locked_queue_rcount, 137 locked_queue_count, locked_queue_rcount,
138 n, LFS_WAIT_BUFS)); 138 n, LFS_WAIT_BUFS));
139 } 139 }
140 if (!bytes_fit) { 140 if (!bytes_fit) {
141 DLOG((DLOG_AVAIL, "lfs_fits_buf: no fit bytes: %ld + %ld + %d >= %ld\n", 141 DLOG((DLOG_AVAIL, "lfs_fits_buf: no fit bytes: %ld + %ld + %d >= %ld\n",
142 locked_queue_bytes, locked_queue_rbytes, 142 locked_queue_bytes, locked_queue_rbytes,
143 bytes, LFS_WAIT_BYTES)); 143 bytes, LFS_WAIT_BYTES));
144 } 144 }
145#endif /* DEBUG */ 145#endif /* DEBUG */
146 146
147 return (count_fit && bytes_fit); 147 return (count_fit && bytes_fit);
148} 148}
149 149
150/* ARGSUSED */ 150/* ARGSUSED */
151static int 151static int
152lfs_reservebuf(struct lfs *fs, struct vnode *vp, 152lfs_reservebuf(struct lfs *fs, struct vnode *vp,
153 struct vnode *vp2, int n, int bytes) 153 struct vnode *vp2, int n, int bytes)
154{ 154{
155 int cantwait; 155 int cantwait;
156 156
157 ASSERT_MAYBE_SEGLOCK(fs); 157 ASSERT_MAYBE_SEGLOCK(fs);
158 KASSERT(locked_queue_rcount >= 0); 158 KASSERT(locked_queue_rcount >= 0);
159 KASSERT(locked_queue_rbytes >= 0); 159 KASSERT(locked_queue_rbytes >= 0);
160 160
161 cantwait = (VTOI(vp)->i_flag & IN_ADIROP) || fs->lfs_unlockvp == vp; 161 cantwait = (VTOI(vp)->i_flag & IN_ADIROP) || fs->lfs_unlockvp == vp;
162 mutex_enter(&lfs_lock); 162 mutex_enter(&lfs_lock);
163 while (!cantwait && n > 0 && !lfs_fits_buf(fs, n, bytes)) { 163 while (!cantwait && n > 0 && !lfs_fits_buf(fs, n, bytes)) {
164 int error; 164 int error;
165 165
166 lfs_flush(fs, 0, 0); 166 lfs_flush(fs, 0, 0);
167 167
168 DLOG((DLOG_AVAIL, "lfs_reservebuf: waiting: count=%d, bytes=%ld\n", 168 DLOG((DLOG_AVAIL, "lfs_reservebuf: waiting: count=%d, bytes=%ld\n",
169 locked_queue_count, locked_queue_bytes)); 169 locked_queue_count, locked_queue_bytes));
170 ++locked_queue_waiters; 170 ++locked_queue_waiters;
171 error = cv_timedwait_sig(&locked_queue_cv, &lfs_lock, 171 error = cv_timedwait_sig(&locked_queue_cv, &lfs_lock,
172 hz * LFS_BUFWAIT); 172 hz * LFS_BUFWAIT);
173 --locked_queue_waiters; 173 --locked_queue_waiters;
174 if (error && error != EWOULDBLOCK) { 174 if (error && error != EWOULDBLOCK) {
175 mutex_exit(&lfs_lock); 175 mutex_exit(&lfs_lock);
176 return error; 176 return error;
177 } 177 }
178 } 178 }
179 179
180 locked_queue_rcount += n; 180 locked_queue_rcount += n;
181 locked_queue_rbytes += bytes; 181 locked_queue_rbytes += bytes;
182 182
183 if (n < 0 && locked_queue_waiters > 0) { 183 if (n < 0 && locked_queue_waiters > 0) {
184 DLOG((DLOG_AVAIL, "lfs_reservebuf: broadcast: count=%d, bytes=%ld\n", 184 DLOG((DLOG_AVAIL, "lfs_reservebuf: broadcast: count=%d, bytes=%ld\n",
185 locked_queue_count, locked_queue_bytes)); 185 locked_queue_count, locked_queue_bytes));
186 cv_broadcast(&locked_queue_cv); 186 cv_broadcast(&locked_queue_cv);
187 } 187 }
188 188
189 mutex_exit(&lfs_lock); 189 mutex_exit(&lfs_lock);
190 190
191 KASSERT(locked_queue_rcount >= 0); 191 KASSERT(locked_queue_rcount >= 0);
192 KASSERT(locked_queue_rbytes >= 0); 192 KASSERT(locked_queue_rbytes >= 0);
193 193
194 return 0; 194 return 0;
195} 195}
196 196
197/* 197/*
198 * Try to reserve some blocks, prior to performing a sensitive operation that 198 * Try to reserve some blocks, prior to performing a sensitive operation that
199 * requires the vnode lock to be honored. If there is not enough space, give 199 * requires the vnode lock to be honored. If there is not enough space, give
200 * up the vnode lock temporarily and wait for the space to become available. 200 * up the vnode lock temporarily and wait for the space to become available.
201 * 201 *
202 * Called with vp locked. (Note nowever that if fsb < 0, vp is ignored.) 202 * Called with vp locked. (Note nowever that if fsb < 0, vp is ignored.)
203 * 203 *
204 * XXX YAMT - it isn't safe to unlock vp here 204 * XXX YAMT - it isn't safe to unlock vp here
205 * because the node might be modified while we sleep. 205 * because the node might be modified while we sleep.
206 * (eg. cached states like i_offset might be stale, 206 * (eg. cached states like i_offset might be stale,
207 * the vnode might be truncated, etc..) 207 * the vnode might be truncated, etc..)
208 * maybe we should have a way to restart the vnodeop (EVOPRESTART?) 208 * maybe we should have a way to restart the vnodeop (EVOPRESTART?)
209 * or rearrange vnodeop interface to leave vnode locking to file system 209 * or rearrange vnodeop interface to leave vnode locking to file system
210 * specific code so that each file systems can have their own vnode locking and 210 * specific code so that each file systems can have their own vnode locking and
211 * vnode re-using strategies. 211 * vnode re-using strategies.
212 */ 212 */
213static int 213static int
214lfs_reserveavail(struct lfs *fs, struct vnode *vp, 214lfs_reserveavail(struct lfs *fs, struct vnode *vp,
215 struct vnode *vp2, int fsb) 215 struct vnode *vp2, int fsb)
216{ 216{
217 CLEANERINFO *cip; 217 CLEANERINFO *cip;
218 struct buf *bp; 218 struct buf *bp;
219 int error, slept; 219 int error, slept;
220 int cantwait; 220 int cantwait;
221 221
222 ASSERT_MAYBE_SEGLOCK(fs); 222 ASSERT_MAYBE_SEGLOCK(fs);
223 slept = 0; 223 slept = 0;
224 mutex_enter(&lfs_lock); 224 mutex_enter(&lfs_lock);
225 cantwait = (VTOI(vp)->i_flag & IN_ADIROP) || fs->lfs_unlockvp == vp; 225 cantwait = (VTOI(vp)->i_flag & IN_ADIROP) || fs->lfs_unlockvp == vp;
226 while (!cantwait && fsb > 0 && 226 while (!cantwait && fsb > 0 &&
227 !lfs_fits(fs, fsb + fs->lfs_ravail + fs->lfs_favail)) { 227 !lfs_fits(fs, fsb + fs->lfs_ravail + fs->lfs_favail)) {
228 mutex_exit(&lfs_lock); 228 mutex_exit(&lfs_lock);
229 229
230 if (!slept) { 230 if (!slept) {
231 DLOG((DLOG_AVAIL, "lfs_reserve: waiting for %ld (bfree = %d," 231 DLOG((DLOG_AVAIL, "lfs_reserve: waiting for %ld (bfree = %d,"
232 " est_bfree = %d)\n", 232 " est_bfree = %d)\n",
233 fsb + fs->lfs_ravail + fs->lfs_favail, 233 fsb + fs->lfs_ravail + fs->lfs_favail,
234 fs->lfs_bfree, LFS_EST_BFREE(fs))); 234 lfs_sb_getbfree(fs), LFS_EST_BFREE(fs)));
235 } 235 }
236 ++slept; 236 ++slept;
237 237
238 /* Wake up the cleaner */ 238 /* Wake up the cleaner */
239 LFS_CLEANERINFO(cip, fs, bp); 239 LFS_CLEANERINFO(cip, fs, bp);
240 LFS_SYNC_CLEANERINFO(cip, fs, bp, 0); 240 LFS_SYNC_CLEANERINFO(cip, fs, bp, 0);
241 lfs_wakeup_cleaner(fs); 241 lfs_wakeup_cleaner(fs);
242 242
243 mutex_enter(&lfs_lock); 243 mutex_enter(&lfs_lock);
244 /* Cleaner might have run while we were reading, check again */ 244 /* Cleaner might have run while we were reading, check again */
245 if (lfs_fits(fs, fsb + fs->lfs_ravail + fs->lfs_favail)) 245 if (lfs_fits(fs, fsb + fs->lfs_ravail + fs->lfs_favail))
246 break; 246 break;
247 247
248 error = mtsleep(&fs->lfs_availsleep, PCATCH | PUSER, 248 error = mtsleep(&fs->lfs_availsleep, PCATCH | PUSER,
249 "lfs_reserve", 0, &lfs_lock); 249 "lfs_reserve", 0, &lfs_lock);
250 if (error) { 250 if (error) {
251 mutex_exit(&lfs_lock); 251 mutex_exit(&lfs_lock);
252 return error; 252 return error;
253 } 253 }
254 } 254 }
255#ifdef DEBUG 255#ifdef DEBUG
256 if (slept) { 256 if (slept) {
257 DLOG((DLOG_AVAIL, "lfs_reserve: woke up\n")); 257 DLOG((DLOG_AVAIL, "lfs_reserve: woke up\n"));
258 } 258 }
259#endif 259#endif
260 fs->lfs_ravail += fsb; 260 fs->lfs_ravail += fsb;
261 mutex_exit(&lfs_lock); 261 mutex_exit(&lfs_lock);
262 262
263 return 0; 263 return 0;
264} 264}
265 265
266#ifdef DIAGNOSTIC 266#ifdef DIAGNOSTIC
267int lfs_rescount; 267int lfs_rescount;
268int lfs_rescountdirop; 268int lfs_rescountdirop;
269#endif 269#endif
270 270
271int 271int
272lfs_reserve(struct lfs *fs, struct vnode *vp, struct vnode *vp2, int fsb) 272lfs_reserve(struct lfs *fs, struct vnode *vp, struct vnode *vp2, int fsb)
273{ 273{
274 int error; 274 int error;
275 275
276 ASSERT_MAYBE_SEGLOCK(fs); 276 ASSERT_MAYBE_SEGLOCK(fs);
277 if (vp2) { 277 if (vp2) {
278 /* Make sure we're not in the process of reclaiming vp2 */ 278 /* Make sure we're not in the process of reclaiming vp2 */
279 mutex_enter(&lfs_lock); 279 mutex_enter(&lfs_lock);
280 while(fs->lfs_flags & LFS_UNDIROP) { 280 while(fs->lfs_flags & LFS_UNDIROP) {
281 mtsleep(&fs->lfs_flags, PRIBIO + 1, "lfsrundirop", 0, 281 mtsleep(&fs->lfs_flags, PRIBIO + 1, "lfsrundirop", 0,
282 &lfs_lock); 282 &lfs_lock);
283 } 283 }
284 mutex_exit(&lfs_lock); 284 mutex_exit(&lfs_lock);
285 } 285 }
286 286
287 KASSERT(fsb < 0 || VOP_ISLOCKED(vp)); 287 KASSERT(fsb < 0 || VOP_ISLOCKED(vp));
288 KASSERT(vp2 == NULL || fsb < 0 || VOP_ISLOCKED(vp2)); 288 KASSERT(vp2 == NULL || fsb < 0 || VOP_ISLOCKED(vp2));
289 KASSERT(vp2 == NULL || vp2 != fs->lfs_unlockvp); 289 KASSERT(vp2 == NULL || vp2 != fs->lfs_unlockvp);
290 290
291#ifdef DIAGNOSTIC 291#ifdef DIAGNOSTIC
292 mutex_enter(&lfs_lock); 292 mutex_enter(&lfs_lock);
293 if (fsb > 0) 293 if (fsb > 0)
294 lfs_rescount++; 294 lfs_rescount++;
295 else if (fsb < 0) 295 else if (fsb < 0)
296 lfs_rescount--; 296 lfs_rescount--;
297 if (lfs_rescount < 0) 297 if (lfs_rescount < 0)
298 panic("lfs_rescount"); 298 panic("lfs_rescount");
299 mutex_exit(&lfs_lock); 299 mutex_exit(&lfs_lock);
300#endif 300#endif
301 301
302 /* 302 /*
303 * XXX 303 * XXX
304 * vref vnodes here so that cleaner doesn't try to reuse them. 304 * vref vnodes here so that cleaner doesn't try to reuse them.
305 * (see XXX comment in lfs_reserveavail) 305 * (see XXX comment in lfs_reserveavail)
306 */ 306 */
307 vhold(vp); 307 vhold(vp);
308 if (vp2 != NULL) { 308 if (vp2 != NULL) {
309 vhold(vp2); 309 vhold(vp2);
310 } 310 }
311 311
312 error = lfs_reserveavail(fs, vp, vp2, fsb); 312 error = lfs_reserveavail(fs, vp, vp2, fsb);
313 if (error) 313 if (error)
314 goto done; 314 goto done;
315 315
316 /* 316 /*
317 * XXX just a guess. should be more precise. 317 * XXX just a guess. should be more precise.
318 */ 318 */
319 error = lfs_reservebuf(fs, vp, vp2, fsb, lfs_fsbtob(fs, fsb)); 319 error = lfs_reservebuf(fs, vp, vp2, fsb, lfs_fsbtob(fs, fsb));
320 if (error) 320 if (error)
321 lfs_reserveavail(fs, vp, vp2, -fsb); 321 lfs_reserveavail(fs, vp, vp2, -fsb);
322 322
323done: 323done:
324 holdrele(vp); 324 holdrele(vp);
325 if (vp2 != NULL) { 325 if (vp2 != NULL) {
326 holdrele(vp2); 326 holdrele(vp2);
327 } 327 }
328 328
329 return error; 329 return error;
330} 330}
331 331
332int 332int
333lfs_bwrite(void *v) 333lfs_bwrite(void *v)
334{ 334{
335 struct vop_bwrite_args /* { 335 struct vop_bwrite_args /* {
336 struct vnode *a_vp; 336 struct vnode *a_vp;
337 struct buf *a_bp; 337 struct buf *a_bp;
338 } */ *ap = v; 338 } */ *ap = v;
339 struct buf *bp = ap->a_bp; 339 struct buf *bp = ap->a_bp;
340 340
341#ifdef DIAGNOSTIC 341#ifdef DIAGNOSTIC
342 if (VTOI(bp->b_vp)->i_lfs->lfs_ronly == 0 && (bp->b_flags & B_ASYNC)) { 342 if (VTOI(bp->b_vp)->i_lfs->lfs_ronly == 0 && (bp->b_flags & B_ASYNC)) {
343 panic("bawrite LFS buffer"); 343 panic("bawrite LFS buffer");
344 } 344 }
345#endif /* DIAGNOSTIC */ 345#endif /* DIAGNOSTIC */
346 return lfs_bwrite_ext(bp, 0); 346 return lfs_bwrite_ext(bp, 0);
347} 347}
348 348
349/* 349/*
350 * Determine if there is enough room currently available to write fsb 350 * Determine if there is enough room currently available to write fsb
351 * blocks. We need enough blocks for the new blocks, the current 351 * blocks. We need enough blocks for the new blocks, the current
352 * inode blocks (including potentially the ifile inode), a summary block, 352 * inode blocks (including potentially the ifile inode), a summary block,
353 * and the segment usage table, plus an ifile block. 353 * and the segment usage table, plus an ifile block.
354 */ 354 */
355int 355int
356lfs_fits(struct lfs *fs, int fsb) 356lfs_fits(struct lfs *fs, int fsb)
357{ 357{
358 int64_t needed; 358 int64_t needed;
359 359
360 ASSERT_NO_SEGLOCK(fs); 360 ASSERT_NO_SEGLOCK(fs);
361 needed = fsb + lfs_btofsb(fs, lfs_sb_getsumsize(fs)) + 361 needed = fsb + lfs_btofsb(fs, lfs_sb_getsumsize(fs)) +
362 ((howmany(lfs_sb_getuinodes(fs) + 1, LFS_INOPB(fs)) + 362 ((howmany(lfs_sb_getuinodes(fs) + 1, LFS_INOPB(fs)) +
363 lfs_sb_getsegtabsz(fs) + 363 lfs_sb_getsegtabsz(fs) +
364 1) << (lfs_sb_getbshift(fs) - lfs_sb_getffshift(fs))); 364 1) << (lfs_sb_getbshift(fs) - lfs_sb_getffshift(fs)));
365 365
366 if (needed >= lfs_sb_getavail(fs)) { 366 if (needed >= lfs_sb_getavail(fs)) {
367#ifdef DEBUG 367#ifdef DEBUG
368 DLOG((DLOG_AVAIL, "lfs_fits: no fit: fsb = %ld, uinodes = %ld, " 368 DLOG((DLOG_AVAIL, "lfs_fits: no fit: fsb = %ld, uinodes = %ld, "
369 "needed = %jd, avail = %jd\n", 369 "needed = %jd, avail = %jd\n",
370 (long)fsb, (long)fs->lfs_uinodes, (intmax_t)needed, 370 (long)fsb, (long)lfs_sb_getuinodes(fs), (intmax_t)needed,
371 (intmax_t)lfs_sb_getavail(fs))); 371 (intmax_t)lfs_sb_getavail(fs)));
372#endif 372#endif
373 return 0; 373 return 0;
374 } 374 }
375 return 1; 375 return 1;
376} 376}
377 377
378int 378int
379lfs_availwait(struct lfs *fs, int fsb) 379lfs_availwait(struct lfs *fs, int fsb)
380{ 380{
381 int error; 381 int error;
382 CLEANERINFO *cip; 382 CLEANERINFO *cip;
383 struct buf *cbp; 383 struct buf *cbp;
384 384
385 ASSERT_NO_SEGLOCK(fs); 385 ASSERT_NO_SEGLOCK(fs);
386 /* Push cleaner blocks through regardless */ 386 /* Push cleaner blocks through regardless */
387 mutex_enter(&lfs_lock); 387 mutex_enter(&lfs_lock);
388 if (LFS_SEGLOCK_HELD(fs) && 388 if (LFS_SEGLOCK_HELD(fs) &&
389 fs->lfs_sp->seg_flags & (SEGM_CLEAN | SEGM_FORCE_CKP)) { 389 fs->lfs_sp->seg_flags & (SEGM_CLEAN | SEGM_FORCE_CKP)) {
390 mutex_exit(&lfs_lock); 390 mutex_exit(&lfs_lock);
391 return 0; 391 return 0;
392 } 392 }
393 mutex_exit(&lfs_lock); 393 mutex_exit(&lfs_lock);
394 394
395 while (!lfs_fits(fs, fsb)) { 395 while (!lfs_fits(fs, fsb)) {
396 /* 396 /*
397 * Out of space, need cleaner to run. 397 * Out of space, need cleaner to run.
398 * Update the cleaner info, then wake it up. 398 * Update the cleaner info, then wake it up.
399 * Note the cleanerinfo block is on the ifile 399 * Note the cleanerinfo block is on the ifile
400 * so it CANT_WAIT. 400 * so it CANT_WAIT.
401 */ 401 */
402 LFS_CLEANERINFO(cip, fs, cbp); 402 LFS_CLEANERINFO(cip, fs, cbp);
403 LFS_SYNC_CLEANERINFO(cip, fs, cbp, 0); 403 LFS_SYNC_CLEANERINFO(cip, fs, cbp, 0);
404 404
405#ifdef DEBUG 405#ifdef DEBUG
406 DLOG((DLOG_AVAIL, "lfs_availwait: out of available space, " 406 DLOG((DLOG_AVAIL, "lfs_availwait: out of available space, "
407 "waiting on cleaner\n")); 407 "waiting on cleaner\n"));
408#endif 408#endif
409 409
410 lfs_wakeup_cleaner(fs); 410 lfs_wakeup_cleaner(fs);
411#ifdef DIAGNOSTIC 411#ifdef DIAGNOSTIC
412 if (LFS_SEGLOCK_HELD(fs)) 412 if (LFS_SEGLOCK_HELD(fs))
413 panic("lfs_availwait: deadlock"); 413 panic("lfs_availwait: deadlock");
414#endif 414#endif
415 error = tsleep(&fs->lfs_availsleep, PCATCH | PUSER, 415 error = tsleep(&fs->lfs_availsleep, PCATCH | PUSER,
416 "cleaner", 0); 416 "cleaner", 0);
417 if (error) 417 if (error)
418 return (error); 418 return (error);
419 } 419 }
420 return 0; 420 return 0;
421} 421}
422 422
423int 423int
424lfs_bwrite_ext(struct buf *bp, int flags) 424lfs_bwrite_ext(struct buf *bp, int flags)
425{ 425{
426 struct lfs *fs; 426 struct lfs *fs;
427 struct inode *ip; 427 struct inode *ip;
428 struct vnode *vp; 428 struct vnode *vp;
429 int fsb; 429 int fsb;
430 430
431 vp = bp->b_vp; 431 vp = bp->b_vp;
432 fs = VFSTOULFS(vp->v_mount)->um_lfs; 432 fs = VFSTOULFS(vp->v_mount)->um_lfs;
433 433
434 ASSERT_MAYBE_SEGLOCK(fs); 434 ASSERT_MAYBE_SEGLOCK(fs);
435 KASSERT(bp->b_cflags & BC_BUSY); 435 KASSERT(bp->b_cflags & BC_BUSY);
436 KASSERT(flags & BW_CLEAN || !LFS_IS_MALLOC_BUF(bp)); 436 KASSERT(flags & BW_CLEAN || !LFS_IS_MALLOC_BUF(bp));
437 KASSERT(((bp->b_oflags | bp->b_flags) & (BO_DELWRI|B_LOCKED)) 437 KASSERT(((bp->b_oflags | bp->b_flags) & (BO_DELWRI|B_LOCKED))
438 != BO_DELWRI); 438 != BO_DELWRI);
439 439
440 /* 440 /*
441 * Don't write *any* blocks if we're mounted read-only, or 441 * Don't write *any* blocks if we're mounted read-only, or
442 * if we are "already unmounted". 442 * if we are "already unmounted".
443 * 443 *
444 * In particular the cleaner can't write blocks either. 444 * In particular the cleaner can't write blocks either.
445 */ 445 */
446 if (fs->lfs_ronly || (lfs_sb_getpflags(fs) & LFS_PF_CLEAN)) { 446 if (fs->lfs_ronly || (lfs_sb_getpflags(fs) & LFS_PF_CLEAN)) {
447 bp->b_oflags &= ~BO_DELWRI; 447 bp->b_oflags &= ~BO_DELWRI;
448 bp->b_flags |= B_READ; /* XXX is this right? --ks */ 448 bp->b_flags |= B_READ; /* XXX is this right? --ks */
449 bp->b_error = 0; 449 bp->b_error = 0;
450 mutex_enter(&bufcache_lock); 450 mutex_enter(&bufcache_lock);
451 LFS_UNLOCK_BUF(bp); 451 LFS_UNLOCK_BUF(bp);
452 if (LFS_IS_MALLOC_BUF(bp)) 452 if (LFS_IS_MALLOC_BUF(bp))
453 bp->b_cflags &= ~BC_BUSY; 453 bp->b_cflags &= ~BC_BUSY;
454 else 454 else
455 brelsel(bp, 0); 455 brelsel(bp, 0);
456 mutex_exit(&bufcache_lock); 456 mutex_exit(&bufcache_lock);
457 return (fs->lfs_ronly ? EROFS : 0); 457 return (fs->lfs_ronly ? EROFS : 0);
458 } 458 }
459 459
460 /* 460 /*
461 * Set the delayed write flag and use reassignbuf to move the buffer 461 * Set the delayed write flag and use reassignbuf to move the buffer
462 * from the clean list to the dirty one. 462 * from the clean list to the dirty one.
463 * 463 *
464 * Set the B_LOCKED flag and unlock the buffer, causing brelse to move 464 * Set the B_LOCKED flag and unlock the buffer, causing brelse to move
465 * the buffer onto the LOCKED free list. This is necessary, otherwise 465 * the buffer onto the LOCKED free list. This is necessary, otherwise
466 * getnewbuf() would try to reclaim the buffers using bawrite, which 466 * getnewbuf() would try to reclaim the buffers using bawrite, which
467 * isn't going to work. 467 * isn't going to work.
468 * 468 *
469 * XXX we don't let meta-data writes run out of space because they can 469 * XXX we don't let meta-data writes run out of space because they can
470 * come from the segment writer. We need to make sure that there is 470 * come from the segment writer. We need to make sure that there is
471 * enough space reserved so that there's room to write meta-data 471 * enough space reserved so that there's room to write meta-data
472 * blocks. 472 * blocks.
473 */ 473 */
474 if ((bp->b_flags & B_LOCKED) == 0) { 474 if ((bp->b_flags & B_LOCKED) == 0) {
475 fsb = lfs_numfrags(fs, bp->b_bcount); 475 fsb = lfs_numfrags(fs, bp->b_bcount);
476 476
477 ip = VTOI(vp); 477 ip = VTOI(vp);
478 mutex_enter(&lfs_lock); 478 mutex_enter(&lfs_lock);
479 if (flags & BW_CLEAN) { 479 if (flags & BW_CLEAN) {
480 LFS_SET_UINO(ip, IN_CLEANING); 480 LFS_SET_UINO(ip, IN_CLEANING);
481 } else { 481 } else {
482 LFS_SET_UINO(ip, IN_MODIFIED); 482 LFS_SET_UINO(ip, IN_MODIFIED);
483 } 483 }
484 mutex_exit(&lfs_lock); 484 mutex_exit(&lfs_lock);
485 lfs_sb_subavail(fs, fsb); 485 lfs_sb_subavail(fs, fsb);
486 486
487 mutex_enter(&bufcache_lock); 487 mutex_enter(&bufcache_lock);
488 mutex_enter(vp->v_interlock); 488 mutex_enter(vp->v_interlock);
489 bp->b_oflags = (bp->b_oflags | BO_DELWRI) & ~BO_DONE; 489 bp->b_oflags = (bp->b_oflags | BO_DELWRI) & ~BO_DONE;
490 LFS_LOCK_BUF(bp); 490 LFS_LOCK_BUF(bp);
491 bp->b_flags &= ~B_READ; 491 bp->b_flags &= ~B_READ;
492 bp->b_error = 0; 492 bp->b_error = 0;
493 reassignbuf(bp, bp->b_vp); 493 reassignbuf(bp, bp->b_vp);
494 mutex_exit(vp->v_interlock); 494 mutex_exit(vp->v_interlock);
495 } else { 495 } else {
496 mutex_enter(&bufcache_lock); 496 mutex_enter(&bufcache_lock);
497 } 497 }
498 498
499 if (bp->b_iodone != NULL) 499 if (bp->b_iodone != NULL)
500 bp->b_cflags &= ~BC_BUSY; 500 bp->b_cflags &= ~BC_BUSY;
501 else 501 else
502 brelsel(bp, 0); 502 brelsel(bp, 0);
503 mutex_exit(&bufcache_lock); 503 mutex_exit(&bufcache_lock);
504 504
505 return (0); 505 return (0);
506} 506}
507 507
508/* 508/*
509 * Called and return with the lfs_lock held. 509 * Called and return with the lfs_lock held.
510 */ 510 */
511void 511void
512lfs_flush_fs(struct lfs *fs, int flags) 512lfs_flush_fs(struct lfs *fs, int flags)
513{ 513{
514 ASSERT_NO_SEGLOCK(fs); 514 ASSERT_NO_SEGLOCK(fs);
515 KASSERT(mutex_owned(&lfs_lock)); 515 KASSERT(mutex_owned(&lfs_lock));
516 if (fs->lfs_ronly) 516 if (fs->lfs_ronly)
517 return; 517 return;
518 518
519 if (lfs_dostats) 519 if (lfs_dostats)
520 ++lfs_stats.flush_invoked; 520 ++lfs_stats.flush_invoked;
521 521
522 fs->lfs_pdflush = 0; 522 fs->lfs_pdflush = 0;
523 mutex_exit(&lfs_lock); 523 mutex_exit(&lfs_lock);
524 lfs_writer_enter(fs, "fldirop"); 524 lfs_writer_enter(fs, "fldirop");
525 lfs_segwrite(fs->lfs_ivnode->v_mount, flags); 525 lfs_segwrite(fs->lfs_ivnode->v_mount, flags);
526 lfs_writer_leave(fs); 526 lfs_writer_leave(fs);
527 mutex_enter(&lfs_lock); 527 mutex_enter(&lfs_lock);
528 fs->lfs_favail = 0; /* XXX */ 528 fs->lfs_favail = 0; /* XXX */
529} 529}
530 530
531/* 531/*
532 * This routine initiates segment writes when LFS is consuming too many 532 * This routine initiates segment writes when LFS is consuming too many
533 * resources. Ideally the pageout daemon would be able to direct LFS 533 * resources. Ideally the pageout daemon would be able to direct LFS
534 * more subtly. 534 * more subtly.
535 * XXX We have one static count of locked buffers; 535 * XXX We have one static count of locked buffers;
536 * XXX need to think more about the multiple filesystem case. 536 * XXX need to think more about the multiple filesystem case.
537 * 537 *
538 * Called and return with lfs_lock held. 538 * Called and return with lfs_lock held.
539 * If fs != NULL, we hold the segment lock for fs. 539 * If fs != NULL, we hold the segment lock for fs.
540 */ 540 */
541void 541void
542lfs_flush(struct lfs *fs, int flags, int only_onefs) 542lfs_flush(struct lfs *fs, int flags, int only_onefs)
543{ 543{
544 extern u_int64_t locked_fakequeue_count; 544 extern u_int64_t locked_fakequeue_count;
545 struct mount *mp, *nmp; 545 struct mount *mp, *nmp;
546 struct lfs *tfs; 546 struct lfs *tfs;
547 547
548 KASSERT(mutex_owned(&lfs_lock)); 548 KASSERT(mutex_owned(&lfs_lock));
549 KDASSERT(fs == NULL || !LFS_SEGLOCK_HELD(fs)); 549 KDASSERT(fs == NULL || !LFS_SEGLOCK_HELD(fs));
550 550
551 if (lfs_dostats) 551 if (lfs_dostats)
552 ++lfs_stats.write_exceeded; 552 ++lfs_stats.write_exceeded;
553 /* XXX should we include SEGM_CKP here? */ 553 /* XXX should we include SEGM_CKP here? */
554 if (lfs_writing && !(flags & SEGM_SYNC)) { 554 if (lfs_writing && !(flags & SEGM_SYNC)) {
555 DLOG((DLOG_FLUSH, "lfs_flush: not flushing because another flush is active\n")); 555 DLOG((DLOG_FLUSH, "lfs_flush: not flushing because another flush is active\n"));
556 return; 556 return;
557 } 557 }
558 while (lfs_writing) 558 while (lfs_writing)
559 cv_wait(&lfs_writing_cv, &lfs_lock); 559 cv_wait(&lfs_writing_cv, &lfs_lock);
560 lfs_writing = 1; 560 lfs_writing = 1;
561 561
562 mutex_exit(&lfs_lock); 562 mutex_exit(&lfs_lock);
563 563
564 if (only_onefs) { 564 if (only_onefs) {
565 KASSERT(fs != NULL); 565 KASSERT(fs != NULL);
566 if (vfs_busy(fs->lfs_ivnode->v_mount, NULL)) 566 if (vfs_busy(fs->lfs_ivnode->v_mount, NULL))
567 goto errout; 567 goto errout;
568 mutex_enter(&lfs_lock); 568 mutex_enter(&lfs_lock);
569 lfs_flush_fs(fs, flags); 569 lfs_flush_fs(fs, flags);
570 mutex_exit(&lfs_lock); 570 mutex_exit(&lfs_lock);
571 vfs_unbusy(fs->lfs_ivnode->v_mount, false, NULL); 571 vfs_unbusy(fs->lfs_ivnode->v_mount, false, NULL);
572 } else { 572 } else {
573 locked_fakequeue_count = 0; 573 locked_fakequeue_count = 0;
574 mutex_enter(&mountlist_lock); 574 mutex_enter(&mountlist_lock);
575 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 575 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
576 if (vfs_busy(mp, &nmp)) { 576 if (vfs_busy(mp, &nmp)) {
577 DLOG((DLOG_FLUSH, "lfs_flush: fs vfs_busy\n")); 577 DLOG((DLOG_FLUSH, "lfs_flush: fs vfs_busy\n"));
578 continue; 578 continue;
579 } 579 }
580 if (strncmp(&mp->mnt_stat.f_fstypename[0], MOUNT_LFS, 580 if (strncmp(&mp->mnt_stat.f_fstypename[0], MOUNT_LFS,
581 sizeof(mp->mnt_stat.f_fstypename)) == 0) { 581 sizeof(mp->mnt_stat.f_fstypename)) == 0) {
582 tfs = VFSTOULFS(mp)->um_lfs; 582 tfs = VFSTOULFS(mp)->um_lfs;
583 mutex_enter(&lfs_lock); 583 mutex_enter(&lfs_lock);
584 lfs_flush_fs(tfs, flags); 584 lfs_flush_fs(tfs, flags);
585 mutex_exit(&lfs_lock); 585 mutex_exit(&lfs_lock);
586 } 586 }
587 vfs_unbusy(mp, false, &nmp); 587 vfs_unbusy(mp, false, &nmp);
588 } 588 }
589 mutex_exit(&mountlist_lock); 589 mutex_exit(&mountlist_lock);
590 } 590 }
591 LFS_DEBUG_COUNTLOCKED("flush"); 591 LFS_DEBUG_COUNTLOCKED("flush");
592 wakeup(&lfs_subsys_pages); 592 wakeup(&lfs_subsys_pages);
593 593
594 errout: 594 errout:
595 mutex_enter(&lfs_lock); 595 mutex_enter(&lfs_lock);
596 KASSERT(lfs_writing); 596 KASSERT(lfs_writing);
597 lfs_writing = 0; 597 lfs_writing = 0;
598 wakeup(&lfs_writing); 598 wakeup(&lfs_writing);
599} 599}
600 600
601#define INOCOUNT(fs) howmany(lfs_sb_getuinodes(fs), LFS_INOPB(fs)) 601#define INOCOUNT(fs) howmany(lfs_sb_getuinodes(fs), LFS_INOPB(fs))
602#define INOBYTES(fs) (lfs_sb_getuinodes(fs) * sizeof (struct ulfs1_dinode)) 602#define INOBYTES(fs) (lfs_sb_getuinodes(fs) * sizeof (struct ulfs1_dinode))
603 603
604/* 604/*
605 * make sure that we don't have too many locked buffers. 605 * make sure that we don't have too many locked buffers.
606 * flush buffers if needed. 606 * flush buffers if needed.
607 */ 607 */
608int 608int
609lfs_check(struct vnode *vp, daddr_t blkno, int flags) 609lfs_check(struct vnode *vp, daddr_t blkno, int flags)
610{ 610{
611 int error; 611 int error;
612 struct lfs *fs; 612 struct lfs *fs;
613 struct inode *ip; 613 struct inode *ip;
614 extern pid_t lfs_writer_daemon; 614 extern pid_t lfs_writer_daemon;
615 615
616 error = 0; 616 error = 0;
617 ip = VTOI(vp); 617 ip = VTOI(vp);
618 618
619 /* If out of buffers, wait on writer */ 619 /* If out of buffers, wait on writer */
620 /* XXX KS - if it's the Ifile, we're probably the cleaner! */ 620 /* XXX KS - if it's the Ifile, we're probably the cleaner! */
621 if (ip->i_number == LFS_IFILE_INUM) 621 if (ip->i_number == LFS_IFILE_INUM)
622 return 0; 622 return 0;
623 /* If we're being called from inside a dirop, don't sleep */ 623 /* If we're being called from inside a dirop, don't sleep */
624 if (ip->i_flag & IN_ADIROP) 624 if (ip->i_flag & IN_ADIROP)
625 return 0; 625 return 0;
626 626
627 fs = ip->i_lfs; 627 fs = ip->i_lfs;
628 628
629 ASSERT_NO_SEGLOCK(fs); 629 ASSERT_NO_SEGLOCK(fs);
630 630
631 /* 631 /*
632 * If we would flush below, but dirops are active, sleep. 632 * If we would flush below, but dirops are active, sleep.
633 * Note that a dirop cannot ever reach this code! 633 * Note that a dirop cannot ever reach this code!
634 */ 634 */
635 mutex_enter(&lfs_lock); 635 mutex_enter(&lfs_lock);
636 while (fs->lfs_dirops > 0 && 636 while (fs->lfs_dirops > 0 &&
637 (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS || 637 (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS ||
638 locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES || 638 locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES ||
639 lfs_subsys_pages > LFS_MAX_PAGES || 639 lfs_subsys_pages > LFS_MAX_PAGES ||
640 fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || 640 fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) ||
641 lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0)) 641 lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0))
642 { 642 {
643 ++fs->lfs_diropwait; 643 ++fs->lfs_diropwait;
644 mtsleep(&fs->lfs_writer, PRIBIO+1, "bufdirop", 0, 644 mtsleep(&fs->lfs_writer, PRIBIO+1, "bufdirop", 0,
645 &lfs_lock); 645 &lfs_lock);
646 --fs->lfs_diropwait; 646 --fs->lfs_diropwait;
647 } 647 }
648 648
649#ifdef DEBUG 649#ifdef DEBUG
650 if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS) 650 if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS)
651 DLOG((DLOG_FLUSH, "lfs_check: lqc = %d, max %d\n", 651 DLOG((DLOG_FLUSH, "lfs_check: lqc = %d, max %d\n",
652 locked_queue_count + INOCOUNT(fs), LFS_MAX_BUFS)); 652 locked_queue_count + INOCOUNT(fs), LFS_MAX_BUFS));
653 if (locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES) 653 if (locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES)
654 DLOG((DLOG_FLUSH, "lfs_check: lqb = %ld, max %ld\n", 654 DLOG((DLOG_FLUSH, "lfs_check: lqb = %ld, max %ld\n",
655 locked_queue_bytes + INOBYTES(fs), LFS_MAX_BYTES)); 655 locked_queue_bytes + INOBYTES(fs), LFS_MAX_BYTES));
656 if (lfs_subsys_pages > LFS_MAX_PAGES) 656 if (lfs_subsys_pages > LFS_MAX_PAGES)
657 DLOG((DLOG_FLUSH, "lfs_check: lssp = %d, max %d\n", 657 DLOG((DLOG_FLUSH, "lfs_check: lssp = %d, max %d\n",
658 lfs_subsys_pages, LFS_MAX_PAGES)); 658 lfs_subsys_pages, LFS_MAX_PAGES));
659 if (lfs_fs_pagetrip && fs->lfs_pages > lfs_fs_pagetrip) 659 if (lfs_fs_pagetrip && fs->lfs_pages > lfs_fs_pagetrip)
660 DLOG((DLOG_FLUSH, "lfs_check: fssp = %d, trip at %d\n", 660 DLOG((DLOG_FLUSH, "lfs_check: fssp = %d, trip at %d\n",
661 fs->lfs_pages, lfs_fs_pagetrip)); 661 fs->lfs_pages, lfs_fs_pagetrip));
662 if (lfs_dirvcount > LFS_MAX_DIROP) 662 if (lfs_dirvcount > LFS_MAX_DIROP)
663 DLOG((DLOG_FLUSH, "lfs_check: ldvc = %d, max %d\n", 663 DLOG((DLOG_FLUSH, "lfs_check: ldvc = %d, max %d\n",
664 lfs_dirvcount, LFS_MAX_DIROP)); 664 lfs_dirvcount, LFS_MAX_DIROP));
665 if (fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs)) 665 if (fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs))
666 DLOG((DLOG_FLUSH, "lfs_check: lfdvc = %d, max %d\n", 666 DLOG((DLOG_FLUSH, "lfs_check: lfdvc = %d, max %d\n",
667 fs->lfs_dirvcount, LFS_MAX_FSDIROP(fs))); 667 fs->lfs_dirvcount, LFS_MAX_FSDIROP(fs)));
668 if (fs->lfs_diropwait > 0) 668 if (fs->lfs_diropwait > 0)
669 DLOG((DLOG_FLUSH, "lfs_check: ldvw = %d\n", 669 DLOG((DLOG_FLUSH, "lfs_check: ldvw = %d\n",
670 fs->lfs_diropwait)); 670 fs->lfs_diropwait));
671#endif 671#endif
672 672
673 /* If there are too many pending dirops, we have to flush them. */ 673 /* If there are too many pending dirops, we have to flush them. */
674 if (fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || 674 if (fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) ||
675 lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0) { 675 lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0) {
676 mutex_exit(&lfs_lock); 676 mutex_exit(&lfs_lock);
677 lfs_flush_dirops(fs); 677 lfs_flush_dirops(fs);
678 mutex_enter(&lfs_lock); 678 mutex_enter(&lfs_lock);
679 } else if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS || 679 } else if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS ||
680 locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES || 680 locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES ||
681 lfs_subsys_pages > LFS_MAX_PAGES || 681 lfs_subsys_pages > LFS_MAX_PAGES ||
682 fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || 682 fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) ||
683 lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0) { 683 lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0) {
684 lfs_flush(fs, flags, 0); 684 lfs_flush(fs, flags, 0);
685 } else if (lfs_fs_pagetrip && fs->lfs_pages > lfs_fs_pagetrip) { 685 } else if (lfs_fs_pagetrip && fs->lfs_pages > lfs_fs_pagetrip) {
686 /* 686 /*
687 * If we didn't flush the whole thing, some filesystems 687 * If we didn't flush the whole thing, some filesystems
688 * still might want to be flushed. 688 * still might want to be flushed.
689 */ 689 */
690 ++fs->lfs_pdflush; 690 ++fs->lfs_pdflush;
691 wakeup(&lfs_writer_daemon); 691 wakeup(&lfs_writer_daemon);
692 } 692 }
693 693
694 while (locked_queue_count + INOCOUNT(fs) >= LFS_WAIT_BUFS || 694 while (locked_queue_count + INOCOUNT(fs) >= LFS_WAIT_BUFS ||
695 locked_queue_bytes + INOBYTES(fs) >= LFS_WAIT_BYTES || 695 locked_queue_bytes + INOBYTES(fs) >= LFS_WAIT_BYTES ||
696 lfs_subsys_pages > LFS_WAIT_PAGES || 696 lfs_subsys_pages > LFS_WAIT_PAGES ||
697 fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || 697 fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) ||
698 lfs_dirvcount > LFS_MAX_DIROP) { 698 lfs_dirvcount > LFS_MAX_DIROP) {
699 699
700 if (lfs_dostats) 700 if (lfs_dostats)
701 ++lfs_stats.wait_exceeded; 701 ++lfs_stats.wait_exceeded;
702 DLOG((DLOG_AVAIL, "lfs_check: waiting: count=%d, bytes=%ld\n", 702 DLOG((DLOG_AVAIL, "lfs_check: waiting: count=%d, bytes=%ld\n",
703 locked_queue_count, locked_queue_bytes)); 703 locked_queue_count, locked_queue_bytes));
704 ++locked_queue_waiters; 704 ++locked_queue_waiters;
705 error = cv_timedwait_sig(&locked_queue_cv, &lfs_lock, 705 error = cv_timedwait_sig(&locked_queue_cv, &lfs_lock,
706 hz * LFS_BUFWAIT); 706 hz * LFS_BUFWAIT);
707 --locked_queue_waiters; 707 --locked_queue_waiters;
708 if (error != EWOULDBLOCK) 708 if (error != EWOULDBLOCK)
709 break; 709 break;
710 710
711 /* 711 /*
712 * lfs_flush might not flush all the buffers, if some of the 712 * lfs_flush might not flush all the buffers, if some of the
713 * inodes were locked or if most of them were Ifile blocks 713 * inodes were locked or if most of them were Ifile blocks
714 * and we weren't asked to checkpoint. Try flushing again 714 * and we weren't asked to checkpoint. Try flushing again
715 * to keep us from blocking indefinitely. 715 * to keep us from blocking indefinitely.
716 */ 716 */
717 if (locked_queue_count + INOCOUNT(fs) >= LFS_MAX_BUFS || 717 if (locked_queue_count + INOCOUNT(fs) >= LFS_MAX_BUFS ||
718 locked_queue_bytes + INOBYTES(fs) >= LFS_MAX_BYTES) { 718 locked_queue_bytes + INOBYTES(fs) >= LFS_MAX_BYTES) {
719 lfs_flush(fs, flags | SEGM_CKP, 0); 719 lfs_flush(fs, flags | SEGM_CKP, 0);
720 } 720 }
721 } 721 }
722 mutex_exit(&lfs_lock); 722 mutex_exit(&lfs_lock);
723 return (error); 723 return (error);
724} 724}
725 725
726/* 726/*
727 * Allocate a new buffer header. 727 * Allocate a new buffer header.
728 */ 728 */
729struct buf * 729struct buf *
730lfs_newbuf(struct lfs *fs, struct vnode *vp, daddr_t daddr, size_t size, int type) 730lfs_newbuf(struct lfs *fs, struct vnode *vp, daddr_t daddr, size_t size, int type)
731{ 731{
732 struct buf *bp; 732 struct buf *bp;
733 size_t nbytes; 733 size_t nbytes;
734 734
735 ASSERT_MAYBE_SEGLOCK(fs); 735 ASSERT_MAYBE_SEGLOCK(fs);
736 nbytes = roundup(size, lfs_fsbtob(fs, 1)); 736 nbytes = roundup(size, lfs_fsbtob(fs, 1));
737 737
738 bp = getiobuf(NULL, true); 738 bp = getiobuf(NULL, true);
739 if (nbytes) { 739 if (nbytes) {
740 bp->b_data = lfs_malloc(fs, nbytes, type); 740 bp->b_data = lfs_malloc(fs, nbytes, type);
741 /* memset(bp->b_data, 0, nbytes); */ 741 /* memset(bp->b_data, 0, nbytes); */
742 } 742 }
743#ifdef DIAGNOSTIC 743#ifdef DIAGNOSTIC
744 if (vp == NULL) 744 if (vp == NULL)
745 panic("vp is NULL in lfs_newbuf"); 745 panic("vp is NULL in lfs_newbuf");
746 if (bp == NULL) 746 if (bp == NULL)
747 panic("bp is NULL after malloc in lfs_newbuf"); 747 panic("bp is NULL after malloc in lfs_newbuf");
748#endif 748#endif
749 749
750 bp->b_bufsize = size; 750 bp->b_bufsize = size;
751 bp->b_bcount = size; 751 bp->b_bcount = size;
752 bp->b_lblkno = daddr; 752 bp->b_lblkno = daddr;
753 bp->b_blkno = daddr; 753 bp->b_blkno = daddr;
754 bp->b_error = 0; 754 bp->b_error = 0;
755 bp->b_resid = 0; 755 bp->b_resid = 0;
756 bp->b_iodone = lfs_callback; 756 bp->b_iodone = lfs_callback;
757 bp->b_cflags = BC_BUSY | BC_NOCACHE; 757 bp->b_cflags = BC_BUSY | BC_NOCACHE;
758 bp->b_private = fs; 758 bp->b_private = fs;
759 759
760 mutex_enter(&bufcache_lock); 760 mutex_enter(&bufcache_lock);
761 mutex_enter(vp->v_interlock); 761 mutex_enter(vp->v_interlock);
762 bgetvp(vp, bp); 762 bgetvp(vp, bp);
763 mutex_exit(vp->v_interlock); 763 mutex_exit(vp->v_interlock);
764 mutex_exit(&bufcache_lock); 764 mutex_exit(&bufcache_lock);
765 765
766 return (bp); 766 return (bp);
767} 767}
768 768
769void 769void
770lfs_freebuf(struct lfs *fs, struct buf *bp) 770lfs_freebuf(struct lfs *fs, struct buf *bp)
771{ 771{
772 struct vnode *vp; 772 struct vnode *vp;
773 773
774 if ((vp = bp->b_vp) != NULL) { 774 if ((vp = bp->b_vp) != NULL) {
775 mutex_enter(&bufcache_lock); 775 mutex_enter(&bufcache_lock);
776 mutex_enter(vp->v_interlock); 776 mutex_enter(vp->v_interlock);
777 brelvp(bp); 777 brelvp(bp);
778 mutex_exit(vp->v_interlock); 778 mutex_exit(vp->v_interlock);
779 mutex_exit(&bufcache_lock); 779 mutex_exit(&bufcache_lock);
780 } 780 }
781 if (!(bp->b_cflags & BC_INVAL)) { /* BC_INVAL indicates a "fake" buffer */ 781 if (!(bp->b_cflags & BC_INVAL)) { /* BC_INVAL indicates a "fake" buffer */
782 lfs_free(fs, bp->b_data, LFS_NB_UNKNOWN); 782 lfs_free(fs, bp->b_data, LFS_NB_UNKNOWN);
783 bp->b_data = NULL; 783 bp->b_data = NULL;
784 } 784 }
785 putiobuf(bp); 785 putiobuf(bp);
786} 786}
787 787
788/* 788/*
789 * Count buffers on the "locked" queue, and compare it to a pro-forma count. 789 * Count buffers on the "locked" queue, and compare it to a pro-forma count.
790 * Don't count malloced buffers, since they don't detract from the total. 790 * Don't count malloced buffers, since they don't detract from the total.
791 */ 791 */
792void 792void
793lfs_countlocked(int *count, long *bytes, const char *msg) 793lfs_countlocked(int *count, long *bytes, const char *msg)
794{ 794{
795 struct buf *bp; 795 struct buf *bp;
796 int n = 0; 796 int n = 0;
797 long int size = 0L; 797 long int size = 0L;
798 798
799 mutex_enter(&bufcache_lock); 799 mutex_enter(&bufcache_lock);
800 TAILQ_FOREACH(bp, &bufqueues[BQ_LOCKED].bq_queue, b_freelist) { 800 TAILQ_FOREACH(bp, &bufqueues[BQ_LOCKED].bq_queue, b_freelist) {
801 KASSERT(bp->b_iodone == NULL); 801 KASSERT(bp->b_iodone == NULL);
802 n++; 802 n++;
803 size += bp->b_bufsize; 803 size += bp->b_bufsize;
804#ifdef DIAGNOSTIC 804#ifdef DIAGNOSTIC
805 if (n > nbuf) 805 if (n > nbuf)
806 panic("lfs_countlocked: this can't happen: more" 806 panic("lfs_countlocked: this can't happen: more"
807 " buffers locked than exist"); 807 " buffers locked than exist");
808#endif 808#endif
809 } 809 }
810 /* 810 /*
811 * Theoretically this function never really does anything. 811 * Theoretically this function never really does anything.
812 * Give a warning if we have to fix the accounting. 812 * Give a warning if we have to fix the accounting.
813 */ 813 */
814 if (n != *count) { 814 if (n != *count) {
815 DLOG((DLOG_LLIST, "lfs_countlocked: %s: adjusted buf count" 815 DLOG((DLOG_LLIST, "lfs_countlocked: %s: adjusted buf count"
816 " from %d to %d\n", msg, *count, n)); 816 " from %d to %d\n", msg, *count, n));
817 } 817 }
818 if (size != *bytes) { 818 if (size != *bytes) {
819 DLOG((DLOG_LLIST, "lfs_countlocked: %s: adjusted byte count" 819 DLOG((DLOG_LLIST, "lfs_countlocked: %s: adjusted byte count"
820 " from %ld to %ld\n", msg, *bytes, size)); 820 " from %ld to %ld\n", msg, *bytes, size));
821 } 821 }
822 *count = n; 822 *count = n;
823 *bytes = size; 823 *bytes = size;
824 mutex_exit(&bufcache_lock); 824 mutex_exit(&bufcache_lock);
825 return; 825 return;
826} 826}
827 827
828int 828int
829lfs_wait_pages(void) 829lfs_wait_pages(void)
830{ 830{
831 int active, inactive; 831 int active, inactive;
832 832
833 uvm_estimatepageable(&active, &inactive); 833 uvm_estimatepageable(&active, &inactive);
834 return LFS_WAIT_RESOURCE(active + inactive + uvmexp.free, 1); 834 return LFS_WAIT_RESOURCE(active + inactive + uvmexp.free, 1);
835} 835}
836 836
837int 837int
838lfs_max_pages(void) 838lfs_max_pages(void)
839{ 839{
840 int active, inactive; 840 int active, inactive;
841 841
842 uvm_estimatepageable(&active, &inactive); 842 uvm_estimatepageable(&active, &inactive);
843 return LFS_MAX_RESOURCE(active + inactive + uvmexp.free, 1); 843 return LFS_MAX_RESOURCE(active + inactive + uvmexp.free, 1);
844} 844}

cvs diff -r1.43 -r1.44 src/sys/ufs/lfs/lfs_debug.c (switch to unified diff)

--- src/sys/ufs/lfs/lfs_debug.c 2013/06/18 18:18:58 1.43
+++ src/sys/ufs/lfs/lfs_debug.c 2015/07/25 10:40:35 1.44
@@ -1,325 +1,325 @@ @@ -1,325 +1,325 @@
1/* $NetBSD: lfs_debug.c,v 1.43 2013/06/18 18:18:58 christos Exp $ */ 1/* $NetBSD: lfs_debug.c,v 1.44 2015/07/25 10:40:35 martin Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant@hhhh.org>. 8 * by Konrad E. Schroder <perseant@hhhh.org>.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31/* 31/*
32 * Copyright (c) 1991, 1993 32 * Copyright (c) 1991, 1993
33 * The Regents of the University of California. All rights reserved. 33 * The Regents of the University of California. All rights reserved.
34 * 34 *
35 * Redistribution and use in source and binary forms, with or without 35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions 36 * modification, are permitted provided that the following conditions
37 * are met: 37 * are met:
38 * 1. Redistributions of source code must retain the above copyright 38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer. 39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright 40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the 41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution. 42 * documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors 43 * 3. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software 44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission. 45 * without specific prior written permission.
46 * 46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE. 57 * SUCH DAMAGE.
58 * 58 *
59 * @(#)lfs_debug.c 8.1 (Berkeley) 6/11/93 59 * @(#)lfs_debug.c 8.1 (Berkeley) 6/11/93
60 */ 60 */
61 61
62#include <sys/cdefs.h> 62#include <sys/cdefs.h>
63__KERNEL_RCSID(0, "$NetBSD: lfs_debug.c,v 1.43 2013/06/18 18:18:58 christos Exp $"); 63__KERNEL_RCSID(0, "$NetBSD: lfs_debug.c,v 1.44 2015/07/25 10:40:35 martin Exp $");
64 64
65#ifdef DEBUG 65#ifdef DEBUG
66 66
67#include <sys/param.h> 67#include <sys/param.h>
68#include <sys/systm.h> 68#include <sys/systm.h>
69#include <sys/namei.h> 69#include <sys/namei.h>
70#include <sys/vnode.h> 70#include <sys/vnode.h>
71#include <sys/mount.h> 71#include <sys/mount.h>
72#include <sys/buf.h> 72#include <sys/buf.h>
73#include <sys/syslog.h> 73#include <sys/syslog.h>
74#include <sys/proc.h> 74#include <sys/proc.h>
75 75
76#include <ufs/lfs/ulfs_inode.h> 76#include <ufs/lfs/ulfs_inode.h>
77#include <ufs/lfs/lfs.h> 77#include <ufs/lfs/lfs.h>
78#include <ufs/lfs/lfs_extern.h> 78#include <ufs/lfs/lfs_extern.h>
79 79
80int lfs_lognum; 80int lfs_lognum;
81struct lfs_log_entry lfs_log[LFS_LOGLENGTH]; 81struct lfs_log_entry lfs_log[LFS_LOGLENGTH];
82 82
83int 83int
84lfs_bwrite_log(struct buf *bp, const char *file, int line) 84lfs_bwrite_log(struct buf *bp, const char *file, int line)
85{ 85{
86 struct vop_bwrite_args a; 86 struct vop_bwrite_args a;
87 87
88 a.a_desc = VDESC(vop_bwrite); 88 a.a_desc = VDESC(vop_bwrite);
89 a.a_bp = bp; 89 a.a_bp = bp;
90 90
91 if (!(bp->b_flags & B_GATHERED) && !(bp->b_oflags & BO_DELWRI)) { 91 if (!(bp->b_flags & B_GATHERED) && !(bp->b_oflags & BO_DELWRI)) {
92 LFS_ENTER_LOG("write", file, line, bp->b_lblkno, bp->b_flags, 92 LFS_ENTER_LOG("write", file, line, bp->b_lblkno, bp->b_flags,
93 curproc->p_pid); 93 curproc->p_pid);
94 } 94 }
95 return (VCALL(bp->b_vp, VOFFSET(vop_bwrite), &a)); 95 return (VCALL(bp->b_vp, VOFFSET(vop_bwrite), &a));
96} 96}
97 97
98void 98void
99lfs_dumplog(void) 99lfs_dumplog(void)
100{ 100{
101 int i; 101 int i;
102 const char *cp; 102 const char *cp;
103 103
104 for (i = lfs_lognum; i != (lfs_lognum - 1) % LFS_LOGLENGTH; 104 for (i = lfs_lognum; i != (lfs_lognum - 1) % LFS_LOGLENGTH;
105 i = (i + 1) % LFS_LOGLENGTH) 105 i = (i + 1) % LFS_LOGLENGTH)
106 if (lfs_log[i].file) { 106 if (lfs_log[i].file) {
107 /* Only print out basename, for readability */ 107 /* Only print out basename, for readability */
108 cp = lfs_log[i].file; 108 cp = lfs_log[i].file;
109 while(*cp) 109 while(*cp)
110 ++cp; 110 ++cp;
111 while(*cp != '/' && cp > lfs_log[i].file) 111 while(*cp != '/' && cp > lfs_log[i].file)
112 --cp; 112 --cp;
113 113
114 printf("lbn %" PRId64 " %s %lx %d, %d %s\n", 114 printf("lbn %" PRId64 " %s %lx %d, %d %s\n",
115 lfs_log[i].block, 115 lfs_log[i].block,
116 lfs_log[i].op, 116 lfs_log[i].op,
117 lfs_log[i].flags, 117 lfs_log[i].flags,
118 lfs_log[i].pid, 118 lfs_log[i].pid,
119 lfs_log[i].line, 119 lfs_log[i].line,
120 cp); 120 cp);
121 } 121 }
122} 122}
123 123
124void 124void
125lfs_dump_super(struct lfs *lfsp) 125lfs_dump_super(struct lfs *lfsp)
126{ 126{
127 int i; 127 int i;
128 128
129 printf("%s%x\t%s%x\t%s%d\t%s%d\n", 129 printf("%s%x\t%s%x\t%s%d\t%s%d\n",
130 "magic ", lfsp->lfs_magic, 130 "magic ", lfsp->lfs_magic,
131 "version ", lfsp->lfs_version, 131 "version ", lfsp->lfs_version,
132 "size ", lfsp->lfs_size, 132 "size ", lfs_sb_getsize(lfsp),
133 "ssize ", lfsp->lfs_ssize); 133 "ssize ", lfs_sb_getssize(lfsp));
134 printf("%s%d\t%s%d\t%s%d\t%s%d\n", 134 printf("%s%d\t%s%d\t%s%d\t%s%d\n",
135 "dsize ", lfsp->lfs_dsize, 135 "dsize ", lfs_sb_getdsize(lfsp),
136 "bsize ", lfsp->lfs_bsize, 136 "bsize ", lfs_sb_getbsize(lfsp),
137 "fsize ", lfsp->lfs_fsize, 137 "fsize ", lfs_sb_getfsize(lfsp),
138 "frag ", lfsp->lfs_frag); 138 "frag ", lfs_sb_getfrag(lfsp));
139 139
140 printf("%s%d\t%s%d\t%s%d\t%s%d\n", 140 printf("%s%d\t%s%d\t%s%d\t%s%d\n",
141 "minfree ", lfsp->lfs_minfree, 141 "minfree ", lfs_sb_getminfree(lfsp),
142 "inopb ", lfsp->lfs_inopb, 142 "inopb ", lfs_sb_getinopb(lfsp),
143 "ifpb ", lfsp->lfs_ifpb, 143 "ifpb ", lfs_sb_getifpb(lfsp),
144 "nindir ", lfsp->lfs_nindir); 144 "nindir ", lfs_sb_getnindir(lfsp));
145 145
146 printf("%s%d\t%s%d\t%s%d\t%s%d\n", 146 printf("%s%d\t%s%d\t%s%d\t%s%d\n",
147 "nseg ", lfsp->lfs_nseg, 147 "nseg ", lfs_sb_getnseg(lfsp),
148 "nspf ", lfsp->lfs_nspf, 148 "nspf ", lfs_sb_getnspf(lfsp),
149 "cleansz ", lfsp->lfs_cleansz, 149 "cleansz ", lfs_sb_getcleansz(lfsp),
150 "segtabsz ", lfsp->lfs_segtabsz); 150 "segtabsz ", lfs_sb_getsegtabsz(lfsp));
151 151
152 printf("%s%x\t%s%d\t%s%lx\t%s%d\n", 152 printf("%s%x\t%s%d\t%s%lx\t%s%d\n",
153 "segmask ", lfsp->lfs_segmask, 153 "segmask ", lfs_sb_getsegmask(lfsp),
154 "segshift ", lfsp->lfs_segshift, 154 "segshift ", lfs_sb_getsegshift(lfsp),
155 "bmask ", (unsigned long)lfsp->lfs_bmask, 155 "bmask ", (unsigned long)lfs_sb_getbmask(lfsp),
156 "bshift ", lfsp->lfs_bshift); 156 "bshift ", lfs_sb_getbshift(lfsp));
157 157
158 printf("%s%lu\t%s%d\t%s%lx\t%s%u\n", 158 printf("%s%lu\t%s%d\t%s%lx\t%s%u\n",
159 "ffmask ", (unsigned long)lfsp->lfs_ffmask, 159 "ffmask ", (unsigned long)lfs_sb_getffmask(lfsp),
160 "ffshift ", lfsp->lfs_ffshift, 160 "ffshift ", lfs_sb_getffshift(lfsp),
161 "fbmask ", (unsigned long)lfsp->lfs_fbmask, 161 "fbmask ", (unsigned long)lfs_sb_getfbmask(lfsp),
162 "fbshift ", lfsp->lfs_fbshift); 162 "fbshift ", lfs_sb_getfbshift(lfsp));
163 163
164 printf("%s%d\t%s%d\t%s%x\t%s%qx\n", 164 printf("%s%d\t%s%d\t%s%x\t%s%qx\n",
165 "sushift ", lfsp->lfs_sushift, 165 "sushift ", lfs_sb_getsushift(lfsp),
166 "fsbtodb ", lfsp->lfs_fsbtodb, 166 "fsbtodb ", lfs_sb_getfsbtodb(lfsp),
167 "cksum ", lfsp->lfs_cksum, 167 "cksum ", lfs_sb_getcksum(lfsp),
168 "maxfilesize ", (long long)lfsp->lfs_maxfilesize); 168 "maxfilesize ", (long long)lfs_sb_getmaxfilesize(lfsp));
169 169
170 printf("Superblock disk addresses:"); 170 printf("Superblock disk addresses:");
171 for (i = 0; i < LFS_MAXNUMSB; i++) 171 for (i = 0; i < LFS_MAXNUMSB; i++)
172 printf(" %x", lfsp->lfs_sboffs[i]); 172 printf(" %x", lfs_sb_getsboff(lfsp, i));
173 printf("\n"); 173 printf("\n");
174 174
175 printf("Checkpoint Info\n"); 175 printf("Checkpoint Info\n");
176 printf("%s%d\t%s%x\t%s%d\n", 176 printf("%s%d\t%s%x\t%s%d\n",
177 "freehd ", lfsp->lfs_freehd, 177 "freehd ", lfs_sb_getfreehd(lfsp),
178 "idaddr ", lfsp->lfs_idaddr, 178 "idaddr ", lfs_sb_getidaddr(lfsp),
179 "ifile ", lfsp->lfs_ifile); 179 "ifile ", lfs_sb_getifile(lfsp));
180 printf("%s%x\t%s%d\t%s%x\t%s%x\t%s%x\t%s%x\n", 180 printf("%s%x\t%s%d\t%s%x\t%s%x\t%s%x\t%s%x\n",
181 "bfree ", lfsp->lfs_bfree, 181 "bfree ", lfs_sb_getbfree(lfsp),
182 "nfiles ", lfsp->lfs_nfiles, 182 "nfiles ", lfs_sb_getnfiles(lfsp),
183 "lastseg ", lfsp->lfs_lastseg, 183 "lastseg ", lfs_sb_getlastseg(lfsp),
184 "nextseg ", lfsp->lfs_nextseg, 184 "nextseg ", lfs_sb_getnextseg(lfsp),
185 "curseg ", lfsp->lfs_curseg, 185 "curseg ", lfs_sb_getcurseg(lfsp),
186 "offset ", lfsp->lfs_offset); 186 "offset ", lfs_sb_getoffset(lfsp));
187 printf("tstamp %llx\n", (long long)lfsp->lfs_tstamp); 187 printf("tstamp %llx\n", (long long)lfs_sb_gettstamp(lfsp));
188} 188}
189 189
190void 190void
191lfs_dump_dinode(struct ulfs1_dinode *dip) 191lfs_dump_dinode(struct ulfs1_dinode *dip)
192{ 192{
193 int i; 193 int i;
194 194
195 printf("%s%u\t%s%d\t%s%u\t%s%u\t%s%qu\t%s%d\n", 195 printf("%s%u\t%s%d\t%s%u\t%s%u\t%s%qu\t%s%d\n",
196 "mode ", dip->di_mode, 196 "mode ", dip->di_mode,
197 "nlink ", dip->di_nlink, 197 "nlink ", dip->di_nlink,
198 "uid ", dip->di_uid, 198 "uid ", dip->di_uid,
199 "gid ", dip->di_gid, 199 "gid ", dip->di_gid,
200 "size ", (long long)dip->di_size, 200 "size ", (long long)dip->di_size,
201 "blocks ", dip->di_blocks); 201 "blocks ", dip->di_blocks);
202 printf("inum %d\n", dip->di_inumber); 202 printf("inum %d\n", dip->di_inumber);
203 printf("Direct Addresses\n"); 203 printf("Direct Addresses\n");
204 for (i = 0; i < ULFS_NDADDR; i++) { 204 for (i = 0; i < ULFS_NDADDR; i++) {
205 printf("\t%x", dip->di_db[i]); 205 printf("\t%x", dip->di_db[i]);
206 if ((i % 6) == 5) 206 if ((i % 6) == 5)
207 printf("\n"); 207 printf("\n");
208 } 208 }
209 for (i = 0; i < ULFS_NIADDR; i++) 209 for (i = 0; i < ULFS_NIADDR; i++)
210 printf("\t%x", dip->di_ib[i]); 210 printf("\t%x", dip->di_ib[i]);
211 printf("\n"); 211 printf("\n");
212} 212}
213 213
214void 214void
215lfs_check_segsum(struct lfs *fs, struct segment *sp, char *file, int line) 215lfs_check_segsum(struct lfs *fs, struct segment *sp, char *file, int line)
216{ 216{
217 int actual; 217 int actual;
218#if 0 218#if 0
219 static int offset; 219 static int offset;
220#endif 220#endif
221 221
222 if ((actual = 1) == 1) 222 if ((actual = 1) == 1)
223 return; /* XXXX not checking this anymore, really */ 223 return; /* XXXX not checking this anymore, really */
224 224
225 if (sp->sum_bytes_left >= FINFOSIZE 225 if (sp->sum_bytes_left >= FINFOSIZE
226 && sp->fip->fi_nblocks > 512) { 226 && sp->fip->fi_nblocks > 512) {
227 printf("%s:%d: fi_nblocks = %d\n",file,line,sp->fip->fi_nblocks); 227 printf("%s:%d: fi_nblocks = %d\n",file,line,sp->fip->fi_nblocks);
228#ifdef DDB 228#ifdef DDB
229 Debugger(); 229 Debugger();
230#endif 230#endif
231 } 231 }
232 232
233 if (sp->sum_bytes_left > 484) { 233 if (sp->sum_bytes_left > 484) {
234 printf("%s:%d: bad value (%d = -%d) for sum_bytes_left\n", 234 printf("%s:%d: bad value (%d = -%d) for sum_bytes_left\n",
235 file, line, sp->sum_bytes_left, fs->lfs_sumsize-sp->sum_bytes_left); 235 file, line, sp->sum_bytes_left, lfs_sb_getsumsize(fs)-sp->sum_bytes_left);
236 panic("too many bytes"); 236 panic("too many bytes");
237 } 237 }
238 238
239 actual = fs->lfs_sumsize 239 actual = lfs_sb_getsumsize(fs)
240 /* amount taken up by FINFOs */ 240 /* amount taken up by FINFOs */
241 - ((char *)&(sp->fip->fi_blocks[sp->fip->fi_nblocks]) - (char *)(sp->segsum)) 241 - ((char *)&(sp->fip->fi_blocks[sp->fip->fi_nblocks]) - (char *)(sp->segsum))
242 /* amount taken up by inode blocks */ 242 /* amount taken up by inode blocks */
243 - sizeof(int32_t)*((sp->ninodes+LFS_INOPB(fs)-1) / LFS_INOPB(fs)); 243 - sizeof(int32_t)*((sp->ninodes+LFS_INOPB(fs)-1) / LFS_INOPB(fs));
244#if 0 244#if 0
245 if (actual - sp->sum_bytes_left < offset) 245 if (actual - sp->sum_bytes_left < offset)
246 { 246 {
247 printf("%s:%d: offset changed %d -> %d\n", file, line, 247 printf("%s:%d: offset changed %d -> %d\n", file, line,
248 offset, actual-sp->sum_bytes_left); 248 offset, actual-sp->sum_bytes_left);
249 offset = actual - sp->sum_bytes_left; 249 offset = actual - sp->sum_bytes_left;
250 /* panic("byte mismatch"); */ 250 /* panic("byte mismatch"); */
251 } 251 }
252#endif 252#endif
253#if 0 253#if 0
254 if (actual != sp->sum_bytes_left) 254 if (actual != sp->sum_bytes_left)
255 printf("%s:%d: warning: segsum miscalc at %d (-%d => %d)\n", 255 printf("%s:%d: warning: segsum miscalc at %d (-%d => %d)\n",
256 file, line, sp->sum_bytes_left, 256 file, line, sp->sum_bytes_left,
257 fs->lfs_sumsize-sp->sum_bytes_left, 257 fs->lfs_sumsize-sp->sum_bytes_left,
258 actual); 258 actual);
259#endif 259#endif
260 if (sp->sum_bytes_left > 0 260 if (sp->sum_bytes_left > 0
261 && ((char *)(sp->segsum))[fs->lfs_sumsize 261 && ((char *)(sp->segsum))[lfs_sb_getsumsize(fs)
262 - sizeof(int32_t) * ((sp->ninodes+LFS_INOPB(fs)-1) / LFS_INOPB(fs)) 262 - sizeof(int32_t) * ((sp->ninodes+LFS_INOPB(fs)-1) / LFS_INOPB(fs))
263 - sp->sum_bytes_left] != '\0') { 263 - sp->sum_bytes_left] != '\0') {
264 printf("%s:%d: warning: segsum overwrite at %d (-%d => %d)\n", 264 printf("%s:%d: warning: segsum overwrite at %d (-%d => %d)\n",
265 file, line, sp->sum_bytes_left, 265 file, line, sp->sum_bytes_left,
266 fs->lfs_sumsize-sp->sum_bytes_left, 266 lfs_sb_getsumsize(fs)-sp->sum_bytes_left,
267 actual); 267 actual);
268#ifdef DDB 268#ifdef DDB
269 Debugger(); 269 Debugger();
270#endif 270#endif
271 } 271 }
272} 272}
273 273
274void 274void
275lfs_check_bpp(struct lfs *fs, struct segment *sp, char *file, int line) 275lfs_check_bpp(struct lfs *fs, struct segment *sp, char *file, int line)
276{ 276{
277 daddr_t blkno; 277 daddr_t blkno;
278 struct buf **bpp; 278 struct buf **bpp;
279 struct vnode *devvp; 279 struct vnode *devvp;
280 280
281 devvp = VTOI(fs->lfs_ivnode)->i_devvp; 281 devvp = VTOI(fs->lfs_ivnode)->i_devvp;
282 blkno = (*(sp->bpp))->b_blkno; 282 blkno = (*(sp->bpp))->b_blkno;
283 for (bpp = sp->bpp; bpp < sp->cbpp; bpp++) { 283 for (bpp = sp->bpp; bpp < sp->cbpp; bpp++) {
284 if ((*bpp)->b_blkno != blkno) { 284 if ((*bpp)->b_blkno != blkno) {
285 if ((*bpp)->b_vp == devvp) { 285 if ((*bpp)->b_vp == devvp) {
286 printf("Oops, would misplace raw block " 286 printf("Oops, would misplace raw block "
287 "0x%" PRIx64 " at 0x%" PRIx64 "\n", 287 "0x%" PRIx64 " at 0x%" PRIx64 "\n",
288 (*bpp)->b_blkno, 288 (*bpp)->b_blkno,
289 blkno); 289 blkno);
290 } else { 290 } else {
291 printf("%s:%d: misplace ino %llu lbn %" PRId64 291 printf("%s:%d: misplace ino %llu lbn %" PRId64
292 " at 0x%" PRIx64 " instead of " 292 " at 0x%" PRIx64 " instead of "
293 "0x%" PRIx64 "\n", 293 "0x%" PRIx64 "\n",
294 file, line, 294 file, line,
295 (unsigned long long) 295 (unsigned long long)
296 VTOI((*bpp)->b_vp)->i_number, 296 VTOI((*bpp)->b_vp)->i_number,
297 (*bpp)->b_lblkno, 297 (*bpp)->b_lblkno,
298 blkno, 298 blkno,
299 (*bpp)->b_blkno); 299 (*bpp)->b_blkno);
300 } 300 }
301 } 301 }
302 blkno += LFS_FSBTODB(fs, lfs_btofsb(fs, (*bpp)->b_bcount)); 302 blkno += LFS_FSBTODB(fs, lfs_btofsb(fs, (*bpp)->b_bcount));
303 } 303 }
304} 304}
305 305
306int lfs_debug_log_subsys[DLOG_MAX]; 306int lfs_debug_log_subsys[DLOG_MAX];
307 307
308/* 308/*
309 * Log events from various debugging areas of LFS, depending on what 309 * Log events from various debugging areas of LFS, depending on what
310 * the user has enabled. 310 * the user has enabled.
311 */ 311 */
312void 312void
313lfs_debug_log(int subsys, const char *fmt, ...) 313lfs_debug_log(int subsys, const char *fmt, ...)
314{ 314{
315 va_list ap; 315 va_list ap;
316 316
317 /* If not debugging this subsys, exit */ 317 /* If not debugging this subsys, exit */
318 if (lfs_debug_log_subsys[subsys] == 0) 318 if (lfs_debug_log_subsys[subsys] == 0)
319 return; 319 return;
320 320
321 va_start(ap, fmt); 321 va_start(ap, fmt);
322 vlog(LOG_DEBUG, fmt, ap); 322 vlog(LOG_DEBUG, fmt, ap);
323 va_end(ap); 323 va_end(ap);
324} 324}
325#endif /* DEBUG */ 325#endif /* DEBUG */

cvs diff -r1.3 -r1.4 src/sys/ufs/lfs/lfs_pages.c (switch to unified diff)

--- src/sys/ufs/lfs/lfs_pages.c 2015/07/24 06:59:32 1.3
+++ src/sys/ufs/lfs/lfs_pages.c 2015/07/25 10:40:35 1.4
@@ -1,893 +1,893 @@ @@ -1,893 +1,893 @@
1/* $NetBSD: lfs_pages.c,v 1.3 2015/07/24 06:59:32 dholland Exp $ */ 1/* $NetBSD: lfs_pages.c,v 1.4 2015/07/25 10:40:35 martin Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant@hhhh.org>. 8 * by Konrad E. Schroder <perseant@hhhh.org>.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31/* 31/*
32 * Copyright (c) 1986, 1989, 1991, 1993, 1995 32 * Copyright (c) 1986, 1989, 1991, 1993, 1995
33 * The Regents of the University of California. All rights reserved. 33 * The Regents of the University of California. All rights reserved.
34 * 34 *
35 * Redistribution and use in source and binary forms, with or without 35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions 36 * modification, are permitted provided that the following conditions
37 * are met: 37 * are met:
38 * 1. Redistributions of source code must retain the above copyright 38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer. 39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright 40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the 41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution. 42 * documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors 43 * 3. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software 44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission. 45 * without specific prior written permission.
46 * 46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE. 57 * SUCH DAMAGE.
58 * 58 *
59 * @(#)lfs_vnops.c 8.13 (Berkeley) 6/10/95 59 * @(#)lfs_vnops.c 8.13 (Berkeley) 6/10/95
60 */ 60 */
61 61
62#include <sys/cdefs.h> 62#include <sys/cdefs.h>
63__KERNEL_RCSID(0, "$NetBSD: lfs_pages.c,v 1.3 2015/07/24 06:59:32 dholland Exp $"); 63__KERNEL_RCSID(0, "$NetBSD: lfs_pages.c,v 1.4 2015/07/25 10:40:35 martin Exp $");
64 64
65#ifdef _KERNEL_OPT 65#ifdef _KERNEL_OPT
66#include "opt_compat_netbsd.h" 66#include "opt_compat_netbsd.h"
67#include "opt_uvm_page_trkown.h" 67#include "opt_uvm_page_trkown.h"
68#endif 68#endif
69 69
70#include <sys/param.h> 70#include <sys/param.h>
71#include <sys/systm.h> 71#include <sys/systm.h>
72#include <sys/namei.h> 72#include <sys/namei.h>
73#include <sys/resourcevar.h> 73#include <sys/resourcevar.h>
74#include <sys/kernel.h> 74#include <sys/kernel.h>
75#include <sys/file.h> 75#include <sys/file.h>
76#include <sys/stat.h> 76#include <sys/stat.h>
77#include <sys/buf.h> 77#include <sys/buf.h>
78#include <sys/proc.h> 78#include <sys/proc.h>
79#include <sys/mount.h> 79#include <sys/mount.h>
80#include <sys/vnode.h> 80#include <sys/vnode.h>
81#include <sys/pool.h> 81#include <sys/pool.h>
82#include <sys/signalvar.h> 82#include <sys/signalvar.h>
83#include <sys/kauth.h> 83#include <sys/kauth.h>
84#include <sys/syslog.h> 84#include <sys/syslog.h>
85#include <sys/fstrans.h> 85#include <sys/fstrans.h>
86 86
87#include <miscfs/fifofs/fifo.h> 87#include <miscfs/fifofs/fifo.h>
88#include <miscfs/genfs/genfs.h> 88#include <miscfs/genfs/genfs.h>
89#include <miscfs/specfs/specdev.h> 89#include <miscfs/specfs/specdev.h>
90 90
91#include <ufs/lfs/ulfs_inode.h> 91#include <ufs/lfs/ulfs_inode.h>
92#include <ufs/lfs/ulfsmount.h> 92#include <ufs/lfs/ulfsmount.h>
93#include <ufs/lfs/ulfs_bswap.h> 93#include <ufs/lfs/ulfs_bswap.h>
94#include <ufs/lfs/ulfs_extern.h> 94#include <ufs/lfs/ulfs_extern.h>
95 95
96#include <uvm/uvm.h> 96#include <uvm/uvm.h>
97#include <uvm/uvm_pmap.h> 97#include <uvm/uvm_pmap.h>
98#include <uvm/uvm_stat.h> 98#include <uvm/uvm_stat.h>
99#include <uvm/uvm_pager.h> 99#include <uvm/uvm_pager.h>
100 100
101#include <ufs/lfs/lfs.h> 101#include <ufs/lfs/lfs.h>
102#include <ufs/lfs/lfs_kernel.h> 102#include <ufs/lfs/lfs_kernel.h>
103#include <ufs/lfs/lfs_extern.h> 103#include <ufs/lfs/lfs_extern.h>
104 104
105extern pid_t lfs_writer_daemon; 105extern pid_t lfs_writer_daemon;
106 106
107static int check_dirty(struct lfs *, struct vnode *, off_t, off_t, off_t, int, int, struct vm_page **); 107static int check_dirty(struct lfs *, struct vnode *, off_t, off_t, off_t, int, int, struct vm_page **);
108 108
109int 109int
110lfs_getpages(void *v) 110lfs_getpages(void *v)
111{ 111{
112 struct vop_getpages_args /* { 112 struct vop_getpages_args /* {
113 struct vnode *a_vp; 113 struct vnode *a_vp;
114 voff_t a_offset; 114 voff_t a_offset;
115 struct vm_page **a_m; 115 struct vm_page **a_m;
116 int *a_count; 116 int *a_count;
117 int a_centeridx; 117 int a_centeridx;
118 vm_prot_t a_access_type; 118 vm_prot_t a_access_type;
119 int a_advice; 119 int a_advice;
120 int a_flags; 120 int a_flags;
121 } */ *ap = v; 121 } */ *ap = v;
122 122
123 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM && 123 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM &&
124 (ap->a_access_type & VM_PROT_WRITE) != 0) { 124 (ap->a_access_type & VM_PROT_WRITE) != 0) {
125 return EPERM; 125 return EPERM;
126 } 126 }
127 if ((ap->a_access_type & VM_PROT_WRITE) != 0) { 127 if ((ap->a_access_type & VM_PROT_WRITE) != 0) {
128 mutex_enter(&lfs_lock); 128 mutex_enter(&lfs_lock);
129 LFS_SET_UINO(VTOI(ap->a_vp), IN_MODIFIED); 129 LFS_SET_UINO(VTOI(ap->a_vp), IN_MODIFIED);
130 mutex_exit(&lfs_lock); 130 mutex_exit(&lfs_lock);
131 } 131 }
132 132
133 /* 133 /*
134 * we're relying on the fact that genfs_getpages() always read in 134 * we're relying on the fact that genfs_getpages() always read in
135 * entire filesystem blocks. 135 * entire filesystem blocks.
136 */ 136 */
137 return genfs_getpages(v); 137 return genfs_getpages(v);
138} 138}
139 139
140/* 140/*
141 * Wait for a page to become unbusy, possibly printing diagnostic messages 141 * Wait for a page to become unbusy, possibly printing diagnostic messages
142 * as well. 142 * as well.
143 * 143 *
144 * Called with vp->v_interlock held; return with it held. 144 * Called with vp->v_interlock held; return with it held.
145 */ 145 */
146static void 146static void
147wait_for_page(struct vnode *vp, struct vm_page *pg, const char *label) 147wait_for_page(struct vnode *vp, struct vm_page *pg, const char *label)
148{ 148{
149 KASSERT(mutex_owned(vp->v_interlock)); 149 KASSERT(mutex_owned(vp->v_interlock));
150 if ((pg->flags & PG_BUSY) == 0) 150 if ((pg->flags & PG_BUSY) == 0)
151 return; /* Nothing to wait for! */ 151 return; /* Nothing to wait for! */
152 152
153#if defined(DEBUG) && defined(UVM_PAGE_TRKOWN) 153#if defined(DEBUG) && defined(UVM_PAGE_TRKOWN)
154 static struct vm_page *lastpg; 154 static struct vm_page *lastpg;
155 155
156 if (label != NULL && pg != lastpg) { 156 if (label != NULL && pg != lastpg) {
157 if (pg->owner_tag) { 157 if (pg->owner_tag) {
158 printf("lfs_putpages[%d.%d]: %s: page %p owner %d.%d [%s]\n", 158 printf("lfs_putpages[%d.%d]: %s: page %p owner %d.%d [%s]\n",
159 curproc->p_pid, curlwp->l_lid, label, 159 curproc->p_pid, curlwp->l_lid, label,
160 pg, pg->owner, pg->lowner, pg->owner_tag); 160 pg, pg->owner, pg->lowner, pg->owner_tag);
161 } else { 161 } else {
162 printf("lfs_putpages[%d.%d]: %s: page %p unowned?!\n", 162 printf("lfs_putpages[%d.%d]: %s: page %p unowned?!\n",
163 curproc->p_pid, curlwp->l_lid, label, pg); 163 curproc->p_pid, curlwp->l_lid, label, pg);
164 } 164 }
165 } 165 }
166 lastpg = pg; 166 lastpg = pg;
167#endif 167#endif
168 168
169 pg->flags |= PG_WANTED; 169 pg->flags |= PG_WANTED;
170 UVM_UNLOCK_AND_WAIT(pg, vp->v_interlock, 0, "lfsput", 0); 170 UVM_UNLOCK_AND_WAIT(pg, vp->v_interlock, 0, "lfsput", 0);
171 mutex_enter(vp->v_interlock); 171 mutex_enter(vp->v_interlock);
172} 172}
173 173
174/* 174/*
175 * This routine is called by lfs_putpages() when it can't complete the 175 * This routine is called by lfs_putpages() when it can't complete the
176 * write because a page is busy. This means that either (1) someone, 176 * write because a page is busy. This means that either (1) someone,
177 * possibly the pagedaemon, is looking at this page, and will give it up 177 * possibly the pagedaemon, is looking at this page, and will give it up
178 * presently; or (2) we ourselves are holding the page busy in the 178 * presently; or (2) we ourselves are holding the page busy in the
179 * process of being written (either gathered or actually on its way to 179 * process of being written (either gathered or actually on its way to
180 * disk). We don't need to give up the segment lock, but we might need 180 * disk). We don't need to give up the segment lock, but we might need
181 * to call lfs_writeseg() to expedite the page's journey to disk. 181 * to call lfs_writeseg() to expedite the page's journey to disk.
182 * 182 *
183 * Called with vp->v_interlock held; return with it held. 183 * Called with vp->v_interlock held; return with it held.
184 */ 184 */
185/* #define BUSYWAIT */ 185/* #define BUSYWAIT */
186static void 186static void
187write_and_wait(struct lfs *fs, struct vnode *vp, struct vm_page *pg, 187write_and_wait(struct lfs *fs, struct vnode *vp, struct vm_page *pg,
188 int seglocked, const char *label) 188 int seglocked, const char *label)
189{ 189{
190 KASSERT(mutex_owned(vp->v_interlock)); 190 KASSERT(mutex_owned(vp->v_interlock));
191#ifndef BUSYWAIT 191#ifndef BUSYWAIT
192 struct inode *ip = VTOI(vp); 192 struct inode *ip = VTOI(vp);
193 struct segment *sp = fs->lfs_sp; 193 struct segment *sp = fs->lfs_sp;
194 int count = 0; 194 int count = 0;
195 195
196 if (pg == NULL) 196 if (pg == NULL)
197 return; 197 return;
198 198
199 while (pg->flags & PG_BUSY && 199 while (pg->flags & PG_BUSY &&
200 pg->uobject == &vp->v_uobj) { 200 pg->uobject == &vp->v_uobj) {
201 mutex_exit(vp->v_interlock); 201 mutex_exit(vp->v_interlock);
202 if (sp->cbpp - sp->bpp > 1) { 202 if (sp->cbpp - sp->bpp > 1) {
203 /* Write gathered pages */ 203 /* Write gathered pages */
204 lfs_updatemeta(sp); 204 lfs_updatemeta(sp);
205 lfs_release_finfo(fs); 205 lfs_release_finfo(fs);
206 (void) lfs_writeseg(fs, sp); 206 (void) lfs_writeseg(fs, sp);
207 207
208 /* 208 /*
209 * Reinitialize FIP 209 * Reinitialize FIP
210 */ 210 */
211 KASSERT(sp->vp == vp); 211 KASSERT(sp->vp == vp);
212 lfs_acquire_finfo(fs, ip->i_number, 212 lfs_acquire_finfo(fs, ip->i_number,
213 ip->i_gen); 213 ip->i_gen);
214 } 214 }
215 ++count; 215 ++count;
216 mutex_enter(vp->v_interlock); 216 mutex_enter(vp->v_interlock);
217 wait_for_page(vp, pg, label); 217 wait_for_page(vp, pg, label);
218 } 218 }
219 if (label != NULL && count > 1) { 219 if (label != NULL && count > 1) {
220 DLOG((DLOG_PAGE, "lfs_putpages[%d]: %s: %sn = %d\n", 220 DLOG((DLOG_PAGE, "lfs_putpages[%d]: %s: %sn = %d\n",
221 curproc->p_pid, label, (count > 0 ? "looping, " : ""), 221 curproc->p_pid, label, (count > 0 ? "looping, " : ""),
222 count)); 222 count));
223 } 223 }
224#else 224#else
225 preempt(1); 225 preempt(1);
226#endif 226#endif
227 KASSERT(mutex_owned(vp->v_interlock)); 227 KASSERT(mutex_owned(vp->v_interlock));
228} 228}
229 229
230/* 230/*
231 * Make sure that for all pages in every block in the given range, 231 * Make sure that for all pages in every block in the given range,
232 * either all are dirty or all are clean. If any of the pages 232 * either all are dirty or all are clean. If any of the pages
233 * we've seen so far are dirty, put the vnode on the paging chain, 233 * we've seen so far are dirty, put the vnode on the paging chain,
234 * and mark it IN_PAGING. 234 * and mark it IN_PAGING.
235 * 235 *
236 * If checkfirst != 0, don't check all the pages but return at the 236 * If checkfirst != 0, don't check all the pages but return at the
237 * first dirty page. 237 * first dirty page.
238 */ 238 */
239static int 239static int
240check_dirty(struct lfs *fs, struct vnode *vp, 240check_dirty(struct lfs *fs, struct vnode *vp,
241 off_t startoffset, off_t endoffset, off_t blkeof, 241 off_t startoffset, off_t endoffset, off_t blkeof,
242 int flags, int checkfirst, struct vm_page **pgp) 242 int flags, int checkfirst, struct vm_page **pgp)
243{ 243{
244 int by_list; 244 int by_list;
245 struct vm_page *curpg = NULL; /* XXX: gcc */ 245 struct vm_page *curpg = NULL; /* XXX: gcc */
246 struct vm_page *pgs[MAXBSIZE / PAGE_SIZE], *pg; 246 struct vm_page *pgs[MAXBSIZE / PAGE_SIZE], *pg;
247 off_t soff = 0; /* XXX: gcc */ 247 off_t soff = 0; /* XXX: gcc */
248 voff_t off; 248 voff_t off;
249 int i; 249 int i;
250 int nonexistent; 250 int nonexistent;
251 int any_dirty; /* number of dirty pages */ 251 int any_dirty; /* number of dirty pages */
252 int dirty; /* number of dirty pages in a block */ 252 int dirty; /* number of dirty pages in a block */
253 int tdirty; 253 int tdirty;
254 int pages_per_block = lfs_sb_getbsize(fs) >> PAGE_SHIFT; 254 int pages_per_block = lfs_sb_getbsize(fs) >> PAGE_SHIFT;
255 int pagedaemon = (curlwp == uvm.pagedaemon_lwp); 255 int pagedaemon = (curlwp == uvm.pagedaemon_lwp);
256 256
257 KASSERT(mutex_owned(vp->v_interlock)); 257 KASSERT(mutex_owned(vp->v_interlock));
258 ASSERT_MAYBE_SEGLOCK(fs); 258 ASSERT_MAYBE_SEGLOCK(fs);
259 top: 259 top:
260 by_list = (vp->v_uobj.uo_npages <= 260 by_list = (vp->v_uobj.uo_npages <=
261 ((endoffset - startoffset) >> PAGE_SHIFT) * 261 ((endoffset - startoffset) >> PAGE_SHIFT) *
262 UVM_PAGE_TREE_PENALTY); 262 UVM_PAGE_TREE_PENALTY);
263 any_dirty = 0; 263 any_dirty = 0;
264 264
265 if (by_list) { 265 if (by_list) {
266 curpg = TAILQ_FIRST(&vp->v_uobj.memq); 266 curpg = TAILQ_FIRST(&vp->v_uobj.memq);
267 } else { 267 } else {
268 soff = startoffset; 268 soff = startoffset;
269 } 269 }
270 while (by_list || soff < MIN(blkeof, endoffset)) { 270 while (by_list || soff < MIN(blkeof, endoffset)) {
271 if (by_list) { 271 if (by_list) {
272 /* 272 /*
273 * Find the first page in a block. Skip 273 * Find the first page in a block. Skip
274 * blocks outside our area of interest or beyond 274 * blocks outside our area of interest or beyond
275 * the end of file. 275 * the end of file.
276 */ 276 */
277 KASSERT(curpg == NULL 277 KASSERT(curpg == NULL
278 || (curpg->flags & PG_MARKER) == 0); 278 || (curpg->flags & PG_MARKER) == 0);
279 if (pages_per_block > 1) { 279 if (pages_per_block > 1) {
280 while (curpg && 280 while (curpg &&
281 ((curpg->offset & lfs_sb_getbmask(fs)) || 281 ((curpg->offset & lfs_sb_getbmask(fs)) ||
282 curpg->offset >= vp->v_size || 282 curpg->offset >= vp->v_size ||
283 curpg->offset >= endoffset)) { 283 curpg->offset >= endoffset)) {
284 curpg = TAILQ_NEXT(curpg, listq.queue); 284 curpg = TAILQ_NEXT(curpg, listq.queue);
285 KASSERT(curpg == NULL || 285 KASSERT(curpg == NULL ||
286 (curpg->flags & PG_MARKER) == 0); 286 (curpg->flags & PG_MARKER) == 0);
287 } 287 }
288 } 288 }
289 if (curpg == NULL) 289 if (curpg == NULL)
290 break; 290 break;
291 soff = curpg->offset; 291 soff = curpg->offset;
292 } 292 }
293 293
294 /* 294 /*
295 * Mark all pages in extended range busy; find out if any 295 * Mark all pages in extended range busy; find out if any
296 * of them are dirty. 296 * of them are dirty.
297 */ 297 */
298 nonexistent = dirty = 0; 298 nonexistent = dirty = 0;
299 for (i = 0; i == 0 || i < pages_per_block; i++) { 299 for (i = 0; i == 0 || i < pages_per_block; i++) {
300 KASSERT(mutex_owned(vp->v_interlock)); 300 KASSERT(mutex_owned(vp->v_interlock));
301 if (by_list && pages_per_block <= 1) { 301 if (by_list && pages_per_block <= 1) {
302 pgs[i] = pg = curpg; 302 pgs[i] = pg = curpg;
303 } else { 303 } else {
304 off = soff + (i << PAGE_SHIFT); 304 off = soff + (i << PAGE_SHIFT);
305 pgs[i] = pg = uvm_pagelookup(&vp->v_uobj, off); 305 pgs[i] = pg = uvm_pagelookup(&vp->v_uobj, off);
306 if (pg == NULL) { 306 if (pg == NULL) {
307 ++nonexistent; 307 ++nonexistent;
308 continue; 308 continue;
309 } 309 }
310 } 310 }
311 KASSERT(pg != NULL); 311 KASSERT(pg != NULL);
312 312
313 /* 313 /*
314 * If we're holding the segment lock, we can deadlock 314 * If we're holding the segment lock, we can deadlock
315 * against a process that has our page and is waiting 315 * against a process that has our page and is waiting
316 * for the cleaner, while the cleaner waits for the 316 * for the cleaner, while the cleaner waits for the
317 * segment lock. Just bail in that case. 317 * segment lock. Just bail in that case.
318 */ 318 */
319 if ((pg->flags & PG_BUSY) && 319 if ((pg->flags & PG_BUSY) &&
320 (pagedaemon || LFS_SEGLOCK_HELD(fs))) { 320 (pagedaemon || LFS_SEGLOCK_HELD(fs))) {
321 if (i > 0) 321 if (i > 0)
322 uvm_page_unbusy(pgs, i); 322 uvm_page_unbusy(pgs, i);
323 DLOG((DLOG_PAGE, "lfs_putpages: avoiding 3-way or pagedaemon deadlock\n")); 323 DLOG((DLOG_PAGE, "lfs_putpages: avoiding 3-way or pagedaemon deadlock\n"));
324 if (pgp) 324 if (pgp)
325 *pgp = pg; 325 *pgp = pg;
326 KASSERT(mutex_owned(vp->v_interlock)); 326 KASSERT(mutex_owned(vp->v_interlock));
327 return -1; 327 return -1;
328 } 328 }
329 329
330 while (pg->flags & PG_BUSY) { 330 while (pg->flags & PG_BUSY) {
331 wait_for_page(vp, pg, NULL); 331 wait_for_page(vp, pg, NULL);
332 KASSERT(mutex_owned(vp->v_interlock)); 332 KASSERT(mutex_owned(vp->v_interlock));
333 if (i > 0) 333 if (i > 0)
334 uvm_page_unbusy(pgs, i); 334 uvm_page_unbusy(pgs, i);
335 KASSERT(mutex_owned(vp->v_interlock)); 335 KASSERT(mutex_owned(vp->v_interlock));
336 goto top; 336 goto top;
337 } 337 }
338 pg->flags |= PG_BUSY; 338 pg->flags |= PG_BUSY;
339 UVM_PAGE_OWN(pg, "lfs_putpages"); 339 UVM_PAGE_OWN(pg, "lfs_putpages");
340 340
341 pmap_page_protect(pg, VM_PROT_NONE); 341 pmap_page_protect(pg, VM_PROT_NONE);
342 tdirty = (pmap_clear_modify(pg) || 342 tdirty = (pmap_clear_modify(pg) ||
343 (pg->flags & PG_CLEAN) == 0); 343 (pg->flags & PG_CLEAN) == 0);
344 dirty += tdirty; 344 dirty += tdirty;
345 } 345 }
346 if (pages_per_block > 0 && nonexistent >= pages_per_block) { 346 if (pages_per_block > 0 && nonexistent >= pages_per_block) {
347 if (by_list) { 347 if (by_list) {
348 curpg = TAILQ_NEXT(curpg, listq.queue); 348 curpg = TAILQ_NEXT(curpg, listq.queue);
349 } else { 349 } else {
350 soff += lfs_sb_getbsize(fs); 350 soff += lfs_sb_getbsize(fs);
351 } 351 }
352 continue; 352 continue;
353 } 353 }
354 354
355 any_dirty += dirty; 355 any_dirty += dirty;
356 KASSERT(nonexistent == 0); 356 KASSERT(nonexistent == 0);
357 KASSERT(mutex_owned(vp->v_interlock)); 357 KASSERT(mutex_owned(vp->v_interlock));
358 358
359 /* 359 /*
360 * If any are dirty make all dirty; unbusy them, 360 * If any are dirty make all dirty; unbusy them,
361 * but if we were asked to clean, wire them so that 361 * but if we were asked to clean, wire them so that
362 * the pagedaemon doesn't bother us about them while 362 * the pagedaemon doesn't bother us about them while
363 * they're on their way to disk. 363 * they're on their way to disk.
364 */ 364 */
365 for (i = 0; i == 0 || i < pages_per_block; i++) { 365 for (i = 0; i == 0 || i < pages_per_block; i++) {
366 KASSERT(mutex_owned(vp->v_interlock)); 366 KASSERT(mutex_owned(vp->v_interlock));
367 pg = pgs[i]; 367 pg = pgs[i];
368 KASSERT(!((pg->flags & PG_CLEAN) && (pg->flags & PG_DELWRI))); 368 KASSERT(!((pg->flags & PG_CLEAN) && (pg->flags & PG_DELWRI)));
369 KASSERT(pg->flags & PG_BUSY); 369 KASSERT(pg->flags & PG_BUSY);
370 if (dirty) { 370 if (dirty) {
371 pg->flags &= ~PG_CLEAN; 371 pg->flags &= ~PG_CLEAN;
372 if (flags & PGO_FREE) { 372 if (flags & PGO_FREE) {
373 /* 373 /*
374 * Wire the page so that 374 * Wire the page so that
375 * pdaemon doesn't see it again. 375 * pdaemon doesn't see it again.
376 */ 376 */
377 mutex_enter(&uvm_pageqlock); 377 mutex_enter(&uvm_pageqlock);
378 uvm_pagewire(pg); 378 uvm_pagewire(pg);
379 mutex_exit(&uvm_pageqlock); 379 mutex_exit(&uvm_pageqlock);
380 380
381 /* Suspended write flag */ 381 /* Suspended write flag */
382 pg->flags |= PG_DELWRI; 382 pg->flags |= PG_DELWRI;
383 } 383 }
384 } 384 }
385 if (pg->flags & PG_WANTED) 385 if (pg->flags & PG_WANTED)
386 wakeup(pg); 386 wakeup(pg);
387 pg->flags &= ~(PG_WANTED|PG_BUSY); 387 pg->flags &= ~(PG_WANTED|PG_BUSY);
388 UVM_PAGE_OWN(pg, NULL); 388 UVM_PAGE_OWN(pg, NULL);
389 } 389 }
390 390
391 if (checkfirst && any_dirty) 391 if (checkfirst && any_dirty)
392 break; 392 break;
393 393
394 if (by_list) { 394 if (by_list) {
395 curpg = TAILQ_NEXT(curpg, listq.queue); 395 curpg = TAILQ_NEXT(curpg, listq.queue);
396 } else { 396 } else {
397 soff += MAX(PAGE_SIZE, lfs_sb_getbsize(fs)); 397 soff += MAX(PAGE_SIZE, lfs_sb_getbsize(fs));
398 } 398 }
399 } 399 }
400 400
401 KASSERT(mutex_owned(vp->v_interlock)); 401 KASSERT(mutex_owned(vp->v_interlock));
402 return any_dirty; 402 return any_dirty;
403} 403}
404 404
405/* 405/*
406 * lfs_putpages functions like genfs_putpages except that 406 * lfs_putpages functions like genfs_putpages except that
407 * 407 *
408 * (1) It needs to bounds-check the incoming requests to ensure that 408 * (1) It needs to bounds-check the incoming requests to ensure that
409 * they are block-aligned; if they are not, expand the range and 409 * they are block-aligned; if they are not, expand the range and
410 * do the right thing in case, e.g., the requested range is clean 410 * do the right thing in case, e.g., the requested range is clean
411 * but the expanded range is dirty. 411 * but the expanded range is dirty.
412 * 412 *
413 * (2) It needs to explicitly send blocks to be written when it is done. 413 * (2) It needs to explicitly send blocks to be written when it is done.
414 * If VOP_PUTPAGES is called without the seglock held, we simply take 414 * If VOP_PUTPAGES is called without the seglock held, we simply take
415 * the seglock and let lfs_segunlock wait for us. 415 * the seglock and let lfs_segunlock wait for us.
416 * XXX There might be a bad situation if we have to flush a vnode while 416 * XXX There might be a bad situation if we have to flush a vnode while
417 * XXX lfs_markv is in operation. As of this writing we panic in this 417 * XXX lfs_markv is in operation. As of this writing we panic in this
418 * XXX case. 418 * XXX case.
419 * 419 *
420 * Assumptions: 420 * Assumptions:
421 * 421 *
422 * (1) The caller does not hold any pages in this vnode busy. If it does, 422 * (1) The caller does not hold any pages in this vnode busy. If it does,
423 * there is a danger that when we expand the page range and busy the 423 * there is a danger that when we expand the page range and busy the
424 * pages we will deadlock. 424 * pages we will deadlock.
425 * 425 *
426 * (2) We are called with vp->v_interlock held; we must return with it 426 * (2) We are called with vp->v_interlock held; we must return with it
427 * released. 427 * released.
428 * 428 *
429 * (3) We don't absolutely have to free pages right away, provided that 429 * (3) We don't absolutely have to free pages right away, provided that
430 * the request does not have PGO_SYNCIO. When the pagedaemon gives 430 * the request does not have PGO_SYNCIO. When the pagedaemon gives
431 * us a request with PGO_FREE, we take the pages out of the paging 431 * us a request with PGO_FREE, we take the pages out of the paging
432 * queue and wake up the writer, which will handle freeing them for us. 432 * queue and wake up the writer, which will handle freeing them for us.
433 * 433 *
434 * We ensure that for any filesystem block, all pages for that 434 * We ensure that for any filesystem block, all pages for that
435 * block are either resident or not, even if those pages are higher 435 * block are either resident or not, even if those pages are higher
436 * than EOF; that means that we will be getting requests to free 436 * than EOF; that means that we will be getting requests to free
437 * "unused" pages above EOF all the time, and should ignore them. 437 * "unused" pages above EOF all the time, and should ignore them.
438 * 438 *
439 * (4) If we are called with PGO_LOCKED, the finfo array we are to write 439 * (4) If we are called with PGO_LOCKED, the finfo array we are to write
440 * into has been set up for us by lfs_writefile. If not, we will 440 * into has been set up for us by lfs_writefile. If not, we will
441 * have to handle allocating and/or freeing an finfo entry. 441 * have to handle allocating and/or freeing an finfo entry.
442 * 442 *
443 * XXX note that we're (ab)using PGO_LOCKED as "seglock held". 443 * XXX note that we're (ab)using PGO_LOCKED as "seglock held".
444 */ 444 */
445 445
446/* How many times to loop before we should start to worry */ 446/* How many times to loop before we should start to worry */
447#define TOOMANY 4 447#define TOOMANY 4
448 448
449int 449int
450lfs_putpages(void *v) 450lfs_putpages(void *v)
451{ 451{
452 int error; 452 int error;
453 struct vop_putpages_args /* { 453 struct vop_putpages_args /* {
454 struct vnode *a_vp; 454 struct vnode *a_vp;
455 voff_t a_offlo; 455 voff_t a_offlo;
456 voff_t a_offhi; 456 voff_t a_offhi;
457 int a_flags; 457 int a_flags;
458 } */ *ap = v; 458 } */ *ap = v;
459 struct vnode *vp; 459 struct vnode *vp;
460 struct inode *ip; 460 struct inode *ip;
461 struct lfs *fs; 461 struct lfs *fs;
462 struct segment *sp; 462 struct segment *sp;
463 off_t origoffset, startoffset, endoffset, origendoffset, blkeof; 463 off_t origoffset, startoffset, endoffset, origendoffset, blkeof;
464 off_t off, max_endoffset; 464 off_t off, max_endoffset;
465 bool seglocked, sync, pagedaemon, reclaim; 465 bool seglocked, sync, pagedaemon, reclaim;
466 struct vm_page *pg, *busypg; 466 struct vm_page *pg, *busypg;
467 UVMHIST_FUNC("lfs_putpages"); UVMHIST_CALLED(ubchist); 467 UVMHIST_FUNC("lfs_putpages"); UVMHIST_CALLED(ubchist);
468 int oreclaim = 0; 468 int oreclaim = 0;
469 int donewriting = 0; 469 int donewriting = 0;
470#ifdef DEBUG 470#ifdef DEBUG
471 int debug_n_again, debug_n_dirtyclean; 471 int debug_n_again, debug_n_dirtyclean;
472#endif 472#endif
473 473
474 vp = ap->a_vp; 474 vp = ap->a_vp;
475 ip = VTOI(vp); 475 ip = VTOI(vp);
476 fs = ip->i_lfs; 476 fs = ip->i_lfs;
477 sync = (ap->a_flags & PGO_SYNCIO) != 0; 477 sync = (ap->a_flags & PGO_SYNCIO) != 0;
478 reclaim = (ap->a_flags & PGO_RECLAIM) != 0; 478 reclaim = (ap->a_flags & PGO_RECLAIM) != 0;
479 pagedaemon = (curlwp == uvm.pagedaemon_lwp); 479 pagedaemon = (curlwp == uvm.pagedaemon_lwp);
480 480
481 KASSERT(mutex_owned(vp->v_interlock)); 481 KASSERT(mutex_owned(vp->v_interlock));
482 482
483 /* Putpages does nothing for metadata. */ 483 /* Putpages does nothing for metadata. */
484 if (vp == fs->lfs_ivnode || vp->v_type != VREG) { 484 if (vp == fs->lfs_ivnode || vp->v_type != VREG) {
485 mutex_exit(vp->v_interlock); 485 mutex_exit(vp->v_interlock);
486 return 0; 486 return 0;
487 } 487 }
488 488
489 /* 489 /*
490 * If there are no pages, don't do anything. 490 * If there are no pages, don't do anything.
491 */ 491 */
492 if (vp->v_uobj.uo_npages == 0) { 492 if (vp->v_uobj.uo_npages == 0) {
493 if (TAILQ_EMPTY(&vp->v_uobj.memq) && 493 if (TAILQ_EMPTY(&vp->v_uobj.memq) &&
494 (vp->v_iflag & VI_ONWORKLST) && 494 (vp->v_iflag & VI_ONWORKLST) &&
495 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 495 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
496 vp->v_iflag &= ~VI_WRMAPDIRTY; 496 vp->v_iflag &= ~VI_WRMAPDIRTY;
497 vn_syncer_remove_from_worklist(vp); 497 vn_syncer_remove_from_worklist(vp);
498 } 498 }
499 mutex_exit(vp->v_interlock); 499 mutex_exit(vp->v_interlock);
500  500
501 /* Remove us from paging queue, if we were on it */ 501 /* Remove us from paging queue, if we were on it */
502 mutex_enter(&lfs_lock); 502 mutex_enter(&lfs_lock);
503 if (ip->i_flags & IN_PAGING) { 503 if (ip->i_flags & IN_PAGING) {
504 ip->i_flags &= ~IN_PAGING; 504 ip->i_flags &= ~IN_PAGING;
505 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); 505 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain);
506 } 506 }
507 mutex_exit(&lfs_lock); 507 mutex_exit(&lfs_lock);
508 508
509 KASSERT(!mutex_owned(vp->v_interlock)); 509 KASSERT(!mutex_owned(vp->v_interlock));
510 return 0; 510 return 0;
511 } 511 }
512 512
513 blkeof = lfs_blkroundup(fs, ip->i_size); 513 blkeof = lfs_blkroundup(fs, ip->i_size);
514 514
515 /* 515 /*
516 * Ignore requests to free pages past EOF but in the same block 516 * Ignore requests to free pages past EOF but in the same block
517 * as EOF, unless the vnode is being reclaimed or the request 517 * as EOF, unless the vnode is being reclaimed or the request
518 * is synchronous. (If the request is sync, it comes from 518 * is synchronous. (If the request is sync, it comes from
519 * lfs_truncate.) 519 * lfs_truncate.)
520 * 520 *
521 * To avoid being flooded with this request, make these pages 521 * To avoid being flooded with this request, make these pages
522 * look "active". 522 * look "active".
523 */ 523 */
524 if (!sync && !reclaim && 524 if (!sync && !reclaim &&
525 ap->a_offlo >= ip->i_size && ap->a_offlo < blkeof) { 525 ap->a_offlo >= ip->i_size && ap->a_offlo < blkeof) {
526 origoffset = ap->a_offlo; 526 origoffset = ap->a_offlo;
527 for (off = origoffset; off < blkeof; off += lfs_sb_getbsize(fs)) { 527 for (off = origoffset; off < blkeof; off += lfs_sb_getbsize(fs)) {
528 pg = uvm_pagelookup(&vp->v_uobj, off); 528 pg = uvm_pagelookup(&vp->v_uobj, off);
529 KASSERT(pg != NULL); 529 KASSERT(pg != NULL);
530 while (pg->flags & PG_BUSY) { 530 while (pg->flags & PG_BUSY) {
531 pg->flags |= PG_WANTED; 531 pg->flags |= PG_WANTED;
532 UVM_UNLOCK_AND_WAIT(pg, vp->v_interlock, 0, 532 UVM_UNLOCK_AND_WAIT(pg, vp->v_interlock, 0,
533 "lfsput2", 0); 533 "lfsput2", 0);
534 mutex_enter(vp->v_interlock); 534 mutex_enter(vp->v_interlock);
535 } 535 }
536 mutex_enter(&uvm_pageqlock); 536 mutex_enter(&uvm_pageqlock);
537 uvm_pageactivate(pg); 537 uvm_pageactivate(pg);
538 mutex_exit(&uvm_pageqlock); 538 mutex_exit(&uvm_pageqlock);
539 } 539 }
540 ap->a_offlo = blkeof; 540 ap->a_offlo = blkeof;
541 if (ap->a_offhi > 0 && ap->a_offhi <= ap->a_offlo) { 541 if (ap->a_offhi > 0 && ap->a_offhi <= ap->a_offlo) {
542 mutex_exit(vp->v_interlock); 542 mutex_exit(vp->v_interlock);
543 return 0; 543 return 0;
544 } 544 }
545 } 545 }
546 546
547 /* 547 /*
548 * Extend page range to start and end at block boundaries. 548 * Extend page range to start and end at block boundaries.
549 * (For the purposes of VOP_PUTPAGES, fragments don't exist.) 549 * (For the purposes of VOP_PUTPAGES, fragments don't exist.)
550 */ 550 */
551 origoffset = ap->a_offlo; 551 origoffset = ap->a_offlo;
552 origendoffset = ap->a_offhi; 552 origendoffset = ap->a_offhi;
553 startoffset = origoffset & ~(lfs_sb_getbmask(fs)); 553 startoffset = origoffset & ~(lfs_sb_getbmask(fs));
554 max_endoffset = (trunc_page(LLONG_MAX) >> lfs_sb_getbshift(fs)) 554 max_endoffset = (trunc_page(LLONG_MAX) >> lfs_sb_getbshift(fs))
555 << lfs_sb_getbshift(fs); 555 << lfs_sb_getbshift(fs);
556 556
557 if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) { 557 if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) {
558 endoffset = max_endoffset; 558 endoffset = max_endoffset;
559 origendoffset = endoffset; 559 origendoffset = endoffset;
560 } else { 560 } else {
561 origendoffset = round_page(ap->a_offhi); 561 origendoffset = round_page(ap->a_offhi);
562 endoffset = round_page(lfs_blkroundup(fs, origendoffset)); 562 endoffset = round_page(lfs_blkroundup(fs, origendoffset));
563 } 563 }
564 564
565 KASSERT(startoffset > 0 || endoffset >= startoffset); 565 KASSERT(startoffset > 0 || endoffset >= startoffset);
566 if (startoffset == endoffset) { 566 if (startoffset == endoffset) {
567 /* Nothing to do, why were we called? */ 567 /* Nothing to do, why were we called? */
568 mutex_exit(vp->v_interlock); 568 mutex_exit(vp->v_interlock);
569 DLOG((DLOG_PAGE, "lfs_putpages: startoffset = endoffset = %" 569 DLOG((DLOG_PAGE, "lfs_putpages: startoffset = endoffset = %"
570 PRId64 "\n", startoffset)); 570 PRId64 "\n", startoffset));
571 return 0; 571 return 0;
572 } 572 }
573 573
574 ap->a_offlo = startoffset; 574 ap->a_offlo = startoffset;
575 ap->a_offhi = endoffset; 575 ap->a_offhi = endoffset;
576 576
577 /* 577 /*
578 * If not cleaning, just send the pages through genfs_putpages 578 * If not cleaning, just send the pages through genfs_putpages
579 * to be returned to the pool. 579 * to be returned to the pool.
580 */ 580 */
581 if (!(ap->a_flags & PGO_CLEANIT)) { 581 if (!(ap->a_flags & PGO_CLEANIT)) {
582 DLOG((DLOG_PAGE, "lfs_putpages: no cleanit vn %p ino %d (flags %x)\n", 582 DLOG((DLOG_PAGE, "lfs_putpages: no cleanit vn %p ino %d (flags %x)\n",
583 vp, (int)ip->i_number, ap->a_flags)); 583 vp, (int)ip->i_number, ap->a_flags));
584 int r = genfs_putpages(v); 584 int r = genfs_putpages(v);
585 KASSERT(!mutex_owned(vp->v_interlock)); 585 KASSERT(!mutex_owned(vp->v_interlock));
586 return r; 586 return r;
587 } 587 }
588 588
589 /* Set PGO_BUSYFAIL to avoid deadlocks */ 589 /* Set PGO_BUSYFAIL to avoid deadlocks */
590 ap->a_flags |= PGO_BUSYFAIL; 590 ap->a_flags |= PGO_BUSYFAIL;
591 591
592 /* 592 /*
593 * Likewise, if we are asked to clean but the pages are not 593 * Likewise, if we are asked to clean but the pages are not
594 * dirty, we can just free them using genfs_putpages. 594 * dirty, we can just free them using genfs_putpages.
595 */ 595 */
596#ifdef DEBUG 596#ifdef DEBUG
597 debug_n_dirtyclean = 0; 597 debug_n_dirtyclean = 0;
598#endif 598#endif
599 do { 599 do {
600 int r; 600 int r;
601 KASSERT(mutex_owned(vp->v_interlock)); 601 KASSERT(mutex_owned(vp->v_interlock));
602 602
603 /* Count the number of dirty pages */ 603 /* Count the number of dirty pages */
604 r = check_dirty(fs, vp, startoffset, endoffset, blkeof, 604 r = check_dirty(fs, vp, startoffset, endoffset, blkeof,
605 ap->a_flags, 1, NULL); 605 ap->a_flags, 1, NULL);
606 if (r < 0) { 606 if (r < 0) {
607 /* Pages are busy with another process */ 607 /* Pages are busy with another process */
608 mutex_exit(vp->v_interlock); 608 mutex_exit(vp->v_interlock);
609 return EDEADLK; 609 return EDEADLK;
610 } 610 }
611 if (r > 0) /* Some pages are dirty */ 611 if (r > 0) /* Some pages are dirty */
612 break; 612 break;
613 613
614 /* 614 /*
615 * Sometimes pages are dirtied between the time that 615 * Sometimes pages are dirtied between the time that
616 * we check and the time we try to clean them. 616 * we check and the time we try to clean them.
617 * Instruct lfs_gop_write to return EDEADLK in this case 617 * Instruct lfs_gop_write to return EDEADLK in this case
618 * so we can write them properly. 618 * so we can write them properly.
619 */ 619 */
620 ip->i_lfs_iflags |= LFSI_NO_GOP_WRITE; 620 ip->i_lfs_iflags |= LFSI_NO_GOP_WRITE;
621 r = genfs_do_putpages(vp, startoffset, endoffset, 621 r = genfs_do_putpages(vp, startoffset, endoffset,
622 ap->a_flags & ~PGO_SYNCIO, &busypg); 622 ap->a_flags & ~PGO_SYNCIO, &busypg);
623 ip->i_lfs_iflags &= ~LFSI_NO_GOP_WRITE; 623 ip->i_lfs_iflags &= ~LFSI_NO_GOP_WRITE;
624 if (r != EDEADLK) { 624 if (r != EDEADLK) {
625 KASSERT(!mutex_owned(vp->v_interlock)); 625 KASSERT(!mutex_owned(vp->v_interlock));
626 return r; 626 return r;
627 } 627 }
628 628
629 /* One of the pages was busy. Start over. */ 629 /* One of the pages was busy. Start over. */
630 mutex_enter(vp->v_interlock); 630 mutex_enter(vp->v_interlock);
631 wait_for_page(vp, busypg, "dirtyclean"); 631 wait_for_page(vp, busypg, "dirtyclean");
632#ifdef DEBUG 632#ifdef DEBUG
633 ++debug_n_dirtyclean; 633 ++debug_n_dirtyclean;
634#endif 634#endif
635 } while(1); 635 } while(1);
636 636
637#ifdef DEBUG 637#ifdef DEBUG
638 if (debug_n_dirtyclean > TOOMANY) 638 if (debug_n_dirtyclean > TOOMANY)
639 DLOG((DLOG_PAGE, "lfs_putpages: dirtyclean: looping, n = %d\n", 639 DLOG((DLOG_PAGE, "lfs_putpages: dirtyclean: looping, n = %d\n",
640 debug_n_dirtyclean)); 640 debug_n_dirtyclean));
641#endif 641#endif
642 642
643 /* 643 /*
644 * Dirty and asked to clean. 644 * Dirty and asked to clean.
645 * 645 *
646 * Pagedaemon can't actually write LFS pages; wake up 646 * Pagedaemon can't actually write LFS pages; wake up
647 * the writer to take care of that. The writer will 647 * the writer to take care of that. The writer will
648 * notice the pager inode queue and act on that. 648 * notice the pager inode queue and act on that.
649 * 649 *
650 * XXX We must drop the vp->interlock before taking the lfs_lock or we 650 * XXX We must drop the vp->interlock before taking the lfs_lock or we
651 * get a nasty deadlock with lfs_flush_pchain(). 651 * get a nasty deadlock with lfs_flush_pchain().
652 */ 652 */
653 if (pagedaemon) { 653 if (pagedaemon) {
654 mutex_exit(vp->v_interlock); 654 mutex_exit(vp->v_interlock);
655 mutex_enter(&lfs_lock); 655 mutex_enter(&lfs_lock);
656 if (!(ip->i_flags & IN_PAGING)) { 656 if (!(ip->i_flags & IN_PAGING)) {
657 ip->i_flags |= IN_PAGING; 657 ip->i_flags |= IN_PAGING;
658 TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, i_lfs_pchain); 658 TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, i_lfs_pchain);
659 }  659 }
660 wakeup(&lfs_writer_daemon); 660 wakeup(&lfs_writer_daemon);
661 mutex_exit(&lfs_lock); 661 mutex_exit(&lfs_lock);
662 preempt(); 662 preempt();
663 KASSERT(!mutex_owned(vp->v_interlock)); 663 KASSERT(!mutex_owned(vp->v_interlock));
664 return EWOULDBLOCK; 664 return EWOULDBLOCK;
665 } 665 }
666 666
667 /* 667 /*
668 * If this is a file created in a recent dirop, we can't flush its 668 * If this is a file created in a recent dirop, we can't flush its
669 * inode until the dirop is complete. Drain dirops, then flush the 669 * inode until the dirop is complete. Drain dirops, then flush the
670 * filesystem (taking care of any other pending dirops while we're 670 * filesystem (taking care of any other pending dirops while we're
671 * at it). 671 * at it).
672 */ 672 */
673 if ((ap->a_flags & (PGO_CLEANIT|PGO_LOCKED)) == PGO_CLEANIT && 673 if ((ap->a_flags & (PGO_CLEANIT|PGO_LOCKED)) == PGO_CLEANIT &&
674 (vp->v_uflag & VU_DIROP)) { 674 (vp->v_uflag & VU_DIROP)) {
675 DLOG((DLOG_PAGE, "lfs_putpages: flushing VU_DIROP\n")); 675 DLOG((DLOG_PAGE, "lfs_putpages: flushing VU_DIROP\n"));
676 676
677 lfs_writer_enter(fs, "ppdirop"); 677 lfs_writer_enter(fs, "ppdirop");
678 678
679 /* Note if we hold the vnode locked */ 679 /* Note if we hold the vnode locked */
680 if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE) 680 if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE)
681 { 681 {
682 DLOG((DLOG_PAGE, "lfs_putpages: dirop inode already locked\n")); 682 DLOG((DLOG_PAGE, "lfs_putpages: dirop inode already locked\n"));
683 } else { 683 } else {
684 DLOG((DLOG_PAGE, "lfs_putpages: dirop inode not locked\n")); 684 DLOG((DLOG_PAGE, "lfs_putpages: dirop inode not locked\n"));
685 } 685 }
686 mutex_exit(vp->v_interlock); 686 mutex_exit(vp->v_interlock);
687 687
688 mutex_enter(&lfs_lock); 688 mutex_enter(&lfs_lock);
689 lfs_flush_fs(fs, sync ? SEGM_SYNC : 0); 689 lfs_flush_fs(fs, sync ? SEGM_SYNC : 0);
690 mutex_exit(&lfs_lock); 690 mutex_exit(&lfs_lock);
691 691
692 mutex_enter(vp->v_interlock); 692 mutex_enter(vp->v_interlock);
693 lfs_writer_leave(fs); 693 lfs_writer_leave(fs);
694 694
695 /* The flush will have cleaned out this vnode as well, 695 /* The flush will have cleaned out this vnode as well,
696 no need to do more to it. */ 696 no need to do more to it. */
697 } 697 }
698 698
699 /* 699 /*
700 * This is it. We are going to write some pages. From here on 700 * This is it. We are going to write some pages. From here on
701 * down it's all just mechanics. 701 * down it's all just mechanics.
702 * 702 *
703 * Don't let genfs_putpages wait; lfs_segunlock will wait for us. 703 * Don't let genfs_putpages wait; lfs_segunlock will wait for us.
704 */ 704 */
705 ap->a_flags &= ~PGO_SYNCIO; 705 ap->a_flags &= ~PGO_SYNCIO;
706 706
707 /* 707 /*
708 * If we've already got the seglock, flush the node and return. 708 * If we've already got the seglock, flush the node and return.
709 * The FIP has already been set up for us by lfs_writefile, 709 * The FIP has already been set up for us by lfs_writefile,
710 * and FIP cleanup and lfs_updatemeta will also be done there, 710 * and FIP cleanup and lfs_updatemeta will also be done there,
711 * unless genfs_putpages returns EDEADLK; then we must flush 711 * unless genfs_putpages returns EDEADLK; then we must flush
712 * what we have, and correct FIP and segment header accounting. 712 * what we have, and correct FIP and segment header accounting.
713 */ 713 */
714 get_seglock: 714 get_seglock:
715 /* 715 /*
716 * If we are not called with the segment locked, lock it. 716 * If we are not called with the segment locked, lock it.
717 * Account for a new FIP in the segment header, and set sp->vp. 717 * Account for a new FIP in the segment header, and set sp->vp.
718 * (This should duplicate the setup at the top of lfs_writefile().) 718 * (This should duplicate the setup at the top of lfs_writefile().)
719 */ 719 */
720 seglocked = (ap->a_flags & PGO_LOCKED) != 0; 720 seglocked = (ap->a_flags & PGO_LOCKED) != 0;
721 if (!seglocked) { 721 if (!seglocked) {
722 mutex_exit(vp->v_interlock); 722 mutex_exit(vp->v_interlock);
723 error = lfs_seglock(fs, SEGM_PROT | (sync ? SEGM_SYNC : 0)); 723 error = lfs_seglock(fs, SEGM_PROT | (sync ? SEGM_SYNC : 0));
724 if (error != 0) { 724 if (error != 0) {
725 KASSERT(!mutex_owned(vp->v_interlock)); 725 KASSERT(!mutex_owned(vp->v_interlock));
726 return error; 726 return error;
727 } 727 }
728 mutex_enter(vp->v_interlock); 728 mutex_enter(vp->v_interlock);
729 lfs_acquire_finfo(fs, ip->i_number, ip->i_gen); 729 lfs_acquire_finfo(fs, ip->i_number, ip->i_gen);
730 } 730 }
731 sp = fs->lfs_sp; 731 sp = fs->lfs_sp;
732 KASSERT(sp->vp == NULL); 732 KASSERT(sp->vp == NULL);
733 sp->vp = vp; 733 sp->vp = vp;
734 734
735 /* Note segments written by reclaim; only for debugging */ 735 /* Note segments written by reclaim; only for debugging */
736 if (vdead_check(vp, VDEAD_NOWAIT) != 0) { 736 if (vdead_check(vp, VDEAD_NOWAIT) != 0) {
737 sp->seg_flags |= SEGM_RECLAIM; 737 sp->seg_flags |= SEGM_RECLAIM;
738 fs->lfs_reclino = ip->i_number; 738 fs->lfs_reclino = ip->i_number;
739 } 739 }
740 740
741 /* 741 /*
742 * Ensure that the partial segment is marked SS_DIROP if this 742 * Ensure that the partial segment is marked SS_DIROP if this
743 * vnode is a DIROP. 743 * vnode is a DIROP.
744 */ 744 */
745 if (!seglocked && vp->v_uflag & VU_DIROP) 745 if (!seglocked && vp->v_uflag & VU_DIROP)
746 ((SEGSUM *)(sp->segsum))->ss_flags |= (SS_DIROP|SS_CONT); 746 ((SEGSUM *)(sp->segsum))->ss_flags |= (SS_DIROP|SS_CONT);
747 747
748 /* 748 /*
749 * Loop over genfs_putpages until all pages are gathered. 749 * Loop over genfs_putpages until all pages are gathered.
750 * genfs_putpages() drops the interlock, so reacquire it if necessary. 750 * genfs_putpages() drops the interlock, so reacquire it if necessary.
751 * Whenever we lose the interlock we have to rerun check_dirty, as 751 * Whenever we lose the interlock we have to rerun check_dirty, as
752 * well, since more pages might have been dirtied in our absence. 752 * well, since more pages might have been dirtied in our absence.
753 */ 753 */
754#ifdef DEBUG 754#ifdef DEBUG
755 debug_n_again = 0; 755 debug_n_again = 0;
756#endif 756#endif
757 do { 757 do {
758 busypg = NULL; 758 busypg = NULL;
759 KASSERT(mutex_owned(vp->v_interlock)); 759 KASSERT(mutex_owned(vp->v_interlock));
760 if (check_dirty(fs, vp, startoffset, endoffset, blkeof, 760 if (check_dirty(fs, vp, startoffset, endoffset, blkeof,
761 ap->a_flags, 0, &busypg) < 0) { 761 ap->a_flags, 0, &busypg) < 0) {
762 mutex_exit(vp->v_interlock); 762 mutex_exit(vp->v_interlock);
763 /* XXX why? --ks */ 763 /* XXX why? --ks */
764 mutex_enter(vp->v_interlock); 764 mutex_enter(vp->v_interlock);
765 write_and_wait(fs, vp, busypg, seglocked, NULL); 765 write_and_wait(fs, vp, busypg, seglocked, NULL);
766 if (!seglocked) { 766 if (!seglocked) {
767 mutex_exit(vp->v_interlock); 767 mutex_exit(vp->v_interlock);
768 lfs_release_finfo(fs); 768 lfs_release_finfo(fs);
769 lfs_segunlock(fs); 769 lfs_segunlock(fs);
770 mutex_enter(vp->v_interlock); 770 mutex_enter(vp->v_interlock);
771 } 771 }
772 sp->vp = NULL; 772 sp->vp = NULL;
773 goto get_seglock; 773 goto get_seglock;
774 } 774 }
775  775
776 busypg = NULL; 776 busypg = NULL;
777 KASSERT(!mutex_owned(&uvm_pageqlock)); 777 KASSERT(!mutex_owned(&uvm_pageqlock));
778 oreclaim = (ap->a_flags & PGO_RECLAIM); 778 oreclaim = (ap->a_flags & PGO_RECLAIM);
779 ap->a_flags &= ~PGO_RECLAIM; 779 ap->a_flags &= ~PGO_RECLAIM;
780 error = genfs_do_putpages(vp, startoffset, endoffset, 780 error = genfs_do_putpages(vp, startoffset, endoffset,
781 ap->a_flags, &busypg); 781 ap->a_flags, &busypg);
782 ap->a_flags |= oreclaim; 782 ap->a_flags |= oreclaim;
783  783
784 if (error == EDEADLK || error == EAGAIN) { 784 if (error == EDEADLK || error == EAGAIN) {
785 DLOG((DLOG_PAGE, "lfs_putpages: genfs_putpages returned" 785 DLOG((DLOG_PAGE, "lfs_putpages: genfs_putpages returned"
786 " %d ino %d off %x (seg %d)\n", error, 786 " %d ino %d off %x (seg %d)\n", error,
787 ip->i_number, fs->lfs_offset, 787 ip->i_number, lfs_sb_getoffset(fs),
788 lfs_dtosn(fs, fs->lfs_offset))); 788 lfs_dtosn(fs, lfs_sb_getoffset(fs))));
789 789
790 if (oreclaim) { 790 if (oreclaim) {
791 mutex_enter(vp->v_interlock); 791 mutex_enter(vp->v_interlock);
792 write_and_wait(fs, vp, busypg, seglocked, "again"); 792 write_and_wait(fs, vp, busypg, seglocked, "again");
793 mutex_exit(vp->v_interlock); 793 mutex_exit(vp->v_interlock);
794 } else { 794 } else {
795 if ((sp->seg_flags & SEGM_SINGLE) && 795 if ((sp->seg_flags & SEGM_SINGLE) &&
796 lfs_sb_getcurseg(fs) != fs->lfs_startseg) 796 lfs_sb_getcurseg(fs) != fs->lfs_startseg)
797 donewriting = 1; 797 donewriting = 1;
798 } 798 }
799 } else if (error) { 799 } else if (error) {
800 DLOG((DLOG_PAGE, "lfs_putpages: genfs_putpages returned" 800 DLOG((DLOG_PAGE, "lfs_putpages: genfs_putpages returned"
801 " %d ino %d off %x (seg %d)\n", error, 801 " %d ino %d off %x (seg %d)\n", error,
802 (int)ip->i_number, fs->lfs_offset, 802 (int)ip->i_number, lfs_sb_getoffset(fs),
803 lfs_dtosn(fs, fs->lfs_offset))); 803 lfs_dtosn(fs, lfs_sb_getoffset(fs))));
804 } 804 }
805 /* genfs_do_putpages loses the interlock */ 805 /* genfs_do_putpages loses the interlock */
806#ifdef DEBUG 806#ifdef DEBUG
807 ++debug_n_again; 807 ++debug_n_again;
808#endif 808#endif
809 if (oreclaim && error == EAGAIN) { 809 if (oreclaim && error == EAGAIN) {
810 DLOG((DLOG_PAGE, "vp %p ino %d vi_flags %x a_flags %x avoiding vclean panic\n", 810 DLOG((DLOG_PAGE, "vp %p ino %d vi_flags %x a_flags %x avoiding vclean panic\n",
811 vp, (int)ip->i_number, vp->v_iflag, ap->a_flags)); 811 vp, (int)ip->i_number, vp->v_iflag, ap->a_flags));
812 mutex_enter(vp->v_interlock); 812 mutex_enter(vp->v_interlock);
813 } 813 }
814 if (error == EDEADLK) 814 if (error == EDEADLK)
815 mutex_enter(vp->v_interlock); 815 mutex_enter(vp->v_interlock);
816 } while (error == EDEADLK || (oreclaim && error == EAGAIN)); 816 } while (error == EDEADLK || (oreclaim && error == EAGAIN));
817#ifdef DEBUG 817#ifdef DEBUG
818 if (debug_n_again > TOOMANY) 818 if (debug_n_again > TOOMANY)
819 DLOG((DLOG_PAGE, "lfs_putpages: again: looping, n = %d\n", debug_n_again)); 819 DLOG((DLOG_PAGE, "lfs_putpages: again: looping, n = %d\n", debug_n_again));
820#endif 820#endif
821 821
822 KASSERT(sp != NULL && sp->vp == vp); 822 KASSERT(sp != NULL && sp->vp == vp);
823 if (!seglocked && !donewriting) { 823 if (!seglocked && !donewriting) {
824 sp->vp = NULL; 824 sp->vp = NULL;
825 825
826 /* Write indirect blocks as well */ 826 /* Write indirect blocks as well */
827 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_indir); 827 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_indir);
828 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_dindir); 828 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_dindir);
829 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_tindir); 829 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_tindir);
830 830
831 KASSERT(sp->vp == NULL); 831 KASSERT(sp->vp == NULL);
832 sp->vp = vp; 832 sp->vp = vp;
833 } 833 }
834 834
835 /* 835 /*
836 * Blocks are now gathered into a segment waiting to be written. 836 * Blocks are now gathered into a segment waiting to be written.
837 * All that's left to do is update metadata, and write them. 837 * All that's left to do is update metadata, and write them.
838 */ 838 */
839 lfs_updatemeta(sp); 839 lfs_updatemeta(sp);
840 KASSERT(sp->vp == vp); 840 KASSERT(sp->vp == vp);
841 sp->vp = NULL; 841 sp->vp = NULL;
842 842
843 /* 843 /*
844 * If we were called from lfs_writefile, we don't need to clean up 844 * If we were called from lfs_writefile, we don't need to clean up
845 * the FIP or unlock the segment lock. We're done. 845 * the FIP or unlock the segment lock. We're done.
846 */ 846 */
847 if (seglocked) { 847 if (seglocked) {
848 KASSERT(!mutex_owned(vp->v_interlock)); 848 KASSERT(!mutex_owned(vp->v_interlock));
849 return error; 849 return error;
850 } 850 }
851 851
852 /* Clean up FIP and send it to disk. */ 852 /* Clean up FIP and send it to disk. */
853 lfs_release_finfo(fs); 853 lfs_release_finfo(fs);
854 lfs_writeseg(fs, fs->lfs_sp); 854 lfs_writeseg(fs, fs->lfs_sp);
855 855
856 /* 856 /*
857 * Remove us from paging queue if we wrote all our pages. 857 * Remove us from paging queue if we wrote all our pages.
858 */ 858 */
859 if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) { 859 if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) {
860 mutex_enter(&lfs_lock); 860 mutex_enter(&lfs_lock);
861 if (ip->i_flags & IN_PAGING) { 861 if (ip->i_flags & IN_PAGING) {
862 ip->i_flags &= ~IN_PAGING; 862 ip->i_flags &= ~IN_PAGING;
863 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); 863 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain);
864 } 864 }
865 mutex_exit(&lfs_lock); 865 mutex_exit(&lfs_lock);
866 } 866 }
867 867
868 /* 868 /*
869 * XXX - with the malloc/copy writeseg, the pages are freed by now 869 * XXX - with the malloc/copy writeseg, the pages are freed by now
870 * even if we don't wait (e.g. if we hold a nested lock). This 870 * even if we don't wait (e.g. if we hold a nested lock). This
871 * will not be true if we stop using malloc/copy. 871 * will not be true if we stop using malloc/copy.
872 */ 872 */
873 KASSERT(fs->lfs_sp->seg_flags & SEGM_PROT); 873 KASSERT(fs->lfs_sp->seg_flags & SEGM_PROT);
874 lfs_segunlock(fs); 874 lfs_segunlock(fs);
875 875
876 /* 876 /*
877 * Wait for v_numoutput to drop to zero. The seglock should 877 * Wait for v_numoutput to drop to zero. The seglock should
878 * take care of this, but there is a slight possibility that 878 * take care of this, but there is a slight possibility that
879 * aiodoned might not have got around to our buffers yet. 879 * aiodoned might not have got around to our buffers yet.
880 */ 880 */
881 if (sync) { 881 if (sync) {
882 mutex_enter(vp->v_interlock); 882 mutex_enter(vp->v_interlock);
883 while (vp->v_numoutput > 0) { 883 while (vp->v_numoutput > 0) {
884 DLOG((DLOG_PAGE, "lfs_putpages: ino %d sleeping on" 884 DLOG((DLOG_PAGE, "lfs_putpages: ino %d sleeping on"
885 " num %d\n", ip->i_number, vp->v_numoutput)); 885 " num %d\n", ip->i_number, vp->v_numoutput));
886 cv_wait(&vp->v_cv, vp->v_interlock); 886 cv_wait(&vp->v_cv, vp->v_interlock);
887 } 887 }
888 mutex_exit(vp->v_interlock); 888 mutex_exit(vp->v_interlock);
889 } 889 }
890 KASSERT(!mutex_owned(vp->v_interlock)); 890 KASSERT(!mutex_owned(vp->v_interlock));
891 return error; 891 return error;
892} 892}
893 893

cvs diff -r1.243 -r1.244 src/sys/ufs/lfs/lfs_segment.c (switch to unified diff)

--- src/sys/ufs/lfs/lfs_segment.c 2015/07/24 06:59:32 1.243
+++ src/sys/ufs/lfs/lfs_segment.c 2015/07/25 10:40:35 1.244
@@ -1,1062 +1,1062 @@ @@ -1,1062 +1,1062 @@
1/* $NetBSD: lfs_segment.c,v 1.243 2015/07/24 06:59:32 dholland Exp $ */ 1/* $NetBSD: lfs_segment.c,v 1.244 2015/07/25 10:40:35 martin Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant@hhhh.org>. 8 * by Konrad E. Schroder <perseant@hhhh.org>.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31/* 31/*
32 * Copyright (c) 1991, 1993 32 * Copyright (c) 1991, 1993
33 * The Regents of the University of California. All rights reserved. 33 * The Regents of the University of California. All rights reserved.
34 * 34 *
35 * Redistribution and use in source and binary forms, with or without 35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions 36 * modification, are permitted provided that the following conditions
37 * are met: 37 * are met:
38 * 1. Redistributions of source code must retain the above copyright 38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer. 39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright 40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the 41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution. 42 * documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors 43 * 3. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software 44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission. 45 * without specific prior written permission.
46 * 46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE. 57 * SUCH DAMAGE.
58 * 58 *
59 * @(#)lfs_segment.c 8.10 (Berkeley) 6/10/95 59 * @(#)lfs_segment.c 8.10 (Berkeley) 6/10/95
60 */ 60 */
61 61
62#include <sys/cdefs.h> 62#include <sys/cdefs.h>
63__KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.243 2015/07/24 06:59:32 dholland Exp $"); 63__KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.244 2015/07/25 10:40:35 martin Exp $");
64 64
65#define _VFS_VNODE_PRIVATE /* XXX: check for VI_MARKER, this has to go */ 65#define _VFS_VNODE_PRIVATE /* XXX: check for VI_MARKER, this has to go */
66 66
67#ifdef DEBUG 67#ifdef DEBUG
68# define vndebug(vp, str) do { \ 68# define vndebug(vp, str) do { \
69 if (VTOI(vp)->i_flag & IN_CLEANING) \ 69 if (VTOI(vp)->i_flag & IN_CLEANING) \
70 DLOG((DLOG_WVNODE, "not writing ino %d because %s (op %d)\n", \ 70 DLOG((DLOG_WVNODE, "not writing ino %d because %s (op %d)\n", \
71 VTOI(vp)->i_number, (str), op)); \ 71 VTOI(vp)->i_number, (str), op)); \
72} while(0) 72} while(0)
73#else 73#else
74# define vndebug(vp, str) 74# define vndebug(vp, str)
75#endif 75#endif
76#define ivndebug(vp, str) \ 76#define ivndebug(vp, str) \
77 DLOG((DLOG_WVNODE, "ino %d: %s\n", VTOI(vp)->i_number, (str))) 77 DLOG((DLOG_WVNODE, "ino %d: %s\n", VTOI(vp)->i_number, (str)))
78 78
79#if defined(_KERNEL_OPT) 79#if defined(_KERNEL_OPT)
80#include "opt_ddb.h" 80#include "opt_ddb.h"
81#endif 81#endif
82 82
83#include <sys/param.h> 83#include <sys/param.h>
84#include <sys/systm.h> 84#include <sys/systm.h>
85#include <sys/namei.h> 85#include <sys/namei.h>
86#include <sys/kernel.h> 86#include <sys/kernel.h>
87#include <sys/resourcevar.h> 87#include <sys/resourcevar.h>
88#include <sys/file.h> 88#include <sys/file.h>
89#include <sys/stat.h> 89#include <sys/stat.h>
90#include <sys/buf.h> 90#include <sys/buf.h>
91#include <sys/proc.h> 91#include <sys/proc.h>
92#include <sys/vnode.h> 92#include <sys/vnode.h>
93#include <sys/mount.h> 93#include <sys/mount.h>
94#include <sys/kauth.h> 94#include <sys/kauth.h>
95#include <sys/syslog.h> 95#include <sys/syslog.h>
96 96
97#include <miscfs/specfs/specdev.h> 97#include <miscfs/specfs/specdev.h>
98#include <miscfs/fifofs/fifo.h> 98#include <miscfs/fifofs/fifo.h>
99 99
100#include <ufs/lfs/ulfs_inode.h> 100#include <ufs/lfs/ulfs_inode.h>
101#include <ufs/lfs/ulfsmount.h> 101#include <ufs/lfs/ulfsmount.h>
102#include <ufs/lfs/ulfs_extern.h> 102#include <ufs/lfs/ulfs_extern.h>
103 103
104#include <ufs/lfs/lfs.h> 104#include <ufs/lfs/lfs.h>
105#include <ufs/lfs/lfs_kernel.h> 105#include <ufs/lfs/lfs_kernel.h>
106#include <ufs/lfs/lfs_extern.h> 106#include <ufs/lfs/lfs_extern.h>
107 107
108#include <uvm/uvm.h> 108#include <uvm/uvm.h>
109#include <uvm/uvm_extern.h> 109#include <uvm/uvm_extern.h>
110 110
111MALLOC_JUSTDEFINE(M_SEGMENT, "LFS segment", "Segment for LFS"); 111MALLOC_JUSTDEFINE(M_SEGMENT, "LFS segment", "Segment for LFS");
112 112
113static void lfs_generic_callback(struct buf *, void (*)(struct buf *)); 113static void lfs_generic_callback(struct buf *, void (*)(struct buf *));
114static void lfs_free_aiodone(struct buf *); 114static void lfs_free_aiodone(struct buf *);
115static void lfs_super_aiodone(struct buf *); 115static void lfs_super_aiodone(struct buf *);
116static void lfs_cluster_aiodone(struct buf *); 116static void lfs_cluster_aiodone(struct buf *);
117static void lfs_cluster_callback(struct buf *); 117static void lfs_cluster_callback(struct buf *);
118 118
119/* 119/*
120 * Determine if it's OK to start a partial in this segment, or if we need 120 * Determine if it's OK to start a partial in this segment, or if we need
121 * to go on to a new segment. 121 * to go on to a new segment.
122 */ 122 */
123#define LFS_PARTIAL_FITS(fs) \ 123#define LFS_PARTIAL_FITS(fs) \
124 (lfs_sb_getfsbpseg(fs) - \ 124 (lfs_sb_getfsbpseg(fs) - \
125 (lfs_sb_getoffset(fs) - lfs_sb_getcurseg(fs)) > \ 125 (lfs_sb_getoffset(fs) - lfs_sb_getcurseg(fs)) > \
126 lfs_sb_getfrag(fs)) 126 lfs_sb_getfrag(fs))
127 127
128/* 128/*
129 * Figure out whether we should do a checkpoint write or go ahead with 129 * Figure out whether we should do a checkpoint write or go ahead with
130 * an ordinary write. 130 * an ordinary write.
131 */ 131 */
132#define LFS_SHOULD_CHECKPOINT(fs, flags) \ 132#define LFS_SHOULD_CHECKPOINT(fs, flags) \
133 ((flags & SEGM_CLEAN) == 0 && \ 133 ((flags & SEGM_CLEAN) == 0 && \
134 ((fs->lfs_nactive > LFS_MAX_ACTIVE || \ 134 ((fs->lfs_nactive > LFS_MAX_ACTIVE || \
135 (flags & SEGM_CKP) || \ 135 (flags & SEGM_CKP) || \
136 lfs_sb_getnclean(fs) < LFS_MAX_ACTIVE))) 136 lfs_sb_getnclean(fs) < LFS_MAX_ACTIVE)))
137 137
138int lfs_match_fake(struct lfs *, struct buf *); 138int lfs_match_fake(struct lfs *, struct buf *);
139void lfs_newseg(struct lfs *); 139void lfs_newseg(struct lfs *);
140/* XXX ondisk32 */ 140/* XXX ondisk32 */
141void lfs_shellsort(struct buf **, int32_t *, int, int); 141void lfs_shellsort(struct buf **, int32_t *, int, int);
142void lfs_supercallback(struct buf *); 142void lfs_supercallback(struct buf *);
143void lfs_updatemeta(struct segment *); 143void lfs_updatemeta(struct segment *);
144void lfs_writesuper(struct lfs *, daddr_t); 144void lfs_writesuper(struct lfs *, daddr_t);
145int lfs_writevnodes(struct lfs *fs, struct mount *mp, 145int lfs_writevnodes(struct lfs *fs, struct mount *mp,
146 struct segment *sp, int dirops); 146 struct segment *sp, int dirops);
147 147
148int lfs_allclean_wakeup; /* Cleaner wakeup address. */ 148int lfs_allclean_wakeup; /* Cleaner wakeup address. */
149int lfs_writeindir = 1; /* whether to flush indir on non-ckp */ 149int lfs_writeindir = 1; /* whether to flush indir on non-ckp */
150int lfs_clean_vnhead = 0; /* Allow freeing to head of vn list */ 150int lfs_clean_vnhead = 0; /* Allow freeing to head of vn list */
151int lfs_dirvcount = 0; /* # active dirops */ 151int lfs_dirvcount = 0; /* # active dirops */
152 152
153/* Statistics Counters */ 153/* Statistics Counters */
154int lfs_dostats = 1; 154int lfs_dostats = 1;
155struct lfs_stats lfs_stats; 155struct lfs_stats lfs_stats;
156 156
157/* op values to lfs_writevnodes */ 157/* op values to lfs_writevnodes */
158#define VN_REG 0 158#define VN_REG 0
159#define VN_DIROP 1 159#define VN_DIROP 1
160#define VN_EMPTY 2 160#define VN_EMPTY 2
161#define VN_CLEAN 3 161#define VN_CLEAN 3
162 162
163/* 163/*
164 * XXX KS - Set modification time on the Ifile, so the cleaner can 164 * XXX KS - Set modification time on the Ifile, so the cleaner can
165 * read the fs mod time off of it. We don't set IN_UPDATE here, 165 * read the fs mod time off of it. We don't set IN_UPDATE here,
166 * since we don't really need this to be flushed to disk (and in any 166 * since we don't really need this to be flushed to disk (and in any
167 * case that wouldn't happen to the Ifile until we checkpoint). 167 * case that wouldn't happen to the Ifile until we checkpoint).
168 */ 168 */
169void 169void
170lfs_imtime(struct lfs *fs) 170lfs_imtime(struct lfs *fs)
171{ 171{
172 struct timespec ts; 172 struct timespec ts;
173 struct inode *ip; 173 struct inode *ip;
174 174
175 ASSERT_MAYBE_SEGLOCK(fs); 175 ASSERT_MAYBE_SEGLOCK(fs);
176 vfs_timestamp(&ts); 176 vfs_timestamp(&ts);
177 ip = VTOI(fs->lfs_ivnode); 177 ip = VTOI(fs->lfs_ivnode);
178 ip->i_ffs1_mtime = ts.tv_sec; 178 ip->i_ffs1_mtime = ts.tv_sec;
179 ip->i_ffs1_mtimensec = ts.tv_nsec; 179 ip->i_ffs1_mtimensec = ts.tv_nsec;
180} 180}
181 181
182/* 182/*
183 * Ifile and meta data blocks are not marked busy, so segment writes MUST be 183 * Ifile and meta data blocks are not marked busy, so segment writes MUST be
184 * single threaded. Currently, there are two paths into lfs_segwrite, sync() 184 * single threaded. Currently, there are two paths into lfs_segwrite, sync()
185 * and getnewbuf(). They both mark the file system busy. Lfs_vflush() 185 * and getnewbuf(). They both mark the file system busy. Lfs_vflush()
186 * explicitly marks the file system busy. So lfs_segwrite is safe. I think. 186 * explicitly marks the file system busy. So lfs_segwrite is safe. I think.
187 */ 187 */
188 188
189#define IS_FLUSHING(fs,vp) ((fs)->lfs_flushvp == (vp)) 189#define IS_FLUSHING(fs,vp) ((fs)->lfs_flushvp == (vp))
190 190
191int 191int
192lfs_vflush(struct vnode *vp) 192lfs_vflush(struct vnode *vp)
193{ 193{
194 struct inode *ip; 194 struct inode *ip;
195 struct lfs *fs; 195 struct lfs *fs;
196 struct segment *sp; 196 struct segment *sp;
197 struct buf *bp, *nbp, *tbp, *tnbp; 197 struct buf *bp, *nbp, *tbp, *tnbp;
198 int error; 198 int error;
199 int flushed; 199 int flushed;
200 int relock; 200 int relock;
201 201
202 ip = VTOI(vp); 202 ip = VTOI(vp);
203 fs = VFSTOULFS(vp->v_mount)->um_lfs; 203 fs = VFSTOULFS(vp->v_mount)->um_lfs;
204 relock = 0; 204 relock = 0;
205 205
206 top: 206 top:
207 KASSERT(mutex_owned(vp->v_interlock) == false); 207 KASSERT(mutex_owned(vp->v_interlock) == false);
208 KASSERT(mutex_owned(&lfs_lock) == false); 208 KASSERT(mutex_owned(&lfs_lock) == false);
209 KASSERT(mutex_owned(&bufcache_lock) == false); 209 KASSERT(mutex_owned(&bufcache_lock) == false);
210 ASSERT_NO_SEGLOCK(fs); 210 ASSERT_NO_SEGLOCK(fs);
211 if (ip->i_flag & IN_CLEANING) { 211 if (ip->i_flag & IN_CLEANING) {
212 ivndebug(vp,"vflush/in_cleaning"); 212 ivndebug(vp,"vflush/in_cleaning");
213 mutex_enter(&lfs_lock); 213 mutex_enter(&lfs_lock);
214 LFS_CLR_UINO(ip, IN_CLEANING); 214 LFS_CLR_UINO(ip, IN_CLEANING);
215 LFS_SET_UINO(ip, IN_MODIFIED); 215 LFS_SET_UINO(ip, IN_MODIFIED);
216 mutex_exit(&lfs_lock); 216 mutex_exit(&lfs_lock);
217 217
218 /* 218 /*
219 * Toss any cleaning buffers that have real counterparts 219 * Toss any cleaning buffers that have real counterparts
220 * to avoid losing new data. 220 * to avoid losing new data.
221 */ 221 */
222 mutex_enter(vp->v_interlock); 222 mutex_enter(vp->v_interlock);
223 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 223 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
224 nbp = LIST_NEXT(bp, b_vnbufs); 224 nbp = LIST_NEXT(bp, b_vnbufs);
225 if (!LFS_IS_MALLOC_BUF(bp)) 225 if (!LFS_IS_MALLOC_BUF(bp))
226 continue; 226 continue;
227 /* 227 /*
228 * Look for pages matching the range covered 228 * Look for pages matching the range covered
229 * by cleaning blocks. It's okay if more dirty 229 * by cleaning blocks. It's okay if more dirty
230 * pages appear, so long as none disappear out 230 * pages appear, so long as none disappear out
231 * from under us. 231 * from under us.
232 */ 232 */
233 if (bp->b_lblkno > 0 && vp->v_type == VREG && 233 if (bp->b_lblkno > 0 && vp->v_type == VREG &&
234 vp != fs->lfs_ivnode) { 234 vp != fs->lfs_ivnode) {
235 struct vm_page *pg; 235 struct vm_page *pg;
236 voff_t off; 236 voff_t off;
237 237
238 for (off = lfs_lblktosize(fs, bp->b_lblkno); 238 for (off = lfs_lblktosize(fs, bp->b_lblkno);
239 off < lfs_lblktosize(fs, bp->b_lblkno + 1); 239 off < lfs_lblktosize(fs, bp->b_lblkno + 1);
240 off += PAGE_SIZE) { 240 off += PAGE_SIZE) {
241 pg = uvm_pagelookup(&vp->v_uobj, off); 241 pg = uvm_pagelookup(&vp->v_uobj, off);
242 if (pg == NULL) 242 if (pg == NULL)
243 continue; 243 continue;
244 if ((pg->flags & PG_CLEAN) == 0 || 244 if ((pg->flags & PG_CLEAN) == 0 ||
245 pmap_is_modified(pg)) { 245 pmap_is_modified(pg)) {
246 lfs_sb_addavail(fs, 246 lfs_sb_addavail(fs,
247 lfs_btofsb(fs, 247 lfs_btofsb(fs,
248 bp->b_bcount)); 248 bp->b_bcount));
249 wakeup(&fs->lfs_availsleep); 249 wakeup(&fs->lfs_availsleep);
250 mutex_exit(vp->v_interlock); 250 mutex_exit(vp->v_interlock);
251 lfs_freebuf(fs, bp); 251 lfs_freebuf(fs, bp);
252 mutex_enter(vp->v_interlock); 252 mutex_enter(vp->v_interlock);
253 bp = NULL; 253 bp = NULL;
254 break; 254 break;
255 } 255 }
256 } 256 }
257 } 257 }
258 for (tbp = LIST_FIRST(&vp->v_dirtyblkhd); tbp; 258 for (tbp = LIST_FIRST(&vp->v_dirtyblkhd); tbp;
259 tbp = tnbp) 259 tbp = tnbp)
260 { 260 {
261 tnbp = LIST_NEXT(tbp, b_vnbufs); 261 tnbp = LIST_NEXT(tbp, b_vnbufs);
262 if (tbp->b_vp == bp->b_vp 262 if (tbp->b_vp == bp->b_vp
263 && tbp->b_lblkno == bp->b_lblkno 263 && tbp->b_lblkno == bp->b_lblkno
264 && tbp != bp) 264 && tbp != bp)
265 { 265 {
266 lfs_sb_addavail(fs, lfs_btofsb(fs, 266 lfs_sb_addavail(fs, lfs_btofsb(fs,
267 bp->b_bcount)); 267 bp->b_bcount));
268 wakeup(&fs->lfs_availsleep); 268 wakeup(&fs->lfs_availsleep);
269 mutex_exit(vp->v_interlock); 269 mutex_exit(vp->v_interlock);
270 lfs_freebuf(fs, bp); 270 lfs_freebuf(fs, bp);
271 mutex_enter(vp->v_interlock); 271 mutex_enter(vp->v_interlock);
272 bp = NULL; 272 bp = NULL;
273 break; 273 break;
274 } 274 }
275 } 275 }
276 } 276 }
277 } else { 277 } else {
278 mutex_enter(vp->v_interlock); 278 mutex_enter(vp->v_interlock);
279 } 279 }
280 280
281 /* If the node is being written, wait until that is done */ 281 /* If the node is being written, wait until that is done */
282 while (WRITEINPROG(vp)) { 282 while (WRITEINPROG(vp)) {
283 ivndebug(vp,"vflush/writeinprog"); 283 ivndebug(vp,"vflush/writeinprog");
284 cv_wait(&vp->v_cv, vp->v_interlock); 284 cv_wait(&vp->v_cv, vp->v_interlock);
285 } 285 }
286 error = vdead_check(vp, VDEAD_NOWAIT); 286 error = vdead_check(vp, VDEAD_NOWAIT);
287 mutex_exit(vp->v_interlock); 287 mutex_exit(vp->v_interlock);
288 288
289 /* Protect against deadlock in vinvalbuf() */ 289 /* Protect against deadlock in vinvalbuf() */
290 lfs_seglock(fs, SEGM_SYNC | ((error != 0) ? SEGM_RECLAIM : 0)); 290 lfs_seglock(fs, SEGM_SYNC | ((error != 0) ? SEGM_RECLAIM : 0));
291 if (error != 0) { 291 if (error != 0) {
292 fs->lfs_reclino = ip->i_number; 292 fs->lfs_reclino = ip->i_number;
293 } 293 }
294 294
295 /* If we're supposed to flush a freed inode, just toss it */ 295 /* If we're supposed to flush a freed inode, just toss it */
296 if (ip->i_lfs_iflags & LFSI_DELETED) { 296 if (ip->i_lfs_iflags & LFSI_DELETED) {
297 DLOG((DLOG_VNODE, "lfs_vflush: ino %d freed, not flushing\n", 297 DLOG((DLOG_VNODE, "lfs_vflush: ino %d freed, not flushing\n",
298 ip->i_number)); 298 ip->i_number));
299 /* Drain v_numoutput */ 299 /* Drain v_numoutput */
300 mutex_enter(vp->v_interlock); 300 mutex_enter(vp->v_interlock);
301 while (vp->v_numoutput > 0) { 301 while (vp->v_numoutput > 0) {
302 cv_wait(&vp->v_cv, vp->v_interlock); 302 cv_wait(&vp->v_cv, vp->v_interlock);
303 } 303 }
304 KASSERT(vp->v_numoutput == 0); 304 KASSERT(vp->v_numoutput == 0);
305 mutex_exit(vp->v_interlock); 305 mutex_exit(vp->v_interlock);
306  306
307 mutex_enter(&bufcache_lock); 307 mutex_enter(&bufcache_lock);
308 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 308 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
309 nbp = LIST_NEXT(bp, b_vnbufs); 309 nbp = LIST_NEXT(bp, b_vnbufs);
310 310
311 KASSERT((bp->b_flags & B_GATHERED) == 0); 311 KASSERT((bp->b_flags & B_GATHERED) == 0);
312 if (bp->b_oflags & BO_DELWRI) { /* XXX always true? */ 312 if (bp->b_oflags & BO_DELWRI) { /* XXX always true? */
313 lfs_sb_addavail(fs, lfs_btofsb(fs, bp->b_bcount)); 313 lfs_sb_addavail(fs, lfs_btofsb(fs, bp->b_bcount));
314 wakeup(&fs->lfs_availsleep); 314 wakeup(&fs->lfs_availsleep);
315 } 315 }
316 /* Copied from lfs_writeseg */ 316 /* Copied from lfs_writeseg */
317 if (bp->b_iodone != NULL) { 317 if (bp->b_iodone != NULL) {
318 mutex_exit(&bufcache_lock); 318 mutex_exit(&bufcache_lock);
319 biodone(bp); 319 biodone(bp);
320 mutex_enter(&bufcache_lock); 320 mutex_enter(&bufcache_lock);
321 } else { 321 } else {
322 bremfree(bp); 322 bremfree(bp);
323 LFS_UNLOCK_BUF(bp); 323 LFS_UNLOCK_BUF(bp);
324 mutex_enter(vp->v_interlock); 324 mutex_enter(vp->v_interlock);
325 bp->b_flags &= ~(B_READ | B_GATHERED); 325 bp->b_flags &= ~(B_READ | B_GATHERED);
326 bp->b_oflags = (bp->b_oflags & ~BO_DELWRI) | BO_DONE; 326 bp->b_oflags = (bp->b_oflags & ~BO_DELWRI) | BO_DONE;
327 bp->b_error = 0; 327 bp->b_error = 0;
328 reassignbuf(bp, vp); 328 reassignbuf(bp, vp);
329 mutex_exit(vp->v_interlock); 329 mutex_exit(vp->v_interlock);
330 brelse(bp, 0); 330 brelse(bp, 0);
331 } 331 }
332 } 332 }
333 mutex_exit(&bufcache_lock); 333 mutex_exit(&bufcache_lock);
334 LFS_CLR_UINO(ip, IN_CLEANING); 334 LFS_CLR_UINO(ip, IN_CLEANING);
335 LFS_CLR_UINO(ip, IN_MODIFIED | IN_ACCESSED); 335 LFS_CLR_UINO(ip, IN_MODIFIED | IN_ACCESSED);
336 ip->i_flag &= ~IN_ALLMOD; 336 ip->i_flag &= ~IN_ALLMOD;
337 DLOG((DLOG_VNODE, "lfs_vflush: done not flushing ino %d\n", 337 DLOG((DLOG_VNODE, "lfs_vflush: done not flushing ino %d\n",
338 ip->i_number)); 338 ip->i_number));
339 lfs_segunlock(fs); 339 lfs_segunlock(fs);
340 340
341 KASSERT(LIST_FIRST(&vp->v_dirtyblkhd) == NULL); 341 KASSERT(LIST_FIRST(&vp->v_dirtyblkhd) == NULL);
342 342
343 return 0; 343 return 0;
344 } 344 }
345 345
346 fs->lfs_flushvp = vp; 346 fs->lfs_flushvp = vp;
347 if (LFS_SHOULD_CHECKPOINT(fs, fs->lfs_sp->seg_flags)) { 347 if (LFS_SHOULD_CHECKPOINT(fs, fs->lfs_sp->seg_flags)) {
348 error = lfs_segwrite(vp->v_mount, SEGM_CKP | SEGM_SYNC); 348 error = lfs_segwrite(vp->v_mount, SEGM_CKP | SEGM_SYNC);
349 fs->lfs_flushvp = NULL; 349 fs->lfs_flushvp = NULL;
350 KASSERT(fs->lfs_flushvp_fakevref == 0); 350 KASSERT(fs->lfs_flushvp_fakevref == 0);
351 lfs_segunlock(fs); 351 lfs_segunlock(fs);
352 352
353 /* Make sure that any pending buffers get written */ 353 /* Make sure that any pending buffers get written */
354 mutex_enter(vp->v_interlock); 354 mutex_enter(vp->v_interlock);
355 while (vp->v_numoutput > 0) { 355 while (vp->v_numoutput > 0) {
356 cv_wait(&vp->v_cv, vp->v_interlock); 356 cv_wait(&vp->v_cv, vp->v_interlock);
357 } 357 }
358 KASSERT(LIST_FIRST(&vp->v_dirtyblkhd) == NULL); 358 KASSERT(LIST_FIRST(&vp->v_dirtyblkhd) == NULL);
359 KASSERT(vp->v_numoutput == 0); 359 KASSERT(vp->v_numoutput == 0);
360 mutex_exit(vp->v_interlock); 360 mutex_exit(vp->v_interlock);
361 361
362 return error; 362 return error;
363 } 363 }
364 sp = fs->lfs_sp; 364 sp = fs->lfs_sp;
365 365
366 flushed = 0; 366 flushed = 0;
367 if (VPISEMPTY(vp)) { 367 if (VPISEMPTY(vp)) {
368 lfs_writevnodes(fs, vp->v_mount, sp, VN_EMPTY); 368 lfs_writevnodes(fs, vp->v_mount, sp, VN_EMPTY);
369 ++flushed; 369 ++flushed;
370 } else if ((ip->i_flag & IN_CLEANING) && 370 } else if ((ip->i_flag & IN_CLEANING) &&
371 (fs->lfs_sp->seg_flags & SEGM_CLEAN)) { 371 (fs->lfs_sp->seg_flags & SEGM_CLEAN)) {
372 ivndebug(vp,"vflush/clean"); 372 ivndebug(vp,"vflush/clean");
373 lfs_writevnodes(fs, vp->v_mount, sp, VN_CLEAN); 373 lfs_writevnodes(fs, vp->v_mount, sp, VN_CLEAN);
374 ++flushed; 374 ++flushed;
375 } else if (lfs_dostats) { 375 } else if (lfs_dostats) {
376 if (!VPISEMPTY(vp) || (VTOI(vp)->i_flag & IN_ALLMOD)) 376 if (!VPISEMPTY(vp) || (VTOI(vp)->i_flag & IN_ALLMOD))
377 ++lfs_stats.vflush_invoked; 377 ++lfs_stats.vflush_invoked;
378 ivndebug(vp,"vflush"); 378 ivndebug(vp,"vflush");
379 } 379 }
380 380
381#ifdef DIAGNOSTIC 381#ifdef DIAGNOSTIC
382 if (vp->v_uflag & VU_DIROP) { 382 if (vp->v_uflag & VU_DIROP) {
383 DLOG((DLOG_VNODE, "lfs_vflush: flushing VU_DIROP\n")); 383 DLOG((DLOG_VNODE, "lfs_vflush: flushing VU_DIROP\n"));
384 /* panic("lfs_vflush: VU_DIROP being flushed...this can\'t happen"); */ 384 /* panic("lfs_vflush: VU_DIROP being flushed...this can\'t happen"); */
385 } 385 }
386#endif 386#endif
387 387
388 do { 388 do {
389#ifdef DEBUG 389#ifdef DEBUG
390 int loopcount = 0; 390 int loopcount = 0;
391#endif 391#endif
392 do { 392 do {
393 if (LIST_FIRST(&vp->v_dirtyblkhd) != NULL) { 393 if (LIST_FIRST(&vp->v_dirtyblkhd) != NULL) {
394 relock = lfs_writefile(fs, sp, vp); 394 relock = lfs_writefile(fs, sp, vp);
395 if (relock && vp != fs->lfs_ivnode) { 395 if (relock && vp != fs->lfs_ivnode) {
396 /* 396 /*
397 * Might have to wait for the 397 * Might have to wait for the
398 * cleaner to run; but we're 398 * cleaner to run; but we're
399 * still not done with this vnode. 399 * still not done with this vnode.
400 * XXX we can do better than this. 400 * XXX we can do better than this.
401 */ 401 */
402 KDASSERT(ip->i_number != LFS_IFILE_INUM); 402 KDASSERT(ip->i_number != LFS_IFILE_INUM);
403 lfs_writeinode(fs, sp, ip); 403 lfs_writeinode(fs, sp, ip);
404 mutex_enter(&lfs_lock); 404 mutex_enter(&lfs_lock);
405 LFS_SET_UINO(ip, IN_MODIFIED); 405 LFS_SET_UINO(ip, IN_MODIFIED);
406 mutex_exit(&lfs_lock); 406 mutex_exit(&lfs_lock);
407 lfs_writeseg(fs, sp); 407 lfs_writeseg(fs, sp);
408 lfs_segunlock(fs); 408 lfs_segunlock(fs);
409 lfs_segunlock_relock(fs); 409 lfs_segunlock_relock(fs);
410 goto top; 410 goto top;
411 } 411 }
412 } 412 }
413 /* 413 /*
414 * If we begin a new segment in the middle of writing 414 * If we begin a new segment in the middle of writing
415 * the Ifile, it creates an inconsistent checkpoint, 415 * the Ifile, it creates an inconsistent checkpoint,
416 * since the Ifile information for the new segment 416 * since the Ifile information for the new segment
417 * is not up-to-date. Take care of this here by 417 * is not up-to-date. Take care of this here by
418 * sending the Ifile through again in case there 418 * sending the Ifile through again in case there
419 * are newly dirtied blocks. But wait, there's more! 419 * are newly dirtied blocks. But wait, there's more!
420 * This second Ifile write could *also* cross a segment 420 * This second Ifile write could *also* cross a segment
421 * boundary, if the first one was large. The second 421 * boundary, if the first one was large. The second
422 * one is guaranteed to be no more than 8 blocks, 422 * one is guaranteed to be no more than 8 blocks,
423 * though (two segment blocks and supporting indirects) 423 * though (two segment blocks and supporting indirects)
424 * so the third write *will not* cross the boundary. 424 * so the third write *will not* cross the boundary.
425 */ 425 */
426 if (vp == fs->lfs_ivnode) { 426 if (vp == fs->lfs_ivnode) {
427 lfs_writefile(fs, sp, vp); 427 lfs_writefile(fs, sp, vp);
428 lfs_writefile(fs, sp, vp); 428 lfs_writefile(fs, sp, vp);
429 } 429 }
430#ifdef DEBUG 430#ifdef DEBUG
431 if (++loopcount > 2) 431 if (++loopcount > 2)
432 log(LOG_NOTICE, "lfs_vflush: looping count=%d\n", loopcount); 432 log(LOG_NOTICE, "lfs_vflush: looping count=%d\n", loopcount);
433#endif 433#endif
434 } while (lfs_writeinode(fs, sp, ip)); 434 } while (lfs_writeinode(fs, sp, ip));
435 } while (lfs_writeseg(fs, sp) && ip->i_number == LFS_IFILE_INUM); 435 } while (lfs_writeseg(fs, sp) && ip->i_number == LFS_IFILE_INUM);
436 436
437 if (lfs_dostats) { 437 if (lfs_dostats) {
438 ++lfs_stats.nwrites; 438 ++lfs_stats.nwrites;
439 if (sp->seg_flags & SEGM_SYNC) 439 if (sp->seg_flags & SEGM_SYNC)
440 ++lfs_stats.nsync_writes; 440 ++lfs_stats.nsync_writes;
441 if (sp->seg_flags & SEGM_CKP) 441 if (sp->seg_flags & SEGM_CKP)
442 ++lfs_stats.ncheckpoints; 442 ++lfs_stats.ncheckpoints;
443 } 443 }
444 /* 444 /*
445 * If we were called from somewhere that has already held the seglock 445 * If we were called from somewhere that has already held the seglock
446 * (e.g., lfs_markv()), the lfs_segunlock will not wait for 446 * (e.g., lfs_markv()), the lfs_segunlock will not wait for
447 * the write to complete because we are still locked. 447 * the write to complete because we are still locked.
448 * Since lfs_vflush() must return the vnode with no dirty buffers, 448 * Since lfs_vflush() must return the vnode with no dirty buffers,
449 * we must explicitly wait, if that is the case. 449 * we must explicitly wait, if that is the case.
450 * 450 *
451 * We compare the iocount against 1, not 0, because it is 451 * We compare the iocount against 1, not 0, because it is
452 * artificially incremented by lfs_seglock(). 452 * artificially incremented by lfs_seglock().
453 */ 453 */
454 mutex_enter(&lfs_lock); 454 mutex_enter(&lfs_lock);
455 if (fs->lfs_seglock > 1) { 455 if (fs->lfs_seglock > 1) {
456 while (fs->lfs_iocount > 1) 456 while (fs->lfs_iocount > 1)
457 (void)mtsleep(&fs->lfs_iocount, PRIBIO + 1, 457 (void)mtsleep(&fs->lfs_iocount, PRIBIO + 1,
458 "lfs_vflush", 0, &lfs_lock); 458 "lfs_vflush", 0, &lfs_lock);
459 } 459 }
460 mutex_exit(&lfs_lock); 460 mutex_exit(&lfs_lock);
461 461
462 lfs_segunlock(fs); 462 lfs_segunlock(fs);
463 463
464 /* Wait for these buffers to be recovered by aiodoned */ 464 /* Wait for these buffers to be recovered by aiodoned */
465 mutex_enter(vp->v_interlock); 465 mutex_enter(vp->v_interlock);
466 while (vp->v_numoutput > 0) { 466 while (vp->v_numoutput > 0) {
467 cv_wait(&vp->v_cv, vp->v_interlock); 467 cv_wait(&vp->v_cv, vp->v_interlock);
468 } 468 }
469 KASSERT(LIST_FIRST(&vp->v_dirtyblkhd) == NULL); 469 KASSERT(LIST_FIRST(&vp->v_dirtyblkhd) == NULL);
470 KASSERT(vp->v_numoutput == 0); 470 KASSERT(vp->v_numoutput == 0);
471 mutex_exit(vp->v_interlock); 471 mutex_exit(vp->v_interlock);
472 472
473 fs->lfs_flushvp = NULL; 473 fs->lfs_flushvp = NULL;
474 KASSERT(fs->lfs_flushvp_fakevref == 0); 474 KASSERT(fs->lfs_flushvp_fakevref == 0);
475 475
476 return (0); 476 return (0);
477} 477}
478 478
479int 479int
480lfs_writevnodes(struct lfs *fs, struct mount *mp, struct segment *sp, int op) 480lfs_writevnodes(struct lfs *fs, struct mount *mp, struct segment *sp, int op)
481{ 481{
482 struct inode *ip; 482 struct inode *ip;
483 struct vnode *vp; 483 struct vnode *vp;
484 int inodes_written = 0; 484 int inodes_written = 0;
485 int error = 0; 485 int error = 0;
486 486
487 ASSERT_SEGLOCK(fs); 487 ASSERT_SEGLOCK(fs);
488 loop: 488 loop:
489 /* start at last (newest) vnode. */ 489 /* start at last (newest) vnode. */
490 mutex_enter(&mntvnode_lock); 490 mutex_enter(&mntvnode_lock);
491 TAILQ_FOREACH_REVERSE(vp, &mp->mnt_vnodelist, vnodelst, v_mntvnodes) { 491 TAILQ_FOREACH_REVERSE(vp, &mp->mnt_vnodelist, vnodelst, v_mntvnodes) {
492 /* 492 /*
493 * If the vnode that we are about to sync is no longer 493 * If the vnode that we are about to sync is no longer
494 * associated with this mount point, start over. 494 * associated with this mount point, start over.
495 */ 495 */
496 if (vp->v_mount != mp) { 496 if (vp->v_mount != mp) {
497 DLOG((DLOG_VNODE, "lfs_writevnodes: starting over\n")); 497 DLOG((DLOG_VNODE, "lfs_writevnodes: starting over\n"));
498 /* 498 /*
499 * After this, pages might be busy 499 * After this, pages might be busy
500 * due to our own previous putpages. 500 * due to our own previous putpages.
501 * Start actual segment write here to avoid deadlock. 501 * Start actual segment write here to avoid deadlock.
502 * If we were just writing one segment and we've done 502 * If we were just writing one segment and we've done
503 * that, break out. 503 * that, break out.
504 */ 504 */
505 mutex_exit(&mntvnode_lock); 505 mutex_exit(&mntvnode_lock);
506 if (lfs_writeseg(fs, sp) && 506 if (lfs_writeseg(fs, sp) &&
507 (sp->seg_flags & SEGM_SINGLE) && 507 (sp->seg_flags & SEGM_SINGLE) &&
508 lfs_sb_getcurseg(fs) != fs->lfs_startseg) { 508 lfs_sb_getcurseg(fs) != fs->lfs_startseg) {
509 DLOG((DLOG_VNODE, "lfs_writevnodes: breaking out of segment write at daddr 0x%jx\n", (uintmax_t)lfs_sb_getoffset(fs))); 509 DLOG((DLOG_VNODE, "lfs_writevnodes: breaking out of segment write at daddr 0x%jx\n", (uintmax_t)lfs_sb_getoffset(fs)));
510 break; 510 break;
511 } 511 }
512 goto loop; 512 goto loop;
513 } 513 }
514 514
515 mutex_enter(vp->v_interlock); 515 mutex_enter(vp->v_interlock);
516 if (vp->v_type == VNON || (vp->v_iflag & VI_MARKER) || 516 if (vp->v_type == VNON || (vp->v_iflag & VI_MARKER) ||
517 vdead_check(vp, VDEAD_NOWAIT) != 0) { 517 vdead_check(vp, VDEAD_NOWAIT) != 0) {
518 mutex_exit(vp->v_interlock); 518 mutex_exit(vp->v_interlock);
519 continue; 519 continue;
520 } 520 }
521 521
522 ip = VTOI(vp); 522 ip = VTOI(vp);
523 if ((op == VN_DIROP && !(vp->v_uflag & VU_DIROP)) || 523 if ((op == VN_DIROP && !(vp->v_uflag & VU_DIROP)) ||
524 (op != VN_DIROP && op != VN_CLEAN && 524 (op != VN_DIROP && op != VN_CLEAN &&
525 (vp->v_uflag & VU_DIROP))) { 525 (vp->v_uflag & VU_DIROP))) {
526 mutex_exit(vp->v_interlock); 526 mutex_exit(vp->v_interlock);
527 vndebug(vp,"dirop"); 527 vndebug(vp,"dirop");
528 continue; 528 continue;
529 } 529 }
530 530
531 if (op == VN_EMPTY && !VPISEMPTY(vp)) { 531 if (op == VN_EMPTY && !VPISEMPTY(vp)) {
532 mutex_exit(vp->v_interlock); 532 mutex_exit(vp->v_interlock);
533 vndebug(vp,"empty"); 533 vndebug(vp,"empty");
534 continue; 534 continue;
535 } 535 }
536 536
537 if (op == VN_CLEAN && ip->i_number != LFS_IFILE_INUM 537 if (op == VN_CLEAN && ip->i_number != LFS_IFILE_INUM
538 && vp != fs->lfs_flushvp 538 && vp != fs->lfs_flushvp
539 && !(ip->i_flag & IN_CLEANING)) { 539 && !(ip->i_flag & IN_CLEANING)) {
540 mutex_exit(vp->v_interlock); 540 mutex_exit(vp->v_interlock);
541 vndebug(vp,"cleaning"); 541 vndebug(vp,"cleaning");
542 continue; 542 continue;
543 } 543 }
544 544
545 mutex_exit(&mntvnode_lock); 545 mutex_exit(&mntvnode_lock);
546 if (vget(vp, LK_NOWAIT, false /* !wait */)) { 546 if (vget(vp, LK_NOWAIT, false /* !wait */)) {
547 vndebug(vp,"vget"); 547 vndebug(vp,"vget");
548 mutex_enter(&mntvnode_lock); 548 mutex_enter(&mntvnode_lock);
549 continue; 549 continue;
550 } 550 }
551 551
552 /* 552 /*
553 * Write the inode/file if dirty and it's not the IFILE. 553 * Write the inode/file if dirty and it's not the IFILE.
554 */ 554 */
555 if ((ip->i_flag & IN_ALLMOD) || !VPISEMPTY(vp)) { 555 if ((ip->i_flag & IN_ALLMOD) || !VPISEMPTY(vp)) {
556 if (ip->i_number != LFS_IFILE_INUM) { 556 if (ip->i_number != LFS_IFILE_INUM) {
557 error = lfs_writefile(fs, sp, vp); 557 error = lfs_writefile(fs, sp, vp);
558 if (error) { 558 if (error) {
559 vrele(vp); 559 vrele(vp);
560 if (error == EAGAIN) { 560 if (error == EAGAIN) {
561 /* 561 /*
562 * This error from lfs_putpages 562 * This error from lfs_putpages
563 * indicates we need to drop 563 * indicates we need to drop
564 * the segment lock and start 564 * the segment lock and start
565 * over after the cleaner has 565 * over after the cleaner has
566 * had a chance to run. 566 * had a chance to run.
567 */ 567 */
568 lfs_writeinode(fs, sp, ip); 568 lfs_writeinode(fs, sp, ip);
569 lfs_writeseg(fs, sp); 569 lfs_writeseg(fs, sp);
570 if (!VPISEMPTY(vp) && 570 if (!VPISEMPTY(vp) &&
571 !WRITEINPROG(vp) && 571 !WRITEINPROG(vp) &&
572 !(ip->i_flag & IN_ALLMOD)) { 572 !(ip->i_flag & IN_ALLMOD)) {
573 mutex_enter(&lfs_lock); 573 mutex_enter(&lfs_lock);
574 LFS_SET_UINO(ip, IN_MODIFIED); 574 LFS_SET_UINO(ip, IN_MODIFIED);
575 mutex_exit(&lfs_lock); 575 mutex_exit(&lfs_lock);
576 } 576 }
577 mutex_enter(&mntvnode_lock); 577 mutex_enter(&mntvnode_lock);
578 break; 578 break;
579 } 579 }
580 error = 0; /* XXX not quite right */ 580 error = 0; /* XXX not quite right */
581 mutex_enter(&mntvnode_lock); 581 mutex_enter(&mntvnode_lock);
582 continue; 582 continue;
583 } 583 }
584  584
585 if (!VPISEMPTY(vp)) { 585 if (!VPISEMPTY(vp)) {
586 if (WRITEINPROG(vp)) { 586 if (WRITEINPROG(vp)) {
587 ivndebug(vp,"writevnodes/write2"); 587 ivndebug(vp,"writevnodes/write2");
588 } else if (!(ip->i_flag & IN_ALLMOD)) { 588 } else if (!(ip->i_flag & IN_ALLMOD)) {
589 mutex_enter(&lfs_lock); 589 mutex_enter(&lfs_lock);
590 LFS_SET_UINO(ip, IN_MODIFIED); 590 LFS_SET_UINO(ip, IN_MODIFIED);
591 mutex_exit(&lfs_lock); 591 mutex_exit(&lfs_lock);
592 } 592 }
593 } 593 }
594 (void) lfs_writeinode(fs, sp, ip); 594 (void) lfs_writeinode(fs, sp, ip);
595 inodes_written++; 595 inodes_written++;
596 } 596 }
597 } 597 }
598 598
599 vrele(vp); 599 vrele(vp);
600 600
601 mutex_enter(&mntvnode_lock); 601 mutex_enter(&mntvnode_lock);
602 } 602 }
603 mutex_exit(&mntvnode_lock); 603 mutex_exit(&mntvnode_lock);
604 return error; 604 return error;
605} 605}
606 606
607/* 607/*
608 * Do a checkpoint. 608 * Do a checkpoint.
609 */ 609 */
610int 610int
611lfs_segwrite(struct mount *mp, int flags) 611lfs_segwrite(struct mount *mp, int flags)
612{ 612{
613 struct buf *bp; 613 struct buf *bp;
614 struct inode *ip; 614 struct inode *ip;
615 struct lfs *fs; 615 struct lfs *fs;
616 struct segment *sp; 616 struct segment *sp;
617 struct vnode *vp; 617 struct vnode *vp;
618 SEGUSE *segusep; 618 SEGUSE *segusep;
619 int do_ckp, did_ckp, error; 619 int do_ckp, did_ckp, error;
620 unsigned n, segleft, maxseg, sn, i, curseg; 620 unsigned n, segleft, maxseg, sn, i, curseg;
621 int writer_set = 0; 621 int writer_set = 0;
622 int dirty; 622 int dirty;
623 int redo; 623 int redo;
624 int um_error; 624 int um_error;
625 625
626 fs = VFSTOULFS(mp)->um_lfs; 626 fs = VFSTOULFS(mp)->um_lfs;
627 ASSERT_MAYBE_SEGLOCK(fs); 627 ASSERT_MAYBE_SEGLOCK(fs);
628 628
629 if (fs->lfs_ronly) 629 if (fs->lfs_ronly)
630 return EROFS; 630 return EROFS;
631 631
632 lfs_imtime(fs); 632 lfs_imtime(fs);
633 633
634 /* 634 /*
635 * Allocate a segment structure and enough space to hold pointers to 635 * Allocate a segment structure and enough space to hold pointers to
636 * the maximum possible number of buffers which can be described in a 636 * the maximum possible number of buffers which can be described in a
637 * single summary block. 637 * single summary block.
638 */ 638 */
639 do_ckp = LFS_SHOULD_CHECKPOINT(fs, flags); 639 do_ckp = LFS_SHOULD_CHECKPOINT(fs, flags);
640 640
641 /* We can't do a partial write and checkpoint at the same time. */ 641 /* We can't do a partial write and checkpoint at the same time. */
642 if (do_ckp) 642 if (do_ckp)
643 flags &= ~SEGM_SINGLE; 643 flags &= ~SEGM_SINGLE;
644 644
645 lfs_seglock(fs, flags | (do_ckp ? SEGM_CKP : 0)); 645 lfs_seglock(fs, flags | (do_ckp ? SEGM_CKP : 0));
646 sp = fs->lfs_sp; 646 sp = fs->lfs_sp;
647 if (sp->seg_flags & (SEGM_CLEAN | SEGM_CKP)) 647 if (sp->seg_flags & (SEGM_CLEAN | SEGM_CKP))
648 do_ckp = 1; 648 do_ckp = 1;
649 649
650 /* 650 /*
651 * If lfs_flushvp is non-NULL, we are called from lfs_vflush, 651 * If lfs_flushvp is non-NULL, we are called from lfs_vflush,
652 * in which case we have to flush *all* buffers off of this vnode. 652 * in which case we have to flush *all* buffers off of this vnode.
653 * We don't care about other nodes, but write any non-dirop nodes 653 * We don't care about other nodes, but write any non-dirop nodes
654 * anyway in anticipation of another getnewvnode(). 654 * anyway in anticipation of another getnewvnode().
655 * 655 *
656 * If we're cleaning we only write cleaning and ifile blocks, and 656 * If we're cleaning we only write cleaning and ifile blocks, and
657 * no dirops, since otherwise we'd risk corruption in a crash. 657 * no dirops, since otherwise we'd risk corruption in a crash.
658 */ 658 */
659 if (sp->seg_flags & SEGM_CLEAN) 659 if (sp->seg_flags & SEGM_CLEAN)
660 lfs_writevnodes(fs, mp, sp, VN_CLEAN); 660 lfs_writevnodes(fs, mp, sp, VN_CLEAN);
661 else if (!(sp->seg_flags & SEGM_FORCE_CKP)) { 661 else if (!(sp->seg_flags & SEGM_FORCE_CKP)) {
662 do { 662 do {
663 um_error = lfs_writevnodes(fs, mp, sp, VN_REG); 663 um_error = lfs_writevnodes(fs, mp, sp, VN_REG);
664 if ((sp->seg_flags & SEGM_SINGLE) && 664 if ((sp->seg_flags & SEGM_SINGLE) &&
665 lfs_sb_getcurseg(fs) != fs->lfs_startseg) { 665 lfs_sb_getcurseg(fs) != fs->lfs_startseg) {
666 DLOG((DLOG_SEG, "lfs_segwrite: breaking out of segment write at daddr 0x%jx\n", (uintmax_t)lfs_sb_getoffset(fs))); 666 DLOG((DLOG_SEG, "lfs_segwrite: breaking out of segment write at daddr 0x%jx\n", (uintmax_t)lfs_sb_getoffset(fs)));
667 break; 667 break;
668 } 668 }
669 669
670 if (do_ckp || fs->lfs_dirops == 0) { 670 if (do_ckp || fs->lfs_dirops == 0) {
671 if (!writer_set) { 671 if (!writer_set) {
672 lfs_writer_enter(fs, "lfs writer"); 672 lfs_writer_enter(fs, "lfs writer");
673 writer_set = 1; 673 writer_set = 1;
674 } 674 }
675 error = lfs_writevnodes(fs, mp, sp, VN_DIROP); 675 error = lfs_writevnodes(fs, mp, sp, VN_DIROP);
676 if (um_error == 0) 676 if (um_error == 0)
677 um_error = error; 677 um_error = error;
678 /* In case writevnodes errored out */ 678 /* In case writevnodes errored out */
679 lfs_flush_dirops(fs); 679 lfs_flush_dirops(fs);
680 ((SEGSUM *)(sp->segsum))->ss_flags &= ~(SS_CONT); 680 ((SEGSUM *)(sp->segsum))->ss_flags &= ~(SS_CONT);
681 lfs_finalize_fs_seguse(fs); 681 lfs_finalize_fs_seguse(fs);
682 } 682 }
683 if (do_ckp && um_error) { 683 if (do_ckp && um_error) {
684 lfs_segunlock_relock(fs); 684 lfs_segunlock_relock(fs);
685 sp = fs->lfs_sp; 685 sp = fs->lfs_sp;
686 } 686 }
687 } while (do_ckp && um_error != 0); 687 } while (do_ckp && um_error != 0);
688 } 688 }
689 689
690 /* 690 /*
691 * If we are doing a checkpoint, mark everything since the 691 * If we are doing a checkpoint, mark everything since the
692 * last checkpoint as no longer ACTIVE. 692 * last checkpoint as no longer ACTIVE.
693 */ 693 */
694 if (do_ckp || fs->lfs_doifile) { 694 if (do_ckp || fs->lfs_doifile) {
695 segleft = lfs_sb_getnseg(fs); 695 segleft = lfs_sb_getnseg(fs);
696 curseg = 0; 696 curseg = 0;
697 for (n = 0; n < lfs_sb_getsegtabsz(fs); n++) { 697 for (n = 0; n < lfs_sb_getsegtabsz(fs); n++) {
698 dirty = 0; 698 dirty = 0;
699 if (bread(fs->lfs_ivnode, lfs_sb_getcleansz(fs) + n, 699 if (bread(fs->lfs_ivnode, lfs_sb_getcleansz(fs) + n,
700 lfs_sb_getbsize(fs), B_MODIFY, &bp)) 700 lfs_sb_getbsize(fs), B_MODIFY, &bp))
701 panic("lfs_segwrite: ifile read"); 701 panic("lfs_segwrite: ifile read");
702 segusep = (SEGUSE *)bp->b_data; 702 segusep = (SEGUSE *)bp->b_data;
703 maxseg = min(segleft, lfs_sb_getsepb(fs)); 703 maxseg = min(segleft, lfs_sb_getsepb(fs));
704 for (i = 0; i < maxseg; i++) { 704 for (i = 0; i < maxseg; i++) {
705 sn = curseg + i; 705 sn = curseg + i;
706 if (sn != lfs_dtosn(fs, lfs_sb_getcurseg(fs)) && 706 if (sn != lfs_dtosn(fs, lfs_sb_getcurseg(fs)) &&
707 segusep->su_flags & SEGUSE_ACTIVE) { 707 segusep->su_flags & SEGUSE_ACTIVE) {
708 segusep->su_flags &= ~SEGUSE_ACTIVE; 708 segusep->su_flags &= ~SEGUSE_ACTIVE;
709 --fs->lfs_nactive; 709 --fs->lfs_nactive;
710 ++dirty; 710 ++dirty;
711 } 711 }
712 fs->lfs_suflags[fs->lfs_activesb][sn] = 712 fs->lfs_suflags[fs->lfs_activesb][sn] =
713 segusep->su_flags; 713 segusep->su_flags;
714 if (fs->lfs_version > 1) 714 if (fs->lfs_version > 1)
715 ++segusep; 715 ++segusep;
716 else 716 else
717 segusep = (SEGUSE *) 717 segusep = (SEGUSE *)
718 ((SEGUSE_V1 *)segusep + 1); 718 ((SEGUSE_V1 *)segusep + 1);
719 } 719 }
720 720
721 if (dirty) 721 if (dirty)
722 error = LFS_BWRITE_LOG(bp); /* Ifile */ 722 error = LFS_BWRITE_LOG(bp); /* Ifile */
723 else 723 else
724 brelse(bp, 0); 724 brelse(bp, 0);
725 segleft -= lfs_sb_getsepb(fs); 725 segleft -= lfs_sb_getsepb(fs);
726 curseg += lfs_sb_getsepb(fs); 726 curseg += lfs_sb_getsepb(fs);
727 } 727 }
728 } 728 }
729 729
730 KASSERT(LFS_SEGLOCK_HELD(fs)); 730 KASSERT(LFS_SEGLOCK_HELD(fs));
731 731
732 did_ckp = 0; 732 did_ckp = 0;
733 if (do_ckp || fs->lfs_doifile) { 733 if (do_ckp || fs->lfs_doifile) {
734 vp = fs->lfs_ivnode; 734 vp = fs->lfs_ivnode;
735#ifdef DEBUG 735#ifdef DEBUG
736 int loopcount = 0; 736 int loopcount = 0;
737#endif 737#endif
738 do { 738 do {
739#ifdef DEBUG 739#ifdef DEBUG
740 LFS_ENTER_LOG("pretend", __FILE__, __LINE__, 0, 0, curproc->p_pid); 740 LFS_ENTER_LOG("pretend", __FILE__, __LINE__, 0, 0, curproc->p_pid);
741#endif 741#endif
742 mutex_enter(&lfs_lock); 742 mutex_enter(&lfs_lock);
743 fs->lfs_flags &= ~LFS_IFDIRTY; 743 fs->lfs_flags &= ~LFS_IFDIRTY;
744 mutex_exit(&lfs_lock); 744 mutex_exit(&lfs_lock);
745 745
746 ip = VTOI(vp); 746 ip = VTOI(vp);
747 747
748 if (LIST_FIRST(&vp->v_dirtyblkhd) != NULL) { 748 if (LIST_FIRST(&vp->v_dirtyblkhd) != NULL) {
749 /* 749 /*
750 * Ifile has no pages, so we don't need 750 * Ifile has no pages, so we don't need
751 * to check error return here. 751 * to check error return here.
752 */ 752 */
753 lfs_writefile(fs, sp, vp); 753 lfs_writefile(fs, sp, vp);
754 /* 754 /*
755 * Ensure the Ifile takes the current segment 755 * Ensure the Ifile takes the current segment
756 * into account. See comment in lfs_vflush. 756 * into account. See comment in lfs_vflush.
757 */ 757 */
758 lfs_writefile(fs, sp, vp); 758 lfs_writefile(fs, sp, vp);
759 lfs_writefile(fs, sp, vp); 759 lfs_writefile(fs, sp, vp);
760 } 760 }
761 761
762 if (ip->i_flag & IN_ALLMOD) 762 if (ip->i_flag & IN_ALLMOD)
763 ++did_ckp; 763 ++did_ckp;
764#if 0 764#if 0
765 redo = (do_ckp ? lfs_writeinode(fs, sp, ip) : 0); 765 redo = (do_ckp ? lfs_writeinode(fs, sp, ip) : 0);
766#else 766#else
767 redo = lfs_writeinode(fs, sp, ip); 767 redo = lfs_writeinode(fs, sp, ip);
768#endif 768#endif
769 redo += lfs_writeseg(fs, sp); 769 redo += lfs_writeseg(fs, sp);
770 mutex_enter(&lfs_lock); 770 mutex_enter(&lfs_lock);
771 redo += (fs->lfs_flags & LFS_IFDIRTY); 771 redo += (fs->lfs_flags & LFS_IFDIRTY);
772 mutex_exit(&lfs_lock); 772 mutex_exit(&lfs_lock);
773#ifdef DEBUG 773#ifdef DEBUG
774 if (++loopcount > 2) 774 if (++loopcount > 2)
775 log(LOG_NOTICE, "lfs_segwrite: looping count=%d\n", 775 log(LOG_NOTICE, "lfs_segwrite: looping count=%d\n",
776 loopcount); 776 loopcount);
777#endif 777#endif
778 } while (redo && do_ckp); 778 } while (redo && do_ckp);
779 779
780 /* 780 /*
781 * Unless we are unmounting, the Ifile may continue to have 781 * Unless we are unmounting, the Ifile may continue to have
782 * dirty blocks even after a checkpoint, due to changes to 782 * dirty blocks even after a checkpoint, due to changes to
783 * inodes' atime. If we're checkpointing, it's "impossible" 783 * inodes' atime. If we're checkpointing, it's "impossible"
784 * for other parts of the Ifile to be dirty after the loop 784 * for other parts of the Ifile to be dirty after the loop
785 * above, since we hold the segment lock. 785 * above, since we hold the segment lock.
786 */ 786 */
787 mutex_enter(vp->v_interlock); 787 mutex_enter(vp->v_interlock);
788 if (LIST_EMPTY(&vp->v_dirtyblkhd)) { 788 if (LIST_EMPTY(&vp->v_dirtyblkhd)) {
789 LFS_CLR_UINO(ip, IN_ALLMOD); 789 LFS_CLR_UINO(ip, IN_ALLMOD);
790 } 790 }
791#ifdef DIAGNOSTIC 791#ifdef DIAGNOSTIC
792 else if (do_ckp) { 792 else if (do_ckp) {
793 int do_panic = 0; 793 int do_panic = 0;
794 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { 794 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
795 if (bp->b_lblkno < lfs_sb_getcleansz(fs) + 795 if (bp->b_lblkno < lfs_sb_getcleansz(fs) +
796 lfs_sb_getsegtabsz(fs) && 796 lfs_sb_getsegtabsz(fs) &&
797 !(bp->b_flags & B_GATHERED)) { 797 !(bp->b_flags & B_GATHERED)) {
798 printf("ifile lbn %ld still dirty (flags %lx)\n", 798 printf("ifile lbn %ld still dirty (flags %lx)\n",
799 (long)bp->b_lblkno, 799 (long)bp->b_lblkno,
800 (long)bp->b_flags); 800 (long)bp->b_flags);
801 ++do_panic; 801 ++do_panic;
802 } 802 }
803 } 803 }
804 if (do_panic) 804 if (do_panic)
805 panic("dirty blocks"); 805 panic("dirty blocks");
806 } 806 }
807#endif 807#endif
808 mutex_exit(vp->v_interlock); 808 mutex_exit(vp->v_interlock);
809 } else { 809 } else {
810 (void) lfs_writeseg(fs, sp); 810 (void) lfs_writeseg(fs, sp);
811 } 811 }
812 812
813 /* Note Ifile no longer needs to be written */ 813 /* Note Ifile no longer needs to be written */
814 fs->lfs_doifile = 0; 814 fs->lfs_doifile = 0;
815 if (writer_set) 815 if (writer_set)
816 lfs_writer_leave(fs); 816 lfs_writer_leave(fs);
817 817
818 /* 818 /*
819 * If we didn't write the Ifile, we didn't really do anything. 819 * If we didn't write the Ifile, we didn't really do anything.
820 * That means that (1) there is a checkpoint on disk and (2) 820 * That means that (1) there is a checkpoint on disk and (2)
821 * nothing has changed since it was written. 821 * nothing has changed since it was written.
822 * 822 *
823 * Take the flags off of the segment so that lfs_segunlock 823 * Take the flags off of the segment so that lfs_segunlock
824 * doesn't have to write the superblock either. 824 * doesn't have to write the superblock either.
825 */ 825 */
826 if (do_ckp && !did_ckp) { 826 if (do_ckp && !did_ckp) {
827 sp->seg_flags &= ~SEGM_CKP; 827 sp->seg_flags &= ~SEGM_CKP;
828 } 828 }
829 829
830 if (lfs_dostats) { 830 if (lfs_dostats) {
831 ++lfs_stats.nwrites; 831 ++lfs_stats.nwrites;
832 if (sp->seg_flags & SEGM_SYNC) 832 if (sp->seg_flags & SEGM_SYNC)
833 ++lfs_stats.nsync_writes; 833 ++lfs_stats.nsync_writes;
834 if (sp->seg_flags & SEGM_CKP) 834 if (sp->seg_flags & SEGM_CKP)
835 ++lfs_stats.ncheckpoints; 835 ++lfs_stats.ncheckpoints;
836 } 836 }
837 lfs_segunlock(fs); 837 lfs_segunlock(fs);
838 return (0); 838 return (0);
839} 839}
840 840
841/* 841/*
842 * Write the dirty blocks associated with a vnode. 842 * Write the dirty blocks associated with a vnode.
843 */ 843 */
844int 844int
845lfs_writefile(struct lfs *fs, struct segment *sp, struct vnode *vp) 845lfs_writefile(struct lfs *fs, struct segment *sp, struct vnode *vp)
846{ 846{
847 struct inode *ip; 847 struct inode *ip;
848 int i, frag; 848 int i, frag;
849 int error; 849 int error;
850 850
851 ASSERT_SEGLOCK(fs); 851 ASSERT_SEGLOCK(fs);
852 error = 0; 852 error = 0;
853 ip = VTOI(vp); 853 ip = VTOI(vp);
854 854
855 lfs_acquire_finfo(fs, ip->i_number, ip->i_gen); 855 lfs_acquire_finfo(fs, ip->i_number, ip->i_gen);
856 856
857 if (vp->v_uflag & VU_DIROP) 857 if (vp->v_uflag & VU_DIROP)
858 ((SEGSUM *)(sp->segsum))->ss_flags |= (SS_DIROP|SS_CONT); 858 ((SEGSUM *)(sp->segsum))->ss_flags |= (SS_DIROP|SS_CONT);
859 859
860 if (sp->seg_flags & SEGM_CLEAN) { 860 if (sp->seg_flags & SEGM_CLEAN) {
861 lfs_gather(fs, sp, vp, lfs_match_fake); 861 lfs_gather(fs, sp, vp, lfs_match_fake);
862 /* 862 /*
863 * For a file being flushed, we need to write *all* blocks. 863 * For a file being flushed, we need to write *all* blocks.
864 * This means writing the cleaning blocks first, and then 864 * This means writing the cleaning blocks first, and then
865 * immediately following with any non-cleaning blocks. 865 * immediately following with any non-cleaning blocks.
866 * The same is true of the Ifile since checkpoints assume 866 * The same is true of the Ifile since checkpoints assume
867 * that all valid Ifile blocks are written. 867 * that all valid Ifile blocks are written.
868 */ 868 */
869 if (IS_FLUSHING(fs, vp) || vp == fs->lfs_ivnode) { 869 if (IS_FLUSHING(fs, vp) || vp == fs->lfs_ivnode) {
870 lfs_gather(fs, sp, vp, lfs_match_data); 870 lfs_gather(fs, sp, vp, lfs_match_data);
871 /* 871 /*
872 * Don't call VOP_PUTPAGES: if we're flushing, 872 * Don't call VOP_PUTPAGES: if we're flushing,
873 * we've already done it, and the Ifile doesn't 873 * we've already done it, and the Ifile doesn't
874 * use the page cache. 874 * use the page cache.
875 */ 875 */
876 } 876 }
877 } else { 877 } else {
878 lfs_gather(fs, sp, vp, lfs_match_data); 878 lfs_gather(fs, sp, vp, lfs_match_data);
879 /* 879 /*
880 * If we're flushing, we've already called VOP_PUTPAGES 880 * If we're flushing, we've already called VOP_PUTPAGES
881 * so don't do it again. Otherwise, we want to write 881 * so don't do it again. Otherwise, we want to write
882 * everything we've got. 882 * everything we've got.
883 */ 883 */
884 if (!IS_FLUSHING(fs, vp)) { 884 if (!IS_FLUSHING(fs, vp)) {
885 mutex_enter(vp->v_interlock); 885 mutex_enter(vp->v_interlock);
886 error = VOP_PUTPAGES(vp, 0, 0, 886 error = VOP_PUTPAGES(vp, 0, 0,
887 PGO_CLEANIT | PGO_ALLPAGES | PGO_LOCKED); 887 PGO_CLEANIT | PGO_ALLPAGES | PGO_LOCKED);
888 } 888 }
889 } 889 }
890 890
891 /* 891 /*
892 * It may not be necessary to write the meta-data blocks at this point, 892 * It may not be necessary to write the meta-data blocks at this point,
893 * as the roll-forward recovery code should be able to reconstruct the 893 * as the roll-forward recovery code should be able to reconstruct the
894 * list. 894 * list.
895 * 895 *
896 * We have to write them anyway, though, under two conditions: (1) the 896 * We have to write them anyway, though, under two conditions: (1) the
897 * vnode is being flushed (for reuse by vinvalbuf); or (2) we are 897 * vnode is being flushed (for reuse by vinvalbuf); or (2) we are
898 * checkpointing. 898 * checkpointing.
899 * 899 *
900 * BUT if we are cleaning, we might have indirect blocks that refer to 900 * BUT if we are cleaning, we might have indirect blocks that refer to
901 * new blocks not being written yet, in addition to fragments being 901 * new blocks not being written yet, in addition to fragments being
902 * moved out of a cleaned segment. If that is the case, don't 902 * moved out of a cleaned segment. If that is the case, don't
903 * write the indirect blocks, or the finfo will have a small block 903 * write the indirect blocks, or the finfo will have a small block
904 * in the middle of it! 904 * in the middle of it!
905 * XXX in this case isn't the inode size wrong too? 905 * XXX in this case isn't the inode size wrong too?
906 */ 906 */
907 frag = 0; 907 frag = 0;
908 if (sp->seg_flags & SEGM_CLEAN) { 908 if (sp->seg_flags & SEGM_CLEAN) {
909 for (i = 0; i < ULFS_NDADDR; i++) 909 for (i = 0; i < ULFS_NDADDR; i++)
910 if (ip->i_lfs_fragsize[i] > 0 && 910 if (ip->i_lfs_fragsize[i] > 0 &&
911 ip->i_lfs_fragsize[i] < lfs_sb_getbsize(fs)) 911 ip->i_lfs_fragsize[i] < lfs_sb_getbsize(fs))
912 ++frag; 912 ++frag;
913 } 913 }
914#ifdef DIAGNOSTIC 914#ifdef DIAGNOSTIC
915 if (frag > 1) 915 if (frag > 1)
916 panic("lfs_writefile: more than one fragment!"); 916 panic("lfs_writefile: more than one fragment!");
917#endif 917#endif
918 if (IS_FLUSHING(fs, vp) || 918 if (IS_FLUSHING(fs, vp) ||
919 (frag == 0 && (lfs_writeindir || (sp->seg_flags & SEGM_CKP)))) { 919 (frag == 0 && (lfs_writeindir || (sp->seg_flags & SEGM_CKP)))) {
920 lfs_gather(fs, sp, vp, lfs_match_indir); 920 lfs_gather(fs, sp, vp, lfs_match_indir);
921 lfs_gather(fs, sp, vp, lfs_match_dindir); 921 lfs_gather(fs, sp, vp, lfs_match_dindir);
922 lfs_gather(fs, sp, vp, lfs_match_tindir); 922 lfs_gather(fs, sp, vp, lfs_match_tindir);
923 } 923 }
924 lfs_release_finfo(fs); 924 lfs_release_finfo(fs);
925 925
926 return error; 926 return error;
927} 927}
928 928
929/* 929/*
930 * Update segment accounting to reflect this inode's change of address. 930 * Update segment accounting to reflect this inode's change of address.
931 */ 931 */
932static int 932static int
933lfs_update_iaddr(struct lfs *fs, struct segment *sp, struct inode *ip, daddr_t ndaddr) 933lfs_update_iaddr(struct lfs *fs, struct segment *sp, struct inode *ip, daddr_t ndaddr)
934{ 934{
935 struct buf *bp; 935 struct buf *bp;
936 daddr_t daddr; 936 daddr_t daddr;
937 IFILE *ifp; 937 IFILE *ifp;
938 SEGUSE *sup; 938 SEGUSE *sup;
939 ino_t ino; 939 ino_t ino;
940 int redo_ifile; 940 int redo_ifile;
941 u_int32_t sn; 941 u_int32_t sn;
942 942
943 redo_ifile = 0; 943 redo_ifile = 0;
944 944
945 /* 945 /*
946 * If updating the ifile, update the super-block. Update the disk 946 * If updating the ifile, update the super-block. Update the disk
947 * address and access times for this inode in the ifile. 947 * address and access times for this inode in the ifile.
948 */ 948 */
949 ino = ip->i_number; 949 ino = ip->i_number;
950 if (ino == LFS_IFILE_INUM) { 950 if (ino == LFS_IFILE_INUM) {
951 daddr = lfs_sb_getidaddr(fs); 951 daddr = lfs_sb_getidaddr(fs);
952 lfs_sb_setidaddr(fs, LFS_DBTOFSB(fs, ndaddr)); 952 lfs_sb_setidaddr(fs, LFS_DBTOFSB(fs, ndaddr));
953 } else { 953 } else {
954 LFS_IENTRY(ifp, fs, ino, bp); 954 LFS_IENTRY(ifp, fs, ino, bp);
955 daddr = ifp->if_daddr; 955 daddr = ifp->if_daddr;
956 ifp->if_daddr = LFS_DBTOFSB(fs, ndaddr); 956 ifp->if_daddr = LFS_DBTOFSB(fs, ndaddr);
957 (void)LFS_BWRITE_LOG(bp); /* Ifile */ 957 (void)LFS_BWRITE_LOG(bp); /* Ifile */
958 } 958 }
959 959
960 /* 960 /*
961 * If this is the Ifile and lfs_offset is set to the first block 961 * If this is the Ifile and lfs_offset is set to the first block
962 * in the segment, dirty the new segment's accounting block 962 * in the segment, dirty the new segment's accounting block
963 * (XXX should already be dirty?) and tell the caller to do it again. 963 * (XXX should already be dirty?) and tell the caller to do it again.
964 */ 964 */
965 if (ip->i_number == LFS_IFILE_INUM) { 965 if (ip->i_number == LFS_IFILE_INUM) {
966 sn = lfs_dtosn(fs, lfs_sb_getoffset(fs)); 966 sn = lfs_dtosn(fs, lfs_sb_getoffset(fs));
967 if (lfs_sntod(fs, sn) + lfs_btofsb(fs, lfs_sb_getsumsize(fs)) == 967 if (lfs_sntod(fs, sn) + lfs_btofsb(fs, lfs_sb_getsumsize(fs)) ==
968 lfs_sb_getoffset(fs)) { 968 lfs_sb_getoffset(fs)) {
969 LFS_SEGENTRY(sup, fs, sn, bp); 969 LFS_SEGENTRY(sup, fs, sn, bp);
970 KASSERT(bp->b_oflags & BO_DELWRI); 970 KASSERT(bp->b_oflags & BO_DELWRI);
971 LFS_WRITESEGENTRY(sup, fs, sn, bp); 971 LFS_WRITESEGENTRY(sup, fs, sn, bp);
972 /* fs->lfs_flags |= LFS_IFDIRTY; */ 972 /* fs->lfs_flags |= LFS_IFDIRTY; */
973 redo_ifile |= 1; 973 redo_ifile |= 1;
974 } 974 }
975 } 975 }
976 976
977 /* 977 /*
978 * The inode's last address should not be in the current partial 978 * The inode's last address should not be in the current partial
979 * segment, except under exceptional circumstances (lfs_writevnodes 979 * segment, except under exceptional circumstances (lfs_writevnodes
980 * had to start over, and in the meantime more blocks were written 980 * had to start over, and in the meantime more blocks were written
981 * to a vnode). Both inodes will be accounted to this segment 981 * to a vnode). Both inodes will be accounted to this segment
982 * in lfs_writeseg so we need to subtract the earlier version 982 * in lfs_writeseg so we need to subtract the earlier version
983 * here anyway. The segment count can temporarily dip below 983 * here anyway. The segment count can temporarily dip below
984 * zero here; keep track of how many duplicates we have in 984 * zero here; keep track of how many duplicates we have in
985 * "dupino" so we don't panic below. 985 * "dupino" so we don't panic below.
986 */ 986 */
987 if (daddr >= lfs_sb_getlastpseg(fs) && daddr <= lfs_sb_getoffset(fs)) { 987 if (daddr >= lfs_sb_getlastpseg(fs) && daddr <= lfs_sb_getoffset(fs)) {
988 ++sp->ndupino; 988 ++sp->ndupino;
989 DLOG((DLOG_SEG, "lfs_writeinode: last inode addr in current pseg " 989 DLOG((DLOG_SEG, "lfs_writeinode: last inode addr in current pseg "
990 "(ino %d daddr 0x%llx) ndupino=%d\n", ino, 990 "(ino %d daddr 0x%llx) ndupino=%d\n", ino,
991 (long long)daddr, sp->ndupino)); 991 (long long)daddr, sp->ndupino));
992 } 992 }
993 /* 993 /*
994 * Account the inode: it no longer belongs to its former segment, 994 * Account the inode: it no longer belongs to its former segment,
995 * though it will not belong to the new segment until that segment 995 * though it will not belong to the new segment until that segment
996 * is actually written. 996 * is actually written.
997 */ 997 */
998 if (daddr != LFS_UNUSED_DADDR) { 998 if (daddr != LFS_UNUSED_DADDR) {
999 u_int32_t oldsn = lfs_dtosn(fs, daddr); 999 u_int32_t oldsn = lfs_dtosn(fs, daddr);
1000#ifdef DIAGNOSTIC 1000#ifdef DIAGNOSTIC
1001 int ndupino = (sp->seg_number == oldsn) ? sp->ndupino : 0; 1001 int ndupino = (sp->seg_number == oldsn) ? sp->ndupino : 0;
1002#endif 1002#endif
1003 LFS_SEGENTRY(sup, fs, oldsn, bp); 1003 LFS_SEGENTRY(sup, fs, oldsn, bp);
1004#ifdef DIAGNOSTIC 1004#ifdef DIAGNOSTIC
1005 if (sup->su_nbytes + 1005 if (sup->su_nbytes +
1006 sizeof (struct ulfs1_dinode) * ndupino 1006 sizeof (struct ulfs1_dinode) * ndupino
1007 < sizeof (struct ulfs1_dinode)) { 1007 < sizeof (struct ulfs1_dinode)) {
1008 printf("lfs_writeinode: negative bytes " 1008 printf("lfs_writeinode: negative bytes "
1009 "(segment %" PRIu32 " short by %d, " 1009 "(segment %" PRIu32 " short by %d, "
1010 "oldsn=%" PRIu32 ", cursn=%" PRIu32 1010 "oldsn=%" PRIu32 ", cursn=%" PRIu32
1011 ", daddr=%" PRId64 ", su_nbytes=%u, " 1011 ", daddr=%" PRId64 ", su_nbytes=%u, "
1012 "ndupino=%d)\n", 1012 "ndupino=%d)\n",
1013 lfs_dtosn(fs, daddr), 1013 lfs_dtosn(fs, daddr),
1014 (int)sizeof (struct ulfs1_dinode) * 1014 (int)sizeof (struct ulfs1_dinode) *
1015 (1 - sp->ndupino) - sup->su_nbytes, 1015 (1 - sp->ndupino) - sup->su_nbytes,
1016 oldsn, sp->seg_number, daddr, 1016 oldsn, sp->seg_number, daddr,
1017 (unsigned int)sup->su_nbytes, 1017 (unsigned int)sup->su_nbytes,
1018 sp->ndupino); 1018 sp->ndupino);
1019 panic("lfs_writeinode: negative bytes"); 1019 panic("lfs_writeinode: negative bytes");
1020 sup->su_nbytes = sizeof (struct ulfs1_dinode); 1020 sup->su_nbytes = sizeof (struct ulfs1_dinode);
1021 } 1021 }
1022#endif 1022#endif
1023 DLOG((DLOG_SU, "seg %d -= %d for ino %d inode\n", 1023 DLOG((DLOG_SU, "seg %d -= %d for ino %d inode\n",
1024 lfs_dtosn(fs, daddr), sizeof (struct ulfs1_dinode), ino)); 1024 lfs_dtosn(fs, daddr), sizeof (struct ulfs1_dinode), ino));
1025 sup->su_nbytes -= sizeof (struct ulfs1_dinode); 1025 sup->su_nbytes -= sizeof (struct ulfs1_dinode);
1026 redo_ifile |= 1026 redo_ifile |=
1027 (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED)); 1027 (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED));
1028 if (redo_ifile) { 1028 if (redo_ifile) {
1029 mutex_enter(&lfs_lock); 1029 mutex_enter(&lfs_lock);
1030 fs->lfs_flags |= LFS_IFDIRTY; 1030 fs->lfs_flags |= LFS_IFDIRTY;
1031 mutex_exit(&lfs_lock); 1031 mutex_exit(&lfs_lock);
1032 /* Don't double-account */ 1032 /* Don't double-account */
1033 lfs_sb_setidaddr(fs, 0x0); 1033 lfs_sb_setidaddr(fs, 0x0);
1034 } 1034 }
1035 LFS_WRITESEGENTRY(sup, fs, oldsn, bp); /* Ifile */ 1035 LFS_WRITESEGENTRY(sup, fs, oldsn, bp); /* Ifile */
1036 } 1036 }
1037 1037
1038 return redo_ifile; 1038 return redo_ifile;
1039} 1039}
1040 1040
1041int 1041int
1042lfs_writeinode(struct lfs *fs, struct segment *sp, struct inode *ip) 1042lfs_writeinode(struct lfs *fs, struct segment *sp, struct inode *ip)
1043{ 1043{
1044 struct buf *bp; 1044 struct buf *bp;
1045 struct ulfs1_dinode *cdp; 1045 struct ulfs1_dinode *cdp;
1046 struct vnode *vp = ITOV(ip); 1046 struct vnode *vp = ITOV(ip);
1047 daddr_t daddr; 1047 daddr_t daddr;
1048 int32_t *daddrp; /* XXX ondisk32 */ 1048 int32_t *daddrp; /* XXX ondisk32 */
1049 int i, ndx; 1049 int i, ndx;
1050 int redo_ifile = 0; 1050 int redo_ifile = 0;
1051 int gotblk = 0; 1051 int gotblk = 0;
1052 int count; 1052 int count;
1053 1053
1054 ASSERT_SEGLOCK(fs); 1054 ASSERT_SEGLOCK(fs);
1055 if (!(ip->i_flag & IN_ALLMOD) && !(vp->v_uflag & VU_DIROP)) 1055 if (!(ip->i_flag & IN_ALLMOD) && !(vp->v_uflag & VU_DIROP))
1056 return (0); 1056 return (0);
1057 1057
1058 /* Can't write ifile when writer is not set */ 1058 /* Can't write ifile when writer is not set */
1059 KASSERT(ip->i_number != LFS_IFILE_INUM || fs->lfs_writer > 0 || 1059 KASSERT(ip->i_number != LFS_IFILE_INUM || fs->lfs_writer > 0 ||
1060 (sp->seg_flags & SEGM_CLEAN)); 1060 (sp->seg_flags & SEGM_CLEAN));
1061 1061
1062 /* 1062 /*
@@ -1251,1543 +1251,1543 @@ lfs_writeinode(struct lfs *fs, struct se @@ -1251,1543 +1251,1543 @@ lfs_writeinode(struct lfs *fs, struct se
1251 LFS_CLR_UINO(ip, IN_CLEANING); 1251 LFS_CLR_UINO(ip, IN_CLEANING);
1252 else { 1252 else {
1253 /* XXX IN_ALLMOD */ 1253 /* XXX IN_ALLMOD */
1254 LFS_CLR_UINO(ip, IN_ACCESSED | IN_ACCESS | IN_CHANGE | 1254 LFS_CLR_UINO(ip, IN_ACCESSED | IN_ACCESS | IN_CHANGE |
1255 IN_UPDATE | IN_MODIFY); 1255 IN_UPDATE | IN_MODIFY);
1256 if (ip->i_lfs_effnblks == ip->i_ffs1_blocks) 1256 if (ip->i_lfs_effnblks == ip->i_ffs1_blocks)
1257 LFS_CLR_UINO(ip, IN_MODIFIED); 1257 LFS_CLR_UINO(ip, IN_MODIFIED);
1258 else { 1258 else {
1259 DLOG((DLOG_VNODE, "lfs_writeinode: ino %d: real " 1259 DLOG((DLOG_VNODE, "lfs_writeinode: ino %d: real "
1260 "blks=%d, eff=%d\n", ip->i_number, 1260 "blks=%d, eff=%d\n", ip->i_number,
1261 ip->i_ffs1_blocks, ip->i_lfs_effnblks)); 1261 ip->i_ffs1_blocks, ip->i_lfs_effnblks));
1262 } 1262 }
1263 } 1263 }
1264 1264
1265 if (ip->i_number == LFS_IFILE_INUM) { 1265 if (ip->i_number == LFS_IFILE_INUM) {
1266 /* We know sp->idp == NULL */ 1266 /* We know sp->idp == NULL */
1267 sp->idp = ((struct ulfs1_dinode *)bp->b_data) + 1267 sp->idp = ((struct ulfs1_dinode *)bp->b_data) +
1268 (sp->ninodes % LFS_INOPB(fs)); 1268 (sp->ninodes % LFS_INOPB(fs));
1269 1269
1270 /* Not dirty any more */ 1270 /* Not dirty any more */
1271 mutex_enter(&lfs_lock); 1271 mutex_enter(&lfs_lock);
1272 fs->lfs_flags &= ~LFS_IFDIRTY; 1272 fs->lfs_flags &= ~LFS_IFDIRTY;
1273 mutex_exit(&lfs_lock); 1273 mutex_exit(&lfs_lock);
1274 } 1274 }
1275 1275
1276 if (gotblk) { 1276 if (gotblk) {
1277 mutex_enter(&bufcache_lock); 1277 mutex_enter(&bufcache_lock);
1278 LFS_LOCK_BUF(bp); 1278 LFS_LOCK_BUF(bp);
1279 brelsel(bp, 0); 1279 brelsel(bp, 0);
1280 mutex_exit(&bufcache_lock); 1280 mutex_exit(&bufcache_lock);
1281 } 1281 }
1282 1282
1283 /* Increment inode count in segment summary block. */ 1283 /* Increment inode count in segment summary block. */
1284 ++((SEGSUM *)(sp->segsum))->ss_ninos; 1284 ++((SEGSUM *)(sp->segsum))->ss_ninos;
1285 1285
1286 /* If this page is full, set flag to allocate a new page. */ 1286 /* If this page is full, set flag to allocate a new page. */
1287 if (++sp->ninodes % LFS_INOPB(fs) == 0) 1287 if (++sp->ninodes % LFS_INOPB(fs) == 0)
1288 sp->ibp = NULL; 1288 sp->ibp = NULL;
1289 1289
1290 redo_ifile = lfs_update_iaddr(fs, sp, ip, bp->b_blkno); 1290 redo_ifile = lfs_update_iaddr(fs, sp, ip, bp->b_blkno);
1291 1291
1292 KASSERT(redo_ifile == 0); 1292 KASSERT(redo_ifile == 0);
1293 return (redo_ifile); 1293 return (redo_ifile);
1294} 1294}
1295 1295
1296int 1296int
1297lfs_gatherblock(struct segment *sp, struct buf *bp, kmutex_t *mptr) 1297lfs_gatherblock(struct segment *sp, struct buf *bp, kmutex_t *mptr)
1298{ 1298{
1299 struct lfs *fs; 1299 struct lfs *fs;
1300 int vers; 1300 int vers;
1301 int j, blksinblk; 1301 int j, blksinblk;
1302 1302
1303 ASSERT_SEGLOCK(sp->fs); 1303 ASSERT_SEGLOCK(sp->fs);
1304 /* 1304 /*
1305 * If full, finish this segment. We may be doing I/O, so 1305 * If full, finish this segment. We may be doing I/O, so
1306 * release and reacquire the splbio(). 1306 * release and reacquire the splbio().
1307 */ 1307 */
1308#ifdef DIAGNOSTIC 1308#ifdef DIAGNOSTIC
1309 if (sp->vp == NULL) 1309 if (sp->vp == NULL)
1310 panic ("lfs_gatherblock: Null vp in segment"); 1310 panic ("lfs_gatherblock: Null vp in segment");
1311#endif 1311#endif
1312 fs = sp->fs; 1312 fs = sp->fs;
1313 blksinblk = howmany(bp->b_bcount, lfs_sb_getbsize(fs)); 1313 blksinblk = howmany(bp->b_bcount, lfs_sb_getbsize(fs));
1314 if (sp->sum_bytes_left < sizeof(int32_t) * blksinblk || 1314 if (sp->sum_bytes_left < sizeof(int32_t) * blksinblk ||
1315 sp->seg_bytes_left < bp->b_bcount) { 1315 sp->seg_bytes_left < bp->b_bcount) {
1316 if (mptr) 1316 if (mptr)
1317 mutex_exit(mptr); 1317 mutex_exit(mptr);
1318 lfs_updatemeta(sp); 1318 lfs_updatemeta(sp);
1319 1319
1320 vers = sp->fip->fi_version; 1320 vers = sp->fip->fi_version;
1321 (void) lfs_writeseg(fs, sp); 1321 (void) lfs_writeseg(fs, sp);
1322 1322
1323 /* Add the current file to the segment summary. */ 1323 /* Add the current file to the segment summary. */
1324 lfs_acquire_finfo(fs, VTOI(sp->vp)->i_number, vers); 1324 lfs_acquire_finfo(fs, VTOI(sp->vp)->i_number, vers);
1325 1325
1326 if (mptr) 1326 if (mptr)
1327 mutex_enter(mptr); 1327 mutex_enter(mptr);
1328 return (1); 1328 return (1);
1329 } 1329 }
1330 1330
1331 if (bp->b_flags & B_GATHERED) { 1331 if (bp->b_flags & B_GATHERED) {
1332 DLOG((DLOG_SEG, "lfs_gatherblock: already gathered! Ino %d," 1332 DLOG((DLOG_SEG, "lfs_gatherblock: already gathered! Ino %d,"
1333 " lbn %" PRId64 "\n", 1333 " lbn %" PRId64 "\n",
1334 sp->fip->fi_ino, bp->b_lblkno)); 1334 sp->fip->fi_ino, bp->b_lblkno));
1335 return (0); 1335 return (0);
1336 } 1336 }
1337 1337
1338 /* Insert into the buffer list, update the FINFO block. */ 1338 /* Insert into the buffer list, update the FINFO block. */
1339 bp->b_flags |= B_GATHERED; 1339 bp->b_flags |= B_GATHERED;
1340 1340
1341 *sp->cbpp++ = bp; 1341 *sp->cbpp++ = bp;
1342 for (j = 0; j < blksinblk; j++) { 1342 for (j = 0; j < blksinblk; j++) {
1343 sp->fip->fi_blocks[sp->fip->fi_nblocks++] = bp->b_lblkno + j; 1343 sp->fip->fi_blocks[sp->fip->fi_nblocks++] = bp->b_lblkno + j;
1344 /* This block's accounting moves from lfs_favail to lfs_avail */ 1344 /* This block's accounting moves from lfs_favail to lfs_avail */
1345 lfs_deregister_block(sp->vp, bp->b_lblkno + j); 1345 lfs_deregister_block(sp->vp, bp->b_lblkno + j);
1346 } 1346 }
1347 1347
1348 sp->sum_bytes_left -= sizeof(int32_t) * blksinblk; 1348 sp->sum_bytes_left -= sizeof(int32_t) * blksinblk;
1349 sp->seg_bytes_left -= bp->b_bcount; 1349 sp->seg_bytes_left -= bp->b_bcount;
1350 return (0); 1350 return (0);
1351} 1351}
1352 1352
1353int 1353int
1354lfs_gather(struct lfs *fs, struct segment *sp, struct vnode *vp, 1354lfs_gather(struct lfs *fs, struct segment *sp, struct vnode *vp,
1355 int (*match)(struct lfs *, struct buf *)) 1355 int (*match)(struct lfs *, struct buf *))
1356{ 1356{
1357 struct buf *bp, *nbp; 1357 struct buf *bp, *nbp;
1358 int count = 0; 1358 int count = 0;
1359 1359
1360 ASSERT_SEGLOCK(fs); 1360 ASSERT_SEGLOCK(fs);
1361 if (vp->v_type == VBLK) 1361 if (vp->v_type == VBLK)
1362 return 0; 1362 return 0;
1363 KASSERT(sp->vp == NULL); 1363 KASSERT(sp->vp == NULL);
1364 sp->vp = vp; 1364 sp->vp = vp;
1365 mutex_enter(&bufcache_lock); 1365 mutex_enter(&bufcache_lock);
1366 1366
1367#ifndef LFS_NO_BACKBUF_HACK 1367#ifndef LFS_NO_BACKBUF_HACK
1368/* This is a hack to see if ordering the blocks in LFS makes a difference. */ 1368/* This is a hack to see if ordering the blocks in LFS makes a difference. */
1369# define BUF_OFFSET \ 1369# define BUF_OFFSET \
1370 (((char *)&LIST_NEXT(bp, b_vnbufs)) - (char *)bp) 1370 (((char *)&LIST_NEXT(bp, b_vnbufs)) - (char *)bp)
1371# define BACK_BUF(BP) \ 1371# define BACK_BUF(BP) \
1372 ((struct buf *)(((char *)(BP)->b_vnbufs.le_prev) - BUF_OFFSET)) 1372 ((struct buf *)(((char *)(BP)->b_vnbufs.le_prev) - BUF_OFFSET))
1373# define BEG_OF_LIST \ 1373# define BEG_OF_LIST \
1374 ((struct buf *)(((char *)&LIST_FIRST(&vp->v_dirtyblkhd)) - BUF_OFFSET)) 1374 ((struct buf *)(((char *)&LIST_FIRST(&vp->v_dirtyblkhd)) - BUF_OFFSET))
1375 1375
1376loop: 1376loop:
1377 /* Find last buffer. */ 1377 /* Find last buffer. */
1378 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); 1378 for (bp = LIST_FIRST(&vp->v_dirtyblkhd);
1379 bp && LIST_NEXT(bp, b_vnbufs) != NULL; 1379 bp && LIST_NEXT(bp, b_vnbufs) != NULL;
1380 bp = LIST_NEXT(bp, b_vnbufs)) 1380 bp = LIST_NEXT(bp, b_vnbufs))
1381 /* nothing */; 1381 /* nothing */;
1382 for (; bp && bp != BEG_OF_LIST; bp = nbp) { 1382 for (; bp && bp != BEG_OF_LIST; bp = nbp) {
1383 nbp = BACK_BUF(bp); 1383 nbp = BACK_BUF(bp);
1384#else /* LFS_NO_BACKBUF_HACK */ 1384#else /* LFS_NO_BACKBUF_HACK */
1385loop: 1385loop:
1386 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 1386 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
1387 nbp = LIST_NEXT(bp, b_vnbufs); 1387 nbp = LIST_NEXT(bp, b_vnbufs);
1388#endif /* LFS_NO_BACKBUF_HACK */ 1388#endif /* LFS_NO_BACKBUF_HACK */
1389 if ((bp->b_cflags & BC_BUSY) != 0 || 1389 if ((bp->b_cflags & BC_BUSY) != 0 ||
1390 (bp->b_flags & B_GATHERED) != 0 || !match(fs, bp)) { 1390 (bp->b_flags & B_GATHERED) != 0 || !match(fs, bp)) {
1391#ifdef DEBUG 1391#ifdef DEBUG
1392 if (vp == fs->lfs_ivnode && 1392 if (vp == fs->lfs_ivnode &&
1393 (bp->b_cflags & BC_BUSY) != 0 && 1393 (bp->b_cflags & BC_BUSY) != 0 &&
1394 (bp->b_flags & B_GATHERED) == 0) 1394 (bp->b_flags & B_GATHERED) == 0)
1395 log(LOG_NOTICE, "lfs_gather: ifile lbn %" 1395 log(LOG_NOTICE, "lfs_gather: ifile lbn %"
1396 PRId64 " busy (%x) at 0x%jx", 1396 PRId64 " busy (%x) at 0x%jx",
1397 bp->b_lblkno, bp->b_flags, 1397 bp->b_lblkno, bp->b_flags,
1398 (uintmax_t)lfs_sb_getoffset(fs)); 1398 (uintmax_t)lfs_sb_getoffset(fs));
1399#endif 1399#endif
1400 continue; 1400 continue;
1401 } 1401 }
1402#ifdef DIAGNOSTIC 1402#ifdef DIAGNOSTIC
1403# ifdef LFS_USE_B_INVAL 1403# ifdef LFS_USE_B_INVAL
1404 if ((bp->b_flags & BC_INVAL) != 0 && bp->b_iodone == NULL) { 1404 if ((bp->b_flags & BC_INVAL) != 0 && bp->b_iodone == NULL) {
1405 DLOG((DLOG_SEG, "lfs_gather: lbn %" PRId64 1405 DLOG((DLOG_SEG, "lfs_gather: lbn %" PRId64
1406 " is BC_INVAL\n", bp->b_lblkno)); 1406 " is BC_INVAL\n", bp->b_lblkno));
1407 VOP_PRINT(bp->b_vp); 1407 VOP_PRINT(bp->b_vp);
1408 } 1408 }
1409# endif /* LFS_USE_B_INVAL */ 1409# endif /* LFS_USE_B_INVAL */
1410 if (!(bp->b_oflags & BO_DELWRI)) 1410 if (!(bp->b_oflags & BO_DELWRI))
1411 panic("lfs_gather: bp not BO_DELWRI"); 1411 panic("lfs_gather: bp not BO_DELWRI");
1412 if (!(bp->b_flags & B_LOCKED)) { 1412 if (!(bp->b_flags & B_LOCKED)) {
1413 DLOG((DLOG_SEG, "lfs_gather: lbn %" PRId64 1413 DLOG((DLOG_SEG, "lfs_gather: lbn %" PRId64
1414 " blk %" PRId64 " not B_LOCKED\n", 1414 " blk %" PRId64 " not B_LOCKED\n",
1415 bp->b_lblkno, 1415 bp->b_lblkno,
1416 LFS_DBTOFSB(fs, bp->b_blkno))); 1416 LFS_DBTOFSB(fs, bp->b_blkno)));
1417 VOP_PRINT(bp->b_vp); 1417 VOP_PRINT(bp->b_vp);
1418 panic("lfs_gather: bp not B_LOCKED"); 1418 panic("lfs_gather: bp not B_LOCKED");
1419 } 1419 }
1420#endif 1420#endif
1421 if (lfs_gatherblock(sp, bp, &bufcache_lock)) { 1421 if (lfs_gatherblock(sp, bp, &bufcache_lock)) {
1422 goto loop; 1422 goto loop;
1423 } 1423 }
1424 count++; 1424 count++;
1425 } 1425 }
1426 mutex_exit(&bufcache_lock); 1426 mutex_exit(&bufcache_lock);
1427 lfs_updatemeta(sp); 1427 lfs_updatemeta(sp);
1428 KASSERT(sp->vp == vp); 1428 KASSERT(sp->vp == vp);
1429 sp->vp = NULL; 1429 sp->vp = NULL;
1430 return count; 1430 return count;
1431} 1431}
1432 1432
1433#if DEBUG 1433#if DEBUG
1434# define DEBUG_OOFF(n) do { \ 1434# define DEBUG_OOFF(n) do { \
1435 if (ooff == 0) { \ 1435 if (ooff == 0) { \
1436 DLOG((DLOG_SEG, "lfs_updatemeta[%d]: warning: writing " \ 1436 DLOG((DLOG_SEG, "lfs_updatemeta[%d]: warning: writing " \
1437 "ino %d lbn %" PRId64 " at 0x%" PRIx32 \ 1437 "ino %d lbn %" PRId64 " at 0x%" PRIx32 \
1438 ", was 0x0 (or %" PRId64 ")\n", \ 1438 ", was 0x0 (or %" PRId64 ")\n", \
1439 (n), ip->i_number, lbn, ndaddr, daddr)); \ 1439 (n), ip->i_number, lbn, ndaddr, daddr)); \
1440 } \ 1440 } \
1441} while (0) 1441} while (0)
1442#else 1442#else
1443# define DEBUG_OOFF(n) 1443# define DEBUG_OOFF(n)
1444#endif 1444#endif
1445 1445
1446/* 1446/*
1447 * Change the given block's address to ndaddr, finding its previous 1447 * Change the given block's address to ndaddr, finding its previous
1448 * location using ulfs_bmaparray(). 1448 * location using ulfs_bmaparray().
1449 * 1449 *
1450 * Account for this change in the segment table. 1450 * Account for this change in the segment table.
1451 * 1451 *
1452 * called with sp == NULL by roll-forwarding code. 1452 * called with sp == NULL by roll-forwarding code.
1453 */ 1453 */
1454void 1454void
1455lfs_update_single(struct lfs *fs, struct segment *sp, 1455lfs_update_single(struct lfs *fs, struct segment *sp,
1456 struct vnode *vp, daddr_t lbn, int32_t ndaddr, int size) 1456 struct vnode *vp, daddr_t lbn, int32_t ndaddr, int size)
1457{ 1457{
1458 SEGUSE *sup; 1458 SEGUSE *sup;
1459 struct buf *bp; 1459 struct buf *bp;
1460 struct indir a[ULFS_NIADDR + 2], *ap; 1460 struct indir a[ULFS_NIADDR + 2], *ap;
1461 struct inode *ip; 1461 struct inode *ip;
1462 daddr_t daddr, ooff; 1462 daddr_t daddr, ooff;
1463 int num, error; 1463 int num, error;
1464 int bb, osize, obb; 1464 int bb, osize, obb;
1465 1465
1466 ASSERT_SEGLOCK(fs); 1466 ASSERT_SEGLOCK(fs);
1467 KASSERT(sp == NULL || sp->vp == vp); 1467 KASSERT(sp == NULL || sp->vp == vp);
1468 ip = VTOI(vp); 1468 ip = VTOI(vp);
1469 1469
1470 error = ulfs_bmaparray(vp, lbn, &daddr, a, &num, NULL, NULL); 1470 error = ulfs_bmaparray(vp, lbn, &daddr, a, &num, NULL, NULL);
1471 if (error) 1471 if (error)
1472 panic("lfs_updatemeta: ulfs_bmaparray returned %d", error); 1472 panic("lfs_updatemeta: ulfs_bmaparray returned %d", error);
1473 1473
1474 daddr = (daddr_t)((int32_t)daddr); /* XXX ondisk32 */ 1474 daddr = (daddr_t)((int32_t)daddr); /* XXX ondisk32 */
1475 KASSERT(daddr <= LFS_MAX_DADDR); 1475 KASSERT(daddr <= LFS_MAX_DADDR);
1476 if (daddr > 0) 1476 if (daddr > 0)
1477 daddr = LFS_DBTOFSB(fs, daddr); 1477 daddr = LFS_DBTOFSB(fs, daddr);
1478 1478
1479 bb = lfs_numfrags(fs, size); 1479 bb = lfs_numfrags(fs, size);
1480 switch (num) { 1480 switch (num) {
1481 case 0: 1481 case 0:
1482 ooff = ip->i_ffs1_db[lbn]; 1482 ooff = ip->i_ffs1_db[lbn];
1483 DEBUG_OOFF(0); 1483 DEBUG_OOFF(0);
1484 if (ooff == UNWRITTEN) 1484 if (ooff == UNWRITTEN)
1485 ip->i_ffs1_blocks += bb; 1485 ip->i_ffs1_blocks += bb;
1486 else { 1486 else {
1487 /* possible fragment truncation or extension */ 1487 /* possible fragment truncation or extension */
1488 obb = lfs_btofsb(fs, ip->i_lfs_fragsize[lbn]); 1488 obb = lfs_btofsb(fs, ip->i_lfs_fragsize[lbn]);
1489 ip->i_ffs1_blocks += (bb - obb); 1489 ip->i_ffs1_blocks += (bb - obb);
1490 } 1490 }
1491 ip->i_ffs1_db[lbn] = ndaddr; 1491 ip->i_ffs1_db[lbn] = ndaddr;
1492 break; 1492 break;
1493 case 1: 1493 case 1:
1494 ooff = ip->i_ffs1_ib[a[0].in_off]; 1494 ooff = ip->i_ffs1_ib[a[0].in_off];
1495 DEBUG_OOFF(1); 1495 DEBUG_OOFF(1);
1496 if (ooff == UNWRITTEN) 1496 if (ooff == UNWRITTEN)
1497 ip->i_ffs1_blocks += bb; 1497 ip->i_ffs1_blocks += bb;
1498 ip->i_ffs1_ib[a[0].in_off] = ndaddr; 1498 ip->i_ffs1_ib[a[0].in_off] = ndaddr;
1499 break; 1499 break;
1500 default: 1500 default:
1501 ap = &a[num - 1]; 1501 ap = &a[num - 1];
1502 if (bread(vp, ap->in_lbn, lfs_sb_getbsize(fs), 1502 if (bread(vp, ap->in_lbn, lfs_sb_getbsize(fs),
1503 B_MODIFY, &bp)) 1503 B_MODIFY, &bp))
1504 panic("lfs_updatemeta: bread bno %" PRId64, 1504 panic("lfs_updatemeta: bread bno %" PRId64,
1505 ap->in_lbn); 1505 ap->in_lbn);
1506 1506
1507 /* XXX ondisk32 */ 1507 /* XXX ondisk32 */
1508 ooff = ((int32_t *)bp->b_data)[ap->in_off]; 1508 ooff = ((int32_t *)bp->b_data)[ap->in_off];
1509 DEBUG_OOFF(num); 1509 DEBUG_OOFF(num);
1510 if (ooff == UNWRITTEN) 1510 if (ooff == UNWRITTEN)
1511 ip->i_ffs1_blocks += bb; 1511 ip->i_ffs1_blocks += bb;
1512 /* XXX ondisk32 */ 1512 /* XXX ondisk32 */
1513 ((int32_t *)bp->b_data)[ap->in_off] = ndaddr; 1513 ((int32_t *)bp->b_data)[ap->in_off] = ndaddr;
1514 (void) VOP_BWRITE(bp->b_vp, bp); 1514 (void) VOP_BWRITE(bp->b_vp, bp);
1515 } 1515 }
1516 1516
1517 KASSERT(ooff == 0 || ooff == UNWRITTEN || ooff == daddr); 1517 KASSERT(ooff == 0 || ooff == UNWRITTEN || ooff == daddr);
1518 1518
1519 /* Update hiblk when extending the file */ 1519 /* Update hiblk when extending the file */
1520 if (lbn > ip->i_lfs_hiblk) 1520 if (lbn > ip->i_lfs_hiblk)
1521 ip->i_lfs_hiblk = lbn; 1521 ip->i_lfs_hiblk = lbn;
1522 1522
1523 /* 1523 /*
1524 * Though we'd rather it couldn't, this *can* happen right now 1524 * Though we'd rather it couldn't, this *can* happen right now
1525 * if cleaning blocks and regular blocks coexist. 1525 * if cleaning blocks and regular blocks coexist.
1526 */ 1526 */
1527 /* KASSERT(daddr < fs->lfs_lastpseg || daddr > ndaddr); */ 1527 /* KASSERT(daddr < fs->lfs_lastpseg || daddr > ndaddr); */
1528 1528
1529 /* 1529 /*
1530 * Update segment usage information, based on old size 1530 * Update segment usage information, based on old size
1531 * and location. 1531 * and location.
1532 */ 1532 */
1533 if (daddr > 0) { 1533 if (daddr > 0) {
1534 u_int32_t oldsn = lfs_dtosn(fs, daddr); 1534 u_int32_t oldsn = lfs_dtosn(fs, daddr);
1535#ifdef DIAGNOSTIC 1535#ifdef DIAGNOSTIC
1536 int ndupino; 1536 int ndupino;
1537 1537
1538 if (sp && sp->seg_number == oldsn) { 1538 if (sp && sp->seg_number == oldsn) {
1539 ndupino = sp->ndupino; 1539 ndupino = sp->ndupino;
1540 } else { 1540 } else {
1541 ndupino = 0; 1541 ndupino = 0;
1542 } 1542 }
1543#endif 1543#endif
1544 KASSERT(oldsn < lfs_sb_getnseg(fs)); 1544 KASSERT(oldsn < lfs_sb_getnseg(fs));
1545 if (lbn >= 0 && lbn < ULFS_NDADDR) 1545 if (lbn >= 0 && lbn < ULFS_NDADDR)
1546 osize = ip->i_lfs_fragsize[lbn]; 1546 osize = ip->i_lfs_fragsize[lbn];
1547 else 1547 else
1548 osize = lfs_sb_getbsize(fs); 1548 osize = lfs_sb_getbsize(fs);
1549 LFS_SEGENTRY(sup, fs, oldsn, bp); 1549 LFS_SEGENTRY(sup, fs, oldsn, bp);
1550#ifdef DIAGNOSTIC 1550#ifdef DIAGNOSTIC
1551 if (sup->su_nbytes + sizeof (struct ulfs1_dinode) * ndupino 1551 if (sup->su_nbytes + sizeof (struct ulfs1_dinode) * ndupino
1552 < osize) { 1552 < osize) {
1553 printf("lfs_updatemeta: negative bytes " 1553 printf("lfs_updatemeta: negative bytes "
1554 "(segment %" PRIu32 " short by %" PRId64 1554 "(segment %" PRIu32 " short by %" PRId64
1555 ")\n", lfs_dtosn(fs, daddr), 1555 ")\n", lfs_dtosn(fs, daddr),
1556 (int64_t)osize - 1556 (int64_t)osize -
1557 (sizeof (struct ulfs1_dinode) * ndupino + 1557 (sizeof (struct ulfs1_dinode) * ndupino +
1558 sup->su_nbytes)); 1558 sup->su_nbytes));
1559 printf("lfs_updatemeta: ino %llu, lbn %" PRId64 1559 printf("lfs_updatemeta: ino %llu, lbn %" PRId64
1560 ", addr = 0x%" PRIx64 "\n", 1560 ", addr = 0x%" PRIx64 "\n",
1561 (unsigned long long)ip->i_number, lbn, daddr); 1561 (unsigned long long)ip->i_number, lbn, daddr);
1562 printf("lfs_updatemeta: ndupino=%d\n", ndupino); 1562 printf("lfs_updatemeta: ndupino=%d\n", ndupino);
1563 panic("lfs_updatemeta: negative bytes"); 1563 panic("lfs_updatemeta: negative bytes");
1564 sup->su_nbytes = osize - 1564 sup->su_nbytes = osize -
1565 sizeof (struct ulfs1_dinode) * ndupino; 1565 sizeof (struct ulfs1_dinode) * ndupino;
1566 } 1566 }
1567#endif 1567#endif
1568 DLOG((DLOG_SU, "seg %" PRIu32 " -= %d for ino %d lbn %" PRId64 1568 DLOG((DLOG_SU, "seg %" PRIu32 " -= %d for ino %d lbn %" PRId64
1569 " db 0x%" PRIx64 "\n", 1569 " db 0x%" PRIx64 "\n",
1570 lfs_dtosn(fs, daddr), osize, 1570 lfs_dtosn(fs, daddr), osize,
1571 ip->i_number, lbn, daddr)); 1571 ip->i_number, lbn, daddr));
1572 sup->su_nbytes -= osize; 1572 sup->su_nbytes -= osize;
1573 if (!(bp->b_flags & B_GATHERED)) { 1573 if (!(bp->b_flags & B_GATHERED)) {
1574 mutex_enter(&lfs_lock); 1574 mutex_enter(&lfs_lock);
1575 fs->lfs_flags |= LFS_IFDIRTY; 1575 fs->lfs_flags |= LFS_IFDIRTY;
1576 mutex_exit(&lfs_lock); 1576 mutex_exit(&lfs_lock);
1577 } 1577 }
1578 LFS_WRITESEGENTRY(sup, fs, oldsn, bp); 1578 LFS_WRITESEGENTRY(sup, fs, oldsn, bp);
1579 } 1579 }
1580 /* 1580 /*
1581 * Now that this block has a new address, and its old 1581 * Now that this block has a new address, and its old
1582 * segment no longer owns it, we can forget about its 1582 * segment no longer owns it, we can forget about its
1583 * old size. 1583 * old size.
1584 */ 1584 */
1585 if (lbn >= 0 && lbn < ULFS_NDADDR) 1585 if (lbn >= 0 && lbn < ULFS_NDADDR)
1586 ip->i_lfs_fragsize[lbn] = size; 1586 ip->i_lfs_fragsize[lbn] = size;
1587} 1587}
1588 1588
1589/* 1589/*
1590 * Update the metadata that points to the blocks listed in the FINFO 1590 * Update the metadata that points to the blocks listed in the FINFO
1591 * array. 1591 * array.
1592 */ 1592 */
1593void 1593void
1594lfs_updatemeta(struct segment *sp) 1594lfs_updatemeta(struct segment *sp)
1595{ 1595{
1596 struct buf *sbp; 1596 struct buf *sbp;
1597 struct lfs *fs; 1597 struct lfs *fs;
1598 struct vnode *vp; 1598 struct vnode *vp;
1599 daddr_t lbn; 1599 daddr_t lbn;
1600 int i, nblocks, num; 1600 int i, nblocks, num;
1601 int bb; 1601 int bb;
1602 int bytesleft, size; 1602 int bytesleft, size;
1603 1603
1604 ASSERT_SEGLOCK(sp->fs); 1604 ASSERT_SEGLOCK(sp->fs);
1605 vp = sp->vp; 1605 vp = sp->vp;
1606 nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp; 1606 nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp;
1607 KASSERT(nblocks >= 0); 1607 KASSERT(nblocks >= 0);
1608 KASSERT(vp != NULL); 1608 KASSERT(vp != NULL);
1609 if (nblocks == 0) 1609 if (nblocks == 0)
1610 return; 1610 return;
1611 1611
1612 /* 1612 /*
1613 * This count may be high due to oversize blocks from lfs_gop_write. 1613 * This count may be high due to oversize blocks from lfs_gop_write.
1614 * Correct for this. (XXX we should be able to keep track of these.) 1614 * Correct for this. (XXX we should be able to keep track of these.)
1615 */ 1615 */
1616 fs = sp->fs; 1616 fs = sp->fs;
1617 for (i = 0; i < nblocks; i++) { 1617 for (i = 0; i < nblocks; i++) {
1618 if (sp->start_bpp[i] == NULL) { 1618 if (sp->start_bpp[i] == NULL) {
1619 DLOG((DLOG_SEG, "lfs_updatemeta: nblocks = %d, not %d\n", i, nblocks)); 1619 DLOG((DLOG_SEG, "lfs_updatemeta: nblocks = %d, not %d\n", i, nblocks));
1620 nblocks = i; 1620 nblocks = i;
1621 break; 1621 break;
1622 } 1622 }
1623 num = howmany(sp->start_bpp[i]->b_bcount, lfs_sb_getbsize(fs)); 1623 num = howmany(sp->start_bpp[i]->b_bcount, lfs_sb_getbsize(fs));
1624 KASSERT(sp->start_bpp[i]->b_lblkno >= 0 || num == 1); 1624 KASSERT(sp->start_bpp[i]->b_lblkno >= 0 || num == 1);
1625 nblocks -= num - 1; 1625 nblocks -= num - 1;
1626 } 1626 }
1627 1627
1628 KASSERT(vp->v_type == VREG || 1628 KASSERT(vp->v_type == VREG ||
1629 nblocks == &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp); 1629 nblocks == &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp);
1630 KASSERT(nblocks == sp->cbpp - sp->start_bpp); 1630 KASSERT(nblocks == sp->cbpp - sp->start_bpp);
1631 1631
1632 /* 1632 /*
1633 * Sort the blocks. 1633 * Sort the blocks.
1634 * 1634 *
1635 * We have to sort even if the blocks come from the 1635 * We have to sort even if the blocks come from the
1636 * cleaner, because there might be other pending blocks on the 1636 * cleaner, because there might be other pending blocks on the
1637 * same inode...and if we don't sort, and there are fragments 1637 * same inode...and if we don't sort, and there are fragments
1638 * present, blocks may be written in the wrong place. 1638 * present, blocks may be written in the wrong place.
1639 */ 1639 */
1640 lfs_shellsort(sp->start_bpp, sp->start_lbp, nblocks, lfs_sb_getbsize(fs)); 1640 lfs_shellsort(sp->start_bpp, sp->start_lbp, nblocks, lfs_sb_getbsize(fs));
1641 1641
1642 /* 1642 /*
1643 * Record the length of the last block in case it's a fragment. 1643 * Record the length of the last block in case it's a fragment.
1644 * If there are indirect blocks present, they sort last. An 1644 * If there are indirect blocks present, they sort last. An
1645 * indirect block will be lfs_bsize and its presence indicates 1645 * indirect block will be lfs_bsize and its presence indicates
1646 * that you cannot have fragments. 1646 * that you cannot have fragments.
1647 * 1647 *
1648 * XXX This last is a lie. A cleaned fragment can coexist with 1648 * XXX This last is a lie. A cleaned fragment can coexist with
1649 * XXX a later indirect block. This will continue to be 1649 * XXX a later indirect block. This will continue to be
1650 * XXX true until lfs_markv is fixed to do everything with 1650 * XXX true until lfs_markv is fixed to do everything with
1651 * XXX fake blocks (including fake inodes and fake indirect blocks). 1651 * XXX fake blocks (including fake inodes and fake indirect blocks).
1652 */ 1652 */
1653 sp->fip->fi_lastlength = ((sp->start_bpp[nblocks - 1]->b_bcount - 1) & 1653 sp->fip->fi_lastlength = ((sp->start_bpp[nblocks - 1]->b_bcount - 1) &
1654 lfs_sb_getbmask(fs)) + 1; 1654 lfs_sb_getbmask(fs)) + 1;
1655 1655
1656 /* 1656 /*
1657 * Assign disk addresses, and update references to the logical 1657 * Assign disk addresses, and update references to the logical
1658 * block and the segment usage information. 1658 * block and the segment usage information.
1659 */ 1659 */
1660 for (i = nblocks; i--; ++sp->start_bpp) { 1660 for (i = nblocks; i--; ++sp->start_bpp) {
1661 sbp = *sp->start_bpp; 1661 sbp = *sp->start_bpp;
1662 lbn = *sp->start_lbp; 1662 lbn = *sp->start_lbp;
1663 KASSERT(sbp->b_lblkno == lbn); 1663 KASSERT(sbp->b_lblkno == lbn);
1664 1664
1665 sbp->b_blkno = LFS_FSBTODB(fs, lfs_sb_getoffset(fs)); 1665 sbp->b_blkno = LFS_FSBTODB(fs, lfs_sb_getoffset(fs));
1666 1666
1667 /* 1667 /*
1668 * If we write a frag in the wrong place, the cleaner won't 1668 * If we write a frag in the wrong place, the cleaner won't
1669 * be able to correctly identify its size later, and the 1669 * be able to correctly identify its size later, and the
1670 * segment will be uncleanable. (Even worse, it will assume 1670 * segment will be uncleanable. (Even worse, it will assume
1671 * that the indirect block that actually ends the list 1671 * that the indirect block that actually ends the list
1672 * is of a smaller size!) 1672 * is of a smaller size!)
1673 */ 1673 */
1674 if ((sbp->b_bcount & lfs_sb_getbmask(fs)) && i != 0) 1674 if ((sbp->b_bcount & lfs_sb_getbmask(fs)) && i != 0)
1675 panic("lfs_updatemeta: fragment is not last block"); 1675 panic("lfs_updatemeta: fragment is not last block");
1676 1676
1677 /* 1677 /*
1678 * For each subblock in this possibly oversized block, 1678 * For each subblock in this possibly oversized block,
1679 * update its address on disk. 1679 * update its address on disk.
1680 */ 1680 */
1681 KASSERT(lbn >= 0 || sbp->b_bcount == lfs_sb_getbsize(fs)); 1681 KASSERT(lbn >= 0 || sbp->b_bcount == lfs_sb_getbsize(fs));
1682 KASSERT(vp == sbp->b_vp); 1682 KASSERT(vp == sbp->b_vp);
1683 for (bytesleft = sbp->b_bcount; bytesleft > 0; 1683 for (bytesleft = sbp->b_bcount; bytesleft > 0;
1684 bytesleft -= lfs_sb_getbsize(fs)) { 1684 bytesleft -= lfs_sb_getbsize(fs)) {
1685 size = MIN(bytesleft, lfs_sb_getbsize(fs)); 1685 size = MIN(bytesleft, lfs_sb_getbsize(fs));
1686 bb = lfs_numfrags(fs, size); 1686 bb = lfs_numfrags(fs, size);
1687 lbn = *sp->start_lbp++; 1687 lbn = *sp->start_lbp++;
1688 lfs_update_single(fs, sp, sp->vp, lbn, lfs_sb_getoffset(fs), 1688 lfs_update_single(fs, sp, sp->vp, lbn, lfs_sb_getoffset(fs),
1689 size); 1689 size);
1690 lfs_sb_addoffset(fs, bb); 1690 lfs_sb_addoffset(fs, bb);
1691 } 1691 }
1692 1692
1693 } 1693 }
1694 1694
1695 /* This inode has been modified */ 1695 /* This inode has been modified */
1696 LFS_SET_UINO(VTOI(vp), IN_MODIFIED); 1696 LFS_SET_UINO(VTOI(vp), IN_MODIFIED);
1697} 1697}
1698 1698
1699/* 1699/*
1700 * Move lfs_offset to a segment earlier than sn. 1700 * Move lfs_offset to a segment earlier than sn.
1701 */ 1701 */
1702int 1702int
1703lfs_rewind(struct lfs *fs, int newsn) 1703lfs_rewind(struct lfs *fs, int newsn)
1704{ 1704{
1705 int sn, osn, isdirty; 1705 int sn, osn, isdirty;
1706 struct buf *bp; 1706 struct buf *bp;
1707 SEGUSE *sup; 1707 SEGUSE *sup;
1708 1708
1709 ASSERT_SEGLOCK(fs); 1709 ASSERT_SEGLOCK(fs);
1710 1710
1711 osn = lfs_dtosn(fs, lfs_sb_getoffset(fs)); 1711 osn = lfs_dtosn(fs, lfs_sb_getoffset(fs));
1712 if (osn < newsn) 1712 if (osn < newsn)
1713 return 0; 1713 return 0;
1714 1714
1715 /* lfs_avail eats the remaining space in this segment */ 1715 /* lfs_avail eats the remaining space in this segment */
1716 lfs_sb_subavail(fs, lfs_sb_getfsbpseg(fs) - (lfs_sb_getoffset(fs) - lfs_sb_getcurseg(fs))); 1716 lfs_sb_subavail(fs, lfs_sb_getfsbpseg(fs) - (lfs_sb_getoffset(fs) - lfs_sb_getcurseg(fs)));
1717 1717
1718 /* Find a low-numbered segment */ 1718 /* Find a low-numbered segment */
1719 for (sn = 0; sn < lfs_sb_getnseg(fs); ++sn) { 1719 for (sn = 0; sn < lfs_sb_getnseg(fs); ++sn) {
1720 LFS_SEGENTRY(sup, fs, sn, bp); 1720 LFS_SEGENTRY(sup, fs, sn, bp);
1721 isdirty = sup->su_flags & SEGUSE_DIRTY; 1721 isdirty = sup->su_flags & SEGUSE_DIRTY;
1722 brelse(bp, 0); 1722 brelse(bp, 0);
1723 1723
1724 if (!isdirty) 1724 if (!isdirty)
1725 break; 1725 break;
1726 } 1726 }
1727 if (sn == lfs_sb_getnseg(fs)) 1727 if (sn == lfs_sb_getnseg(fs))
1728 panic("lfs_rewind: no clean segments"); 1728 panic("lfs_rewind: no clean segments");
1729 if (newsn >= 0 && sn >= newsn) 1729 if (newsn >= 0 && sn >= newsn)
1730 return ENOENT; 1730 return ENOENT;
1731 lfs_sb_setnextseg(fs, sn); 1731 lfs_sb_setnextseg(fs, sn);
1732 lfs_newseg(fs); 1732 lfs_newseg(fs);
1733 lfs_sb_setoffset(fs, lfs_sb_getcurseg(fs)); 1733 lfs_sb_setoffset(fs, lfs_sb_getcurseg(fs));
1734 1734
1735 return 0; 1735 return 0;
1736} 1736}
1737 1737
1738/* 1738/*
1739 * Start a new partial segment. 1739 * Start a new partial segment.
1740 * 1740 *
1741 * Return 1 when we entered to a new segment. 1741 * Return 1 when we entered to a new segment.
1742 * Otherwise, return 0. 1742 * Otherwise, return 0.
1743 */ 1743 */
1744int 1744int
1745lfs_initseg(struct lfs *fs) 1745lfs_initseg(struct lfs *fs)
1746{ 1746{
1747 struct segment *sp = fs->lfs_sp; 1747 struct segment *sp = fs->lfs_sp;
1748 SEGSUM *ssp; 1748 SEGSUM *ssp;
1749 struct buf *sbp; /* buffer for SEGSUM */ 1749 struct buf *sbp; /* buffer for SEGSUM */
1750 int repeat = 0; /* return value */ 1750 int repeat = 0; /* return value */
1751 1751
1752 ASSERT_SEGLOCK(fs); 1752 ASSERT_SEGLOCK(fs);
1753 /* Advance to the next segment. */ 1753 /* Advance to the next segment. */
1754 if (!LFS_PARTIAL_FITS(fs)) { 1754 if (!LFS_PARTIAL_FITS(fs)) {
1755 SEGUSE *sup; 1755 SEGUSE *sup;
1756 struct buf *bp; 1756 struct buf *bp;
1757 1757
1758 /* lfs_avail eats the remaining space */ 1758 /* lfs_avail eats the remaining space */
1759 lfs_sb_subavail(fs, lfs_sb_getfsbpseg(fs) - (lfs_sb_getoffset(fs) - 1759 lfs_sb_subavail(fs, lfs_sb_getfsbpseg(fs) - (lfs_sb_getoffset(fs) -
1760 lfs_sb_getcurseg(fs))); 1760 lfs_sb_getcurseg(fs)));
1761 /* Wake up any cleaning procs waiting on this file system. */ 1761 /* Wake up any cleaning procs waiting on this file system. */
1762 lfs_wakeup_cleaner(fs); 1762 lfs_wakeup_cleaner(fs);
1763 lfs_newseg(fs); 1763 lfs_newseg(fs);
1764 repeat = 1; 1764 repeat = 1;
1765 lfs_sb_setoffset(fs, lfs_sb_getcurseg(fs)); 1765 lfs_sb_setoffset(fs, lfs_sb_getcurseg(fs));
1766 1766
1767 sp->seg_number = lfs_dtosn(fs, lfs_sb_getcurseg(fs)); 1767 sp->seg_number = lfs_dtosn(fs, lfs_sb_getcurseg(fs));
1768 sp->seg_bytes_left = lfs_fsbtob(fs, lfs_sb_getfsbpseg(fs)); 1768 sp->seg_bytes_left = lfs_fsbtob(fs, lfs_sb_getfsbpseg(fs));
1769 1769
1770 /* 1770 /*
1771 * If the segment contains a superblock, update the offset 1771 * If the segment contains a superblock, update the offset
1772 * and summary address to skip over it. 1772 * and summary address to skip over it.
1773 */ 1773 */
1774 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 1774 LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
1775 if (sup->su_flags & SEGUSE_SUPERBLOCK) { 1775 if (sup->su_flags & SEGUSE_SUPERBLOCK) {
1776 lfs_sb_addoffset(fs, lfs_btofsb(fs, LFS_SBPAD)); 1776 lfs_sb_addoffset(fs, lfs_btofsb(fs, LFS_SBPAD));
1777 sp->seg_bytes_left -= LFS_SBPAD; 1777 sp->seg_bytes_left -= LFS_SBPAD;
1778 } 1778 }
1779 brelse(bp, 0); 1779 brelse(bp, 0);
1780 /* Segment zero could also contain the labelpad */ 1780 /* Segment zero could also contain the labelpad */
1781 if (fs->lfs_version > 1 && sp->seg_number == 0 && 1781 if (fs->lfs_version > 1 && sp->seg_number == 0 &&
1782 lfs_sb_gets0addr(fs) < lfs_btofsb(fs, LFS_LABELPAD)) { 1782 lfs_sb_gets0addr(fs) < lfs_btofsb(fs, LFS_LABELPAD)) {
1783 lfs_sb_addoffset(fs, 1783 lfs_sb_addoffset(fs,
1784 lfs_btofsb(fs, LFS_LABELPAD) - lfs_sb_gets0addr(fs)); 1784 lfs_btofsb(fs, LFS_LABELPAD) - lfs_sb_gets0addr(fs));
1785 sp->seg_bytes_left -= 1785 sp->seg_bytes_left -=
1786 LFS_LABELPAD - lfs_fsbtob(fs, lfs_sb_gets0addr(fs)); 1786 LFS_LABELPAD - lfs_fsbtob(fs, lfs_sb_gets0addr(fs));
1787 } 1787 }
1788 } else { 1788 } else {
1789 sp->seg_number = lfs_dtosn(fs, lfs_sb_getcurseg(fs)); 1789 sp->seg_number = lfs_dtosn(fs, lfs_sb_getcurseg(fs));
1790 sp->seg_bytes_left = lfs_fsbtob(fs, lfs_sb_getfsbpseg(fs) - 1790 sp->seg_bytes_left = lfs_fsbtob(fs, lfs_sb_getfsbpseg(fs) -
1791 (lfs_sb_getoffset(fs) - lfs_sb_getcurseg(fs))); 1791 (lfs_sb_getoffset(fs) - lfs_sb_getcurseg(fs)));
1792 } 1792 }
1793 lfs_sb_setlastpseg(fs, lfs_sb_getoffset(fs)); 1793 lfs_sb_setlastpseg(fs, lfs_sb_getoffset(fs));
1794 1794
1795 /* Record first address of this partial segment */ 1795 /* Record first address of this partial segment */
1796 if (sp->seg_flags & SEGM_CLEAN) { 1796 if (sp->seg_flags & SEGM_CLEAN) {
1797 fs->lfs_cleanint[fs->lfs_cleanind] = lfs_sb_getoffset(fs); 1797 fs->lfs_cleanint[fs->lfs_cleanind] = lfs_sb_getoffset(fs);
1798 if (++fs->lfs_cleanind >= LFS_MAX_CLEANIND) { 1798 if (++fs->lfs_cleanind >= LFS_MAX_CLEANIND) {
1799 /* "1" is the artificial inc in lfs_seglock */ 1799 /* "1" is the artificial inc in lfs_seglock */
1800 mutex_enter(&lfs_lock); 1800 mutex_enter(&lfs_lock);
1801 while (fs->lfs_iocount > 1) { 1801 while (fs->lfs_iocount > 1) {
1802 mtsleep(&fs->lfs_iocount, PRIBIO + 1, 1802 mtsleep(&fs->lfs_iocount, PRIBIO + 1,
1803 "lfs_initseg", 0, &lfs_lock); 1803 "lfs_initseg", 0, &lfs_lock);
1804 } 1804 }
1805 mutex_exit(&lfs_lock); 1805 mutex_exit(&lfs_lock);
1806 fs->lfs_cleanind = 0; 1806 fs->lfs_cleanind = 0;
1807 } 1807 }
1808 } 1808 }
1809 1809
1810 sp->fs = fs; 1810 sp->fs = fs;
1811 sp->ibp = NULL; 1811 sp->ibp = NULL;
1812 sp->idp = NULL; 1812 sp->idp = NULL;
1813 sp->ninodes = 0; 1813 sp->ninodes = 0;
1814 sp->ndupino = 0; 1814 sp->ndupino = 0;
1815 1815
1816 sp->cbpp = sp->bpp; 1816 sp->cbpp = sp->bpp;
1817 1817
1818 /* Get a new buffer for SEGSUM */ 1818 /* Get a new buffer for SEGSUM */
1819 sbp = lfs_newbuf(fs, VTOI(fs->lfs_ivnode)->i_devvp, 1819 sbp = lfs_newbuf(fs, VTOI(fs->lfs_ivnode)->i_devvp,
1820 LFS_FSBTODB(fs, lfs_sb_getoffset(fs)), lfs_sb_getsumsize(fs), LFS_NB_SUMMARY); 1820 LFS_FSBTODB(fs, lfs_sb_getoffset(fs)), lfs_sb_getsumsize(fs), LFS_NB_SUMMARY);
1821 1821
1822 /* ... and enter it into the buffer list. */ 1822 /* ... and enter it into the buffer list. */
1823 *sp->cbpp = sbp; 1823 *sp->cbpp = sbp;
1824 sp->cbpp++; 1824 sp->cbpp++;
1825 lfs_sb_addoffset(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs))); 1825 lfs_sb_addoffset(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs)));
1826 1826
1827 sp->start_bpp = sp->cbpp; 1827 sp->start_bpp = sp->cbpp;
1828 1828
1829 /* Set point to SEGSUM, initialize it. */ 1829 /* Set point to SEGSUM, initialize it. */
1830 ssp = sp->segsum = sbp->b_data; 1830 ssp = sp->segsum = sbp->b_data;
1831 memset(ssp, 0, lfs_sb_getsumsize(fs)); 1831 memset(ssp, 0, lfs_sb_getsumsize(fs));
1832 ssp->ss_next = lfs_sb_getnextseg(fs); 1832 ssp->ss_next = lfs_sb_getnextseg(fs);
1833 ssp->ss_nfinfo = ssp->ss_ninos = 0; 1833 ssp->ss_nfinfo = ssp->ss_ninos = 0;
1834 ssp->ss_magic = SS_MAGIC; 1834 ssp->ss_magic = SS_MAGIC;
1835 1835
1836 /* Set pointer to first FINFO, initialize it. */ 1836 /* Set pointer to first FINFO, initialize it. */
1837 sp->fip = (struct finfo *)((char *)sp->segsum + SEGSUM_SIZE(fs)); 1837 sp->fip = (struct finfo *)((char *)sp->segsum + SEGSUM_SIZE(fs));
1838 sp->fip->fi_nblocks = 0; 1838 sp->fip->fi_nblocks = 0;
1839 sp->start_lbp = &sp->fip->fi_blocks[0]; 1839 sp->start_lbp = &sp->fip->fi_blocks[0];
1840 sp->fip->fi_lastlength = 0; 1840 sp->fip->fi_lastlength = 0;
1841 1841
1842 sp->seg_bytes_left -= lfs_sb_getsumsize(fs); 1842 sp->seg_bytes_left -= lfs_sb_getsumsize(fs);
1843 sp->sum_bytes_left = lfs_sb_getsumsize(fs) - SEGSUM_SIZE(fs); 1843 sp->sum_bytes_left = lfs_sb_getsumsize(fs) - SEGSUM_SIZE(fs);
1844 1844
1845 return (repeat); 1845 return (repeat);
1846} 1846}
1847 1847
1848/* 1848/*
1849 * Remove SEGUSE_INVAL from all segments. 1849 * Remove SEGUSE_INVAL from all segments.
1850 */ 1850 */
1851void 1851void
1852lfs_unset_inval_all(struct lfs *fs) 1852lfs_unset_inval_all(struct lfs *fs)
1853{ 1853{
1854 SEGUSE *sup; 1854 SEGUSE *sup;
1855 struct buf *bp; 1855 struct buf *bp;
1856 int i; 1856 int i;
1857 1857
1858 for (i = 0; i < lfs_sb_getnseg(fs); i++) { 1858 for (i = 0; i < lfs_sb_getnseg(fs); i++) {
1859 LFS_SEGENTRY(sup, fs, i, bp); 1859 LFS_SEGENTRY(sup, fs, i, bp);
1860 if (sup->su_flags & SEGUSE_INVAL) { 1860 if (sup->su_flags & SEGUSE_INVAL) {
1861 sup->su_flags &= ~SEGUSE_INVAL; 1861 sup->su_flags &= ~SEGUSE_INVAL;
1862 LFS_WRITESEGENTRY(sup, fs, i, bp); 1862 LFS_WRITESEGENTRY(sup, fs, i, bp);
1863 } else 1863 } else
1864 brelse(bp, 0); 1864 brelse(bp, 0);
1865 } 1865 }
1866} 1866}
1867 1867
1868/* 1868/*
1869 * Return the next segment to write. 1869 * Return the next segment to write.
1870 */ 1870 */
1871void 1871void
1872lfs_newseg(struct lfs *fs) 1872lfs_newseg(struct lfs *fs)
1873{ 1873{
1874 CLEANERINFO *cip; 1874 CLEANERINFO *cip;
1875 SEGUSE *sup; 1875 SEGUSE *sup;
1876 struct buf *bp; 1876 struct buf *bp;
1877 int curseg, isdirty, sn, skip_inval; 1877 int curseg, isdirty, sn, skip_inval;
1878 1878
1879 ASSERT_SEGLOCK(fs); 1879 ASSERT_SEGLOCK(fs);
1880 1880
1881 /* Honor LFCNWRAPSTOP */ 1881 /* Honor LFCNWRAPSTOP */
1882 mutex_enter(&lfs_lock); 1882 mutex_enter(&lfs_lock);
1883 while (lfs_sb_getnextseg(fs) < lfs_sb_getcurseg(fs) && fs->lfs_nowrap) { 1883 while (lfs_sb_getnextseg(fs) < lfs_sb_getcurseg(fs) && fs->lfs_nowrap) {
1884 if (fs->lfs_wrappass) { 1884 if (fs->lfs_wrappass) {
1885 log(LOG_NOTICE, "%s: wrappass=%d\n", 1885 log(LOG_NOTICE, "%s: wrappass=%d\n",
1886 lfs_sb_getfsmnt(fs), fs->lfs_wrappass); 1886 lfs_sb_getfsmnt(fs), fs->lfs_wrappass);
1887 fs->lfs_wrappass = 0; 1887 fs->lfs_wrappass = 0;
1888 break; 1888 break;
1889 } 1889 }
1890 fs->lfs_wrapstatus = LFS_WRAP_WAITING; 1890 fs->lfs_wrapstatus = LFS_WRAP_WAITING;
1891 wakeup(&fs->lfs_nowrap); 1891 wakeup(&fs->lfs_nowrap);
1892 log(LOG_NOTICE, "%s: waiting at log wrap\n", lfs_sb_getfsmnt(fs)); 1892 log(LOG_NOTICE, "%s: waiting at log wrap\n", lfs_sb_getfsmnt(fs));
1893 mtsleep(&fs->lfs_wrappass, PVFS, "newseg", 10 * hz, 1893 mtsleep(&fs->lfs_wrappass, PVFS, "newseg", 10 * hz,
1894 &lfs_lock); 1894 &lfs_lock);
1895 } 1895 }
1896 fs->lfs_wrapstatus = LFS_WRAP_GOING; 1896 fs->lfs_wrapstatus = LFS_WRAP_GOING;
1897 mutex_exit(&lfs_lock); 1897 mutex_exit(&lfs_lock);
1898 1898
1899 LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, lfs_sb_getnextseg(fs)), bp); 1899 LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, lfs_sb_getnextseg(fs)), bp);
1900 DLOG((DLOG_SU, "lfs_newseg: seg %d := 0 in newseg\n", 1900 DLOG((DLOG_SU, "lfs_newseg: seg %d := 0 in newseg\n",
1901 lfs_dtosn(fs, lfs_sb_getnextseg(fs)))); 1901 lfs_dtosn(fs, lfs_sb_getnextseg(fs))));
1902 sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE; 1902 sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
1903 sup->su_nbytes = 0; 1903 sup->su_nbytes = 0;
1904 sup->su_nsums = 0; 1904 sup->su_nsums = 0;
1905 sup->su_ninos = 0; 1905 sup->su_ninos = 0;
1906 LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, lfs_sb_getnextseg(fs)), bp); 1906 LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, lfs_sb_getnextseg(fs)), bp);
1907 1907
1908 LFS_CLEANERINFO(cip, fs, bp); 1908 LFS_CLEANERINFO(cip, fs, bp);
1909 --cip->clean; 1909 --cip->clean;
1910 ++cip->dirty; 1910 ++cip->dirty;
1911 lfs_sb_setnclean(fs, cip->clean); 1911 lfs_sb_setnclean(fs, cip->clean);
1912 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1); 1912 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1);
1913 1913
1914 lfs_sb_setlastseg(fs, lfs_sb_getcurseg(fs)); 1914 lfs_sb_setlastseg(fs, lfs_sb_getcurseg(fs));
1915 lfs_sb_setcurseg(fs, lfs_sb_getnextseg(fs)); 1915 lfs_sb_setcurseg(fs, lfs_sb_getnextseg(fs));
1916 skip_inval = 1; 1916 skip_inval = 1;
1917 for (sn = curseg = lfs_dtosn(fs, lfs_sb_getcurseg(fs)) + lfs_sb_getinterleave(fs);;) { 1917 for (sn = curseg = lfs_dtosn(fs, lfs_sb_getcurseg(fs)) + lfs_sb_getinterleave(fs);;) {
1918 sn = (sn + 1) % lfs_sb_getnseg(fs); 1918 sn = (sn + 1) % lfs_sb_getnseg(fs);
1919 1919
1920 if (sn == curseg) { 1920 if (sn == curseg) {
1921 if (skip_inval) 1921 if (skip_inval)
1922 skip_inval = 0; 1922 skip_inval = 0;
1923 else 1923 else
1924 panic("lfs_nextseg: no clean segments"); 1924 panic("lfs_nextseg: no clean segments");
1925 } 1925 }
1926 LFS_SEGENTRY(sup, fs, sn, bp); 1926 LFS_SEGENTRY(sup, fs, sn, bp);
1927 isdirty = sup->su_flags & (SEGUSE_DIRTY | (skip_inval ? SEGUSE_INVAL : 0)); 1927 isdirty = sup->su_flags & (SEGUSE_DIRTY | (skip_inval ? SEGUSE_INVAL : 0));
1928 /* Check SEGUSE_EMPTY as we go along */ 1928 /* Check SEGUSE_EMPTY as we go along */
1929 if (isdirty && sup->su_nbytes == 0 && 1929 if (isdirty && sup->su_nbytes == 0 &&
1930 !(sup->su_flags & SEGUSE_EMPTY)) 1930 !(sup->su_flags & SEGUSE_EMPTY))
1931 LFS_WRITESEGENTRY(sup, fs, sn, bp); 1931 LFS_WRITESEGENTRY(sup, fs, sn, bp);
1932 else 1932 else
1933 brelse(bp, 0); 1933 brelse(bp, 0);
1934 1934
1935 if (!isdirty) 1935 if (!isdirty)
1936 break; 1936 break;
1937 } 1937 }
1938 if (skip_inval == 0) 1938 if (skip_inval == 0)
1939 lfs_unset_inval_all(fs); 1939 lfs_unset_inval_all(fs);
1940 1940
1941 ++fs->lfs_nactive; 1941 ++fs->lfs_nactive;
1942 lfs_sb_setnextseg(fs, lfs_sntod(fs, sn)); 1942 lfs_sb_setnextseg(fs, lfs_sntod(fs, sn));
1943 if (lfs_dostats) { 1943 if (lfs_dostats) {
1944 ++lfs_stats.segsused; 1944 ++lfs_stats.segsused;
1945 } 1945 }
1946} 1946}
1947 1947
1948static struct buf * 1948static struct buf *
1949lfs_newclusterbuf(struct lfs *fs, struct vnode *vp, daddr_t addr, 1949lfs_newclusterbuf(struct lfs *fs, struct vnode *vp, daddr_t addr,
1950 int n) 1950 int n)
1951{ 1951{
1952 struct lfs_cluster *cl; 1952 struct lfs_cluster *cl;
1953 struct buf **bpp, *bp; 1953 struct buf **bpp, *bp;
1954 1954
1955 ASSERT_SEGLOCK(fs); 1955 ASSERT_SEGLOCK(fs);
1956 cl = (struct lfs_cluster *)pool_get(&fs->lfs_clpool, PR_WAITOK); 1956 cl = (struct lfs_cluster *)pool_get(&fs->lfs_clpool, PR_WAITOK);
1957 bpp = (struct buf **)pool_get(&fs->lfs_bpppool, PR_WAITOK); 1957 bpp = (struct buf **)pool_get(&fs->lfs_bpppool, PR_WAITOK);
1958 memset(cl, 0, sizeof(*cl)); 1958 memset(cl, 0, sizeof(*cl));
1959 cl->fs = fs; 1959 cl->fs = fs;
1960 cl->bpp = bpp; 1960 cl->bpp = bpp;
1961 cl->bufcount = 0; 1961 cl->bufcount = 0;
1962 cl->bufsize = 0; 1962 cl->bufsize = 0;
1963 1963
1964 /* If this segment is being written synchronously, note that */ 1964 /* If this segment is being written synchronously, note that */
1965 if (fs->lfs_sp->seg_flags & SEGM_SYNC) { 1965 if (fs->lfs_sp->seg_flags & SEGM_SYNC) {
1966 cl->flags |= LFS_CL_SYNC; 1966 cl->flags |= LFS_CL_SYNC;
1967 cl->seg = fs->lfs_sp; 1967 cl->seg = fs->lfs_sp;
1968 ++cl->seg->seg_iocount; 1968 ++cl->seg->seg_iocount;
1969 } 1969 }
1970 1970
1971 /* Get an empty buffer header, or maybe one with something on it */ 1971 /* Get an empty buffer header, or maybe one with something on it */
1972 bp = getiobuf(vp, true); 1972 bp = getiobuf(vp, true);
1973 bp->b_dev = NODEV; 1973 bp->b_dev = NODEV;
1974 bp->b_blkno = bp->b_lblkno = addr; 1974 bp->b_blkno = bp->b_lblkno = addr;
1975 bp->b_iodone = lfs_cluster_callback; 1975 bp->b_iodone = lfs_cluster_callback;
1976 bp->b_private = cl; 1976 bp->b_private = cl;
1977 1977
1978 return bp; 1978 return bp;
1979} 1979}
1980 1980
1981int 1981int
1982lfs_writeseg(struct lfs *fs, struct segment *sp) 1982lfs_writeseg(struct lfs *fs, struct segment *sp)
1983{ 1983{
1984 struct buf **bpp, *bp, *cbp, *newbp, *unbusybp; 1984 struct buf **bpp, *bp, *cbp, *newbp, *unbusybp;
1985 SEGUSE *sup; 1985 SEGUSE *sup;
1986 SEGSUM *ssp; 1986 SEGSUM *ssp;
1987 int i; 1987 int i;
1988 int do_again, nblocks, byteoffset; 1988 int do_again, nblocks, byteoffset;
1989 size_t el_size; 1989 size_t el_size;
1990 struct lfs_cluster *cl; 1990 struct lfs_cluster *cl;
1991 u_short ninos; 1991 u_short ninos;
1992 struct vnode *devvp; 1992 struct vnode *devvp;
1993 char *p = NULL; 1993 char *p = NULL;
1994 struct vnode *vp; 1994 struct vnode *vp;
1995 int32_t *daddrp; /* XXX ondisk32 */ 1995 int32_t *daddrp; /* XXX ondisk32 */
1996 int changed; 1996 int changed;
1997 u_int32_t sum; 1997 u_int32_t sum;
1998#ifdef DEBUG 1998#ifdef DEBUG
1999 FINFO *fip; 1999 FINFO *fip;
2000 int findex; 2000 int findex;
2001#endif 2001#endif
2002 2002
2003 ASSERT_SEGLOCK(fs); 2003 ASSERT_SEGLOCK(fs);
2004 2004
2005 ssp = (SEGSUM *)sp->segsum; 2005 ssp = (SEGSUM *)sp->segsum;
2006 2006
2007 /* 2007 /*
2008 * If there are no buffers other than the segment summary to write, 2008 * If there are no buffers other than the segment summary to write,
2009 * don't do anything. If we are the end of a dirop sequence, however, 2009 * don't do anything. If we are the end of a dirop sequence, however,
2010 * write the empty segment summary anyway, to help out the 2010 * write the empty segment summary anyway, to help out the
2011 * roll-forward agent. 2011 * roll-forward agent.
2012 */ 2012 */
2013 if ((nblocks = sp->cbpp - sp->bpp) == 1) { 2013 if ((nblocks = sp->cbpp - sp->bpp) == 1) {
2014 if ((ssp->ss_flags & (SS_DIROP | SS_CONT)) != SS_DIROP) 2014 if ((ssp->ss_flags & (SS_DIROP | SS_CONT)) != SS_DIROP)
2015 return 0; 2015 return 0;
2016 } 2016 }
2017 2017
2018 /* Note if partial segment is being written by the cleaner */ 2018 /* Note if partial segment is being written by the cleaner */
2019 if (sp->seg_flags & SEGM_CLEAN) 2019 if (sp->seg_flags & SEGM_CLEAN)
2020 ssp->ss_flags |= SS_CLEAN; 2020 ssp->ss_flags |= SS_CLEAN;
2021 2021
2022 /* Note if we are writing to reclaim */ 2022 /* Note if we are writing to reclaim */
2023 if (sp->seg_flags & SEGM_RECLAIM) { 2023 if (sp->seg_flags & SEGM_RECLAIM) {
2024 ssp->ss_flags |= SS_RECLAIM; 2024 ssp->ss_flags |= SS_RECLAIM;
2025 ssp->ss_reclino = fs->lfs_reclino; 2025 ssp->ss_reclino = fs->lfs_reclino;
2026 } 2026 }
2027 2027
2028 devvp = VTOI(fs->lfs_ivnode)->i_devvp; 2028 devvp = VTOI(fs->lfs_ivnode)->i_devvp;
2029 2029
2030 /* Update the segment usage information. */ 2030 /* Update the segment usage information. */
2031 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 2031 LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
2032 2032
2033 /* Loop through all blocks, except the segment summary. */ 2033 /* Loop through all blocks, except the segment summary. */
2034 for (bpp = sp->bpp; ++bpp < sp->cbpp; ) { 2034 for (bpp = sp->bpp; ++bpp < sp->cbpp; ) {
2035 if ((*bpp)->b_vp != devvp) { 2035 if ((*bpp)->b_vp != devvp) {
2036 sup->su_nbytes += (*bpp)->b_bcount; 2036 sup->su_nbytes += (*bpp)->b_bcount;
2037 DLOG((DLOG_SU, "seg %" PRIu32 " += %ld for ino %d" 2037 DLOG((DLOG_SU, "seg %" PRIu32 " += %ld for ino %d"
2038 " lbn %" PRId64 " db 0x%" PRIx64 "\n", 2038 " lbn %" PRId64 " db 0x%" PRIx64 "\n",
2039 sp->seg_number, (*bpp)->b_bcount, 2039 sp->seg_number, (*bpp)->b_bcount,
2040 VTOI((*bpp)->b_vp)->i_number, (*bpp)->b_lblkno, 2040 VTOI((*bpp)->b_vp)->i_number, (*bpp)->b_lblkno,
2041 (*bpp)->b_blkno)); 2041 (*bpp)->b_blkno));
2042 } 2042 }
2043 } 2043 }
2044 2044
2045#ifdef DEBUG 2045#ifdef DEBUG
2046 /* Check for zero-length and zero-version FINFO entries. */ 2046 /* Check for zero-length and zero-version FINFO entries. */
2047 fip = (struct finfo *)((char *)ssp + SEGSUM_SIZE(fs)); 2047 fip = (struct finfo *)((char *)ssp + SEGSUM_SIZE(fs));
2048 for (findex = 0; findex < ssp->ss_nfinfo; findex++) { 2048 for (findex = 0; findex < ssp->ss_nfinfo; findex++) {
2049 KDASSERT(fip->fi_nblocks > 0); 2049 KDASSERT(fip->fi_nblocks > 0);
2050 KDASSERT(fip->fi_version > 0); 2050 KDASSERT(fip->fi_version > 0);
2051 fip = (FINFO *)((char *)fip + FINFOSIZE + 2051 fip = (FINFO *)((char *)fip + FINFOSIZE +
2052 sizeof(int32_t) * fip->fi_nblocks); 2052 sizeof(int32_t) * fip->fi_nblocks);
2053 } 2053 }
2054#endif /* DEBUG */ 2054#endif /* DEBUG */
2055 2055
2056 ninos = (ssp->ss_ninos + LFS_INOPB(fs) - 1) / LFS_INOPB(fs); 2056 ninos = (ssp->ss_ninos + LFS_INOPB(fs) - 1) / LFS_INOPB(fs);
2057 DLOG((DLOG_SU, "seg %d += %d for %d inodes\n", 2057 DLOG((DLOG_SU, "seg %d += %d for %d inodes\n",
2058 sp->seg_number, ssp->ss_ninos * sizeof (struct ulfs1_dinode), 2058 sp->seg_number, ssp->ss_ninos * sizeof (struct ulfs1_dinode),
2059 ssp->ss_ninos)); 2059 ssp->ss_ninos));
2060 sup->su_nbytes += ssp->ss_ninos * sizeof (struct ulfs1_dinode); 2060 sup->su_nbytes += ssp->ss_ninos * sizeof (struct ulfs1_dinode);
2061 /* sup->su_nbytes += lfs_sb_getsumsize(fs); */ 2061 /* sup->su_nbytes += lfs_sb_getsumsize(fs); */
2062 if (fs->lfs_version == 1) 2062 if (fs->lfs_version == 1)
2063 sup->su_olastmod = time_second; 2063 sup->su_olastmod = time_second;
2064 else 2064 else
2065 sup->su_lastmod = time_second; 2065 sup->su_lastmod = time_second;
2066 sup->su_ninos += ninos; 2066 sup->su_ninos += ninos;
2067 ++sup->su_nsums; 2067 ++sup->su_nsums;
2068 lfs_sb_subavail(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs))); 2068 lfs_sb_subavail(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs)));
2069 2069
2070 do_again = !(bp->b_flags & B_GATHERED); 2070 do_again = !(bp->b_flags & B_GATHERED);
2071 LFS_WRITESEGENTRY(sup, fs, sp->seg_number, bp); /* Ifile */ 2071 LFS_WRITESEGENTRY(sup, fs, sp->seg_number, bp); /* Ifile */
2072 2072
2073 /* 2073 /*
2074 * Mark blocks B_BUSY, to prevent then from being changed between 2074 * Mark blocks B_BUSY, to prevent then from being changed between
2075 * the checksum computation and the actual write. 2075 * the checksum computation and the actual write.
2076 * 2076 *
2077 * If we are cleaning, check indirect blocks for UNWRITTEN, and if 2077 * If we are cleaning, check indirect blocks for UNWRITTEN, and if
2078 * there are any, replace them with copies that have UNASSIGNED 2078 * there are any, replace them with copies that have UNASSIGNED
2079 * instead. 2079 * instead.
2080 */ 2080 */
2081 mutex_enter(&bufcache_lock); 2081 mutex_enter(&bufcache_lock);
2082 for (bpp = sp->bpp, i = nblocks - 1; i--;) { 2082 for (bpp = sp->bpp, i = nblocks - 1; i--;) {
2083 ++bpp; 2083 ++bpp;
2084 bp = *bpp; 2084 bp = *bpp;
2085 if (bp->b_iodone != NULL) { /* UBC or malloced buffer */ 2085 if (bp->b_iodone != NULL) { /* UBC or malloced buffer */
2086 bp->b_cflags |= BC_BUSY; 2086 bp->b_cflags |= BC_BUSY;
2087 continue; 2087 continue;
2088 } 2088 }
2089 2089
2090 while (bp->b_cflags & BC_BUSY) { 2090 while (bp->b_cflags & BC_BUSY) {
2091 DLOG((DLOG_SEG, "lfs_writeseg: avoiding potential" 2091 DLOG((DLOG_SEG, "lfs_writeseg: avoiding potential"
2092 " data summary corruption for ino %d, lbn %" 2092 " data summary corruption for ino %d, lbn %"
2093 PRId64 "\n", 2093 PRId64 "\n",
2094 VTOI(bp->b_vp)->i_number, bp->b_lblkno)); 2094 VTOI(bp->b_vp)->i_number, bp->b_lblkno));
2095 bp->b_cflags |= BC_WANTED; 2095 bp->b_cflags |= BC_WANTED;
2096 cv_wait(&bp->b_busy, &bufcache_lock); 2096 cv_wait(&bp->b_busy, &bufcache_lock);
2097 } 2097 }
2098 bp->b_cflags |= BC_BUSY; 2098 bp->b_cflags |= BC_BUSY;
2099 mutex_exit(&bufcache_lock); 2099 mutex_exit(&bufcache_lock);
2100 unbusybp = NULL; 2100 unbusybp = NULL;
2101 2101
2102 /* 2102 /*
2103 * Check and replace indirect block UNWRITTEN bogosity. 2103 * Check and replace indirect block UNWRITTEN bogosity.
2104 * XXX See comment in lfs_writefile. 2104 * XXX See comment in lfs_writefile.
2105 */ 2105 */
2106 if (bp->b_lblkno < 0 && bp->b_vp != devvp && bp->b_vp && 2106 if (bp->b_lblkno < 0 && bp->b_vp != devvp && bp->b_vp &&
2107 VTOI(bp->b_vp)->i_ffs1_blocks != 2107 VTOI(bp->b_vp)->i_ffs1_blocks !=
2108 VTOI(bp->b_vp)->i_lfs_effnblks) { 2108 VTOI(bp->b_vp)->i_lfs_effnblks) {
2109 DLOG((DLOG_VNODE, "lfs_writeseg: cleansing ino %d (%d != %d)\n", 2109 DLOG((DLOG_VNODE, "lfs_writeseg: cleansing ino %d (%d != %d)\n",
2110 VTOI(bp->b_vp)->i_number, 2110 VTOI(bp->b_vp)->i_number,
2111 VTOI(bp->b_vp)->i_lfs_effnblks, 2111 VTOI(bp->b_vp)->i_lfs_effnblks,
2112 VTOI(bp->b_vp)->i_ffs1_blocks)); 2112 VTOI(bp->b_vp)->i_ffs1_blocks));
2113 /* Make a copy we'll make changes to */ 2113 /* Make a copy we'll make changes to */
2114 newbp = lfs_newbuf(fs, bp->b_vp, bp->b_lblkno, 2114 newbp = lfs_newbuf(fs, bp->b_vp, bp->b_lblkno,
2115 bp->b_bcount, LFS_NB_IBLOCK); 2115 bp->b_bcount, LFS_NB_IBLOCK);
2116 newbp->b_blkno = bp->b_blkno; 2116 newbp->b_blkno = bp->b_blkno;
2117 memcpy(newbp->b_data, bp->b_data, 2117 memcpy(newbp->b_data, bp->b_data,
2118 newbp->b_bcount); 2118 newbp->b_bcount);
2119 2119
2120 changed = 0; 2120 changed = 0;
2121 /* XXX ondisk32 */ 2121 /* XXX ondisk32 */
2122 for (daddrp = (int32_t *)(newbp->b_data); 2122 for (daddrp = (int32_t *)(newbp->b_data);
2123 daddrp < (int32_t *)((char *)newbp->b_data + 2123 daddrp < (int32_t *)((char *)newbp->b_data +
2124 newbp->b_bcount); daddrp++) { 2124 newbp->b_bcount); daddrp++) {
2125 if (*daddrp == UNWRITTEN) { 2125 if (*daddrp == UNWRITTEN) {
2126 ++changed; 2126 ++changed;
2127 *daddrp = 0; 2127 *daddrp = 0;
2128 } 2128 }
2129 } 2129 }
2130 /* 2130 /*
2131 * Get rid of the old buffer. Don't mark it clean, 2131 * Get rid of the old buffer. Don't mark it clean,
2132 * though, if it still has dirty data on it. 2132 * though, if it still has dirty data on it.
2133 */ 2133 */
2134 if (changed) { 2134 if (changed) {
2135 DLOG((DLOG_SEG, "lfs_writeseg: replacing UNWRITTEN(%d):" 2135 DLOG((DLOG_SEG, "lfs_writeseg: replacing UNWRITTEN(%d):"
2136 " bp = %p newbp = %p\n", changed, bp, 2136 " bp = %p newbp = %p\n", changed, bp,
2137 newbp)); 2137 newbp));
2138 *bpp = newbp; 2138 *bpp = newbp;
2139 bp->b_flags &= ~B_GATHERED; 2139 bp->b_flags &= ~B_GATHERED;
2140 bp->b_error = 0; 2140 bp->b_error = 0;
2141 if (bp->b_iodone != NULL) { 2141 if (bp->b_iodone != NULL) {
2142 DLOG((DLOG_SEG, "lfs_writeseg: " 2142 DLOG((DLOG_SEG, "lfs_writeseg: "
2143 "indir bp should not be B_CALL\n")); 2143 "indir bp should not be B_CALL\n"));
2144 biodone(bp); 2144 biodone(bp);
2145 bp = NULL; 2145 bp = NULL;
2146 } else { 2146 } else {
2147 /* Still on free list, leave it there */ 2147 /* Still on free list, leave it there */
2148 unbusybp = bp; 2148 unbusybp = bp;
2149 /* 2149 /*
2150 * We have to re-decrement lfs_avail 2150 * We have to re-decrement lfs_avail
2151 * since this block is going to come 2151 * since this block is going to come
2152 * back around to us in the next 2152 * back around to us in the next
2153 * segment. 2153 * segment.
2154 */ 2154 */
2155 lfs_sb_subavail(fs, 2155 lfs_sb_subavail(fs,
2156 lfs_btofsb(fs, bp->b_bcount)); 2156 lfs_btofsb(fs, bp->b_bcount));
2157 } 2157 }
2158 } else { 2158 } else {
2159 lfs_freebuf(fs, newbp); 2159 lfs_freebuf(fs, newbp);
2160 } 2160 }
2161 } 2161 }
2162 mutex_enter(&bufcache_lock); 2162 mutex_enter(&bufcache_lock);
2163 if (unbusybp != NULL) { 2163 if (unbusybp != NULL) {
2164 unbusybp->b_cflags &= ~BC_BUSY; 2164 unbusybp->b_cflags &= ~BC_BUSY;
2165 if (unbusybp->b_cflags & BC_WANTED) 2165 if (unbusybp->b_cflags & BC_WANTED)
2166 cv_broadcast(&bp->b_busy); 2166 cv_broadcast(&bp->b_busy);
2167 } 2167 }
2168 } 2168 }
2169 mutex_exit(&bufcache_lock); 2169 mutex_exit(&bufcache_lock);
2170 2170
2171 /* 2171 /*
2172 * Compute checksum across data and then across summary; the first 2172 * Compute checksum across data and then across summary; the first
2173 * block (the summary block) is skipped. Set the create time here 2173 * block (the summary block) is skipped. Set the create time here
2174 * so that it's guaranteed to be later than the inode mod times. 2174 * so that it's guaranteed to be later than the inode mod times.
2175 */ 2175 */
2176 sum = 0; 2176 sum = 0;
2177 if (fs->lfs_version == 1) 2177 if (fs->lfs_version == 1)
2178 el_size = sizeof(u_long); 2178 el_size = sizeof(u_long);
2179 else 2179 else
2180 el_size = sizeof(u_int32_t); 2180 el_size = sizeof(u_int32_t);
2181 for (bpp = sp->bpp, i = nblocks - 1; i--; ) { 2181 for (bpp = sp->bpp, i = nblocks - 1; i--; ) {
2182 ++bpp; 2182 ++bpp;
2183 /* Loop through gop_write cluster blocks */ 2183 /* Loop through gop_write cluster blocks */
2184 for (byteoffset = 0; byteoffset < (*bpp)->b_bcount; 2184 for (byteoffset = 0; byteoffset < (*bpp)->b_bcount;
2185 byteoffset += lfs_sb_getbsize(fs)) { 2185 byteoffset += lfs_sb_getbsize(fs)) {
2186#ifdef LFS_USE_B_INVAL 2186#ifdef LFS_USE_B_INVAL
2187 if (((*bpp)->b_cflags & BC_INVAL) != 0 && 2187 if (((*bpp)->b_cflags & BC_INVAL) != 0 &&
2188 (*bpp)->b_iodone != NULL) { 2188 (*bpp)->b_iodone != NULL) {
2189 if (copyin((void *)(*bpp)->b_saveaddr + 2189 if (copyin((void *)(*bpp)->b_saveaddr +
2190 byteoffset, dp, el_size)) { 2190 byteoffset, dp, el_size)) {
2191 panic("lfs_writeseg: copyin failed [1]:" 2191 panic("lfs_writeseg: copyin failed [1]:"
2192 " ino %d blk %" PRId64, 2192 " ino %d blk %" PRId64,
2193 VTOI((*bpp)->b_vp)->i_number, 2193 VTOI((*bpp)->b_vp)->i_number,
2194 (*bpp)->b_lblkno); 2194 (*bpp)->b_lblkno);
2195 } 2195 }
2196 } else 2196 } else
2197#endif /* LFS_USE_B_INVAL */ 2197#endif /* LFS_USE_B_INVAL */
2198 { 2198 {
2199 sum = lfs_cksum_part((char *) 2199 sum = lfs_cksum_part((char *)
2200 (*bpp)->b_data + byteoffset, el_size, sum); 2200 (*bpp)->b_data + byteoffset, el_size, sum);
2201 } 2201 }
2202 } 2202 }
2203 } 2203 }
2204 if (fs->lfs_version == 1) 2204 if (fs->lfs_version == 1)
2205 ssp->ss_ocreate = time_second; 2205 ssp->ss_ocreate = time_second;
2206 else { 2206 else {
2207 ssp->ss_create = time_second; 2207 ssp->ss_create = time_second;
2208 lfs_sb_addserial(fs, 1); 2208 lfs_sb_addserial(fs, 1);
2209 ssp->ss_serial = lfs_sb_getserial(fs); 2209 ssp->ss_serial = lfs_sb_getserial(fs);
2210 ssp->ss_ident = lfs_sb_getident(fs); 2210 ssp->ss_ident = lfs_sb_getident(fs);
2211 } 2211 }
2212 ssp->ss_datasum = lfs_cksum_fold(sum); 2212 ssp->ss_datasum = lfs_cksum_fold(sum);
2213 ssp->ss_sumsum = cksum(&ssp->ss_datasum, 2213 ssp->ss_sumsum = cksum(&ssp->ss_datasum,
2214 lfs_sb_getsumsize(fs) - sizeof(ssp->ss_sumsum)); 2214 lfs_sb_getsumsize(fs) - sizeof(ssp->ss_sumsum));
2215 2215
2216 mutex_enter(&lfs_lock); 2216 mutex_enter(&lfs_lock);
2217 lfs_sb_subbfree(fs, (lfs_btofsb(fs, ninos * lfs_sb_getibsize(fs)) + 2217 lfs_sb_subbfree(fs, (lfs_btofsb(fs, ninos * lfs_sb_getibsize(fs)) +
2218 lfs_btofsb(fs, lfs_sb_getsumsize(fs)))); 2218 lfs_btofsb(fs, lfs_sb_getsumsize(fs))));
2219 lfs_sb_adddmeta(fs, (lfs_btofsb(fs, ninos * lfs_sb_getibsize(fs)) + 2219 lfs_sb_adddmeta(fs, (lfs_btofsb(fs, ninos * lfs_sb_getibsize(fs)) +
2220 lfs_btofsb(fs, lfs_sb_getsumsize(fs)))); 2220 lfs_btofsb(fs, lfs_sb_getsumsize(fs))));
2221 mutex_exit(&lfs_lock); 2221 mutex_exit(&lfs_lock);
2222 2222
2223 /* 2223 /*
2224 * When we simply write the blocks we lose a rotation for every block 2224 * When we simply write the blocks we lose a rotation for every block
2225 * written. To avoid this problem, we cluster the buffers into a 2225 * written. To avoid this problem, we cluster the buffers into a
2226 * chunk and write the chunk. MAXPHYS is the largest size I/O 2226 * chunk and write the chunk. MAXPHYS is the largest size I/O
2227 * devices can handle, use that for the size of the chunks. 2227 * devices can handle, use that for the size of the chunks.
2228 * 2228 *
2229 * Blocks that are already clusters (from GOP_WRITE), however, we 2229 * Blocks that are already clusters (from GOP_WRITE), however, we
2230 * don't bother to copy into other clusters. 2230 * don't bother to copy into other clusters.
2231 */ 2231 */
2232 2232
2233#define CHUNKSIZE MAXPHYS 2233#define CHUNKSIZE MAXPHYS
2234 2234
2235 if (devvp == NULL) 2235 if (devvp == NULL)
2236 panic("devvp is NULL"); 2236 panic("devvp is NULL");
2237 for (bpp = sp->bpp, i = nblocks; i;) { 2237 for (bpp = sp->bpp, i = nblocks; i;) {
2238 cbp = lfs_newclusterbuf(fs, devvp, (*bpp)->b_blkno, i); 2238 cbp = lfs_newclusterbuf(fs, devvp, (*bpp)->b_blkno, i);
2239 cl = cbp->b_private; 2239 cl = cbp->b_private;
2240 2240
2241 cbp->b_flags |= B_ASYNC; 2241 cbp->b_flags |= B_ASYNC;
2242 cbp->b_cflags |= BC_BUSY; 2242 cbp->b_cflags |= BC_BUSY;
2243 cbp->b_bcount = 0; 2243 cbp->b_bcount = 0;
2244 2244
2245#if defined(DEBUG) && defined(DIAGNOSTIC) 2245#if defined(DEBUG) && defined(DIAGNOSTIC)
2246 if (bpp - sp->bpp > (lfs_sb_getsumsize(fs) - SEGSUM_SIZE(fs)) 2246 if (bpp - sp->bpp > (lfs_sb_getsumsize(fs) - SEGSUM_SIZE(fs))
2247 / sizeof(int32_t)) { 2247 / sizeof(int32_t)) {
2248 panic("lfs_writeseg: real bpp overwrite"); 2248 panic("lfs_writeseg: real bpp overwrite");
2249 } 2249 }
2250 if (bpp - sp->bpp > lfs_segsize(fs) / fs->lfs_fsize) { 2250 if (bpp - sp->bpp > lfs_segsize(fs) / lfs_sb_getfsize(fs)) {
2251 panic("lfs_writeseg: theoretical bpp overwrite"); 2251 panic("lfs_writeseg: theoretical bpp overwrite");
2252 } 2252 }
2253#endif 2253#endif
2254 2254
2255 /* 2255 /*
2256 * Construct the cluster. 2256 * Construct the cluster.
2257 */ 2257 */
2258 mutex_enter(&lfs_lock); 2258 mutex_enter(&lfs_lock);
2259 ++fs->lfs_iocount; 2259 ++fs->lfs_iocount;
2260 mutex_exit(&lfs_lock); 2260 mutex_exit(&lfs_lock);
2261 while (i && cbp->b_bcount < CHUNKSIZE) { 2261 while (i && cbp->b_bcount < CHUNKSIZE) {
2262 bp = *bpp; 2262 bp = *bpp;
2263 2263
2264 if (bp->b_bcount > (CHUNKSIZE - cbp->b_bcount)) 2264 if (bp->b_bcount > (CHUNKSIZE - cbp->b_bcount))
2265 break; 2265 break;
2266 if (cbp->b_bcount > 0 && !(cl->flags & LFS_CL_MALLOC)) 2266 if (cbp->b_bcount > 0 && !(cl->flags & LFS_CL_MALLOC))
2267 break; 2267 break;
2268 2268
2269 /* Clusters from GOP_WRITE are expedited */ 2269 /* Clusters from GOP_WRITE are expedited */
2270 if (bp->b_bcount > lfs_sb_getbsize(fs)) { 2270 if (bp->b_bcount > lfs_sb_getbsize(fs)) {
2271 if (cbp->b_bcount > 0) 2271 if (cbp->b_bcount > 0)
2272 /* Put in its own buffer */ 2272 /* Put in its own buffer */
2273 break; 2273 break;
2274 else { 2274 else {
2275 cbp->b_data = bp->b_data; 2275 cbp->b_data = bp->b_data;
2276 } 2276 }
2277 } else if (cbp->b_bcount == 0) { 2277 } else if (cbp->b_bcount == 0) {
2278 p = cbp->b_data = lfs_malloc(fs, CHUNKSIZE, 2278 p = cbp->b_data = lfs_malloc(fs, CHUNKSIZE,
2279 LFS_NB_CLUSTER); 2279 LFS_NB_CLUSTER);
2280 cl->flags |= LFS_CL_MALLOC; 2280 cl->flags |= LFS_CL_MALLOC;
2281 } 2281 }
2282#ifdef DIAGNOSTIC 2282#ifdef DIAGNOSTIC
2283 if (lfs_dtosn(fs, LFS_DBTOFSB(fs, bp->b_blkno + 2283 if (lfs_dtosn(fs, LFS_DBTOFSB(fs, bp->b_blkno +
2284 btodb(bp->b_bcount - 1))) != 2284 btodb(bp->b_bcount - 1))) !=
2285 sp->seg_number) { 2285 sp->seg_number) {
2286 printf("blk size %d daddr %" PRIx64 2286 printf("blk size %d daddr %" PRIx64
2287 " not in seg %d\n", 2287 " not in seg %d\n",
2288 bp->b_bcount, bp->b_blkno, 2288 bp->b_bcount, bp->b_blkno,
2289 sp->seg_number); 2289 sp->seg_number);
2290 panic("segment overwrite"); 2290 panic("segment overwrite");
2291 } 2291 }
2292#endif 2292#endif
2293 2293
2294#ifdef LFS_USE_B_INVAL 2294#ifdef LFS_USE_B_INVAL
2295 /* 2295 /*
2296 * Fake buffers from the cleaner are marked as B_INVAL. 2296 * Fake buffers from the cleaner are marked as B_INVAL.
2297 * We need to copy the data from user space rather than 2297 * We need to copy the data from user space rather than
2298 * from the buffer indicated. 2298 * from the buffer indicated.
2299 * XXX == what do I do on an error? 2299 * XXX == what do I do on an error?
2300 */ 2300 */
2301 if ((bp->b_cflags & BC_INVAL) != 0 && 2301 if ((bp->b_cflags & BC_INVAL) != 0 &&
2302 bp->b_iodone != NULL) { 2302 bp->b_iodone != NULL) {
2303 if (copyin(bp->b_saveaddr, p, bp->b_bcount)) 2303 if (copyin(bp->b_saveaddr, p, bp->b_bcount))
2304 panic("lfs_writeseg: " 2304 panic("lfs_writeseg: "
2305 "copyin failed [2]"); 2305 "copyin failed [2]");
2306 } else 2306 } else
2307#endif /* LFS_USE_B_INVAL */ 2307#endif /* LFS_USE_B_INVAL */
2308 if (cl->flags & LFS_CL_MALLOC) { 2308 if (cl->flags & LFS_CL_MALLOC) {
2309 /* copy data into our cluster. */ 2309 /* copy data into our cluster. */
2310 memcpy(p, bp->b_data, bp->b_bcount); 2310 memcpy(p, bp->b_data, bp->b_bcount);
2311 p += bp->b_bcount; 2311 p += bp->b_bcount;
2312 } 2312 }
2313 2313
2314 cbp->b_bcount += bp->b_bcount; 2314 cbp->b_bcount += bp->b_bcount;
2315 cl->bufsize += bp->b_bcount; 2315 cl->bufsize += bp->b_bcount;
2316 2316
2317 bp->b_flags &= ~B_READ; 2317 bp->b_flags &= ~B_READ;
2318 bp->b_error = 0; 2318 bp->b_error = 0;
2319 cl->bpp[cl->bufcount++] = bp; 2319 cl->bpp[cl->bufcount++] = bp;
2320 2320
2321 vp = bp->b_vp; 2321 vp = bp->b_vp;
2322 mutex_enter(&bufcache_lock); 2322 mutex_enter(&bufcache_lock);
2323 mutex_enter(vp->v_interlock); 2323 mutex_enter(vp->v_interlock);
2324 bp->b_oflags &= ~(BO_DELWRI | BO_DONE); 2324 bp->b_oflags &= ~(BO_DELWRI | BO_DONE);
2325 reassignbuf(bp, vp); 2325 reassignbuf(bp, vp);
2326 vp->v_numoutput++; 2326 vp->v_numoutput++;
2327 mutex_exit(vp->v_interlock); 2327 mutex_exit(vp->v_interlock);
2328 mutex_exit(&bufcache_lock); 2328 mutex_exit(&bufcache_lock);
2329 2329
2330 bpp++; 2330 bpp++;
2331 i--; 2331 i--;
2332 } 2332 }
2333 if (fs->lfs_sp->seg_flags & SEGM_SYNC) 2333 if (fs->lfs_sp->seg_flags & SEGM_SYNC)
2334 BIO_SETPRIO(cbp, BPRIO_TIMECRITICAL); 2334 BIO_SETPRIO(cbp, BPRIO_TIMECRITICAL);
2335 else 2335 else
2336 BIO_SETPRIO(cbp, BPRIO_TIMELIMITED); 2336 BIO_SETPRIO(cbp, BPRIO_TIMELIMITED);
2337 mutex_enter(devvp->v_interlock); 2337 mutex_enter(devvp->v_interlock);
2338 devvp->v_numoutput++; 2338 devvp->v_numoutput++;
2339 mutex_exit(devvp->v_interlock); 2339 mutex_exit(devvp->v_interlock);
2340 VOP_STRATEGY(devvp, cbp); 2340 VOP_STRATEGY(devvp, cbp);
2341 curlwp->l_ru.ru_oublock++; 2341 curlwp->l_ru.ru_oublock++;
2342 } 2342 }
2343 2343
2344 if (lfs_dostats) { 2344 if (lfs_dostats) {
2345 ++lfs_stats.psegwrites; 2345 ++lfs_stats.psegwrites;
2346 lfs_stats.blocktot += nblocks - 1; 2346 lfs_stats.blocktot += nblocks - 1;
2347 if (fs->lfs_sp->seg_flags & SEGM_SYNC) 2347 if (fs->lfs_sp->seg_flags & SEGM_SYNC)
2348 ++lfs_stats.psyncwrites; 2348 ++lfs_stats.psyncwrites;
2349 if (fs->lfs_sp->seg_flags & SEGM_CLEAN) { 2349 if (fs->lfs_sp->seg_flags & SEGM_CLEAN) {
2350 ++lfs_stats.pcleanwrites; 2350 ++lfs_stats.pcleanwrites;
2351 lfs_stats.cleanblocks += nblocks - 1; 2351 lfs_stats.cleanblocks += nblocks - 1;
2352 } 2352 }
2353 } 2353 }
2354 2354
2355 return (lfs_initseg(fs) || do_again); 2355 return (lfs_initseg(fs) || do_again);
2356} 2356}
2357 2357
2358void 2358void
2359lfs_writesuper(struct lfs *fs, daddr_t daddr) 2359lfs_writesuper(struct lfs *fs, daddr_t daddr)
2360{ 2360{
2361 struct buf *bp; 2361 struct buf *bp;
2362 struct vnode *devvp = VTOI(fs->lfs_ivnode)->i_devvp; 2362 struct vnode *devvp = VTOI(fs->lfs_ivnode)->i_devvp;
2363 int s; 2363 int s;
2364 2364
2365 ASSERT_MAYBE_SEGLOCK(fs); 2365 ASSERT_MAYBE_SEGLOCK(fs);
2366#ifdef DIAGNOSTIC 2366#ifdef DIAGNOSTIC
2367 KASSERT(fs->lfs_magic == LFS_MAGIC); 2367 KASSERT(fs->lfs_magic == LFS_MAGIC);
2368#endif 2368#endif
2369 /* 2369 /*
2370 * If we can write one superblock while another is in 2370 * If we can write one superblock while another is in
2371 * progress, we risk not having a complete checkpoint if we crash. 2371 * progress, we risk not having a complete checkpoint if we crash.
2372 * So, block here if a superblock write is in progress. 2372 * So, block here if a superblock write is in progress.
2373 */ 2373 */
2374 mutex_enter(&lfs_lock); 2374 mutex_enter(&lfs_lock);
2375 s = splbio(); 2375 s = splbio();
2376 while (fs->lfs_sbactive) { 2376 while (fs->lfs_sbactive) {
2377 mtsleep(&fs->lfs_sbactive, PRIBIO+1, "lfs sb", 0, 2377 mtsleep(&fs->lfs_sbactive, PRIBIO+1, "lfs sb", 0,
2378 &lfs_lock); 2378 &lfs_lock);
2379 } 2379 }
2380 fs->lfs_sbactive = daddr; 2380 fs->lfs_sbactive = daddr;
2381 splx(s); 2381 splx(s);
2382 mutex_exit(&lfs_lock); 2382 mutex_exit(&lfs_lock);
2383 2383
2384 /* Set timestamp of this version of the superblock */ 2384 /* Set timestamp of this version of the superblock */
2385 if (fs->lfs_version == 1) 2385 if (fs->lfs_version == 1)
2386 lfs_sb_setotstamp(fs, time_second); 2386 lfs_sb_setotstamp(fs, time_second);
2387 lfs_sb_settstamp(fs, time_second); 2387 lfs_sb_settstamp(fs, time_second);
2388 2388
2389 /* Checksum the superblock and copy it into a buffer. */ 2389 /* Checksum the superblock and copy it into a buffer. */
2390 lfs_sb_setcksum(fs, lfs_sb_cksum(&(fs->lfs_dlfs))); 2390 lfs_sb_setcksum(fs, lfs_sb_cksum(&(fs->lfs_dlfs)));
2391 bp = lfs_newbuf(fs, devvp, 2391 bp = lfs_newbuf(fs, devvp,
2392 LFS_FSBTODB(fs, daddr), LFS_SBPAD, LFS_NB_SBLOCK); 2392 LFS_FSBTODB(fs, daddr), LFS_SBPAD, LFS_NB_SBLOCK);
2393 memset((char *)bp->b_data + sizeof(struct dlfs), 0, 2393 memset((char *)bp->b_data + sizeof(struct dlfs), 0,
2394 LFS_SBPAD - sizeof(struct dlfs)); 2394 LFS_SBPAD - sizeof(struct dlfs));
2395 *(struct dlfs *)bp->b_data = fs->lfs_dlfs; 2395 *(struct dlfs *)bp->b_data = fs->lfs_dlfs;
2396 2396
2397 bp->b_cflags |= BC_BUSY; 2397 bp->b_cflags |= BC_BUSY;
2398 bp->b_flags = (bp->b_flags & ~B_READ) | B_ASYNC; 2398 bp->b_flags = (bp->b_flags & ~B_READ) | B_ASYNC;
2399 bp->b_oflags &= ~(BO_DONE | BO_DELWRI); 2399 bp->b_oflags &= ~(BO_DONE | BO_DELWRI);
2400 bp->b_error = 0; 2400 bp->b_error = 0;
2401 bp->b_iodone = lfs_supercallback; 2401 bp->b_iodone = lfs_supercallback;
2402 2402
2403 if (fs->lfs_sp != NULL && fs->lfs_sp->seg_flags & SEGM_SYNC) 2403 if (fs->lfs_sp != NULL && fs->lfs_sp->seg_flags & SEGM_SYNC)
2404 BIO_SETPRIO(bp, BPRIO_TIMECRITICAL); 2404 BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
2405 else 2405 else
2406 BIO_SETPRIO(bp, BPRIO_TIMELIMITED); 2406 BIO_SETPRIO(bp, BPRIO_TIMELIMITED);
2407 curlwp->l_ru.ru_oublock++; 2407 curlwp->l_ru.ru_oublock++;
2408 2408
2409 mutex_enter(devvp->v_interlock); 2409 mutex_enter(devvp->v_interlock);
2410 devvp->v_numoutput++; 2410 devvp->v_numoutput++;
2411 mutex_exit(devvp->v_interlock); 2411 mutex_exit(devvp->v_interlock);
2412 2412
2413 mutex_enter(&lfs_lock); 2413 mutex_enter(&lfs_lock);
2414 ++fs->lfs_iocount; 2414 ++fs->lfs_iocount;
2415 mutex_exit(&lfs_lock); 2415 mutex_exit(&lfs_lock);
2416 VOP_STRATEGY(devvp, bp); 2416 VOP_STRATEGY(devvp, bp);
2417} 2417}
2418 2418
2419/* 2419/*
2420 * Logical block number match routines used when traversing the dirty block 2420 * Logical block number match routines used when traversing the dirty block
2421 * chain. 2421 * chain.
2422 */ 2422 */
2423int 2423int
2424lfs_match_fake(struct lfs *fs, struct buf *bp) 2424lfs_match_fake(struct lfs *fs, struct buf *bp)
2425{ 2425{
2426 2426
2427 ASSERT_SEGLOCK(fs); 2427 ASSERT_SEGLOCK(fs);
2428 return LFS_IS_MALLOC_BUF(bp); 2428 return LFS_IS_MALLOC_BUF(bp);
2429} 2429}
2430 2430
2431#if 0 2431#if 0
2432int 2432int
2433lfs_match_real(struct lfs *fs, struct buf *bp) 2433lfs_match_real(struct lfs *fs, struct buf *bp)
2434{ 2434{
2435 2435
2436 ASSERT_SEGLOCK(fs); 2436 ASSERT_SEGLOCK(fs);
2437 return (lfs_match_data(fs, bp) && !lfs_match_fake(fs, bp)); 2437 return (lfs_match_data(fs, bp) && !lfs_match_fake(fs, bp));
2438} 2438}
2439#endif 2439#endif
2440 2440
2441int 2441int
2442lfs_match_data(struct lfs *fs, struct buf *bp) 2442lfs_match_data(struct lfs *fs, struct buf *bp)
2443{ 2443{
2444 2444
2445 ASSERT_SEGLOCK(fs); 2445 ASSERT_SEGLOCK(fs);
2446 return (bp->b_lblkno >= 0); 2446 return (bp->b_lblkno >= 0);
2447} 2447}
2448 2448
2449int 2449int
2450lfs_match_indir(struct lfs *fs, struct buf *bp) 2450lfs_match_indir(struct lfs *fs, struct buf *bp)
2451{ 2451{
2452 daddr_t lbn; 2452 daddr_t lbn;
2453 2453
2454 ASSERT_SEGLOCK(fs); 2454 ASSERT_SEGLOCK(fs);
2455 lbn = bp->b_lblkno; 2455 lbn = bp->b_lblkno;
2456 return (lbn < 0 && (-lbn - ULFS_NDADDR) % LFS_NINDIR(fs) == 0); 2456 return (lbn < 0 && (-lbn - ULFS_NDADDR) % LFS_NINDIR(fs) == 0);
2457} 2457}
2458 2458
2459int 2459int
2460lfs_match_dindir(struct lfs *fs, struct buf *bp) 2460lfs_match_dindir(struct lfs *fs, struct buf *bp)
2461{ 2461{
2462 daddr_t lbn; 2462 daddr_t lbn;
2463 2463
2464 ASSERT_SEGLOCK(fs); 2464 ASSERT_SEGLOCK(fs);
2465 lbn = bp->b_lblkno; 2465 lbn = bp->b_lblkno;
2466 return (lbn < 0 && (-lbn - ULFS_NDADDR) % LFS_NINDIR(fs) == 1); 2466 return (lbn < 0 && (-lbn - ULFS_NDADDR) % LFS_NINDIR(fs) == 1);
2467} 2467}
2468 2468
2469int 2469int
2470lfs_match_tindir(struct lfs *fs, struct buf *bp) 2470lfs_match_tindir(struct lfs *fs, struct buf *bp)
2471{ 2471{
2472 daddr_t lbn; 2472 daddr_t lbn;
2473 2473
2474 ASSERT_SEGLOCK(fs); 2474 ASSERT_SEGLOCK(fs);
2475 lbn = bp->b_lblkno; 2475 lbn = bp->b_lblkno;
2476 return (lbn < 0 && (-lbn - ULFS_NDADDR) % LFS_NINDIR(fs) == 2); 2476 return (lbn < 0 && (-lbn - ULFS_NDADDR) % LFS_NINDIR(fs) == 2);
2477} 2477}
2478 2478
2479static void 2479static void
2480lfs_free_aiodone(struct buf *bp) 2480lfs_free_aiodone(struct buf *bp)
2481{ 2481{
2482 struct lfs *fs; 2482 struct lfs *fs;
2483 2483
2484 KERNEL_LOCK(1, curlwp); 2484 KERNEL_LOCK(1, curlwp);
2485 fs = bp->b_private; 2485 fs = bp->b_private;
2486 ASSERT_NO_SEGLOCK(fs); 2486 ASSERT_NO_SEGLOCK(fs);
2487 lfs_freebuf(fs, bp); 2487 lfs_freebuf(fs, bp);
2488 KERNEL_UNLOCK_LAST(curlwp); 2488 KERNEL_UNLOCK_LAST(curlwp);
2489} 2489}
2490 2490
2491static void 2491static void
2492lfs_super_aiodone(struct buf *bp) 2492lfs_super_aiodone(struct buf *bp)
2493{ 2493{
2494 struct lfs *fs; 2494 struct lfs *fs;
2495 2495
2496 KERNEL_LOCK(1, curlwp); 2496 KERNEL_LOCK(1, curlwp);
2497 fs = bp->b_private; 2497 fs = bp->b_private;
2498 ASSERT_NO_SEGLOCK(fs); 2498 ASSERT_NO_SEGLOCK(fs);
2499 mutex_enter(&lfs_lock); 2499 mutex_enter(&lfs_lock);
2500 fs->lfs_sbactive = 0; 2500 fs->lfs_sbactive = 0;
2501 if (--fs->lfs_iocount <= 1) 2501 if (--fs->lfs_iocount <= 1)
2502 wakeup(&fs->lfs_iocount); 2502 wakeup(&fs->lfs_iocount);
2503 wakeup(&fs->lfs_sbactive); 2503 wakeup(&fs->lfs_sbactive);
2504 mutex_exit(&lfs_lock); 2504 mutex_exit(&lfs_lock);
2505 lfs_freebuf(fs, bp); 2505 lfs_freebuf(fs, bp);
2506 KERNEL_UNLOCK_LAST(curlwp); 2506 KERNEL_UNLOCK_LAST(curlwp);
2507} 2507}
2508 2508
2509static void 2509static void
2510lfs_cluster_aiodone(struct buf *bp) 2510lfs_cluster_aiodone(struct buf *bp)
2511{ 2511{
2512 struct lfs_cluster *cl; 2512 struct lfs_cluster *cl;
2513 struct lfs *fs; 2513 struct lfs *fs;
2514 struct buf *tbp, *fbp; 2514 struct buf *tbp, *fbp;
2515 struct vnode *vp, *devvp, *ovp; 2515 struct vnode *vp, *devvp, *ovp;
2516 struct inode *ip; 2516 struct inode *ip;
2517 int error; 2517 int error;
2518 2518
2519 KERNEL_LOCK(1, curlwp); 2519 KERNEL_LOCK(1, curlwp);
2520 2520
2521 error = bp->b_error; 2521 error = bp->b_error;
2522 cl = bp->b_private; 2522 cl = bp->b_private;
2523 fs = cl->fs; 2523 fs = cl->fs;
2524 devvp = VTOI(fs->lfs_ivnode)->i_devvp; 2524 devvp = VTOI(fs->lfs_ivnode)->i_devvp;
2525 ASSERT_NO_SEGLOCK(fs); 2525 ASSERT_NO_SEGLOCK(fs);
2526 2526
2527 /* Put the pages back, and release the buffer */ 2527 /* Put the pages back, and release the buffer */
2528 while (cl->bufcount--) { 2528 while (cl->bufcount--) {
2529 tbp = cl->bpp[cl->bufcount]; 2529 tbp = cl->bpp[cl->bufcount];
2530 KASSERT(tbp->b_cflags & BC_BUSY); 2530 KASSERT(tbp->b_cflags & BC_BUSY);
2531 if (error) { 2531 if (error) {
2532 tbp->b_error = error; 2532 tbp->b_error = error;
2533 } 2533 }
2534 2534
2535 /* 2535 /*
2536 * We're done with tbp. If it has not been re-dirtied since 2536 * We're done with tbp. If it has not been re-dirtied since
2537 * the cluster was written, free it. Otherwise, keep it on 2537 * the cluster was written, free it. Otherwise, keep it on
2538 * the locked list to be written again. 2538 * the locked list to be written again.
2539 */ 2539 */
2540 vp = tbp->b_vp; 2540 vp = tbp->b_vp;
2541 2541
2542 tbp->b_flags &= ~B_GATHERED; 2542 tbp->b_flags &= ~B_GATHERED;
2543 2543
2544 LFS_BCLEAN_LOG(fs, tbp); 2544 LFS_BCLEAN_LOG(fs, tbp);
2545 2545
2546 mutex_enter(&bufcache_lock); 2546 mutex_enter(&bufcache_lock);
2547 if (tbp->b_iodone == NULL) { 2547 if (tbp->b_iodone == NULL) {
2548 KASSERT(tbp->b_flags & B_LOCKED); 2548 KASSERT(tbp->b_flags & B_LOCKED);
2549 bremfree(tbp); 2549 bremfree(tbp);
2550 if (vp) { 2550 if (vp) {
2551 mutex_enter(vp->v_interlock); 2551 mutex_enter(vp->v_interlock);
2552 reassignbuf(tbp, vp); 2552 reassignbuf(tbp, vp);
2553 mutex_exit(vp->v_interlock); 2553 mutex_exit(vp->v_interlock);
2554 } 2554 }
2555 tbp->b_flags |= B_ASYNC; /* for biodone */ 2555 tbp->b_flags |= B_ASYNC; /* for biodone */
2556 } 2556 }
2557 2557
2558 if (((tbp->b_flags | tbp->b_oflags) & 2558 if (((tbp->b_flags | tbp->b_oflags) &
2559 (B_LOCKED | BO_DELWRI)) == B_LOCKED) 2559 (B_LOCKED | BO_DELWRI)) == B_LOCKED)
2560 LFS_UNLOCK_BUF(tbp); 2560 LFS_UNLOCK_BUF(tbp);
2561 2561
2562 if (tbp->b_oflags & BO_DONE) { 2562 if (tbp->b_oflags & BO_DONE) {
2563 DLOG((DLOG_SEG, "blk %d biodone already (flags %lx)\n", 2563 DLOG((DLOG_SEG, "blk %d biodone already (flags %lx)\n",
2564 cl->bufcount, (long)tbp->b_flags)); 2564 cl->bufcount, (long)tbp->b_flags));
2565 } 2565 }
2566 2566
2567 if (tbp->b_iodone != NULL && !LFS_IS_MALLOC_BUF(tbp)) { 2567 if (tbp->b_iodone != NULL && !LFS_IS_MALLOC_BUF(tbp)) {
2568 /* 2568 /*
2569 * A buffer from the page daemon. 2569 * A buffer from the page daemon.
2570 * We use the same iodone as it does, 2570 * We use the same iodone as it does,
2571 * so we must manually disassociate its 2571 * so we must manually disassociate its
2572 * buffers from the vp. 2572 * buffers from the vp.
2573 */ 2573 */
2574 if ((ovp = tbp->b_vp) != NULL) { 2574 if ((ovp = tbp->b_vp) != NULL) {
2575 /* This is just silly */ 2575 /* This is just silly */
2576 mutex_enter(ovp->v_interlock); 2576 mutex_enter(ovp->v_interlock);
2577 brelvp(tbp); 2577 brelvp(tbp);
2578 mutex_exit(ovp->v_interlock); 2578 mutex_exit(ovp->v_interlock);
2579 tbp->b_vp = vp; 2579 tbp->b_vp = vp;
2580 tbp->b_objlock = vp->v_interlock; 2580 tbp->b_objlock = vp->v_interlock;
2581 } 2581 }
2582 /* Put it back the way it was */ 2582 /* Put it back the way it was */
2583 tbp->b_flags |= B_ASYNC; 2583 tbp->b_flags |= B_ASYNC;
2584 /* Master buffers have BC_AGE */ 2584 /* Master buffers have BC_AGE */
2585 if (tbp->b_private == tbp) 2585 if (tbp->b_private == tbp)
2586 tbp->b_cflags |= BC_AGE; 2586 tbp->b_cflags |= BC_AGE;
2587 } 2587 }
2588 mutex_exit(&bufcache_lock); 2588 mutex_exit(&bufcache_lock);
2589 2589
2590 biodone(tbp); 2590 biodone(tbp);
2591 2591
2592 /* 2592 /*
2593 * If this is the last block for this vnode, but 2593 * If this is the last block for this vnode, but
2594 * there are other blocks on its dirty list, 2594 * there are other blocks on its dirty list,
2595 * set IN_MODIFIED/IN_CLEANING depending on what 2595 * set IN_MODIFIED/IN_CLEANING depending on what
2596 * sort of block. Only do this for our mount point, 2596 * sort of block. Only do this for our mount point,
2597 * not for, e.g., inode blocks that are attached to 2597 * not for, e.g., inode blocks that are attached to
2598 * the devvp. 2598 * the devvp.
2599 * XXX KS - Shouldn't we set *both* if both types 2599 * XXX KS - Shouldn't we set *both* if both types
2600 * of blocks are present (traverse the dirty list?) 2600 * of blocks are present (traverse the dirty list?)
2601 */ 2601 */
2602 mutex_enter(vp->v_interlock); 2602 mutex_enter(vp->v_interlock);
2603 mutex_enter(&lfs_lock); 2603 mutex_enter(&lfs_lock);
2604 if (vp != devvp && vp->v_numoutput == 0 && 2604 if (vp != devvp && vp->v_numoutput == 0 &&
2605 (fbp = LIST_FIRST(&vp->v_dirtyblkhd)) != NULL) { 2605 (fbp = LIST_FIRST(&vp->v_dirtyblkhd)) != NULL) {
2606 ip = VTOI(vp); 2606 ip = VTOI(vp);
2607 DLOG((DLOG_SEG, "lfs_cluster_aiodone: mark ino %d\n", 2607 DLOG((DLOG_SEG, "lfs_cluster_aiodone: mark ino %d\n",
2608 ip->i_number)); 2608 ip->i_number));
2609 if (LFS_IS_MALLOC_BUF(fbp)) 2609 if (LFS_IS_MALLOC_BUF(fbp))
2610 LFS_SET_UINO(ip, IN_CLEANING); 2610 LFS_SET_UINO(ip, IN_CLEANING);
2611 else 2611 else
2612 LFS_SET_UINO(ip, IN_MODIFIED); 2612 LFS_SET_UINO(ip, IN_MODIFIED);
2613 } 2613 }
2614 cv_broadcast(&vp->v_cv); 2614 cv_broadcast(&vp->v_cv);
2615 mutex_exit(&lfs_lock); 2615 mutex_exit(&lfs_lock);
2616 mutex_exit(vp->v_interlock); 2616 mutex_exit(vp->v_interlock);
2617 } 2617 }
2618 2618
2619 /* Fix up the cluster buffer, and release it */ 2619 /* Fix up the cluster buffer, and release it */
2620 if (cl->flags & LFS_CL_MALLOC) 2620 if (cl->flags & LFS_CL_MALLOC)
2621 lfs_free(fs, bp->b_data, LFS_NB_CLUSTER); 2621 lfs_free(fs, bp->b_data, LFS_NB_CLUSTER);
2622 putiobuf(bp); 2622 putiobuf(bp);
2623 2623
2624 /* Note i/o done */ 2624 /* Note i/o done */
2625 if (cl->flags & LFS_CL_SYNC) { 2625 if (cl->flags & LFS_CL_SYNC) {
2626 if (--cl->seg->seg_iocount == 0) 2626 if (--cl->seg->seg_iocount == 0)
2627 wakeup(&cl->seg->seg_iocount); 2627 wakeup(&cl->seg->seg_iocount);
2628 } 2628 }
2629 mutex_enter(&lfs_lock); 2629 mutex_enter(&lfs_lock);
2630#ifdef DIAGNOSTIC 2630#ifdef DIAGNOSTIC
2631 if (fs->lfs_iocount == 0) 2631 if (fs->lfs_iocount == 0)
2632 panic("lfs_cluster_aiodone: zero iocount"); 2632 panic("lfs_cluster_aiodone: zero iocount");
2633#endif 2633#endif
2634 if (--fs->lfs_iocount <= 1) 2634 if (--fs->lfs_iocount <= 1)
2635 wakeup(&fs->lfs_iocount); 2635 wakeup(&fs->lfs_iocount);
2636 mutex_exit(&lfs_lock); 2636 mutex_exit(&lfs_lock);
2637 2637
2638 KERNEL_UNLOCK_LAST(curlwp); 2638 KERNEL_UNLOCK_LAST(curlwp);
2639 2639
2640 pool_put(&fs->lfs_bpppool, cl->bpp); 2640 pool_put(&fs->lfs_bpppool, cl->bpp);
2641 cl->bpp = NULL; 2641 cl->bpp = NULL;
2642 pool_put(&fs->lfs_clpool, cl); 2642 pool_put(&fs->lfs_clpool, cl);
2643} 2643}
2644 2644
2645static void 2645static void
2646lfs_generic_callback(struct buf *bp, void (*aiodone)(struct buf *)) 2646lfs_generic_callback(struct buf *bp, void (*aiodone)(struct buf *))
2647{ 2647{
2648 /* reset b_iodone for when this is a single-buf i/o. */ 2648 /* reset b_iodone for when this is a single-buf i/o. */
2649 bp->b_iodone = aiodone; 2649 bp->b_iodone = aiodone;
2650 2650
2651 workqueue_enqueue(uvm.aiodone_queue, &bp->b_work, NULL); 2651 workqueue_enqueue(uvm.aiodone_queue, &bp->b_work, NULL);
2652} 2652}
2653 2653
2654static void 2654static void
2655lfs_cluster_callback(struct buf *bp) 2655lfs_cluster_callback(struct buf *bp)
2656{ 2656{
2657 2657
2658 lfs_generic_callback(bp, lfs_cluster_aiodone); 2658 lfs_generic_callback(bp, lfs_cluster_aiodone);
2659} 2659}
2660 2660
2661void 2661void
2662lfs_supercallback(struct buf *bp) 2662lfs_supercallback(struct buf *bp)
2663{ 2663{
2664 2664
2665 lfs_generic_callback(bp, lfs_super_aiodone); 2665 lfs_generic_callback(bp, lfs_super_aiodone);
2666} 2666}
2667 2667
2668/* 2668/*
2669 * The only buffers that are going to hit these functions are the 2669 * The only buffers that are going to hit these functions are the
2670 * segment write blocks, or the segment summaries, or the superblocks. 2670 * segment write blocks, or the segment summaries, or the superblocks.
2671 * 2671 *
2672 * All of the above are created by lfs_newbuf, and so do not need to be 2672 * All of the above are created by lfs_newbuf, and so do not need to be
2673 * released via brelse. 2673 * released via brelse.
2674 */ 2674 */
2675void 2675void
2676lfs_callback(struct buf *bp) 2676lfs_callback(struct buf *bp)
2677{ 2677{
2678 2678
2679 lfs_generic_callback(bp, lfs_free_aiodone); 2679 lfs_generic_callback(bp, lfs_free_aiodone);
2680} 2680}
2681 2681
2682/* 2682/*
2683 * Shellsort (diminishing increment sort) from Data Structures and 2683 * Shellsort (diminishing increment sort) from Data Structures and
2684 * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290; 2684 * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290;
2685 * see also Knuth Vol. 3, page 84. The increments are selected from 2685 * see also Knuth Vol. 3, page 84. The increments are selected from
2686 * formula (8), page 95. Roughly O(N^3/2). 2686 * formula (8), page 95. Roughly O(N^3/2).
2687 */ 2687 */
2688/* 2688/*
2689 * This is our own private copy of shellsort because we want to sort 2689 * This is our own private copy of shellsort because we want to sort
2690 * two parallel arrays (the array of buffer pointers and the array of 2690 * two parallel arrays (the array of buffer pointers and the array of
2691 * logical block numbers) simultaneously. Note that we cast the array 2691 * logical block numbers) simultaneously. Note that we cast the array
2692 * of logical block numbers to a unsigned in this routine so that the 2692 * of logical block numbers to a unsigned in this routine so that the
2693 * negative block numbers (meta data blocks) sort AFTER the data blocks. 2693 * negative block numbers (meta data blocks) sort AFTER the data blocks.
2694 */ 2694 */
2695 2695
2696void 2696void
2697lfs_shellsort(struct buf **bp_array, int32_t *lb_array, int nmemb, int size) 2697lfs_shellsort(struct buf **bp_array, int32_t *lb_array, int nmemb, int size)
2698{ 2698{
2699 static int __rsshell_increments[] = { 4, 1, 0 }; 2699 static int __rsshell_increments[] = { 4, 1, 0 };
2700 int incr, *incrp, t1, t2; 2700 int incr, *incrp, t1, t2;
2701 struct buf *bp_temp; 2701 struct buf *bp_temp;
2702 2702
2703#ifdef DEBUG 2703#ifdef DEBUG
2704 incr = 0; 2704 incr = 0;
2705 for (t1 = 0; t1 < nmemb; t1++) { 2705 for (t1 = 0; t1 < nmemb; t1++) {
2706 for (t2 = 0; t2 * size < bp_array[t1]->b_bcount; t2++) { 2706 for (t2 = 0; t2 * size < bp_array[t1]->b_bcount; t2++) {
2707 if (lb_array[incr++] != bp_array[t1]->b_lblkno + t2) { 2707 if (lb_array[incr++] != bp_array[t1]->b_lblkno + t2) {
2708 /* dump before panic */ 2708 /* dump before panic */
2709 printf("lfs_shellsort: nmemb=%d, size=%d\n", 2709 printf("lfs_shellsort: nmemb=%d, size=%d\n",
2710 nmemb, size); 2710 nmemb, size);
2711 incr = 0; 2711 incr = 0;
2712 for (t1 = 0; t1 < nmemb; t1++) { 2712 for (t1 = 0; t1 < nmemb; t1++) {
2713 const struct buf *bp = bp_array[t1]; 2713 const struct buf *bp = bp_array[t1];
2714 2714
2715 printf("bp[%d]: lbn=%" PRIu64 ", size=%" 2715 printf("bp[%d]: lbn=%" PRIu64 ", size=%"
2716 PRIu64 "\n", t1, 2716 PRIu64 "\n", t1,
2717 (uint64_t)bp->b_bcount, 2717 (uint64_t)bp->b_bcount,
2718 (uint64_t)bp->b_lblkno); 2718 (uint64_t)bp->b_lblkno);
2719 printf("lbns:"); 2719 printf("lbns:");
2720 for (t2 = 0; t2 * size < bp->b_bcount; 2720 for (t2 = 0; t2 * size < bp->b_bcount;
2721 t2++) { 2721 t2++) {
2722 printf(" %" PRId32, 2722 printf(" %" PRId32,
2723 lb_array[incr++]); 2723 lb_array[incr++]);
2724 } 2724 }
2725 printf("\n"); 2725 printf("\n");
2726 } 2726 }
2727 panic("lfs_shellsort: inconsistent input"); 2727 panic("lfs_shellsort: inconsistent input");
2728 } 2728 }
2729 } 2729 }
2730 } 2730 }
2731#endif 2731#endif
2732 2732
2733 for (incrp = __rsshell_increments; (incr = *incrp++) != 0;) 2733 for (incrp = __rsshell_increments; (incr = *incrp++) != 0;)
2734 for (t1 = incr; t1 < nmemb; ++t1) 2734 for (t1 = incr; t1 < nmemb; ++t1)
2735 for (t2 = t1 - incr; t2 >= 0;) 2735 for (t2 = t1 - incr; t2 >= 0;)
2736 if ((u_int32_t)bp_array[t2]->b_lblkno > 2736 if ((u_int32_t)bp_array[t2]->b_lblkno >
2737 (u_int32_t)bp_array[t2 + incr]->b_lblkno) { 2737 (u_int32_t)bp_array[t2 + incr]->b_lblkno) {
2738 bp_temp = bp_array[t2]; 2738 bp_temp = bp_array[t2];
2739 bp_array[t2] = bp_array[t2 + incr]; 2739 bp_array[t2] = bp_array[t2 + incr];
2740 bp_array[t2 + incr] = bp_temp; 2740 bp_array[t2 + incr] = bp_temp;
2741 t2 -= incr; 2741 t2 -= incr;
2742 } else 2742 } else
2743 break; 2743 break;
2744 2744
2745 /* Reform the list of logical blocks */ 2745 /* Reform the list of logical blocks */
2746 incr = 0; 2746 incr = 0;
2747 for (t1 = 0; t1 < nmemb; t1++) { 2747 for (t1 = 0; t1 < nmemb; t1++) {
2748 for (t2 = 0; t2 * size < bp_array[t1]->b_bcount; t2++) { 2748 for (t2 = 0; t2 * size < bp_array[t1]->b_bcount; t2++) {
2749 lb_array[incr++] = bp_array[t1]->b_lblkno + t2; 2749 lb_array[incr++] = bp_array[t1]->b_lblkno + t2;
2750 } 2750 }
2751 } 2751 }
2752} 2752}
2753 2753
2754/* 2754/*
2755 * Set up an FINFO entry for a new file. The fip pointer is assumed to  2755 * Set up an FINFO entry for a new file. The fip pointer is assumed to
2756 * point at uninitialized space. 2756 * point at uninitialized space.
2757 */ 2757 */
2758void 2758void
2759lfs_acquire_finfo(struct lfs *fs, ino_t ino, int vers) 2759lfs_acquire_finfo(struct lfs *fs, ino_t ino, int vers)
2760{ 2760{
2761 struct segment *sp = fs->lfs_sp; 2761 struct segment *sp = fs->lfs_sp;
2762 2762
2763 KASSERT(vers > 0); 2763 KASSERT(vers > 0);
2764 2764
2765 if (sp->seg_bytes_left < lfs_sb_getbsize(fs) || 2765 if (sp->seg_bytes_left < lfs_sb_getbsize(fs) ||
2766 sp->sum_bytes_left < sizeof(struct finfo)) 2766 sp->sum_bytes_left < sizeof(struct finfo))
2767 (void) lfs_writeseg(fs, fs->lfs_sp); 2767 (void) lfs_writeseg(fs, fs->lfs_sp);
2768  2768
2769 sp->sum_bytes_left -= FINFOSIZE; 2769 sp->sum_bytes_left -= FINFOSIZE;
2770 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 2770 ++((SEGSUM *)(sp->segsum))->ss_nfinfo;
2771 sp->fip->fi_nblocks = 0; 2771 sp->fip->fi_nblocks = 0;
2772 sp->fip->fi_ino = ino; 2772 sp->fip->fi_ino = ino;
2773 sp->fip->fi_version = vers; 2773 sp->fip->fi_version = vers;
2774} 2774}
2775 2775
2776/* 2776/*
2777 * Release the FINFO entry, either clearing out an unused entry or 2777 * Release the FINFO entry, either clearing out an unused entry or
2778 * advancing us to the next available entry. 2778 * advancing us to the next available entry.
2779 */ 2779 */
2780void 2780void
2781lfs_release_finfo(struct lfs *fs) 2781lfs_release_finfo(struct lfs *fs)
2782{ 2782{
2783 struct segment *sp = fs->lfs_sp; 2783 struct segment *sp = fs->lfs_sp;
2784 2784
2785 if (sp->fip->fi_nblocks != 0) { 2785 if (sp->fip->fi_nblocks != 0) {
2786 sp->fip = (FINFO*)((char *)sp->fip + FINFOSIZE + 2786 sp->fip = (FINFO*)((char *)sp->fip + FINFOSIZE +
2787 sizeof(int32_t) * sp->fip->fi_nblocks); 2787 sizeof(int32_t) * sp->fip->fi_nblocks);
2788 sp->start_lbp = &sp->fip->fi_blocks[0]; 2788 sp->start_lbp = &sp->fip->fi_blocks[0];
2789 } else { 2789 } else {
2790 sp->sum_bytes_left += FINFOSIZE; 2790 sp->sum_bytes_left += FINFOSIZE;
2791 --((SEGSUM *)(sp->segsum))->ss_nfinfo; 2791 --((SEGSUM *)(sp->segsum))->ss_nfinfo;
2792 } 2792 }
2793} 2793}

cvs diff -r1.275 -r1.276 src/sys/ufs/lfs/lfs_vnops.c (switch to unified diff)

--- src/sys/ufs/lfs/lfs_vnops.c 2015/07/24 06:59:32 1.275
+++ src/sys/ufs/lfs/lfs_vnops.c 2015/07/25 10:40:35 1.276
@@ -1,2232 +1,2232 @@ @@ -1,2232 +1,2232 @@
1/* $NetBSD: lfs_vnops.c,v 1.275 2015/07/24 06:59:32 dholland Exp $ */ 1/* $NetBSD: lfs_vnops.c,v 1.276 2015/07/25 10:40:35 martin Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant@hhhh.org>. 8 * by Konrad E. Schroder <perseant@hhhh.org>.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31/* 31/*
32 * Copyright (c) 1986, 1989, 1991, 1993, 1995 32 * Copyright (c) 1986, 1989, 1991, 1993, 1995
33 * The Regents of the University of California. All rights reserved. 33 * The Regents of the University of California. All rights reserved.
34 * 34 *
35 * Redistribution and use in source and binary forms, with or without 35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions 36 * modification, are permitted provided that the following conditions
37 * are met: 37 * are met:
38 * 1. Redistributions of source code must retain the above copyright 38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer. 39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright 40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the 41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution. 42 * documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors 43 * 3. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software 44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission. 45 * without specific prior written permission.
46 * 46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE. 57 * SUCH DAMAGE.
58 * 58 *
59 * @(#)lfs_vnops.c 8.13 (Berkeley) 6/10/95 59 * @(#)lfs_vnops.c 8.13 (Berkeley) 6/10/95
60 */ 60 */
61 61
62/* from NetBSD: ufs_vnops.c,v 1.213 2013/06/08 05:47:02 kardel Exp */ 62/* from NetBSD: ufs_vnops.c,v 1.213 2013/06/08 05:47:02 kardel Exp */
63/*- 63/*-
64 * Copyright (c) 2008 The NetBSD Foundation, Inc. 64 * Copyright (c) 2008 The NetBSD Foundation, Inc.
65 * All rights reserved. 65 * All rights reserved.
66 * 66 *
67 * This code is derived from software contributed to The NetBSD Foundation 67 * This code is derived from software contributed to The NetBSD Foundation
68 * by Wasabi Systems, Inc. 68 * by Wasabi Systems, Inc.
69 * 69 *
70 * Redistribution and use in source and binary forms, with or without 70 * Redistribution and use in source and binary forms, with or without
71 * modification, are permitted provided that the following conditions 71 * modification, are permitted provided that the following conditions
72 * are met: 72 * are met:
73 * 1. Redistributions of source code must retain the above copyright 73 * 1. Redistributions of source code must retain the above copyright
74 * notice, this list of conditions and the following disclaimer. 74 * notice, this list of conditions and the following disclaimer.
75 * 2. Redistributions in binary form must reproduce the above copyright 75 * 2. Redistributions in binary form must reproduce the above copyright
76 * notice, this list of conditions and the following disclaimer in the 76 * notice, this list of conditions and the following disclaimer in the
77 * documentation and/or other materials provided with the distribution. 77 * documentation and/or other materials provided with the distribution.
78 * 78 *
79 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 79 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
80 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 80 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
81 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 81 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
82 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 82 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
83 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 83 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
84 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 84 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
85 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 85 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
86 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 86 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
87 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 87 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
88 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 88 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
89 * POSSIBILITY OF SUCH DAMAGE. 89 * POSSIBILITY OF SUCH DAMAGE.
90 */ 90 */
91/* 91/*
92 * Copyright (c) 1982, 1986, 1989, 1993, 1995 92 * Copyright (c) 1982, 1986, 1989, 1993, 1995
93 * The Regents of the University of California. All rights reserved. 93 * The Regents of the University of California. All rights reserved.
94 * (c) UNIX System Laboratories, Inc. 94 * (c) UNIX System Laboratories, Inc.
95 * All or some portions of this file are derived from material licensed 95 * All or some portions of this file are derived from material licensed
96 * to the University of California by American Telephone and Telegraph 96 * to the University of California by American Telephone and Telegraph
97 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 97 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
98 * the permission of UNIX System Laboratories, Inc. 98 * the permission of UNIX System Laboratories, Inc.
99 * 99 *
100 * Redistribution and use in source and binary forms, with or without 100 * Redistribution and use in source and binary forms, with or without
101 * modification, are permitted provided that the following conditions 101 * modification, are permitted provided that the following conditions
102 * are met: 102 * are met:
103 * 1. Redistributions of source code must retain the above copyright 103 * 1. Redistributions of source code must retain the above copyright
104 * notice, this list of conditions and the following disclaimer. 104 * notice, this list of conditions and the following disclaimer.
105 * 2. Redistributions in binary form must reproduce the above copyright 105 * 2. Redistributions in binary form must reproduce the above copyright
106 * notice, this list of conditions and the following disclaimer in the 106 * notice, this list of conditions and the following disclaimer in the
107 * documentation and/or other materials provided with the distribution. 107 * documentation and/or other materials provided with the distribution.
108 * 3. Neither the name of the University nor the names of its contributors 108 * 3. Neither the name of the University nor the names of its contributors
109 * may be used to endorse or promote products derived from this software 109 * may be used to endorse or promote products derived from this software
110 * without specific prior written permission. 110 * without specific prior written permission.
111 * 111 *
112 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 112 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
113 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 113 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
114 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 114 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
115 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 115 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
116 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 116 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
117 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 117 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
118 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 118 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
119 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 119 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
120 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 120 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
121 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 121 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
122 * SUCH DAMAGE. 122 * SUCH DAMAGE.
123 * 123 *
124 * @(#)ufs_vnops.c 8.28 (Berkeley) 7/31/95 124 * @(#)ufs_vnops.c 8.28 (Berkeley) 7/31/95
125 */ 125 */
126 126
127#include <sys/cdefs.h> 127#include <sys/cdefs.h>
128__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.275 2015/07/24 06:59:32 dholland Exp $"); 128__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.276 2015/07/25 10:40:35 martin Exp $");
129 129
130#ifdef _KERNEL_OPT 130#ifdef _KERNEL_OPT
131#include "opt_compat_netbsd.h" 131#include "opt_compat_netbsd.h"
132#include "opt_uvm_page_trkown.h" 132#include "opt_uvm_page_trkown.h"
133#endif 133#endif
134 134
135#include <sys/param.h> 135#include <sys/param.h>
136#include <sys/systm.h> 136#include <sys/systm.h>
137#include <sys/namei.h> 137#include <sys/namei.h>
138#include <sys/resourcevar.h> 138#include <sys/resourcevar.h>
139#include <sys/kernel.h> 139#include <sys/kernel.h>
140#include <sys/file.h> 140#include <sys/file.h>
141#include <sys/stat.h> 141#include <sys/stat.h>
142#include <sys/buf.h> 142#include <sys/buf.h>
143#include <sys/proc.h> 143#include <sys/proc.h>
144#include <sys/mount.h> 144#include <sys/mount.h>
145#include <sys/vnode.h> 145#include <sys/vnode.h>
146#include <sys/pool.h> 146#include <sys/pool.h>
147#include <sys/signalvar.h> 147#include <sys/signalvar.h>
148#include <sys/kauth.h> 148#include <sys/kauth.h>
149#include <sys/syslog.h> 149#include <sys/syslog.h>
150#include <sys/fstrans.h> 150#include <sys/fstrans.h>
151 151
152#include <miscfs/fifofs/fifo.h> 152#include <miscfs/fifofs/fifo.h>
153#include <miscfs/genfs/genfs.h> 153#include <miscfs/genfs/genfs.h>
154#include <miscfs/specfs/specdev.h> 154#include <miscfs/specfs/specdev.h>
155 155
156#include <ufs/lfs/ulfs_inode.h> 156#include <ufs/lfs/ulfs_inode.h>
157#include <ufs/lfs/ulfsmount.h> 157#include <ufs/lfs/ulfsmount.h>
158#include <ufs/lfs/ulfs_bswap.h> 158#include <ufs/lfs/ulfs_bswap.h>
159#include <ufs/lfs/ulfs_extern.h> 159#include <ufs/lfs/ulfs_extern.h>
160 160
161#include <uvm/uvm.h> 161#include <uvm/uvm.h>
162#include <uvm/uvm_pmap.h> 162#include <uvm/uvm_pmap.h>
163#include <uvm/uvm_stat.h> 163#include <uvm/uvm_stat.h>
164#include <uvm/uvm_pager.h> 164#include <uvm/uvm_pager.h>
165 165
166#include <ufs/lfs/lfs.h> 166#include <ufs/lfs/lfs.h>
167#include <ufs/lfs/lfs_kernel.h> 167#include <ufs/lfs/lfs_kernel.h>
168#include <ufs/lfs/lfs_extern.h> 168#include <ufs/lfs/lfs_extern.h>
169 169
170extern pid_t lfs_writer_daemon; 170extern pid_t lfs_writer_daemon;
171int lfs_ignore_lazy_sync = 1; 171int lfs_ignore_lazy_sync = 1;
172 172
173static int lfs_openextattr(void *v); 173static int lfs_openextattr(void *v);
174static int lfs_closeextattr(void *v); 174static int lfs_closeextattr(void *v);
175static int lfs_getextattr(void *v); 175static int lfs_getextattr(void *v);
176static int lfs_setextattr(void *v); 176static int lfs_setextattr(void *v);
177static int lfs_listextattr(void *v); 177static int lfs_listextattr(void *v);
178static int lfs_deleteextattr(void *v); 178static int lfs_deleteextattr(void *v);
179 179
180/* 180/*
181 * A virgin directory (no blushing please). 181 * A virgin directory (no blushing please).
182 */ 182 */
183static const struct lfs_dirtemplate mastertemplate = { 183static const struct lfs_dirtemplate mastertemplate = {
184 0, 12, LFS_DT_DIR, 1, ".", 184 0, 12, LFS_DT_DIR, 1, ".",
185 0, LFS_DIRBLKSIZ - 12, LFS_DT_DIR, 2, ".." 185 0, LFS_DIRBLKSIZ - 12, LFS_DT_DIR, 2, ".."
186}; 186};
187 187
188/* Global vfs data structures for lfs. */ 188/* Global vfs data structures for lfs. */
189int (**lfs_vnodeop_p)(void *); 189int (**lfs_vnodeop_p)(void *);
190const struct vnodeopv_entry_desc lfs_vnodeop_entries[] = { 190const struct vnodeopv_entry_desc lfs_vnodeop_entries[] = {
191 { &vop_default_desc, vn_default_error }, 191 { &vop_default_desc, vn_default_error },
192 { &vop_lookup_desc, ulfs_lookup }, /* lookup */ 192 { &vop_lookup_desc, ulfs_lookup }, /* lookup */
193 { &vop_create_desc, lfs_create }, /* create */ 193 { &vop_create_desc, lfs_create }, /* create */
194 { &vop_whiteout_desc, ulfs_whiteout }, /* whiteout */ 194 { &vop_whiteout_desc, ulfs_whiteout }, /* whiteout */
195 { &vop_mknod_desc, lfs_mknod }, /* mknod */ 195 { &vop_mknod_desc, lfs_mknod }, /* mknod */
196 { &vop_open_desc, ulfs_open }, /* open */ 196 { &vop_open_desc, ulfs_open }, /* open */
197 { &vop_close_desc, lfs_close }, /* close */ 197 { &vop_close_desc, lfs_close }, /* close */
198 { &vop_access_desc, ulfs_access }, /* access */ 198 { &vop_access_desc, ulfs_access }, /* access */
199 { &vop_getattr_desc, lfs_getattr }, /* getattr */ 199 { &vop_getattr_desc, lfs_getattr }, /* getattr */
200 { &vop_setattr_desc, lfs_setattr }, /* setattr */ 200 { &vop_setattr_desc, lfs_setattr }, /* setattr */
201 { &vop_read_desc, lfs_read }, /* read */ 201 { &vop_read_desc, lfs_read }, /* read */
202 { &vop_write_desc, lfs_write }, /* write */ 202 { &vop_write_desc, lfs_write }, /* write */
203 { &vop_fallocate_desc, genfs_eopnotsupp }, /* fallocate */ 203 { &vop_fallocate_desc, genfs_eopnotsupp }, /* fallocate */
204 { &vop_fdiscard_desc, genfs_eopnotsupp }, /* fdiscard */ 204 { &vop_fdiscard_desc, genfs_eopnotsupp }, /* fdiscard */
205 { &vop_ioctl_desc, ulfs_ioctl }, /* ioctl */ 205 { &vop_ioctl_desc, ulfs_ioctl }, /* ioctl */
206 { &vop_fcntl_desc, lfs_fcntl }, /* fcntl */ 206 { &vop_fcntl_desc, lfs_fcntl }, /* fcntl */
207 { &vop_poll_desc, ulfs_poll }, /* poll */ 207 { &vop_poll_desc, ulfs_poll }, /* poll */
208 { &vop_kqfilter_desc, genfs_kqfilter }, /* kqfilter */ 208 { &vop_kqfilter_desc, genfs_kqfilter }, /* kqfilter */
209 { &vop_revoke_desc, ulfs_revoke }, /* revoke */ 209 { &vop_revoke_desc, ulfs_revoke }, /* revoke */
210 { &vop_mmap_desc, lfs_mmap }, /* mmap */ 210 { &vop_mmap_desc, lfs_mmap }, /* mmap */
211 { &vop_fsync_desc, lfs_fsync }, /* fsync */ 211 { &vop_fsync_desc, lfs_fsync }, /* fsync */
212 { &vop_seek_desc, ulfs_seek }, /* seek */ 212 { &vop_seek_desc, ulfs_seek }, /* seek */
213 { &vop_remove_desc, lfs_remove }, /* remove */ 213 { &vop_remove_desc, lfs_remove }, /* remove */
214 { &vop_link_desc, lfs_link }, /* link */ 214 { &vop_link_desc, lfs_link }, /* link */
215 { &vop_rename_desc, lfs_rename }, /* rename */ 215 { &vop_rename_desc, lfs_rename }, /* rename */
216 { &vop_mkdir_desc, lfs_mkdir }, /* mkdir */ 216 { &vop_mkdir_desc, lfs_mkdir }, /* mkdir */
217 { &vop_rmdir_desc, lfs_rmdir }, /* rmdir */ 217 { &vop_rmdir_desc, lfs_rmdir }, /* rmdir */
218 { &vop_symlink_desc, lfs_symlink }, /* symlink */ 218 { &vop_symlink_desc, lfs_symlink }, /* symlink */
219 { &vop_readdir_desc, ulfs_readdir }, /* readdir */ 219 { &vop_readdir_desc, ulfs_readdir }, /* readdir */
220 { &vop_readlink_desc, ulfs_readlink }, /* readlink */ 220 { &vop_readlink_desc, ulfs_readlink }, /* readlink */
221 { &vop_abortop_desc, ulfs_abortop }, /* abortop */ 221 { &vop_abortop_desc, ulfs_abortop }, /* abortop */
222 { &vop_inactive_desc, lfs_inactive }, /* inactive */ 222 { &vop_inactive_desc, lfs_inactive }, /* inactive */
223 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */ 223 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */
224 { &vop_lock_desc, ulfs_lock }, /* lock */ 224 { &vop_lock_desc, ulfs_lock }, /* lock */
225 { &vop_unlock_desc, ulfs_unlock }, /* unlock */ 225 { &vop_unlock_desc, ulfs_unlock }, /* unlock */
226 { &vop_bmap_desc, ulfs_bmap }, /* bmap */ 226 { &vop_bmap_desc, ulfs_bmap }, /* bmap */
227 { &vop_strategy_desc, lfs_strategy }, /* strategy */ 227 { &vop_strategy_desc, lfs_strategy }, /* strategy */
228 { &vop_print_desc, ulfs_print }, /* print */ 228 { &vop_print_desc, ulfs_print }, /* print */
229 { &vop_islocked_desc, ulfs_islocked }, /* islocked */ 229 { &vop_islocked_desc, ulfs_islocked }, /* islocked */
230 { &vop_pathconf_desc, ulfs_pathconf }, /* pathconf */ 230 { &vop_pathconf_desc, ulfs_pathconf }, /* pathconf */
231 { &vop_advlock_desc, ulfs_advlock }, /* advlock */ 231 { &vop_advlock_desc, ulfs_advlock }, /* advlock */
232 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */ 232 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */
233 { &vop_getpages_desc, lfs_getpages }, /* getpages */ 233 { &vop_getpages_desc, lfs_getpages }, /* getpages */
234 { &vop_putpages_desc, lfs_putpages }, /* putpages */ 234 { &vop_putpages_desc, lfs_putpages }, /* putpages */
235 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */ 235 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */
236 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */ 236 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */
237 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */ 237 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */
238 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */ 238 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */
239 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */ 239 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */
240 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */ 240 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */
241 { NULL, NULL } 241 { NULL, NULL }
242}; 242};
243const struct vnodeopv_desc lfs_vnodeop_opv_desc = 243const struct vnodeopv_desc lfs_vnodeop_opv_desc =
244 { &lfs_vnodeop_p, lfs_vnodeop_entries }; 244 { &lfs_vnodeop_p, lfs_vnodeop_entries };
245 245
246int (**lfs_specop_p)(void *); 246int (**lfs_specop_p)(void *);
247const struct vnodeopv_entry_desc lfs_specop_entries[] = { 247const struct vnodeopv_entry_desc lfs_specop_entries[] = {
248 { &vop_default_desc, vn_default_error }, 248 { &vop_default_desc, vn_default_error },
249 { &vop_lookup_desc, spec_lookup }, /* lookup */ 249 { &vop_lookup_desc, spec_lookup }, /* lookup */
250 { &vop_create_desc, spec_create }, /* create */ 250 { &vop_create_desc, spec_create }, /* create */
251 { &vop_mknod_desc, spec_mknod }, /* mknod */ 251 { &vop_mknod_desc, spec_mknod }, /* mknod */
252 { &vop_open_desc, spec_open }, /* open */ 252 { &vop_open_desc, spec_open }, /* open */
253 { &vop_close_desc, lfsspec_close }, /* close */ 253 { &vop_close_desc, lfsspec_close }, /* close */
254 { &vop_access_desc, ulfs_access }, /* access */ 254 { &vop_access_desc, ulfs_access }, /* access */
255 { &vop_getattr_desc, lfs_getattr }, /* getattr */ 255 { &vop_getattr_desc, lfs_getattr }, /* getattr */
256 { &vop_setattr_desc, lfs_setattr }, /* setattr */ 256 { &vop_setattr_desc, lfs_setattr }, /* setattr */
257 { &vop_read_desc, ulfsspec_read }, /* read */ 257 { &vop_read_desc, ulfsspec_read }, /* read */
258 { &vop_write_desc, ulfsspec_write }, /* write */ 258 { &vop_write_desc, ulfsspec_write }, /* write */
259 { &vop_fallocate_desc, spec_fallocate }, /* fallocate */ 259 { &vop_fallocate_desc, spec_fallocate }, /* fallocate */
260 { &vop_fdiscard_desc, spec_fdiscard }, /* fdiscard */ 260 { &vop_fdiscard_desc, spec_fdiscard }, /* fdiscard */
261 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ 261 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
262 { &vop_fcntl_desc, ulfs_fcntl }, /* fcntl */ 262 { &vop_fcntl_desc, ulfs_fcntl }, /* fcntl */
263 { &vop_poll_desc, spec_poll }, /* poll */ 263 { &vop_poll_desc, spec_poll }, /* poll */
264 { &vop_kqfilter_desc, spec_kqfilter }, /* kqfilter */ 264 { &vop_kqfilter_desc, spec_kqfilter }, /* kqfilter */
265 { &vop_revoke_desc, spec_revoke }, /* revoke */ 265 { &vop_revoke_desc, spec_revoke }, /* revoke */
266 { &vop_mmap_desc, spec_mmap }, /* mmap */ 266 { &vop_mmap_desc, spec_mmap }, /* mmap */
267 { &vop_fsync_desc, spec_fsync }, /* fsync */ 267 { &vop_fsync_desc, spec_fsync }, /* fsync */
268 { &vop_seek_desc, spec_seek }, /* seek */ 268 { &vop_seek_desc, spec_seek }, /* seek */
269 { &vop_remove_desc, spec_remove }, /* remove */ 269 { &vop_remove_desc, spec_remove }, /* remove */
270 { &vop_link_desc, spec_link }, /* link */ 270 { &vop_link_desc, spec_link }, /* link */
271 { &vop_rename_desc, spec_rename }, /* rename */ 271 { &vop_rename_desc, spec_rename }, /* rename */
272 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ 272 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
273 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ 273 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
274 { &vop_symlink_desc, spec_symlink }, /* symlink */ 274 { &vop_symlink_desc, spec_symlink }, /* symlink */
275 { &vop_readdir_desc, spec_readdir }, /* readdir */ 275 { &vop_readdir_desc, spec_readdir }, /* readdir */
276 { &vop_readlink_desc, spec_readlink }, /* readlink */ 276 { &vop_readlink_desc, spec_readlink }, /* readlink */
277 { &vop_abortop_desc, spec_abortop }, /* abortop */ 277 { &vop_abortop_desc, spec_abortop }, /* abortop */
278 { &vop_inactive_desc, lfs_inactive }, /* inactive */ 278 { &vop_inactive_desc, lfs_inactive }, /* inactive */
279 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */ 279 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */
280 { &vop_lock_desc, ulfs_lock }, /* lock */ 280 { &vop_lock_desc, ulfs_lock }, /* lock */
281 { &vop_unlock_desc, ulfs_unlock }, /* unlock */ 281 { &vop_unlock_desc, ulfs_unlock }, /* unlock */
282 { &vop_bmap_desc, spec_bmap }, /* bmap */ 282 { &vop_bmap_desc, spec_bmap }, /* bmap */
283 { &vop_strategy_desc, spec_strategy }, /* strategy */ 283 { &vop_strategy_desc, spec_strategy }, /* strategy */
284 { &vop_print_desc, ulfs_print }, /* print */ 284 { &vop_print_desc, ulfs_print }, /* print */
285 { &vop_islocked_desc, ulfs_islocked }, /* islocked */ 285 { &vop_islocked_desc, ulfs_islocked }, /* islocked */
286 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ 286 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
287 { &vop_advlock_desc, spec_advlock }, /* advlock */ 287 { &vop_advlock_desc, spec_advlock }, /* advlock */
288 { &vop_bwrite_desc, vn_bwrite }, /* bwrite */ 288 { &vop_bwrite_desc, vn_bwrite }, /* bwrite */
289 { &vop_getpages_desc, spec_getpages }, /* getpages */ 289 { &vop_getpages_desc, spec_getpages }, /* getpages */
290 { &vop_putpages_desc, spec_putpages }, /* putpages */ 290 { &vop_putpages_desc, spec_putpages }, /* putpages */
291 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */ 291 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */
292 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */ 292 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */
293 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */ 293 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */
294 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */ 294 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */
295 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */ 295 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */
296 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */ 296 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */
297 { NULL, NULL } 297 { NULL, NULL }
298}; 298};
299const struct vnodeopv_desc lfs_specop_opv_desc = 299const struct vnodeopv_desc lfs_specop_opv_desc =
300 { &lfs_specop_p, lfs_specop_entries }; 300 { &lfs_specop_p, lfs_specop_entries };
301 301
302int (**lfs_fifoop_p)(void *); 302int (**lfs_fifoop_p)(void *);
303const struct vnodeopv_entry_desc lfs_fifoop_entries[] = { 303const struct vnodeopv_entry_desc lfs_fifoop_entries[] = {
304 { &vop_default_desc, vn_default_error }, 304 { &vop_default_desc, vn_default_error },
305 { &vop_lookup_desc, vn_fifo_bypass }, /* lookup */ 305 { &vop_lookup_desc, vn_fifo_bypass }, /* lookup */
306 { &vop_create_desc, vn_fifo_bypass }, /* create */ 306 { &vop_create_desc, vn_fifo_bypass }, /* create */
307 { &vop_mknod_desc, vn_fifo_bypass }, /* mknod */ 307 { &vop_mknod_desc, vn_fifo_bypass }, /* mknod */
308 { &vop_open_desc, vn_fifo_bypass }, /* open */ 308 { &vop_open_desc, vn_fifo_bypass }, /* open */
309 { &vop_close_desc, lfsfifo_close }, /* close */ 309 { &vop_close_desc, lfsfifo_close }, /* close */
310 { &vop_access_desc, ulfs_access }, /* access */ 310 { &vop_access_desc, ulfs_access }, /* access */
311 { &vop_getattr_desc, lfs_getattr }, /* getattr */ 311 { &vop_getattr_desc, lfs_getattr }, /* getattr */
312 { &vop_setattr_desc, lfs_setattr }, /* setattr */ 312 { &vop_setattr_desc, lfs_setattr }, /* setattr */
313 { &vop_read_desc, ulfsfifo_read }, /* read */ 313 { &vop_read_desc, ulfsfifo_read }, /* read */
314 { &vop_write_desc, ulfsfifo_write }, /* write */ 314 { &vop_write_desc, ulfsfifo_write }, /* write */
315 { &vop_fallocate_desc, vn_fifo_bypass }, /* fallocate */ 315 { &vop_fallocate_desc, vn_fifo_bypass }, /* fallocate */
316 { &vop_fdiscard_desc, vn_fifo_bypass }, /* fdiscard */ 316 { &vop_fdiscard_desc, vn_fifo_bypass }, /* fdiscard */
317 { &vop_ioctl_desc, vn_fifo_bypass }, /* ioctl */ 317 { &vop_ioctl_desc, vn_fifo_bypass }, /* ioctl */
318 { &vop_fcntl_desc, ulfs_fcntl }, /* fcntl */ 318 { &vop_fcntl_desc, ulfs_fcntl }, /* fcntl */
319 { &vop_poll_desc, vn_fifo_bypass }, /* poll */ 319 { &vop_poll_desc, vn_fifo_bypass }, /* poll */
320 { &vop_kqfilter_desc, vn_fifo_bypass }, /* kqfilter */ 320 { &vop_kqfilter_desc, vn_fifo_bypass }, /* kqfilter */
321 { &vop_revoke_desc, vn_fifo_bypass }, /* revoke */ 321 { &vop_revoke_desc, vn_fifo_bypass }, /* revoke */
322 { &vop_mmap_desc, vn_fifo_bypass }, /* mmap */ 322 { &vop_mmap_desc, vn_fifo_bypass }, /* mmap */
323 { &vop_fsync_desc, vn_fifo_bypass }, /* fsync */ 323 { &vop_fsync_desc, vn_fifo_bypass }, /* fsync */
324 { &vop_seek_desc, vn_fifo_bypass }, /* seek */ 324 { &vop_seek_desc, vn_fifo_bypass }, /* seek */
325 { &vop_remove_desc, vn_fifo_bypass }, /* remove */ 325 { &vop_remove_desc, vn_fifo_bypass }, /* remove */
326 { &vop_link_desc, vn_fifo_bypass }, /* link */ 326 { &vop_link_desc, vn_fifo_bypass }, /* link */
327 { &vop_rename_desc, vn_fifo_bypass }, /* rename */ 327 { &vop_rename_desc, vn_fifo_bypass }, /* rename */
328 { &vop_mkdir_desc, vn_fifo_bypass }, /* mkdir */ 328 { &vop_mkdir_desc, vn_fifo_bypass }, /* mkdir */
329 { &vop_rmdir_desc, vn_fifo_bypass }, /* rmdir */ 329 { &vop_rmdir_desc, vn_fifo_bypass }, /* rmdir */
330 { &vop_symlink_desc, vn_fifo_bypass }, /* symlink */ 330 { &vop_symlink_desc, vn_fifo_bypass }, /* symlink */
331 { &vop_readdir_desc, vn_fifo_bypass }, /* readdir */ 331 { &vop_readdir_desc, vn_fifo_bypass }, /* readdir */
332 { &vop_readlink_desc, vn_fifo_bypass }, /* readlink */ 332 { &vop_readlink_desc, vn_fifo_bypass }, /* readlink */
333 { &vop_abortop_desc, vn_fifo_bypass }, /* abortop */ 333 { &vop_abortop_desc, vn_fifo_bypass }, /* abortop */
334 { &vop_inactive_desc, lfs_inactive }, /* inactive */ 334 { &vop_inactive_desc, lfs_inactive }, /* inactive */
335 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */ 335 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */
336 { &vop_lock_desc, ulfs_lock }, /* lock */ 336 { &vop_lock_desc, ulfs_lock }, /* lock */
337 { &vop_unlock_desc, ulfs_unlock }, /* unlock */ 337 { &vop_unlock_desc, ulfs_unlock }, /* unlock */
338 { &vop_bmap_desc, vn_fifo_bypass }, /* bmap */ 338 { &vop_bmap_desc, vn_fifo_bypass }, /* bmap */
339 { &vop_strategy_desc, vn_fifo_bypass }, /* strategy */ 339 { &vop_strategy_desc, vn_fifo_bypass }, /* strategy */
340 { &vop_print_desc, ulfs_print }, /* print */ 340 { &vop_print_desc, ulfs_print }, /* print */
341 { &vop_islocked_desc, ulfs_islocked }, /* islocked */ 341 { &vop_islocked_desc, ulfs_islocked }, /* islocked */
342 { &vop_pathconf_desc, vn_fifo_bypass }, /* pathconf */ 342 { &vop_pathconf_desc, vn_fifo_bypass }, /* pathconf */
343 { &vop_advlock_desc, vn_fifo_bypass }, /* advlock */ 343 { &vop_advlock_desc, vn_fifo_bypass }, /* advlock */
344 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */ 344 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */
345 { &vop_putpages_desc, vn_fifo_bypass }, /* putpages */ 345 { &vop_putpages_desc, vn_fifo_bypass }, /* putpages */
346 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */ 346 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */
347 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */ 347 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */
348 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */ 348 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */
349 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */ 349 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */
350 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */ 350 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */
351 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */ 351 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */
352 { NULL, NULL } 352 { NULL, NULL }
353}; 353};
354const struct vnodeopv_desc lfs_fifoop_opv_desc = 354const struct vnodeopv_desc lfs_fifoop_opv_desc =
355 { &lfs_fifoop_p, lfs_fifoop_entries }; 355 { &lfs_fifoop_p, lfs_fifoop_entries };
356 356
357#define LFS_READWRITE 357#define LFS_READWRITE
358#include <ufs/lfs/ulfs_readwrite.c> 358#include <ufs/lfs/ulfs_readwrite.c>
359#undef LFS_READWRITE 359#undef LFS_READWRITE
360 360
361/* 361/*
362 * Synch an open file. 362 * Synch an open file.
363 */ 363 */
364/* ARGSUSED */ 364/* ARGSUSED */
365int 365int
366lfs_fsync(void *v) 366lfs_fsync(void *v)
367{ 367{
368 struct vop_fsync_args /* { 368 struct vop_fsync_args /* {
369 struct vnode *a_vp; 369 struct vnode *a_vp;
370 kauth_cred_t a_cred; 370 kauth_cred_t a_cred;
371 int a_flags; 371 int a_flags;
372 off_t offlo; 372 off_t offlo;
373 off_t offhi; 373 off_t offhi;
374 } */ *ap = v; 374 } */ *ap = v;
375 struct vnode *vp = ap->a_vp; 375 struct vnode *vp = ap->a_vp;
376 int error, wait; 376 int error, wait;
377 struct inode *ip = VTOI(vp); 377 struct inode *ip = VTOI(vp);
378 struct lfs *fs = ip->i_lfs; 378 struct lfs *fs = ip->i_lfs;
379 379
380 /* If we're mounted read-only, don't try to sync. */ 380 /* If we're mounted read-only, don't try to sync. */
381 if (fs->lfs_ronly) 381 if (fs->lfs_ronly)
382 return 0; 382 return 0;
383 383
384 /* If a removed vnode is being cleaned, no need to sync here. */ 384 /* If a removed vnode is being cleaned, no need to sync here. */
385 if ((ap->a_flags & FSYNC_RECLAIM) != 0 && ip->i_mode == 0) 385 if ((ap->a_flags & FSYNC_RECLAIM) != 0 && ip->i_mode == 0)
386 return 0; 386 return 0;
387 387
388 /* 388 /*
389 * Trickle sync simply adds this vnode to the pager list, as if 389 * Trickle sync simply adds this vnode to the pager list, as if
390 * the pagedaemon had requested a pageout. 390 * the pagedaemon had requested a pageout.
391 */ 391 */
392 if (ap->a_flags & FSYNC_LAZY) { 392 if (ap->a_flags & FSYNC_LAZY) {
393 if (lfs_ignore_lazy_sync == 0) { 393 if (lfs_ignore_lazy_sync == 0) {
394 mutex_enter(&lfs_lock); 394 mutex_enter(&lfs_lock);
395 if (!(ip->i_flags & IN_PAGING)) { 395 if (!(ip->i_flags & IN_PAGING)) {
396 ip->i_flags |= IN_PAGING; 396 ip->i_flags |= IN_PAGING;
397 TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, 397 TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip,
398 i_lfs_pchain); 398 i_lfs_pchain);
399 } 399 }
400 wakeup(&lfs_writer_daemon); 400 wakeup(&lfs_writer_daemon);
401 mutex_exit(&lfs_lock); 401 mutex_exit(&lfs_lock);
402 } 402 }
403 return 0; 403 return 0;
404 } 404 }
405 405
406 /* 406 /*
407 * If a vnode is bring cleaned, flush it out before we try to 407 * If a vnode is bring cleaned, flush it out before we try to
408 * reuse it. This prevents the cleaner from writing files twice 408 * reuse it. This prevents the cleaner from writing files twice
409 * in the same partial segment, causing an accounting underflow. 409 * in the same partial segment, causing an accounting underflow.
410 */ 410 */
411 if (ap->a_flags & FSYNC_RECLAIM && ip->i_flags & IN_CLEANING) { 411 if (ap->a_flags & FSYNC_RECLAIM && ip->i_flags & IN_CLEANING) {
412 lfs_vflush(vp); 412 lfs_vflush(vp);
413 } 413 }
414 414
415 wait = (ap->a_flags & FSYNC_WAIT); 415 wait = (ap->a_flags & FSYNC_WAIT);
416 do { 416 do {
417 mutex_enter(vp->v_interlock); 417 mutex_enter(vp->v_interlock);
418 error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo), 418 error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo),
419 round_page(ap->a_offhi), 419 round_page(ap->a_offhi),
420 PGO_CLEANIT | (wait ? PGO_SYNCIO : 0)); 420 PGO_CLEANIT | (wait ? PGO_SYNCIO : 0));
421 if (error == EAGAIN) { 421 if (error == EAGAIN) {
422 mutex_enter(&lfs_lock); 422 mutex_enter(&lfs_lock);
423 mtsleep(&fs->lfs_availsleep, PCATCH | PUSER, 423 mtsleep(&fs->lfs_availsleep, PCATCH | PUSER,
424 "lfs_fsync", hz / 100 + 1, &lfs_lock); 424 "lfs_fsync", hz / 100 + 1, &lfs_lock);
425 mutex_exit(&lfs_lock); 425 mutex_exit(&lfs_lock);
426 } 426 }
427 } while (error == EAGAIN); 427 } while (error == EAGAIN);
428 if (error) 428 if (error)
429 return error; 429 return error;
430 430
431 if ((ap->a_flags & FSYNC_DATAONLY) == 0) 431 if ((ap->a_flags & FSYNC_DATAONLY) == 0)
432 error = lfs_update(vp, NULL, NULL, wait ? UPDATE_WAIT : 0); 432 error = lfs_update(vp, NULL, NULL, wait ? UPDATE_WAIT : 0);
433 433
434 if (error == 0 && ap->a_flags & FSYNC_CACHE) { 434 if (error == 0 && ap->a_flags & FSYNC_CACHE) {
435 int l = 0; 435 int l = 0;
436 error = VOP_IOCTL(ip->i_devvp, DIOCCACHESYNC, &l, FWRITE, 436 error = VOP_IOCTL(ip->i_devvp, DIOCCACHESYNC, &l, FWRITE,
437 curlwp->l_cred); 437 curlwp->l_cred);
438 } 438 }
439 if (wait && !VPISEMPTY(vp)) 439 if (wait && !VPISEMPTY(vp))
440 LFS_SET_UINO(ip, IN_MODIFIED); 440 LFS_SET_UINO(ip, IN_MODIFIED);
441 441
442 return error; 442 return error;
443} 443}
444 444
445/* 445/*
446 * Take IN_ADIROP off, then call ulfs_inactive. 446 * Take IN_ADIROP off, then call ulfs_inactive.
447 */ 447 */
448int 448int
449lfs_inactive(void *v) 449lfs_inactive(void *v)
450{ 450{
451 struct vop_inactive_args /* { 451 struct vop_inactive_args /* {
452 struct vnode *a_vp; 452 struct vnode *a_vp;
453 } */ *ap = v; 453 } */ *ap = v;
454 454
455 lfs_unmark_vnode(ap->a_vp); 455 lfs_unmark_vnode(ap->a_vp);
456 456
457 /* 457 /*
458 * The Ifile is only ever inactivated on unmount. 458 * The Ifile is only ever inactivated on unmount.
459 * Streamline this process by not giving it more dirty blocks. 459 * Streamline this process by not giving it more dirty blocks.
460 */ 460 */
461 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM) { 461 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM) {
462 mutex_enter(&lfs_lock); 462 mutex_enter(&lfs_lock);
463 LFS_CLR_UINO(VTOI(ap->a_vp), IN_ALLMOD); 463 LFS_CLR_UINO(VTOI(ap->a_vp), IN_ALLMOD);
464 mutex_exit(&lfs_lock); 464 mutex_exit(&lfs_lock);
465 VOP_UNLOCK(ap->a_vp); 465 VOP_UNLOCK(ap->a_vp);
466 return 0; 466 return 0;
467 } 467 }
468 468
469#ifdef DEBUG 469#ifdef DEBUG
470 /* 470 /*
471 * This might happen on unmount. 471 * This might happen on unmount.
472 * XXX If it happens at any other time, it should be a panic. 472 * XXX If it happens at any other time, it should be a panic.
473 */ 473 */
474 if (ap->a_vp->v_uflag & VU_DIROP) { 474 if (ap->a_vp->v_uflag & VU_DIROP) {
475 struct inode *ip = VTOI(ap->a_vp); 475 struct inode *ip = VTOI(ap->a_vp);
476 printf("lfs_inactive: inactivating VU_DIROP? ino = %d\n", (int)ip->i_number); 476 printf("lfs_inactive: inactivating VU_DIROP? ino = %d\n", (int)ip->i_number);
477 } 477 }
478#endif /* DIAGNOSTIC */ 478#endif /* DIAGNOSTIC */
479 479
480 return ulfs_inactive(v); 480 return ulfs_inactive(v);
481} 481}
482 482
483int 483int
484lfs_set_dirop(struct vnode *dvp, struct vnode *vp) 484lfs_set_dirop(struct vnode *dvp, struct vnode *vp)
485{ 485{
486 struct lfs *fs; 486 struct lfs *fs;
487 int error; 487 int error;
488 488
489 KASSERT(VOP_ISLOCKED(dvp)); 489 KASSERT(VOP_ISLOCKED(dvp));
490 KASSERT(vp == NULL || VOP_ISLOCKED(vp)); 490 KASSERT(vp == NULL || VOP_ISLOCKED(vp));
491 491
492 fs = VTOI(dvp)->i_lfs; 492 fs = VTOI(dvp)->i_lfs;
493 493
494 ASSERT_NO_SEGLOCK(fs); 494 ASSERT_NO_SEGLOCK(fs);
495 /* 495 /*
496 * LFS_NRESERVE calculates direct and indirect blocks as well 496 * LFS_NRESERVE calculates direct and indirect blocks as well
497 * as an inode block; an overestimate in most cases. 497 * as an inode block; an overestimate in most cases.
498 */ 498 */
499 if ((error = lfs_reserve(fs, dvp, vp, LFS_NRESERVE(fs))) != 0) 499 if ((error = lfs_reserve(fs, dvp, vp, LFS_NRESERVE(fs))) != 0)
500 return (error); 500 return (error);
501 501
502 restart: 502 restart:
503 mutex_enter(&lfs_lock); 503 mutex_enter(&lfs_lock);
504 if (fs->lfs_dirops == 0) { 504 if (fs->lfs_dirops == 0) {
505 mutex_exit(&lfs_lock); 505 mutex_exit(&lfs_lock);
506 lfs_check(dvp, LFS_UNUSED_LBN, 0); 506 lfs_check(dvp, LFS_UNUSED_LBN, 0);
507 mutex_enter(&lfs_lock); 507 mutex_enter(&lfs_lock);
508 } 508 }
509 while (fs->lfs_writer) { 509 while (fs->lfs_writer) {
510 error = mtsleep(&fs->lfs_dirops, (PRIBIO + 1) | PCATCH, 510 error = mtsleep(&fs->lfs_dirops, (PRIBIO + 1) | PCATCH,
511 "lfs_sdirop", 0, &lfs_lock); 511 "lfs_sdirop", 0, &lfs_lock);
512 if (error == EINTR) { 512 if (error == EINTR) {
513 mutex_exit(&lfs_lock); 513 mutex_exit(&lfs_lock);
514 goto unreserve; 514 goto unreserve;
515 } 515 }
516 } 516 }
517 if (lfs_dirvcount > LFS_MAX_DIROP && fs->lfs_dirops == 0) { 517 if (lfs_dirvcount > LFS_MAX_DIROP && fs->lfs_dirops == 0) {
518 wakeup(&lfs_writer_daemon); 518 wakeup(&lfs_writer_daemon);
519 mutex_exit(&lfs_lock); 519 mutex_exit(&lfs_lock);
520 preempt(); 520 preempt();
521 goto restart; 521 goto restart;
522 } 522 }
523 523
524 if (lfs_dirvcount > LFS_MAX_DIROP) { 524 if (lfs_dirvcount > LFS_MAX_DIROP) {
525 DLOG((DLOG_DIROP, "lfs_set_dirop: sleeping with dirops=%d, " 525 DLOG((DLOG_DIROP, "lfs_set_dirop: sleeping with dirops=%d, "
526 "dirvcount=%d\n", fs->lfs_dirops, lfs_dirvcount)); 526 "dirvcount=%d\n", fs->lfs_dirops, lfs_dirvcount));
527 if ((error = mtsleep(&lfs_dirvcount, 527 if ((error = mtsleep(&lfs_dirvcount,
528 PCATCH | PUSER | PNORELOCK, "lfs_maxdirop", 0, 528 PCATCH | PUSER | PNORELOCK, "lfs_maxdirop", 0,
529 &lfs_lock)) != 0) { 529 &lfs_lock)) != 0) {
530 goto unreserve; 530 goto unreserve;
531 } 531 }
532 goto restart; 532 goto restart;
533 } 533 }
534 534
535 ++fs->lfs_dirops; 535 ++fs->lfs_dirops;
536 /* fs->lfs_doifile = 1; */ /* XXX why? --ks */ 536 /* fs->lfs_doifile = 1; */ /* XXX why? --ks */
537 mutex_exit(&lfs_lock); 537 mutex_exit(&lfs_lock);
538 538
539 /* Hold a reference so SET_ENDOP will be happy */ 539 /* Hold a reference so SET_ENDOP will be happy */
540 vref(dvp); 540 vref(dvp);
541 if (vp) { 541 if (vp) {
542 vref(vp); 542 vref(vp);
543 MARK_VNODE(vp); 543 MARK_VNODE(vp);
544 } 544 }
545 545
546 MARK_VNODE(dvp); 546 MARK_VNODE(dvp);
547 return 0; 547 return 0;
548 548
549 unreserve: 549 unreserve:
550 lfs_reserve(fs, dvp, vp, -LFS_NRESERVE(fs)); 550 lfs_reserve(fs, dvp, vp, -LFS_NRESERVE(fs));
551 return error; 551 return error;
552} 552}
553 553
554/* 554/*
555 * Opposite of lfs_set_dirop... mostly. For now at least must call 555 * Opposite of lfs_set_dirop... mostly. For now at least must call
556 * UNMARK_VNODE(dvp) explicitly first. (XXX: clean that up) 556 * UNMARK_VNODE(dvp) explicitly first. (XXX: clean that up)
557 */ 557 */
558void 558void
559lfs_unset_dirop(struct lfs *fs, struct vnode *dvp, const char *str) 559lfs_unset_dirop(struct lfs *fs, struct vnode *dvp, const char *str)
560{ 560{
561 mutex_enter(&lfs_lock); 561 mutex_enter(&lfs_lock);
562 --fs->lfs_dirops; 562 --fs->lfs_dirops;
563 if (!fs->lfs_dirops) { 563 if (!fs->lfs_dirops) {
564 if (fs->lfs_nadirop) { 564 if (fs->lfs_nadirop) {
565 panic("lfs_unset_dirop: %s: no dirops but " 565 panic("lfs_unset_dirop: %s: no dirops but "
566 " nadirop=%d", str, 566 " nadirop=%d", str,
567 fs->lfs_nadirop); 567 fs->lfs_nadirop);
568 } 568 }
569 wakeup(&fs->lfs_writer); 569 wakeup(&fs->lfs_writer);
570 mutex_exit(&lfs_lock); 570 mutex_exit(&lfs_lock);
571 lfs_check(dvp, LFS_UNUSED_LBN, 0); 571 lfs_check(dvp, LFS_UNUSED_LBN, 0);
572 } else { 572 } else {
573 mutex_exit(&lfs_lock); 573 mutex_exit(&lfs_lock);
574 } 574 }
575 lfs_reserve(fs, dvp, NULL, -LFS_NRESERVE(fs)); 575 lfs_reserve(fs, dvp, NULL, -LFS_NRESERVE(fs));
576} 576}
577 577
578void 578void
579lfs_mark_vnode(struct vnode *vp) 579lfs_mark_vnode(struct vnode *vp)
580{ 580{
581 struct inode *ip = VTOI(vp); 581 struct inode *ip = VTOI(vp);
582 struct lfs *fs = ip->i_lfs; 582 struct lfs *fs = ip->i_lfs;
583 583
584 mutex_enter(&lfs_lock); 584 mutex_enter(&lfs_lock);
585 if (!(ip->i_flag & IN_ADIROP)) { 585 if (!(ip->i_flag & IN_ADIROP)) {
586 if (!(vp->v_uflag & VU_DIROP)) { 586 if (!(vp->v_uflag & VU_DIROP)) {
587 mutex_exit(&lfs_lock); 587 mutex_exit(&lfs_lock);
588 vref(vp); 588 vref(vp);
589 mutex_enter(&lfs_lock); 589 mutex_enter(&lfs_lock);
590 ++lfs_dirvcount; 590 ++lfs_dirvcount;
591 ++fs->lfs_dirvcount; 591 ++fs->lfs_dirvcount;
592 TAILQ_INSERT_TAIL(&fs->lfs_dchainhd, ip, i_lfs_dchain); 592 TAILQ_INSERT_TAIL(&fs->lfs_dchainhd, ip, i_lfs_dchain);
593 vp->v_uflag |= VU_DIROP; 593 vp->v_uflag |= VU_DIROP;
594 } 594 }
595 ++fs->lfs_nadirop; 595 ++fs->lfs_nadirop;
596 ip->i_flag &= ~IN_CDIROP; 596 ip->i_flag &= ~IN_CDIROP;
597 ip->i_flag |= IN_ADIROP; 597 ip->i_flag |= IN_ADIROP;
598 } else 598 } else
599 KASSERT(vp->v_uflag & VU_DIROP); 599 KASSERT(vp->v_uflag & VU_DIROP);
600 mutex_exit(&lfs_lock); 600 mutex_exit(&lfs_lock);
601} 601}
602 602
603void 603void
604lfs_unmark_vnode(struct vnode *vp) 604lfs_unmark_vnode(struct vnode *vp)
605{ 605{
606 struct inode *ip = VTOI(vp); 606 struct inode *ip = VTOI(vp);
607 607
608 mutex_enter(&lfs_lock); 608 mutex_enter(&lfs_lock);
609 if (ip && (ip->i_flag & IN_ADIROP)) { 609 if (ip && (ip->i_flag & IN_ADIROP)) {
610 KASSERT(vp->v_uflag & VU_DIROP); 610 KASSERT(vp->v_uflag & VU_DIROP);
611 --ip->i_lfs->lfs_nadirop; 611 --ip->i_lfs->lfs_nadirop;
612 ip->i_flag &= ~IN_ADIROP; 612 ip->i_flag &= ~IN_ADIROP;
613 } 613 }
614 mutex_exit(&lfs_lock); 614 mutex_exit(&lfs_lock);
615} 615}
616 616
617int 617int
618lfs_symlink(void *v) 618lfs_symlink(void *v)
619{ 619{
620 struct vop_symlink_v3_args /* { 620 struct vop_symlink_v3_args /* {
621 struct vnode *a_dvp; 621 struct vnode *a_dvp;
622 struct vnode **a_vpp; 622 struct vnode **a_vpp;
623 struct componentname *a_cnp; 623 struct componentname *a_cnp;
624 struct vattr *a_vap; 624 struct vattr *a_vap;
625 char *a_target; 625 char *a_target;
626 } */ *ap = v; 626 } */ *ap = v;
627 struct lfs *fs; 627 struct lfs *fs;
628 struct vnode *dvp, **vpp; 628 struct vnode *dvp, **vpp;
629 struct inode *ip; 629 struct inode *ip;
630 struct ulfs_lookup_results *ulr; 630 struct ulfs_lookup_results *ulr;
631 ssize_t len; /* XXX should be size_t */ 631 ssize_t len; /* XXX should be size_t */
632 int error; 632 int error;
633 633
634 dvp = ap->a_dvp; 634 dvp = ap->a_dvp;
635 vpp = ap->a_vpp; 635 vpp = ap->a_vpp;
636 636
637 KASSERT(vpp != NULL); 637 KASSERT(vpp != NULL);
638 KASSERT(*vpp == NULL); 638 KASSERT(*vpp == NULL);
639 KASSERT(ap->a_vap->va_type == VLNK); 639 KASSERT(ap->a_vap->va_type == VLNK);
640 640
641 /* XXX should handle this material another way */ 641 /* XXX should handle this material another way */
642 ulr = &VTOI(ap->a_dvp)->i_crap; 642 ulr = &VTOI(ap->a_dvp)->i_crap;
643 ULFS_CHECK_CRAPCOUNTER(VTOI(ap->a_dvp)); 643 ULFS_CHECK_CRAPCOUNTER(VTOI(ap->a_dvp));
644 644
645 fs = VFSTOULFS(dvp->v_mount)->um_lfs; 645 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
646 ASSERT_NO_SEGLOCK(fs); 646 ASSERT_NO_SEGLOCK(fs);
647 if (fs->lfs_ronly) { 647 if (fs->lfs_ronly) {
648 return EROFS; 648 return EROFS;
649 } 649 }
650 650
651 error = lfs_set_dirop(dvp, NULL); 651 error = lfs_set_dirop(dvp, NULL);
652 if (error) 652 if (error)
653 return error; 653 return error;
654 654
655 fstrans_start(dvp->v_mount, FSTRANS_SHARED); 655 fstrans_start(dvp->v_mount, FSTRANS_SHARED);
656 error = ulfs_makeinode(ap->a_vap, dvp, ulr, vpp, ap->a_cnp); 656 error = ulfs_makeinode(ap->a_vap, dvp, ulr, vpp, ap->a_cnp);
657 if (error) { 657 if (error) {
658 goto out; 658 goto out;
659 } 659 }
660 660
661 VN_KNOTE(ap->a_dvp, NOTE_WRITE); 661 VN_KNOTE(ap->a_dvp, NOTE_WRITE);
662 ip = VTOI(*vpp); 662 ip = VTOI(*vpp);
663 663
664 len = strlen(ap->a_target); 664 len = strlen(ap->a_target);
665 if (len < ip->i_lfs->um_maxsymlinklen) { 665 if (len < ip->i_lfs->um_maxsymlinklen) {
666 memcpy((char *)SHORTLINK(ip), ap->a_target, len); 666 memcpy((char *)SHORTLINK(ip), ap->a_target, len);
667 ip->i_size = len; 667 ip->i_size = len;
668 DIP_ASSIGN(ip, size, len); 668 DIP_ASSIGN(ip, size, len);
669 uvm_vnp_setsize(*vpp, ip->i_size); 669 uvm_vnp_setsize(*vpp, ip->i_size);
670 ip->i_flag |= IN_CHANGE | IN_UPDATE; 670 ip->i_flag |= IN_CHANGE | IN_UPDATE;
671 if ((*vpp)->v_mount->mnt_flag & MNT_RELATIME) 671 if ((*vpp)->v_mount->mnt_flag & MNT_RELATIME)
672 ip->i_flag |= IN_ACCESS; 672 ip->i_flag |= IN_ACCESS;
673 } else { 673 } else {
674 error = ulfs_bufio(UIO_WRITE, *vpp, ap->a_target, len, (off_t)0, 674 error = ulfs_bufio(UIO_WRITE, *vpp, ap->a_target, len, (off_t)0,
675 IO_NODELOCKED | IO_JOURNALLOCKED, ap->a_cnp->cn_cred, NULL, 675 IO_NODELOCKED | IO_JOURNALLOCKED, ap->a_cnp->cn_cred, NULL,
676 NULL); 676 NULL);
677 } 677 }
678 678
679 VOP_UNLOCK(*vpp); 679 VOP_UNLOCK(*vpp);
680 if (error) 680 if (error)
681 vrele(*vpp); 681 vrele(*vpp);
682 682
683out: 683out:
684 fstrans_done(dvp->v_mount); 684 fstrans_done(dvp->v_mount);
685 685
686 UNMARK_VNODE(dvp); 686 UNMARK_VNODE(dvp);
687 /* XXX: is it even possible for the symlink to get MARK'd? */ 687 /* XXX: is it even possible for the symlink to get MARK'd? */
688 UNMARK_VNODE(*vpp); 688 UNMARK_VNODE(*vpp);
689 if (!((*vpp)->v_uflag & VU_DIROP)) { 689 if (!((*vpp)->v_uflag & VU_DIROP)) {
690 KASSERT(error != 0); 690 KASSERT(error != 0);
691 *vpp = NULL; 691 *vpp = NULL;
692 } 692 }
693 else { 693 else {
694 KASSERT(error == 0); 694 KASSERT(error == 0);
695 } 695 }
696 lfs_unset_dirop(fs, dvp, "symlink"); 696 lfs_unset_dirop(fs, dvp, "symlink");
697 697
698 vrele(dvp); 698 vrele(dvp);
699 return (error); 699 return (error);
700} 700}
701 701
702int 702int
703lfs_mknod(void *v) 703lfs_mknod(void *v)
704{ 704{
705 struct vop_mknod_v3_args /* { 705 struct vop_mknod_v3_args /* {
706 struct vnode *a_dvp; 706 struct vnode *a_dvp;
707 struct vnode **a_vpp; 707 struct vnode **a_vpp;
708 struct componentname *a_cnp; 708 struct componentname *a_cnp;
709 struct vattr *a_vap; 709 struct vattr *a_vap;
710 } */ *ap = v; 710 } */ *ap = v;
711 struct lfs *fs; 711 struct lfs *fs;
712 struct vnode *dvp, **vpp; 712 struct vnode *dvp, **vpp;
713 struct vattr *vap; 713 struct vattr *vap;
714 struct inode *ip; 714 struct inode *ip;
715 int error; 715 int error;
716 ino_t ino; 716 ino_t ino;
717 struct ulfs_lookup_results *ulr; 717 struct ulfs_lookup_results *ulr;
718 718
719 dvp = ap->a_dvp; 719 dvp = ap->a_dvp;
720 vpp = ap->a_vpp; 720 vpp = ap->a_vpp;
721 vap = ap->a_vap; 721 vap = ap->a_vap;
722 722
723 KASSERT(vpp != NULL); 723 KASSERT(vpp != NULL);
724 KASSERT(*vpp == NULL); 724 KASSERT(*vpp == NULL);
725  725
726 /* XXX should handle this material another way */ 726 /* XXX should handle this material another way */
727 ulr = &VTOI(dvp)->i_crap; 727 ulr = &VTOI(dvp)->i_crap;
728 ULFS_CHECK_CRAPCOUNTER(VTOI(dvp)); 728 ULFS_CHECK_CRAPCOUNTER(VTOI(dvp));
729 729
730 fs = VFSTOULFS(dvp->v_mount)->um_lfs; 730 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
731 ASSERT_NO_SEGLOCK(fs); 731 ASSERT_NO_SEGLOCK(fs);
732 if (fs->lfs_ronly) { 732 if (fs->lfs_ronly) {
733 return EROFS; 733 return EROFS;
734 } 734 }
735 735
736 error = lfs_set_dirop(dvp, NULL); 736 error = lfs_set_dirop(dvp, NULL);
737 if (error) 737 if (error)
738 return error; 738 return error;
739 739
740 fstrans_start(ap->a_dvp->v_mount, FSTRANS_SHARED); 740 fstrans_start(ap->a_dvp->v_mount, FSTRANS_SHARED);
741 error = ulfs_makeinode(vap, dvp, ulr, vpp, ap->a_cnp); 741 error = ulfs_makeinode(vap, dvp, ulr, vpp, ap->a_cnp);
742 742
743 /* Either way we're done with the dirop at this point */ 743 /* Either way we're done with the dirop at this point */
744 UNMARK_VNODE(dvp); 744 UNMARK_VNODE(dvp);
745 UNMARK_VNODE(*vpp); 745 UNMARK_VNODE(*vpp);
746 if (!((*vpp)->v_uflag & VU_DIROP)) { 746 if (!((*vpp)->v_uflag & VU_DIROP)) {
747 KASSERT(error != 0); 747 KASSERT(error != 0);
748 *vpp = NULL; 748 *vpp = NULL;
749 } 749 }
750 else { 750 else {
751 KASSERT(error == 0); 751 KASSERT(error == 0);
752 } 752 }
753 lfs_unset_dirop(fs, dvp, "mknod"); 753 lfs_unset_dirop(fs, dvp, "mknod");
754 /* 754 /*
755 * XXX this is where this used to be (though inside some evil 755 * XXX this is where this used to be (though inside some evil
756 * macros) but it clearly should be moved further down. 756 * macros) but it clearly should be moved further down.
757 * - dholland 20140515 757 * - dholland 20140515
758 */ 758 */
759 vrele(dvp); 759 vrele(dvp);
760 760
761 if (error) { 761 if (error) {
762 fstrans_done(ap->a_dvp->v_mount); 762 fstrans_done(ap->a_dvp->v_mount);
763 *vpp = NULL; 763 *vpp = NULL;
764 return (error); 764 return (error);
765 } 765 }
766 766
767 VN_KNOTE(dvp, NOTE_WRITE); 767 VN_KNOTE(dvp, NOTE_WRITE);
768 ip = VTOI(*vpp); 768 ip = VTOI(*vpp);
769 ino = ip->i_number; 769 ino = ip->i_number;
770 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 770 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
771 771
772 /* 772 /*
773 * Call fsync to write the vnode so that we don't have to deal with 773 * Call fsync to write the vnode so that we don't have to deal with
774 * flushing it when it's marked VU_DIROP or reclaiming. 774 * flushing it when it's marked VU_DIROP or reclaiming.
775 * 775 *
776 * XXX KS - If we can't flush we also can't call vgone(), so must 776 * XXX KS - If we can't flush we also can't call vgone(), so must
777 * return. But, that leaves this vnode in limbo, also not good. 777 * return. But, that leaves this vnode in limbo, also not good.
778 * Can this ever happen (barring hardware failure)? 778 * Can this ever happen (barring hardware failure)?
779 */ 779 */
780 if ((error = VOP_FSYNC(*vpp, NOCRED, FSYNC_WAIT, 0, 0)) != 0) { 780 if ((error = VOP_FSYNC(*vpp, NOCRED, FSYNC_WAIT, 0, 0)) != 0) {
781 panic("lfs_mknod: couldn't fsync (ino %llu)", 781 panic("lfs_mknod: couldn't fsync (ino %llu)",
782 (unsigned long long)ino); 782 (unsigned long long)ino);
783 /* return (error); */ 783 /* return (error); */
784 } 784 }
785 785
786 fstrans_done(ap->a_dvp->v_mount); 786 fstrans_done(ap->a_dvp->v_mount);
787 KASSERT(error == 0); 787 KASSERT(error == 0);
788 VOP_UNLOCK(*vpp); 788 VOP_UNLOCK(*vpp);
789 return (0); 789 return (0);
790} 790}
791 791
792/* 792/*
793 * Create a regular file 793 * Create a regular file
794 */ 794 */
795int 795int
796lfs_create(void *v) 796lfs_create(void *v)
797{ 797{
798 struct vop_create_v3_args /* { 798 struct vop_create_v3_args /* {
799 struct vnode *a_dvp; 799 struct vnode *a_dvp;
800 struct vnode **a_vpp; 800 struct vnode **a_vpp;
801 struct componentname *a_cnp; 801 struct componentname *a_cnp;
802 struct vattr *a_vap; 802 struct vattr *a_vap;
803 } */ *ap = v; 803 } */ *ap = v;
804 struct lfs *fs; 804 struct lfs *fs;
805 struct vnode *dvp, **vpp; 805 struct vnode *dvp, **vpp;
806 struct vattr *vap; 806 struct vattr *vap;
807 struct ulfs_lookup_results *ulr; 807 struct ulfs_lookup_results *ulr;
808 int error; 808 int error;
809 809
810 dvp = ap->a_dvp; 810 dvp = ap->a_dvp;
811 vpp = ap->a_vpp; 811 vpp = ap->a_vpp;
812 vap = ap->a_vap; 812 vap = ap->a_vap;
813 813
814 KASSERT(vpp != NULL); 814 KASSERT(vpp != NULL);
815 KASSERT(*vpp == NULL); 815 KASSERT(*vpp == NULL);
816 816
817 /* XXX should handle this material another way */ 817 /* XXX should handle this material another way */
818 ulr = &VTOI(dvp)->i_crap; 818 ulr = &VTOI(dvp)->i_crap;
819 ULFS_CHECK_CRAPCOUNTER(VTOI(dvp)); 819 ULFS_CHECK_CRAPCOUNTER(VTOI(dvp));
820 820
821 fs = VFSTOULFS(dvp->v_mount)->um_lfs; 821 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
822 ASSERT_NO_SEGLOCK(fs); 822 ASSERT_NO_SEGLOCK(fs);
823 if (fs->lfs_ronly) { 823 if (fs->lfs_ronly) {
824 return EROFS; 824 return EROFS;
825 } 825 }
826 826
827 error = lfs_set_dirop(dvp, NULL); 827 error = lfs_set_dirop(dvp, NULL);
828 if (error) 828 if (error)
829 return error; 829 return error;
830 830
831 fstrans_start(dvp->v_mount, FSTRANS_SHARED); 831 fstrans_start(dvp->v_mount, FSTRANS_SHARED);
832 error = ulfs_makeinode(vap, dvp, ulr, vpp, ap->a_cnp); 832 error = ulfs_makeinode(vap, dvp, ulr, vpp, ap->a_cnp);
833 if (error) { 833 if (error) {
834 fstrans_done(dvp->v_mount); 834 fstrans_done(dvp->v_mount);
835 goto out; 835 goto out;
836 } 836 }
837 fstrans_done(dvp->v_mount); 837 fstrans_done(dvp->v_mount);
838 VN_KNOTE(dvp, NOTE_WRITE); 838 VN_KNOTE(dvp, NOTE_WRITE);
839 VOP_UNLOCK(*vpp); 839 VOP_UNLOCK(*vpp);
840 840
841out: 841out:
842 842
843 UNMARK_VNODE(dvp); 843 UNMARK_VNODE(dvp);
844 UNMARK_VNODE(*vpp); 844 UNMARK_VNODE(*vpp);
845 if (!((*vpp)->v_uflag & VU_DIROP)) { 845 if (!((*vpp)->v_uflag & VU_DIROP)) {
846 KASSERT(error != 0); 846 KASSERT(error != 0);
847 *vpp = NULL; 847 *vpp = NULL;
848 } 848 }
849 else { 849 else {
850 KASSERT(error == 0); 850 KASSERT(error == 0);
851 } 851 }
852 lfs_unset_dirop(fs, dvp, "create"); 852 lfs_unset_dirop(fs, dvp, "create");
853 853
854 vrele(dvp); 854 vrele(dvp);
855 return (error); 855 return (error);
856} 856}
857 857
858int 858int
859lfs_mkdir(void *v) 859lfs_mkdir(void *v)
860{ 860{
861 struct vop_mkdir_v3_args /* { 861 struct vop_mkdir_v3_args /* {
862 struct vnode *a_dvp; 862 struct vnode *a_dvp;
863 struct vnode **a_vpp; 863 struct vnode **a_vpp;
864 struct componentname *a_cnp; 864 struct componentname *a_cnp;
865 struct vattr *a_vap; 865 struct vattr *a_vap;
866 } */ *ap = v; 866 } */ *ap = v;
867 struct lfs *fs; 867 struct lfs *fs;
868 struct vnode *dvp, *tvp, **vpp; 868 struct vnode *dvp, *tvp, **vpp;
869 struct inode *dp, *ip; 869 struct inode *dp, *ip;
870 struct componentname *cnp; 870 struct componentname *cnp;
871 struct vattr *vap; 871 struct vattr *vap;
872 struct ulfs_lookup_results *ulr; 872 struct ulfs_lookup_results *ulr;
873 struct buf *bp; 873 struct buf *bp;
874 struct lfs_dirtemplate dirtemplate; 874 struct lfs_dirtemplate dirtemplate;
875 struct lfs_direct *newdir; 875 struct lfs_direct *newdir;
876 int dirblksiz; 876 int dirblksiz;
877 int error; 877 int error;
878 878
879 dvp = ap->a_dvp; 879 dvp = ap->a_dvp;
880 tvp = NULL; 880 tvp = NULL;
881 vpp = ap->a_vpp; 881 vpp = ap->a_vpp;
882 cnp = ap->a_cnp; 882 cnp = ap->a_cnp;
883 vap = ap->a_vap; 883 vap = ap->a_vap;
884 884
885 dp = VTOI(dvp); 885 dp = VTOI(dvp);
886 ip = NULL; 886 ip = NULL;
887 887
888 KASSERT(vap->va_type == VDIR); 888 KASSERT(vap->va_type == VDIR);
889 KASSERT(vpp != NULL); 889 KASSERT(vpp != NULL);
890 KASSERT(*vpp == NULL); 890 KASSERT(*vpp == NULL);
891 891
892 /* XXX should handle this material another way */ 892 /* XXX should handle this material another way */
893 ulr = &dp->i_crap; 893 ulr = &dp->i_crap;
894 ULFS_CHECK_CRAPCOUNTER(dp); 894 ULFS_CHECK_CRAPCOUNTER(dp);
895 895
896 fs = VFSTOULFS(dvp->v_mount)->um_lfs; 896 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
897 ASSERT_NO_SEGLOCK(fs); 897 ASSERT_NO_SEGLOCK(fs);
898 if (fs->lfs_ronly) { 898 if (fs->lfs_ronly) {
899 return EROFS; 899 return EROFS;
900 } 900 }
901 dirblksiz = fs->um_dirblksiz; 901 dirblksiz = fs->um_dirblksiz;
902 902
903 error = lfs_set_dirop(dvp, NULL); 903 error = lfs_set_dirop(dvp, NULL);
904 if (error) 904 if (error)
905 return error; 905 return error;
906 906
907 fstrans_start(dvp->v_mount, FSTRANS_SHARED); 907 fstrans_start(dvp->v_mount, FSTRANS_SHARED);
908 908
909 if ((nlink_t)dp->i_nlink >= LINK_MAX) { 909 if ((nlink_t)dp->i_nlink >= LINK_MAX) {
910 error = EMLINK; 910 error = EMLINK;
911 goto out; 911 goto out;
912 } 912 }
913 913
914 /* 914 /*
915 * Must simulate part of ulfs_makeinode here to acquire the inode, 915 * Must simulate part of ulfs_makeinode here to acquire the inode,
916 * but not have it entered in the parent directory. The entry is 916 * but not have it entered in the parent directory. The entry is
917 * made later after writing "." and ".." entries. 917 * made later after writing "." and ".." entries.
918 */ 918 */
919 error = vcache_new(dvp->v_mount, dvp, vap, cnp->cn_cred, ap->a_vpp); 919 error = vcache_new(dvp->v_mount, dvp, vap, cnp->cn_cred, ap->a_vpp);
920 if (error) 920 if (error)
921 goto out; 921 goto out;
922 922
923 error = vn_lock(*ap->a_vpp, LK_EXCLUSIVE); 923 error = vn_lock(*ap->a_vpp, LK_EXCLUSIVE);
924 if (error) { 924 if (error) {
925 vrele(*ap->a_vpp); 925 vrele(*ap->a_vpp);
926 *ap->a_vpp = NULL; 926 *ap->a_vpp = NULL;
927 goto out; 927 goto out;
928 } 928 }
929 929
930 tvp = *ap->a_vpp; 930 tvp = *ap->a_vpp;
931 lfs_mark_vnode(tvp); 931 lfs_mark_vnode(tvp);
932 ip = VTOI(tvp); 932 ip = VTOI(tvp);
933 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 933 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
934 ip->i_nlink = 2; 934 ip->i_nlink = 2;
935 DIP_ASSIGN(ip, nlink, 2); 935 DIP_ASSIGN(ip, nlink, 2);
936 if (cnp->cn_flags & ISWHITEOUT) { 936 if (cnp->cn_flags & ISWHITEOUT) {
937 ip->i_flags |= UF_OPAQUE; 937 ip->i_flags |= UF_OPAQUE;
938 DIP_ASSIGN(ip, flags, ip->i_flags); 938 DIP_ASSIGN(ip, flags, ip->i_flags);
939 } 939 }
940 940
941 /* 941 /*
942 * Bump link count in parent directory to reflect work done below. 942 * Bump link count in parent directory to reflect work done below.
943 */ 943 */
944 dp->i_nlink++; 944 dp->i_nlink++;
945 DIP_ASSIGN(dp, nlink, dp->i_nlink); 945 DIP_ASSIGN(dp, nlink, dp->i_nlink);
946 dp->i_flag |= IN_CHANGE; 946 dp->i_flag |= IN_CHANGE;
947 if ((error = lfs_update(dvp, NULL, NULL, UPDATE_DIROP)) != 0) 947 if ((error = lfs_update(dvp, NULL, NULL, UPDATE_DIROP)) != 0)
948 goto bad; 948 goto bad;
949 949
950 /* 950 /*
951 * Initialize directory with "." and ".." from static template. 951 * Initialize directory with "." and ".." from static template.
952 */ 952 */
953 dirtemplate = mastertemplate; 953 dirtemplate = mastertemplate;
954 dirtemplate.dotdot_reclen = dirblksiz - dirtemplate.dot_reclen; 954 dirtemplate.dotdot_reclen = dirblksiz - dirtemplate.dot_reclen;
955 dirtemplate.dot_ino = ulfs_rw32(ip->i_number, ULFS_MPNEEDSWAP(fs)); 955 dirtemplate.dot_ino = ulfs_rw32(ip->i_number, ULFS_MPNEEDSWAP(fs));
956 dirtemplate.dotdot_ino = ulfs_rw32(dp->i_number, ULFS_MPNEEDSWAP(fs)); 956 dirtemplate.dotdot_ino = ulfs_rw32(dp->i_number, ULFS_MPNEEDSWAP(fs));
957 dirtemplate.dot_reclen = ulfs_rw16(dirtemplate.dot_reclen, 957 dirtemplate.dot_reclen = ulfs_rw16(dirtemplate.dot_reclen,
958 ULFS_MPNEEDSWAP(fs)); 958 ULFS_MPNEEDSWAP(fs));
959 dirtemplate.dotdot_reclen = ulfs_rw16(dirtemplate.dotdot_reclen, 959 dirtemplate.dotdot_reclen = ulfs_rw16(dirtemplate.dotdot_reclen,
960 ULFS_MPNEEDSWAP(fs)); 960 ULFS_MPNEEDSWAP(fs));
961 if (fs->um_maxsymlinklen <= 0) { 961 if (fs->um_maxsymlinklen <= 0) {
962#if BYTE_ORDER == LITTLE_ENDIAN 962#if BYTE_ORDER == LITTLE_ENDIAN
963 if (ULFS_MPNEEDSWAP(fs) == 0) 963 if (ULFS_MPNEEDSWAP(fs) == 0)
964#else 964#else
965 if (ULFS_MPNEEDSWAP(fs) != 0) 965 if (ULFS_MPNEEDSWAP(fs) != 0)
966#endif 966#endif
967 { 967 {
968 dirtemplate.dot_type = dirtemplate.dot_namlen; 968 dirtemplate.dot_type = dirtemplate.dot_namlen;
969 dirtemplate.dotdot_type = dirtemplate.dotdot_namlen; 969 dirtemplate.dotdot_type = dirtemplate.dotdot_namlen;
970 dirtemplate.dot_namlen = dirtemplate.dotdot_namlen = 0; 970 dirtemplate.dot_namlen = dirtemplate.dotdot_namlen = 0;
971 } else 971 } else
972 dirtemplate.dot_type = dirtemplate.dotdot_type = 0; 972 dirtemplate.dot_type = dirtemplate.dotdot_type = 0;
973 } 973 }
974 if ((error = lfs_balloc(tvp, (off_t)0, dirblksiz, cnp->cn_cred, 974 if ((error = lfs_balloc(tvp, (off_t)0, dirblksiz, cnp->cn_cred,
975 B_CLRBUF, &bp)) != 0) 975 B_CLRBUF, &bp)) != 0)
976 goto bad; 976 goto bad;
977 ip->i_size = dirblksiz; 977 ip->i_size = dirblksiz;
978 DIP_ASSIGN(ip, size, dirblksiz); 978 DIP_ASSIGN(ip, size, dirblksiz);
979 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 979 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
980 uvm_vnp_setsize(tvp, ip->i_size); 980 uvm_vnp_setsize(tvp, ip->i_size);
981 memcpy((void *)bp->b_data, (void *)&dirtemplate, sizeof dirtemplate); 981 memcpy((void *)bp->b_data, (void *)&dirtemplate, sizeof dirtemplate);
982 982
983 /* 983 /*
984 * Directory set up; now install its entry in the parent directory. 984 * Directory set up; now install its entry in the parent directory.
985 */ 985 */
986 if ((error = VOP_BWRITE(bp->b_vp, bp)) != 0) 986 if ((error = VOP_BWRITE(bp->b_vp, bp)) != 0)
987 goto bad; 987 goto bad;
988 if ((error = lfs_update(tvp, NULL, NULL, UPDATE_DIROP)) != 0) { 988 if ((error = lfs_update(tvp, NULL, NULL, UPDATE_DIROP)) != 0) {
989 goto bad; 989 goto bad;
990 } 990 }
991 newdir = pool_cache_get(ulfs_direct_cache, PR_WAITOK); 991 newdir = pool_cache_get(ulfs_direct_cache, PR_WAITOK);
992 ulfs_makedirentry(ip, cnp, newdir); 992 ulfs_makedirentry(ip, cnp, newdir);
993 error = ulfs_direnter(dvp, ulr, tvp, newdir, cnp, bp); 993 error = ulfs_direnter(dvp, ulr, tvp, newdir, cnp, bp);
994 pool_cache_put(ulfs_direct_cache, newdir); 994 pool_cache_put(ulfs_direct_cache, newdir);
995 bad: 995 bad:
996 if (error == 0) { 996 if (error == 0) {
997 VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); 997 VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
998 VOP_UNLOCK(tvp); 998 VOP_UNLOCK(tvp);
999 } else { 999 } else {
1000 dp->i_nlink--; 1000 dp->i_nlink--;
1001 DIP_ASSIGN(dp, nlink, dp->i_nlink); 1001 DIP_ASSIGN(dp, nlink, dp->i_nlink);
1002 dp->i_flag |= IN_CHANGE; 1002 dp->i_flag |= IN_CHANGE;
1003 /* 1003 /*
1004 * No need to do an explicit lfs_truncate here, vrele will 1004 * No need to do an explicit lfs_truncate here, vrele will
1005 * do this for us because we set the link count to 0. 1005 * do this for us because we set the link count to 0.
1006 */ 1006 */
1007 ip->i_nlink = 0; 1007 ip->i_nlink = 0;
1008 DIP_ASSIGN(ip, nlink, 0); 1008 DIP_ASSIGN(ip, nlink, 0);
1009 ip->i_flag |= IN_CHANGE; 1009 ip->i_flag |= IN_CHANGE;
1010 /* If IN_ADIROP, account for it */ 1010 /* If IN_ADIROP, account for it */
1011 lfs_unmark_vnode(tvp); 1011 lfs_unmark_vnode(tvp);
1012 vput(tvp); 1012 vput(tvp);
1013 } 1013 }
1014 1014
1015out: 1015out:
1016 fstrans_done(dvp->v_mount); 1016 fstrans_done(dvp->v_mount);
1017 1017
1018 UNMARK_VNODE(dvp); 1018 UNMARK_VNODE(dvp);
1019 UNMARK_VNODE(*vpp); 1019 UNMARK_VNODE(*vpp);
1020 if (!((*vpp)->v_uflag & VU_DIROP)) { 1020 if (!((*vpp)->v_uflag & VU_DIROP)) {
1021 KASSERT(error != 0); 1021 KASSERT(error != 0);
1022 *vpp = NULL; 1022 *vpp = NULL;
1023 } 1023 }
1024 else { 1024 else {
1025 KASSERT(error == 0); 1025 KASSERT(error == 0);
1026 } 1026 }
1027 lfs_unset_dirop(fs, dvp, "mkdir"); 1027 lfs_unset_dirop(fs, dvp, "mkdir");
1028 1028
1029 vrele(dvp); 1029 vrele(dvp);
1030 return (error); 1030 return (error);
1031} 1031}
1032 1032
1033int 1033int
1034lfs_remove(void *v) 1034lfs_remove(void *v)
1035{ 1035{
1036 struct vop_remove_args /* { 1036 struct vop_remove_args /* {
1037 struct vnode *a_dvp; 1037 struct vnode *a_dvp;
1038 struct vnode *a_vp; 1038 struct vnode *a_vp;
1039 struct componentname *a_cnp; 1039 struct componentname *a_cnp;
1040 } */ *ap = v; 1040 } */ *ap = v;
1041 struct vnode *dvp, *vp; 1041 struct vnode *dvp, *vp;
1042 struct inode *ip; 1042 struct inode *ip;
1043 int error; 1043 int error;
1044 1044
1045 dvp = ap->a_dvp; 1045 dvp = ap->a_dvp;
1046 vp = ap->a_vp; 1046 vp = ap->a_vp;
1047 ip = VTOI(vp); 1047 ip = VTOI(vp);
1048 if ((error = lfs_set_dirop(dvp, vp)) != 0) { 1048 if ((error = lfs_set_dirop(dvp, vp)) != 0) {
1049 if (dvp == vp) 1049 if (dvp == vp)
1050 vrele(vp); 1050 vrele(vp);
1051 else 1051 else
1052 vput(vp); 1052 vput(vp);
1053 vput(dvp); 1053 vput(dvp);
1054 return error; 1054 return error;
1055 } 1055 }
1056 error = ulfs_remove(ap); 1056 error = ulfs_remove(ap);
1057 if (ip->i_nlink == 0) 1057 if (ip->i_nlink == 0)
1058 lfs_orphan(ip->i_lfs, ip->i_number); 1058 lfs_orphan(ip->i_lfs, ip->i_number);
1059 1059
1060 UNMARK_VNODE(dvp); 1060 UNMARK_VNODE(dvp);
1061 if (ap->a_vp) { 1061 if (ap->a_vp) {
1062 UNMARK_VNODE(ap->a_vp); 1062 UNMARK_VNODE(ap->a_vp);
1063 } 1063 }
1064 lfs_unset_dirop(ip->i_lfs, dvp, "remove"); 1064 lfs_unset_dirop(ip->i_lfs, dvp, "remove");
1065 vrele(dvp); 1065 vrele(dvp);
1066 if (ap->a_vp) { 1066 if (ap->a_vp) {
1067 vrele(ap->a_vp); 1067 vrele(ap->a_vp);
1068 } 1068 }
1069 1069
1070 return (error); 1070 return (error);
1071} 1071}
1072 1072
1073int 1073int
1074lfs_rmdir(void *v) 1074lfs_rmdir(void *v)
1075{ 1075{
1076 struct vop_rmdir_args /* { 1076 struct vop_rmdir_args /* {
1077 struct vnodeop_desc *a_desc; 1077 struct vnodeop_desc *a_desc;
1078 struct vnode *a_dvp; 1078 struct vnode *a_dvp;
1079 struct vnode *a_vp; 1079 struct vnode *a_vp;
1080 struct componentname *a_cnp; 1080 struct componentname *a_cnp;
1081 } */ *ap = v; 1081 } */ *ap = v;
1082 struct vnode *vp; 1082 struct vnode *vp;
1083 struct inode *ip; 1083 struct inode *ip;
1084 int error; 1084 int error;
1085 1085
1086 vp = ap->a_vp; 1086 vp = ap->a_vp;
1087 ip = VTOI(vp); 1087 ip = VTOI(vp);
1088 if ((error = lfs_set_dirop(ap->a_dvp, ap->a_vp)) != 0) { 1088 if ((error = lfs_set_dirop(ap->a_dvp, ap->a_vp)) != 0) {
1089 if (ap->a_dvp == vp) 1089 if (ap->a_dvp == vp)
1090 vrele(ap->a_dvp); 1090 vrele(ap->a_dvp);
1091 else 1091 else
1092 vput(ap->a_dvp); 1092 vput(ap->a_dvp);
1093 vput(vp); 1093 vput(vp);
1094 return error; 1094 return error;
1095 } 1095 }
1096 error = ulfs_rmdir(ap); 1096 error = ulfs_rmdir(ap);
1097 if (ip->i_nlink == 0) 1097 if (ip->i_nlink == 0)
1098 lfs_orphan(ip->i_lfs, ip->i_number); 1098 lfs_orphan(ip->i_lfs, ip->i_number);
1099 1099
1100 UNMARK_VNODE(ap->a_dvp); 1100 UNMARK_VNODE(ap->a_dvp);
1101 if (ap->a_vp) { 1101 if (ap->a_vp) {
1102 UNMARK_VNODE(ap->a_vp); 1102 UNMARK_VNODE(ap->a_vp);
1103 } 1103 }
1104 lfs_unset_dirop(ip->i_lfs, ap->a_dvp, "rmdir"); 1104 lfs_unset_dirop(ip->i_lfs, ap->a_dvp, "rmdir");
1105 vrele(ap->a_dvp); 1105 vrele(ap->a_dvp);
1106 if (ap->a_vp) { 1106 if (ap->a_vp) {
1107 vrele(ap->a_vp); 1107 vrele(ap->a_vp);
1108 } 1108 }
1109 1109
1110 return (error); 1110 return (error);
1111} 1111}
1112 1112
1113int 1113int
1114lfs_link(void *v) 1114lfs_link(void *v)
1115{ 1115{
1116 struct vop_link_v2_args /* { 1116 struct vop_link_v2_args /* {
1117 struct vnode *a_dvp; 1117 struct vnode *a_dvp;
1118 struct vnode *a_vp; 1118 struct vnode *a_vp;
1119 struct componentname *a_cnp; 1119 struct componentname *a_cnp;
1120 } */ *ap = v; 1120 } */ *ap = v;
1121 struct lfs *fs; 1121 struct lfs *fs;
1122 struct vnode *dvp; 1122 struct vnode *dvp;
1123 int error; 1123 int error;
1124 1124
1125 dvp = ap->a_dvp; 1125 dvp = ap->a_dvp;
1126 1126
1127 fs = VFSTOULFS(dvp->v_mount)->um_lfs; 1127 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
1128 ASSERT_NO_SEGLOCK(fs); 1128 ASSERT_NO_SEGLOCK(fs);
1129 if (fs->lfs_ronly) { 1129 if (fs->lfs_ronly) {
1130 return EROFS; 1130 return EROFS;
1131 } 1131 }
1132 1132
1133 error = lfs_set_dirop(dvp, NULL); 1133 error = lfs_set_dirop(dvp, NULL);
1134 if (error) { 1134 if (error) {
1135 return error; 1135 return error;
1136 } 1136 }
1137 1137
1138 error = ulfs_link(ap); 1138 error = ulfs_link(ap);
1139 1139
1140 UNMARK_VNODE(dvp); 1140 UNMARK_VNODE(dvp);
1141 lfs_unset_dirop(fs, dvp, "link"); 1141 lfs_unset_dirop(fs, dvp, "link");
1142 vrele(dvp); 1142 vrele(dvp);
1143 1143
1144 return (error); 1144 return (error);
1145} 1145}
1146 1146
1147/* XXX hack to avoid calling ITIMES in getattr */ 1147/* XXX hack to avoid calling ITIMES in getattr */
1148int 1148int
1149lfs_getattr(void *v) 1149lfs_getattr(void *v)
1150{ 1150{
1151 struct vop_getattr_args /* { 1151 struct vop_getattr_args /* {
1152 struct vnode *a_vp; 1152 struct vnode *a_vp;
1153 struct vattr *a_vap; 1153 struct vattr *a_vap;
1154 kauth_cred_t a_cred; 1154 kauth_cred_t a_cred;
1155 } */ *ap = v; 1155 } */ *ap = v;
1156 struct vnode *vp = ap->a_vp; 1156 struct vnode *vp = ap->a_vp;
1157 struct inode *ip = VTOI(vp); 1157 struct inode *ip = VTOI(vp);
1158 struct vattr *vap = ap->a_vap; 1158 struct vattr *vap = ap->a_vap;
1159 struct lfs *fs = ip->i_lfs; 1159 struct lfs *fs = ip->i_lfs;
1160 1160
1161 fstrans_start(vp->v_mount, FSTRANS_SHARED); 1161 fstrans_start(vp->v_mount, FSTRANS_SHARED);
1162 /* 1162 /*
1163 * Copy from inode table 1163 * Copy from inode table
1164 */ 1164 */
1165 vap->va_fsid = ip->i_dev; 1165 vap->va_fsid = ip->i_dev;
1166 vap->va_fileid = ip->i_number; 1166 vap->va_fileid = ip->i_number;
1167 vap->va_mode = ip->i_mode & ~LFS_IFMT; 1167 vap->va_mode = ip->i_mode & ~LFS_IFMT;
1168 vap->va_nlink = ip->i_nlink; 1168 vap->va_nlink = ip->i_nlink;
1169 vap->va_uid = ip->i_uid; 1169 vap->va_uid = ip->i_uid;
1170 vap->va_gid = ip->i_gid; 1170 vap->va_gid = ip->i_gid;
1171 vap->va_rdev = (dev_t)ip->i_ffs1_rdev; 1171 vap->va_rdev = (dev_t)ip->i_ffs1_rdev;
1172 vap->va_size = vp->v_size; 1172 vap->va_size = vp->v_size;
1173 vap->va_atime.tv_sec = ip->i_ffs1_atime; 1173 vap->va_atime.tv_sec = ip->i_ffs1_atime;
1174 vap->va_atime.tv_nsec = ip->i_ffs1_atimensec; 1174 vap->va_atime.tv_nsec = ip->i_ffs1_atimensec;
1175 vap->va_mtime.tv_sec = ip->i_ffs1_mtime; 1175 vap->va_mtime.tv_sec = ip->i_ffs1_mtime;
1176 vap->va_mtime.tv_nsec = ip->i_ffs1_mtimensec; 1176 vap->va_mtime.tv_nsec = ip->i_ffs1_mtimensec;
1177 vap->va_ctime.tv_sec = ip->i_ffs1_ctime; 1177 vap->va_ctime.tv_sec = ip->i_ffs1_ctime;
1178 vap->va_ctime.tv_nsec = ip->i_ffs1_ctimensec; 1178 vap->va_ctime.tv_nsec = ip->i_ffs1_ctimensec;
1179 vap->va_flags = ip->i_flags; 1179 vap->va_flags = ip->i_flags;
1180 vap->va_gen = ip->i_gen; 1180 vap->va_gen = ip->i_gen;
1181 /* this doesn't belong here */ 1181 /* this doesn't belong here */
1182 if (vp->v_type == VBLK) 1182 if (vp->v_type == VBLK)
1183 vap->va_blocksize = BLKDEV_IOSIZE; 1183 vap->va_blocksize = BLKDEV_IOSIZE;
1184 else if (vp->v_type == VCHR) 1184 else if (vp->v_type == VCHR)
1185 vap->va_blocksize = MAXBSIZE; 1185 vap->va_blocksize = MAXBSIZE;
1186 else 1186 else
1187 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; 1187 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
1188 vap->va_bytes = lfs_fsbtob(fs, (u_quad_t)ip->i_lfs_effnblks); 1188 vap->va_bytes = lfs_fsbtob(fs, (u_quad_t)ip->i_lfs_effnblks);
1189 vap->va_type = vp->v_type; 1189 vap->va_type = vp->v_type;
1190 vap->va_filerev = ip->i_modrev; 1190 vap->va_filerev = ip->i_modrev;
1191 fstrans_done(vp->v_mount); 1191 fstrans_done(vp->v_mount);
1192 return (0); 1192 return (0);
1193} 1193}
1194 1194
1195/* 1195/*
1196 * Check to make sure the inode blocks won't choke the buffer 1196 * Check to make sure the inode blocks won't choke the buffer
1197 * cache, then call ulfs_setattr as usual. 1197 * cache, then call ulfs_setattr as usual.
1198 */ 1198 */
1199int 1199int
1200lfs_setattr(void *v) 1200lfs_setattr(void *v)
1201{ 1201{
1202 struct vop_setattr_args /* { 1202 struct vop_setattr_args /* {
1203 struct vnode *a_vp; 1203 struct vnode *a_vp;
1204 struct vattr *a_vap; 1204 struct vattr *a_vap;
1205 kauth_cred_t a_cred; 1205 kauth_cred_t a_cred;
1206 } */ *ap = v; 1206 } */ *ap = v;
1207 struct vnode *vp = ap->a_vp; 1207 struct vnode *vp = ap->a_vp;
1208 1208
1209 lfs_check(vp, LFS_UNUSED_LBN, 0); 1209 lfs_check(vp, LFS_UNUSED_LBN, 0);
1210 return ulfs_setattr(v); 1210 return ulfs_setattr(v);
1211} 1211}
1212 1212
1213/* 1213/*
1214 * Release the block we hold on lfs_newseg wrapping. Called on file close, 1214 * Release the block we hold on lfs_newseg wrapping. Called on file close,
1215 * or explicitly from LFCNWRAPGO. Called with the interlock held. 1215 * or explicitly from LFCNWRAPGO. Called with the interlock held.
1216 */ 1216 */
1217static int 1217static int
1218lfs_wrapgo(struct lfs *fs, struct inode *ip, int waitfor) 1218lfs_wrapgo(struct lfs *fs, struct inode *ip, int waitfor)
1219{ 1219{
1220 if (fs->lfs_stoplwp != curlwp) 1220 if (fs->lfs_stoplwp != curlwp)
1221 return EBUSY; 1221 return EBUSY;
1222 1222
1223 fs->lfs_stoplwp = NULL; 1223 fs->lfs_stoplwp = NULL;
1224 cv_signal(&fs->lfs_stopcv); 1224 cv_signal(&fs->lfs_stopcv);
1225 1225
1226 KASSERT(fs->lfs_nowrap > 0); 1226 KASSERT(fs->lfs_nowrap > 0);
1227 if (fs->lfs_nowrap <= 0) { 1227 if (fs->lfs_nowrap <= 0) {
1228 return 0; 1228 return 0;
1229 } 1229 }
1230 1230
1231 if (--fs->lfs_nowrap == 0) { 1231 if (--fs->lfs_nowrap == 0) {
1232 log(LOG_NOTICE, "%s: re-enabled log wrap\n", 1232 log(LOG_NOTICE, "%s: re-enabled log wrap\n",
1233 lfs_sb_getfsmnt(fs)); 1233 lfs_sb_getfsmnt(fs));
1234 wakeup(&fs->lfs_wrappass); 1234 wakeup(&fs->lfs_wrappass);
1235 lfs_wakeup_cleaner(fs); 1235 lfs_wakeup_cleaner(fs);
1236 } 1236 }
1237 if (waitfor) { 1237 if (waitfor) {
1238 mtsleep(&fs->lfs_nextsegsleep, PCATCH | PUSER, "segment", 1238 mtsleep(&fs->lfs_nextsegsleep, PCATCH | PUSER, "segment",
1239 0, &lfs_lock); 1239 0, &lfs_lock);
1240 } 1240 }
1241 1241
1242 return 0; 1242 return 0;
1243} 1243}
1244 1244
1245/* 1245/*
1246 * Close called. 1246 * Close called.
1247 * 1247 *
1248 * Update the times on the inode. 1248 * Update the times on the inode.
1249 */ 1249 */
1250/* ARGSUSED */ 1250/* ARGSUSED */
1251int 1251int
1252lfs_close(void *v) 1252lfs_close(void *v)
1253{ 1253{
1254 struct vop_close_args /* { 1254 struct vop_close_args /* {
1255 struct vnode *a_vp; 1255 struct vnode *a_vp;
1256 int a_fflag; 1256 int a_fflag;
1257 kauth_cred_t a_cred; 1257 kauth_cred_t a_cred;
1258 } */ *ap = v; 1258 } */ *ap = v;
1259 struct vnode *vp = ap->a_vp; 1259 struct vnode *vp = ap->a_vp;
1260 struct inode *ip = VTOI(vp); 1260 struct inode *ip = VTOI(vp);
1261 struct lfs *fs = ip->i_lfs; 1261 struct lfs *fs = ip->i_lfs;
1262 1262
1263 if ((ip->i_number == ULFS_ROOTINO || ip->i_number == LFS_IFILE_INUM) && 1263 if ((ip->i_number == ULFS_ROOTINO || ip->i_number == LFS_IFILE_INUM) &&
1264 fs->lfs_stoplwp == curlwp) { 1264 fs->lfs_stoplwp == curlwp) {
1265 mutex_enter(&lfs_lock); 1265 mutex_enter(&lfs_lock);
1266 log(LOG_NOTICE, "lfs_close: releasing log wrap control\n"); 1266 log(LOG_NOTICE, "lfs_close: releasing log wrap control\n");
1267 lfs_wrapgo(fs, ip, 0); 1267 lfs_wrapgo(fs, ip, 0);
1268 mutex_exit(&lfs_lock); 1268 mutex_exit(&lfs_lock);
1269 } 1269 }
1270 1270
1271 if (vp == ip->i_lfs->lfs_ivnode && 1271 if (vp == ip->i_lfs->lfs_ivnode &&
1272 vp->v_mount->mnt_iflag & IMNT_UNMOUNT) 1272 vp->v_mount->mnt_iflag & IMNT_UNMOUNT)
1273 return 0; 1273 return 0;
1274 1274
1275 fstrans_start(vp->v_mount, FSTRANS_SHARED); 1275 fstrans_start(vp->v_mount, FSTRANS_SHARED);
1276 if (vp->v_usecount > 1 && vp != ip->i_lfs->lfs_ivnode) { 1276 if (vp->v_usecount > 1 && vp != ip->i_lfs->lfs_ivnode) {
1277 LFS_ITIMES(ip, NULL, NULL, NULL); 1277 LFS_ITIMES(ip, NULL, NULL, NULL);
1278 } 1278 }
1279 fstrans_done(vp->v_mount); 1279 fstrans_done(vp->v_mount);
1280 return (0); 1280 return (0);
1281} 1281}
1282 1282
1283/* 1283/*
1284 * Close wrapper for special devices. 1284 * Close wrapper for special devices.
1285 * 1285 *
1286 * Update the times on the inode then do device close. 1286 * Update the times on the inode then do device close.
1287 */ 1287 */
1288int 1288int
1289lfsspec_close(void *v) 1289lfsspec_close(void *v)
1290{ 1290{
1291 struct vop_close_args /* { 1291 struct vop_close_args /* {
1292 struct vnode *a_vp; 1292 struct vnode *a_vp;
1293 int a_fflag; 1293 int a_fflag;
1294 kauth_cred_t a_cred; 1294 kauth_cred_t a_cred;
1295 } */ *ap = v; 1295 } */ *ap = v;
1296 struct vnode *vp; 1296 struct vnode *vp;
1297 struct inode *ip; 1297 struct inode *ip;
1298 1298
1299 vp = ap->a_vp; 1299 vp = ap->a_vp;
1300 ip = VTOI(vp); 1300 ip = VTOI(vp);
1301 if (vp->v_usecount > 1) { 1301 if (vp->v_usecount > 1) {
1302 LFS_ITIMES(ip, NULL, NULL, NULL); 1302 LFS_ITIMES(ip, NULL, NULL, NULL);
1303 } 1303 }
1304 return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap)); 1304 return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap));
1305} 1305}
1306 1306
1307/* 1307/*
1308 * Close wrapper for fifo's. 1308 * Close wrapper for fifo's.
1309 * 1309 *
1310 * Update the times on the inode then do device close. 1310 * Update the times on the inode then do device close.
1311 */ 1311 */
1312int 1312int
1313lfsfifo_close(void *v) 1313lfsfifo_close(void *v)
1314{ 1314{
1315 struct vop_close_args /* { 1315 struct vop_close_args /* {
1316 struct vnode *a_vp; 1316 struct vnode *a_vp;
1317 int a_fflag; 1317 int a_fflag;
1318 kauth_cred_ a_cred; 1318 kauth_cred_ a_cred;
1319 } */ *ap = v; 1319 } */ *ap = v;
1320 struct vnode *vp; 1320 struct vnode *vp;
1321 struct inode *ip; 1321 struct inode *ip;
1322 1322
1323 vp = ap->a_vp; 1323 vp = ap->a_vp;
1324 ip = VTOI(vp); 1324 ip = VTOI(vp);
1325 if (ap->a_vp->v_usecount > 1) { 1325 if (ap->a_vp->v_usecount > 1) {
1326 LFS_ITIMES(ip, NULL, NULL, NULL); 1326 LFS_ITIMES(ip, NULL, NULL, NULL);
1327 } 1327 }
1328 return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap)); 1328 return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap));
1329} 1329}
1330 1330
1331/* 1331/*
1332 * Reclaim an inode so that it can be used for other purposes. 1332 * Reclaim an inode so that it can be used for other purposes.
1333 */ 1333 */
1334 1334
1335int 1335int
1336lfs_reclaim(void *v) 1336lfs_reclaim(void *v)
1337{ 1337{
1338 struct vop_reclaim_args /* { 1338 struct vop_reclaim_args /* {
1339 struct vnode *a_vp; 1339 struct vnode *a_vp;
1340 } */ *ap = v; 1340 } */ *ap = v;
1341 struct vnode *vp = ap->a_vp; 1341 struct vnode *vp = ap->a_vp;
1342 struct inode *ip = VTOI(vp); 1342 struct inode *ip = VTOI(vp);
1343 struct lfs *fs = ip->i_lfs; 1343 struct lfs *fs = ip->i_lfs;
1344 int error; 1344 int error;
1345 1345
1346 /* 1346 /*
1347 * The inode must be freed and updated before being removed 1347 * The inode must be freed and updated before being removed
1348 * from its hash chain. Other threads trying to gain a hold 1348 * from its hash chain. Other threads trying to gain a hold
1349 * or lock on the inode will be stalled. 1349 * or lock on the inode will be stalled.
1350 */ 1350 */
1351 if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1351 if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1352 lfs_vfree(vp, ip->i_number, ip->i_omode); 1352 lfs_vfree(vp, ip->i_number, ip->i_omode);
1353 1353
1354 mutex_enter(&lfs_lock); 1354 mutex_enter(&lfs_lock);
1355 LFS_CLR_UINO(ip, IN_ALLMOD); 1355 LFS_CLR_UINO(ip, IN_ALLMOD);
1356 mutex_exit(&lfs_lock); 1356 mutex_exit(&lfs_lock);
1357 if ((error = ulfs_reclaim(vp))) 1357 if ((error = ulfs_reclaim(vp)))
1358 return (error); 1358 return (error);
1359 1359
1360 /* 1360 /*
1361 * Take us off the paging and/or dirop queues if we were on them. 1361 * Take us off the paging and/or dirop queues if we were on them.
1362 * We shouldn't be on them. 1362 * We shouldn't be on them.
1363 */ 1363 */
1364 mutex_enter(&lfs_lock); 1364 mutex_enter(&lfs_lock);
1365 if (ip->i_flags & IN_PAGING) { 1365 if (ip->i_flags & IN_PAGING) {
1366 log(LOG_WARNING, "%s: reclaimed vnode is IN_PAGING\n", 1366 log(LOG_WARNING, "%s: reclaimed vnode is IN_PAGING\n",
1367 lfs_sb_getfsmnt(fs)); 1367 lfs_sb_getfsmnt(fs));
1368 ip->i_flags &= ~IN_PAGING; 1368 ip->i_flags &= ~IN_PAGING;
1369 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); 1369 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain);
1370 } 1370 }
1371 if (vp->v_uflag & VU_DIROP) { 1371 if (vp->v_uflag & VU_DIROP) {
1372 panic("reclaimed vnode is VU_DIROP"); 1372 panic("reclaimed vnode is VU_DIROP");
1373 vp->v_uflag &= ~VU_DIROP; 1373 vp->v_uflag &= ~VU_DIROP;
1374 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); 1374 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
1375 } 1375 }
1376 mutex_exit(&lfs_lock); 1376 mutex_exit(&lfs_lock);
1377 1377
1378 pool_put(&lfs_dinode_pool, ip->i_din.ffs1_din); 1378 pool_put(&lfs_dinode_pool, ip->i_din.ffs1_din);
1379 lfs_deregister_all(vp); 1379 lfs_deregister_all(vp);
1380 pool_put(&lfs_inoext_pool, ip->inode_ext.lfs); 1380 pool_put(&lfs_inoext_pool, ip->inode_ext.lfs);
1381 ip->inode_ext.lfs = NULL; 1381 ip->inode_ext.lfs = NULL;
1382 genfs_node_destroy(vp); 1382 genfs_node_destroy(vp);
1383 pool_put(&lfs_inode_pool, vp->v_data); 1383 pool_put(&lfs_inode_pool, vp->v_data);
1384 vp->v_data = NULL; 1384 vp->v_data = NULL;
1385 return (0); 1385 return (0);
1386} 1386}
1387 1387
1388/* 1388/*
1389 * Read a block from a storage device. 1389 * Read a block from a storage device.
1390 * 1390 *
1391 * Calculate the logical to physical mapping if not done already, 1391 * Calculate the logical to physical mapping if not done already,
1392 * then call the device strategy routine. 1392 * then call the device strategy routine.
1393 * 1393 *
1394 * In order to avoid reading blocks that are in the process of being 1394 * In order to avoid reading blocks that are in the process of being
1395 * written by the cleaner---and hence are not mutexed by the normal 1395 * written by the cleaner---and hence are not mutexed by the normal
1396 * buffer cache / page cache mechanisms---check for collisions before 1396 * buffer cache / page cache mechanisms---check for collisions before
1397 * reading. 1397 * reading.
1398 * 1398 *
1399 * We inline ulfs_strategy to make sure that the VOP_BMAP occurs *before* 1399 * We inline ulfs_strategy to make sure that the VOP_BMAP occurs *before*
1400 * the active cleaner test. 1400 * the active cleaner test.
1401 * 1401 *
1402 * XXX This code assumes that lfs_markv makes synchronous checkpoints. 1402 * XXX This code assumes that lfs_markv makes synchronous checkpoints.
1403 */ 1403 */
1404int 1404int
1405lfs_strategy(void *v) 1405lfs_strategy(void *v)
1406{ 1406{
1407 struct vop_strategy_args /* { 1407 struct vop_strategy_args /* {
1408 struct vnode *a_vp; 1408 struct vnode *a_vp;
1409 struct buf *a_bp; 1409 struct buf *a_bp;
1410 } */ *ap = v; 1410 } */ *ap = v;
1411 struct buf *bp; 1411 struct buf *bp;
1412 struct lfs *fs; 1412 struct lfs *fs;
1413 struct vnode *vp; 1413 struct vnode *vp;
1414 struct inode *ip; 1414 struct inode *ip;
1415 daddr_t tbn; 1415 daddr_t tbn;
1416#define MAXLOOP 25 1416#define MAXLOOP 25
1417 int i, sn, error, slept, loopcount; 1417 int i, sn, error, slept, loopcount;
1418 1418
1419 bp = ap->a_bp; 1419 bp = ap->a_bp;
1420 vp = ap->a_vp; 1420 vp = ap->a_vp;
1421 ip = VTOI(vp); 1421 ip = VTOI(vp);
1422 fs = ip->i_lfs; 1422 fs = ip->i_lfs;
1423 1423
1424 /* lfs uses its strategy routine only for read */ 1424 /* lfs uses its strategy routine only for read */
1425 KASSERT(bp->b_flags & B_READ); 1425 KASSERT(bp->b_flags & B_READ);
1426 1426
1427 if (vp->v_type == VBLK || vp->v_type == VCHR) 1427 if (vp->v_type == VBLK || vp->v_type == VCHR)
1428 panic("lfs_strategy: spec"); 1428 panic("lfs_strategy: spec");
1429 KASSERT(bp->b_bcount != 0); 1429 KASSERT(bp->b_bcount != 0);
1430 if (bp->b_blkno == bp->b_lblkno) { 1430 if (bp->b_blkno == bp->b_lblkno) {
1431 error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, 1431 error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno,
1432 NULL); 1432 NULL);
1433 if (error) { 1433 if (error) {
1434 bp->b_error = error; 1434 bp->b_error = error;
1435 bp->b_resid = bp->b_bcount; 1435 bp->b_resid = bp->b_bcount;
1436 biodone(bp); 1436 biodone(bp);
1437 return (error); 1437 return (error);
1438 } 1438 }
1439 if ((long)bp->b_blkno == -1) /* no valid data */ 1439 if ((long)bp->b_blkno == -1) /* no valid data */
1440 clrbuf(bp); 1440 clrbuf(bp);
1441 } 1441 }
1442 if ((long)bp->b_blkno < 0) { /* block is not on disk */ 1442 if ((long)bp->b_blkno < 0) { /* block is not on disk */
1443 bp->b_resid = bp->b_bcount; 1443 bp->b_resid = bp->b_bcount;
1444 biodone(bp); 1444 biodone(bp);
1445 return (0); 1445 return (0);
1446 } 1446 }
1447 1447
1448 slept = 1; 1448 slept = 1;
1449 loopcount = 0; 1449 loopcount = 0;
1450 mutex_enter(&lfs_lock); 1450 mutex_enter(&lfs_lock);
1451 while (slept && fs->lfs_seglock) { 1451 while (slept && fs->lfs_seglock) {
1452 mutex_exit(&lfs_lock); 1452 mutex_exit(&lfs_lock);
1453 /* 1453 /*
1454 * Look through list of intervals. 1454 * Look through list of intervals.
1455 * There will only be intervals to look through 1455 * There will only be intervals to look through
1456 * if the cleaner holds the seglock. 1456 * if the cleaner holds the seglock.
1457 * Since the cleaner is synchronous, we can trust 1457 * Since the cleaner is synchronous, we can trust
1458 * the list of intervals to be current. 1458 * the list of intervals to be current.
1459 */ 1459 */
1460 tbn = LFS_DBTOFSB(fs, bp->b_blkno); 1460 tbn = LFS_DBTOFSB(fs, bp->b_blkno);
1461 sn = lfs_dtosn(fs, tbn); 1461 sn = lfs_dtosn(fs, tbn);
1462 slept = 0; 1462 slept = 0;
1463 for (i = 0; i < fs->lfs_cleanind; i++) { 1463 for (i = 0; i < fs->lfs_cleanind; i++) {
1464 if (sn == lfs_dtosn(fs, fs->lfs_cleanint[i]) && 1464 if (sn == lfs_dtosn(fs, fs->lfs_cleanint[i]) &&
1465 tbn >= fs->lfs_cleanint[i]) { 1465 tbn >= fs->lfs_cleanint[i]) {
1466 DLOG((DLOG_CLEAN, 1466 DLOG((DLOG_CLEAN,
1467 "lfs_strategy: ino %d lbn %" PRId64 1467 "lfs_strategy: ino %d lbn %" PRId64
1468 " ind %d sn %d fsb %" PRIx32 1468 " ind %d sn %d fsb %" PRIx32
1469 " given sn %d fsb %" PRIx64 "\n", 1469 " given sn %d fsb %" PRIx64 "\n",
1470 ip->i_number, bp->b_lblkno, i, 1470 ip->i_number, bp->b_lblkno, i,
1471 lfs_dtosn(fs, fs->lfs_cleanint[i]), 1471 lfs_dtosn(fs, fs->lfs_cleanint[i]),
1472 fs->lfs_cleanint[i], sn, tbn)); 1472 fs->lfs_cleanint[i], sn, tbn));
1473 DLOG((DLOG_CLEAN, 1473 DLOG((DLOG_CLEAN,
1474 "lfs_strategy: sleeping on ino %d lbn %" 1474 "lfs_strategy: sleeping on ino %d lbn %"
1475 PRId64 "\n", ip->i_number, bp->b_lblkno)); 1475 PRId64 "\n", ip->i_number, bp->b_lblkno));
1476 mutex_enter(&lfs_lock); 1476 mutex_enter(&lfs_lock);
1477 if (LFS_SEGLOCK_HELD(fs) && fs->lfs_iocount) { 1477 if (LFS_SEGLOCK_HELD(fs) && fs->lfs_iocount) {
1478 /* 1478 /*
1479 * Cleaner can't wait for itself. 1479 * Cleaner can't wait for itself.
1480 * Instead, wait for the blocks 1480 * Instead, wait for the blocks
1481 * to be written to disk. 1481 * to be written to disk.
1482 * XXX we need pribio in the test 1482 * XXX we need pribio in the test
1483 * XXX here. 1483 * XXX here.
1484 */ 1484 */
1485 mtsleep(&fs->lfs_iocount, 1485 mtsleep(&fs->lfs_iocount,
1486 (PRIBIO + 1) | PNORELOCK, 1486 (PRIBIO + 1) | PNORELOCK,
1487 "clean2", hz/10 + 1, 1487 "clean2", hz/10 + 1,
1488 &lfs_lock); 1488 &lfs_lock);
1489 slept = 1; 1489 slept = 1;
1490 ++loopcount; 1490 ++loopcount;
1491 break; 1491 break;
1492 } else if (fs->lfs_seglock) { 1492 } else if (fs->lfs_seglock) {
1493 mtsleep(&fs->lfs_seglock, 1493 mtsleep(&fs->lfs_seglock,
1494 (PRIBIO + 1) | PNORELOCK, 1494 (PRIBIO + 1) | PNORELOCK,
1495 "clean1", 0, 1495 "clean1", 0,
1496 &lfs_lock); 1496 &lfs_lock);
1497 slept = 1; 1497 slept = 1;
1498 break; 1498 break;
1499 } 1499 }
1500 mutex_exit(&lfs_lock); 1500 mutex_exit(&lfs_lock);
1501 } 1501 }
1502 } 1502 }
1503 mutex_enter(&lfs_lock); 1503 mutex_enter(&lfs_lock);
1504 if (loopcount > MAXLOOP) { 1504 if (loopcount > MAXLOOP) {
1505 printf("lfs_strategy: breaking out of clean2 loop\n"); 1505 printf("lfs_strategy: breaking out of clean2 loop\n");
1506 break; 1506 break;
1507 } 1507 }
1508 } 1508 }
1509 mutex_exit(&lfs_lock); 1509 mutex_exit(&lfs_lock);
1510 1510
1511 vp = ip->i_devvp; 1511 vp = ip->i_devvp;
1512 return VOP_STRATEGY(vp, bp); 1512 return VOP_STRATEGY(vp, bp);
1513} 1513}
1514 1514
1515/* 1515/*
1516 * Inline lfs_segwrite/lfs_writevnodes, but just for dirops. 1516 * Inline lfs_segwrite/lfs_writevnodes, but just for dirops.
1517 * Technically this is a checkpoint (the on-disk state is valid) 1517 * Technically this is a checkpoint (the on-disk state is valid)
1518 * even though we are leaving out all the file data. 1518 * even though we are leaving out all the file data.
1519 */ 1519 */
1520int 1520int
1521lfs_flush_dirops(struct lfs *fs) 1521lfs_flush_dirops(struct lfs *fs)
1522{ 1522{
1523 struct inode *ip, *nip; 1523 struct inode *ip, *nip;
1524 struct vnode *vp; 1524 struct vnode *vp;
1525 extern int lfs_dostats; 1525 extern int lfs_dostats;
1526 struct segment *sp; 1526 struct segment *sp;
1527 int flags = 0; 1527 int flags = 0;
1528 int error = 0; 1528 int error = 0;
1529 1529
1530 ASSERT_MAYBE_SEGLOCK(fs); 1530 ASSERT_MAYBE_SEGLOCK(fs);
1531 KASSERT(fs->lfs_nadirop == 0); 1531 KASSERT(fs->lfs_nadirop == 0);
1532 1532
1533 if (fs->lfs_ronly) 1533 if (fs->lfs_ronly)
1534 return EROFS; 1534 return EROFS;
1535 1535
1536 mutex_enter(&lfs_lock); 1536 mutex_enter(&lfs_lock);
1537 if (TAILQ_FIRST(&fs->lfs_dchainhd) == NULL) { 1537 if (TAILQ_FIRST(&fs->lfs_dchainhd) == NULL) {
1538 mutex_exit(&lfs_lock); 1538 mutex_exit(&lfs_lock);
1539 return 0; 1539 return 0;
1540 } else 1540 } else
1541 mutex_exit(&lfs_lock); 1541 mutex_exit(&lfs_lock);
1542 1542
1543 if (lfs_dostats) 1543 if (lfs_dostats)
1544 ++lfs_stats.flush_invoked; 1544 ++lfs_stats.flush_invoked;
1545 1545
1546 lfs_imtime(fs); 1546 lfs_imtime(fs);
1547 lfs_seglock(fs, flags); 1547 lfs_seglock(fs, flags);
1548 sp = fs->lfs_sp; 1548 sp = fs->lfs_sp;
1549 1549
1550 /* 1550 /*
1551 * lfs_writevnodes, optimized to get dirops out of the way. 1551 * lfs_writevnodes, optimized to get dirops out of the way.
1552 * Only write dirops, and don't flush files' pages, only 1552 * Only write dirops, and don't flush files' pages, only
1553 * blocks from the directories. 1553 * blocks from the directories.
1554 * 1554 *
1555 * We don't need to vref these files because they are 1555 * We don't need to vref these files because they are
1556 * dirops and so hold an extra reference until the 1556 * dirops and so hold an extra reference until the
1557 * segunlock clears them of that status. 1557 * segunlock clears them of that status.
1558 * 1558 *
1559 * We don't need to check for IN_ADIROP because we know that 1559 * We don't need to check for IN_ADIROP because we know that
1560 * no dirops are active. 1560 * no dirops are active.
1561 * 1561 *
1562 */ 1562 */
1563 mutex_enter(&lfs_lock); 1563 mutex_enter(&lfs_lock);
1564 for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) { 1564 for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) {
1565 nip = TAILQ_NEXT(ip, i_lfs_dchain); 1565 nip = TAILQ_NEXT(ip, i_lfs_dchain);
1566 mutex_exit(&lfs_lock); 1566 mutex_exit(&lfs_lock);
1567 vp = ITOV(ip); 1567 vp = ITOV(ip);
1568 mutex_enter(vp->v_interlock); 1568 mutex_enter(vp->v_interlock);
1569 1569
1570 KASSERT((ip->i_flag & IN_ADIROP) == 0); 1570 KASSERT((ip->i_flag & IN_ADIROP) == 0);
1571 KASSERT(vp->v_uflag & VU_DIROP); 1571 KASSERT(vp->v_uflag & VU_DIROP);
1572 KASSERT(vdead_check(vp, VDEAD_NOWAIT) == 0); 1572 KASSERT(vdead_check(vp, VDEAD_NOWAIT) == 0);
1573 1573
1574 /* 1574 /*
1575 * All writes to directories come from dirops; all 1575 * All writes to directories come from dirops; all
1576 * writes to files' direct blocks go through the page 1576 * writes to files' direct blocks go through the page
1577 * cache, which we're not touching. Reads to files 1577 * cache, which we're not touching. Reads to files
1578 * and/or directories will not be affected by writing 1578 * and/or directories will not be affected by writing
1579 * directory blocks inodes and file inodes. So we don't 1579 * directory blocks inodes and file inodes. So we don't
1580 * really need to lock. 1580 * really need to lock.
1581 */ 1581 */
1582 if (vdead_check(vp, VDEAD_NOWAIT) != 0) { 1582 if (vdead_check(vp, VDEAD_NOWAIT) != 0) {
1583 mutex_exit(vp->v_interlock); 1583 mutex_exit(vp->v_interlock);
1584 mutex_enter(&lfs_lock); 1584 mutex_enter(&lfs_lock);
1585 continue; 1585 continue;
1586 } 1586 }
1587 mutex_exit(vp->v_interlock); 1587 mutex_exit(vp->v_interlock);
1588 /* XXX see below 1588 /* XXX see below
1589 * waslocked = VOP_ISLOCKED(vp); 1589 * waslocked = VOP_ISLOCKED(vp);
1590 */ 1590 */
1591 if (vp->v_type != VREG && 1591 if (vp->v_type != VREG &&
1592 ((ip->i_flag & IN_ALLMOD) || !VPISEMPTY(vp))) { 1592 ((ip->i_flag & IN_ALLMOD) || !VPISEMPTY(vp))) {
1593 error = lfs_writefile(fs, sp, vp); 1593 error = lfs_writefile(fs, sp, vp);
1594 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) && 1594 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) &&
1595 !(ip->i_flag & IN_ALLMOD)) { 1595 !(ip->i_flag & IN_ALLMOD)) {
1596 mutex_enter(&lfs_lock); 1596 mutex_enter(&lfs_lock);
1597 LFS_SET_UINO(ip, IN_MODIFIED); 1597 LFS_SET_UINO(ip, IN_MODIFIED);
1598 mutex_exit(&lfs_lock); 1598 mutex_exit(&lfs_lock);
1599 } 1599 }
1600 if (error && (sp->seg_flags & SEGM_SINGLE)) { 1600 if (error && (sp->seg_flags & SEGM_SINGLE)) {
1601 mutex_enter(&lfs_lock); 1601 mutex_enter(&lfs_lock);
1602 error = EAGAIN; 1602 error = EAGAIN;
1603 break; 1603 break;
1604 } 1604 }
1605 } 1605 }
1606 KDASSERT(ip->i_number != LFS_IFILE_INUM); 1606 KDASSERT(ip->i_number != LFS_IFILE_INUM);
1607 error = lfs_writeinode(fs, sp, ip); 1607 error = lfs_writeinode(fs, sp, ip);
1608 mutex_enter(&lfs_lock); 1608 mutex_enter(&lfs_lock);
1609 if (error && (sp->seg_flags & SEGM_SINGLE)) { 1609 if (error && (sp->seg_flags & SEGM_SINGLE)) {
1610 error = EAGAIN; 1610 error = EAGAIN;
1611 break; 1611 break;
1612 } 1612 }
1613 1613
1614 /* 1614 /*
1615 * We might need to update these inodes again, 1615 * We might need to update these inodes again,
1616 * for example, if they have data blocks to write. 1616 * for example, if they have data blocks to write.
1617 * Make sure that after this flush, they are still 1617 * Make sure that after this flush, they are still
1618 * marked IN_MODIFIED so that we don't forget to 1618 * marked IN_MODIFIED so that we don't forget to
1619 * write them. 1619 * write them.
1620 */ 1620 */
1621 /* XXX only for non-directories? --KS */ 1621 /* XXX only for non-directories? --KS */
1622 LFS_SET_UINO(ip, IN_MODIFIED); 1622 LFS_SET_UINO(ip, IN_MODIFIED);
1623 } 1623 }
1624 mutex_exit(&lfs_lock); 1624 mutex_exit(&lfs_lock);
1625 /* We've written all the dirops there are */ 1625 /* We've written all the dirops there are */
1626 ((SEGSUM *)(sp->segsum))->ss_flags &= ~(SS_CONT); 1626 ((SEGSUM *)(sp->segsum))->ss_flags &= ~(SS_CONT);
1627 lfs_finalize_fs_seguse(fs); 1627 lfs_finalize_fs_seguse(fs);
1628 (void) lfs_writeseg(fs, sp); 1628 (void) lfs_writeseg(fs, sp);
1629 lfs_segunlock(fs); 1629 lfs_segunlock(fs);
1630 1630
1631 return error; 1631 return error;
1632} 1632}
1633 1633
1634/* 1634/*
1635 * Flush all vnodes for which the pagedaemon has requested pageouts. 1635 * Flush all vnodes for which the pagedaemon has requested pageouts.
1636 * Skip over any files that are marked VU_DIROP (since lfs_flush_dirop() 1636 * Skip over any files that are marked VU_DIROP (since lfs_flush_dirop()
1637 * has just run, this would be an error). If we have to skip a vnode 1637 * has just run, this would be an error). If we have to skip a vnode
1638 * for any reason, just skip it; if we have to wait for the cleaner, 1638 * for any reason, just skip it; if we have to wait for the cleaner,
1639 * abort. The writer daemon will call us again later. 1639 * abort. The writer daemon will call us again later.
1640 */ 1640 */
1641int 1641int
1642lfs_flush_pchain(struct lfs *fs) 1642lfs_flush_pchain(struct lfs *fs)
1643{ 1643{
1644 struct inode *ip, *nip; 1644 struct inode *ip, *nip;
1645 struct vnode *vp; 1645 struct vnode *vp;
1646 extern int lfs_dostats; 1646 extern int lfs_dostats;
1647 struct segment *sp; 1647 struct segment *sp;
1648 int error, error2; 1648 int error, error2;
1649 1649
1650 ASSERT_NO_SEGLOCK(fs); 1650 ASSERT_NO_SEGLOCK(fs);
1651 1651
1652 if (fs->lfs_ronly) 1652 if (fs->lfs_ronly)
1653 return EROFS; 1653 return EROFS;
1654 1654
1655 mutex_enter(&lfs_lock); 1655 mutex_enter(&lfs_lock);
1656 if (TAILQ_FIRST(&fs->lfs_pchainhd) == NULL) { 1656 if (TAILQ_FIRST(&fs->lfs_pchainhd) == NULL) {
1657 mutex_exit(&lfs_lock); 1657 mutex_exit(&lfs_lock);
1658 return 0; 1658 return 0;
1659 } else 1659 } else
1660 mutex_exit(&lfs_lock); 1660 mutex_exit(&lfs_lock);
1661 1661
1662 /* Get dirops out of the way */ 1662 /* Get dirops out of the way */
1663 if ((error = lfs_flush_dirops(fs)) != 0) 1663 if ((error = lfs_flush_dirops(fs)) != 0)
1664 return error; 1664 return error;
1665 1665
1666 if (lfs_dostats) 1666 if (lfs_dostats)
1667 ++lfs_stats.flush_invoked; 1667 ++lfs_stats.flush_invoked;
1668 1668
1669 /* 1669 /*
1670 * Inline lfs_segwrite/lfs_writevnodes, but just for pageouts. 1670 * Inline lfs_segwrite/lfs_writevnodes, but just for pageouts.
1671 */ 1671 */
1672 lfs_imtime(fs); 1672 lfs_imtime(fs);
1673 lfs_seglock(fs, 0); 1673 lfs_seglock(fs, 0);
1674 sp = fs->lfs_sp; 1674 sp = fs->lfs_sp;
1675 1675
1676 /* 1676 /*
1677 * lfs_writevnodes, optimized to clear pageout requests. 1677 * lfs_writevnodes, optimized to clear pageout requests.
1678 * Only write non-dirop files that are in the pageout queue. 1678 * Only write non-dirop files that are in the pageout queue.
1679 * We're very conservative about what we write; we want to be 1679 * We're very conservative about what we write; we want to be
1680 * fast and async. 1680 * fast and async.
1681 */ 1681 */
1682 mutex_enter(&lfs_lock); 1682 mutex_enter(&lfs_lock);
1683 top: 1683 top:
1684 for (ip = TAILQ_FIRST(&fs->lfs_pchainhd); ip != NULL; ip = nip) { 1684 for (ip = TAILQ_FIRST(&fs->lfs_pchainhd); ip != NULL; ip = nip) {
1685 nip = TAILQ_NEXT(ip, i_lfs_pchain); 1685 nip = TAILQ_NEXT(ip, i_lfs_pchain);
1686 vp = ITOV(ip); 1686 vp = ITOV(ip);
1687 1687
1688 if (!(ip->i_flags & IN_PAGING)) 1688 if (!(ip->i_flags & IN_PAGING))
1689 goto top; 1689 goto top;
1690 1690
1691 mutex_enter(vp->v_interlock); 1691 mutex_enter(vp->v_interlock);
1692 if (vdead_check(vp, VDEAD_NOWAIT) != 0 || 1692 if (vdead_check(vp, VDEAD_NOWAIT) != 0 ||
1693 (vp->v_uflag & VU_DIROP) != 0) { 1693 (vp->v_uflag & VU_DIROP) != 0) {
1694 mutex_exit(vp->v_interlock); 1694 mutex_exit(vp->v_interlock);
1695 continue; 1695 continue;
1696 } 1696 }
1697 if (vp->v_type != VREG) { 1697 if (vp->v_type != VREG) {
1698 mutex_exit(vp->v_interlock); 1698 mutex_exit(vp->v_interlock);
1699 continue; 1699 continue;
1700 } 1700 }
1701 if (vget(vp, LK_NOWAIT, false /* !wait */)) 1701 if (vget(vp, LK_NOWAIT, false /* !wait */))
1702 continue; 1702 continue;
1703 mutex_exit(&lfs_lock); 1703 mutex_exit(&lfs_lock);
1704 1704
1705 if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_RETRY) != 0) { 1705 if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_RETRY) != 0) {
1706 vrele(vp); 1706 vrele(vp);
1707 mutex_enter(&lfs_lock); 1707 mutex_enter(&lfs_lock);
1708 continue; 1708 continue;
1709 } 1709 }
1710 1710
1711 error = lfs_writefile(fs, sp, vp); 1711 error = lfs_writefile(fs, sp, vp);
1712 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) && 1712 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) &&
1713 !(ip->i_flag & IN_ALLMOD)) { 1713 !(ip->i_flag & IN_ALLMOD)) {
1714 mutex_enter(&lfs_lock); 1714 mutex_enter(&lfs_lock);
1715 LFS_SET_UINO(ip, IN_MODIFIED); 1715 LFS_SET_UINO(ip, IN_MODIFIED);
1716 mutex_exit(&lfs_lock); 1716 mutex_exit(&lfs_lock);
1717 } 1717 }
1718 KDASSERT(ip->i_number != LFS_IFILE_INUM); 1718 KDASSERT(ip->i_number != LFS_IFILE_INUM);
1719 error2 = lfs_writeinode(fs, sp, ip); 1719 error2 = lfs_writeinode(fs, sp, ip);
1720 1720
1721 VOP_UNLOCK(vp); 1721 VOP_UNLOCK(vp);
1722 vrele(vp); 1722 vrele(vp);
1723 1723
1724 if (error == EAGAIN || error2 == EAGAIN) { 1724 if (error == EAGAIN || error2 == EAGAIN) {
1725 lfs_writeseg(fs, sp); 1725 lfs_writeseg(fs, sp);
1726 mutex_enter(&lfs_lock); 1726 mutex_enter(&lfs_lock);
1727 break; 1727 break;
1728 } 1728 }
1729 mutex_enter(&lfs_lock); 1729 mutex_enter(&lfs_lock);
1730 } 1730 }
1731 mutex_exit(&lfs_lock); 1731 mutex_exit(&lfs_lock);
1732 (void) lfs_writeseg(fs, sp); 1732 (void) lfs_writeseg(fs, sp);
1733 lfs_segunlock(fs); 1733 lfs_segunlock(fs);
1734 1734
1735 return 0; 1735 return 0;
1736} 1736}
1737 1737
1738/* 1738/*
1739 * Provide a fcntl interface to sys_lfs_{segwait,bmapv,markv}. 1739 * Provide a fcntl interface to sys_lfs_{segwait,bmapv,markv}.
1740 */ 1740 */
1741int 1741int
1742lfs_fcntl(void *v) 1742lfs_fcntl(void *v)
1743{ 1743{
1744 struct vop_fcntl_args /* { 1744 struct vop_fcntl_args /* {
1745 struct vnode *a_vp; 1745 struct vnode *a_vp;
1746 u_int a_command; 1746 u_int a_command;
1747 void * a_data; 1747 void * a_data;
1748 int a_fflag; 1748 int a_fflag;
1749 kauth_cred_t a_cred; 1749 kauth_cred_t a_cred;
1750 } */ *ap = v; 1750 } */ *ap = v;
1751 struct timeval tv; 1751 struct timeval tv;
1752 struct timeval *tvp; 1752 struct timeval *tvp;
1753 BLOCK_INFO *blkiov; 1753 BLOCK_INFO *blkiov;
1754 CLEANERINFO *cip; 1754 CLEANERINFO *cip;
1755 SEGUSE *sup; 1755 SEGUSE *sup;
1756 int blkcnt, error; 1756 int blkcnt, error;
1757 size_t fh_size; 1757 size_t fh_size;
1758 struct lfs_fcntl_markv blkvp; 1758 struct lfs_fcntl_markv blkvp;
1759 struct lwp *l; 1759 struct lwp *l;
1760 fsid_t *fsidp; 1760 fsid_t *fsidp;
1761 struct lfs *fs; 1761 struct lfs *fs;
1762 struct buf *bp; 1762 struct buf *bp;
1763 fhandle_t *fhp; 1763 fhandle_t *fhp;
1764 daddr_t off; 1764 daddr_t off;
1765 int oclean; 1765 int oclean;
1766 1766
1767 /* Only respect LFS fcntls on fs root or Ifile */ 1767 /* Only respect LFS fcntls on fs root or Ifile */
1768 if (VTOI(ap->a_vp)->i_number != ULFS_ROOTINO && 1768 if (VTOI(ap->a_vp)->i_number != ULFS_ROOTINO &&
1769 VTOI(ap->a_vp)->i_number != LFS_IFILE_INUM) { 1769 VTOI(ap->a_vp)->i_number != LFS_IFILE_INUM) {
1770 return ulfs_fcntl(v); 1770 return ulfs_fcntl(v);
1771 } 1771 }
1772 1772
1773 /* Avoid locking a draining lock */ 1773 /* Avoid locking a draining lock */
1774 if (ap->a_vp->v_mount->mnt_iflag & IMNT_UNMOUNT) { 1774 if (ap->a_vp->v_mount->mnt_iflag & IMNT_UNMOUNT) {
1775 return ESHUTDOWN; 1775 return ESHUTDOWN;
1776 } 1776 }
1777 1777
1778 /* LFS control and monitoring fcntls are available only to root */ 1778 /* LFS control and monitoring fcntls are available only to root */
1779 l = curlwp; 1779 l = curlwp;
1780 if (((ap->a_command & 0xff00) >> 8) == 'L' && 1780 if (((ap->a_command & 0xff00) >> 8) == 'L' &&
1781 (error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_LFS, 1781 (error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_LFS,
1782 KAUTH_REQ_SYSTEM_LFS_FCNTL, NULL, NULL, NULL)) != 0) 1782 KAUTH_REQ_SYSTEM_LFS_FCNTL, NULL, NULL, NULL)) != 0)
1783 return (error); 1783 return (error);
1784 1784
1785 fs = VTOI(ap->a_vp)->i_lfs; 1785 fs = VTOI(ap->a_vp)->i_lfs;
1786 fsidp = &ap->a_vp->v_mount->mnt_stat.f_fsidx; 1786 fsidp = &ap->a_vp->v_mount->mnt_stat.f_fsidx;
1787 1787
1788 error = 0; 1788 error = 0;
1789 switch ((int)ap->a_command) { 1789 switch ((int)ap->a_command) {
1790 case LFCNSEGWAITALL_COMPAT_50: 1790 case LFCNSEGWAITALL_COMPAT_50:
1791 case LFCNSEGWAITALL_COMPAT: 1791 case LFCNSEGWAITALL_COMPAT:
1792 fsidp = NULL; 1792 fsidp = NULL;
1793 /* FALLSTHROUGH */ 1793 /* FALLSTHROUGH */
1794 case LFCNSEGWAIT_COMPAT_50: 1794 case LFCNSEGWAIT_COMPAT_50:
1795 case LFCNSEGWAIT_COMPAT: 1795 case LFCNSEGWAIT_COMPAT:
1796 { 1796 {
1797 struct timeval50 *tvp50 1797 struct timeval50 *tvp50
1798 = (struct timeval50 *)ap->a_data; 1798 = (struct timeval50 *)ap->a_data;
1799 timeval50_to_timeval(tvp50, &tv); 1799 timeval50_to_timeval(tvp50, &tv);
1800 tvp = &tv; 1800 tvp = &tv;
1801 } 1801 }
1802 goto segwait_common; 1802 goto segwait_common;
1803 case LFCNSEGWAITALL: 1803 case LFCNSEGWAITALL:
1804 fsidp = NULL; 1804 fsidp = NULL;
1805 /* FALLSTHROUGH */ 1805 /* FALLSTHROUGH */
1806 case LFCNSEGWAIT: 1806 case LFCNSEGWAIT:
1807 tvp = (struct timeval *)ap->a_data; 1807 tvp = (struct timeval *)ap->a_data;
1808segwait_common: 1808segwait_common:
1809 mutex_enter(&lfs_lock); 1809 mutex_enter(&lfs_lock);
1810 ++fs->lfs_sleepers; 1810 ++fs->lfs_sleepers;
1811 mutex_exit(&lfs_lock); 1811 mutex_exit(&lfs_lock);
1812 1812
1813 error = lfs_segwait(fsidp, tvp); 1813 error = lfs_segwait(fsidp, tvp);
1814 1814
1815 mutex_enter(&lfs_lock); 1815 mutex_enter(&lfs_lock);
1816 if (--fs->lfs_sleepers == 0) 1816 if (--fs->lfs_sleepers == 0)
1817 wakeup(&fs->lfs_sleepers); 1817 wakeup(&fs->lfs_sleepers);
1818 mutex_exit(&lfs_lock); 1818 mutex_exit(&lfs_lock);
1819 return error; 1819 return error;
1820 1820
1821 case LFCNBMAPV: 1821 case LFCNBMAPV:
1822 case LFCNMARKV: 1822 case LFCNMARKV:
1823 blkvp = *(struct lfs_fcntl_markv *)ap->a_data; 1823 blkvp = *(struct lfs_fcntl_markv *)ap->a_data;
1824 1824
1825 blkcnt = blkvp.blkcnt; 1825 blkcnt = blkvp.blkcnt;
1826 if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT) 1826 if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT)
1827 return (EINVAL); 1827 return (EINVAL);
1828 blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV); 1828 blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV);
1829 if ((error = copyin(blkvp.blkiov, blkiov, 1829 if ((error = copyin(blkvp.blkiov, blkiov,
1830 blkcnt * sizeof(BLOCK_INFO))) != 0) { 1830 blkcnt * sizeof(BLOCK_INFO))) != 0) {
1831 lfs_free(fs, blkiov, LFS_NB_BLKIOV); 1831 lfs_free(fs, blkiov, LFS_NB_BLKIOV);
1832 return error; 1832 return error;
1833 } 1833 }
1834 1834
1835 mutex_enter(&lfs_lock); 1835 mutex_enter(&lfs_lock);
1836 ++fs->lfs_sleepers; 1836 ++fs->lfs_sleepers;
1837 mutex_exit(&lfs_lock); 1837 mutex_exit(&lfs_lock);
1838 if (ap->a_command == LFCNBMAPV) 1838 if (ap->a_command == LFCNBMAPV)
1839 error = lfs_bmapv(l->l_proc, fsidp, blkiov, blkcnt); 1839 error = lfs_bmapv(l->l_proc, fsidp, blkiov, blkcnt);
1840 else /* LFCNMARKV */ 1840 else /* LFCNMARKV */
1841 error = lfs_markv(l->l_proc, fsidp, blkiov, blkcnt); 1841 error = lfs_markv(l->l_proc, fsidp, blkiov, blkcnt);
1842 if (error == 0) 1842 if (error == 0)
1843 error = copyout(blkiov, blkvp.blkiov, 1843 error = copyout(blkiov, blkvp.blkiov,
1844 blkcnt * sizeof(BLOCK_INFO)); 1844 blkcnt * sizeof(BLOCK_INFO));
1845 mutex_enter(&lfs_lock); 1845 mutex_enter(&lfs_lock);
1846 if (--fs->lfs_sleepers == 0) 1846 if (--fs->lfs_sleepers == 0)
1847 wakeup(&fs->lfs_sleepers); 1847 wakeup(&fs->lfs_sleepers);
1848 mutex_exit(&lfs_lock); 1848 mutex_exit(&lfs_lock);
1849 lfs_free(fs, blkiov, LFS_NB_BLKIOV); 1849 lfs_free(fs, blkiov, LFS_NB_BLKIOV);
1850 return error; 1850 return error;
1851 1851
1852 case LFCNRECLAIM: 1852 case LFCNRECLAIM:
1853 /* 1853 /*
1854 * Flush dirops and write Ifile, allowing empty segments 1854 * Flush dirops and write Ifile, allowing empty segments
1855 * to be immediately reclaimed. 1855 * to be immediately reclaimed.
1856 */ 1856 */
1857 lfs_writer_enter(fs, "pndirop"); 1857 lfs_writer_enter(fs, "pndirop");
1858 off = lfs_sb_getoffset(fs); 1858 off = lfs_sb_getoffset(fs);
1859 lfs_seglock(fs, SEGM_FORCE_CKP | SEGM_CKP); 1859 lfs_seglock(fs, SEGM_FORCE_CKP | SEGM_CKP);
1860 lfs_flush_dirops(fs); 1860 lfs_flush_dirops(fs);
1861 LFS_CLEANERINFO(cip, fs, bp); 1861 LFS_CLEANERINFO(cip, fs, bp);
1862 oclean = cip->clean; 1862 oclean = cip->clean;
1863 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1); 1863 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1);
1864 lfs_segwrite(ap->a_vp->v_mount, SEGM_FORCE_CKP); 1864 lfs_segwrite(ap->a_vp->v_mount, SEGM_FORCE_CKP);
1865 fs->lfs_sp->seg_flags |= SEGM_PROT; 1865 fs->lfs_sp->seg_flags |= SEGM_PROT;
1866 lfs_segunlock(fs); 1866 lfs_segunlock(fs);
1867 lfs_writer_leave(fs); 1867 lfs_writer_leave(fs);
1868 1868
1869#ifdef DEBUG 1869#ifdef DEBUG
1870 LFS_CLEANERINFO(cip, fs, bp); 1870 LFS_CLEANERINFO(cip, fs, bp);
1871 DLOG((DLOG_CLEAN, "lfs_fcntl: reclaim wrote %" PRId64 1871 DLOG((DLOG_CLEAN, "lfs_fcntl: reclaim wrote %" PRId64
1872 " blocks, cleaned %" PRId32 " segments (activesb %d)\n", 1872 " blocks, cleaned %" PRId32 " segments (activesb %d)\n",
1873 fs->lfs_offset - off, cip->clean - oclean, 1873 lfs_sb_getoffset(fs) - off, cip->clean - oclean,
1874 fs->lfs_activesb)); 1874 fs->lfs_activesb));
1875 LFS_SYNC_CLEANERINFO(cip, fs, bp, 0); 1875 LFS_SYNC_CLEANERINFO(cip, fs, bp, 0);
1876#else 1876#else
1877 __USE(oclean); 1877 __USE(oclean);
1878 __USE(off); 1878 __USE(off);
1879#endif 1879#endif
1880 1880
1881 return 0; 1881 return 0;
1882 1882
1883 case LFCNIFILEFH_COMPAT: 1883 case LFCNIFILEFH_COMPAT:
1884 /* Return the filehandle of the Ifile */ 1884 /* Return the filehandle of the Ifile */
1885 if ((error = kauth_authorize_system(l->l_cred, 1885 if ((error = kauth_authorize_system(l->l_cred,
1886 KAUTH_SYSTEM_FILEHANDLE, 0, NULL, NULL, NULL)) != 0) 1886 KAUTH_SYSTEM_FILEHANDLE, 0, NULL, NULL, NULL)) != 0)
1887 return (error); 1887 return (error);
1888 fhp = (struct fhandle *)ap->a_data; 1888 fhp = (struct fhandle *)ap->a_data;
1889 fhp->fh_fsid = *fsidp; 1889 fhp->fh_fsid = *fsidp;
1890 fh_size = 16; /* former VFS_MAXFIDSIZ */ 1890 fh_size = 16; /* former VFS_MAXFIDSIZ */
1891 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size); 1891 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size);
1892 1892
1893 case LFCNIFILEFH_COMPAT2: 1893 case LFCNIFILEFH_COMPAT2:
1894 case LFCNIFILEFH: 1894 case LFCNIFILEFH:
1895 /* Return the filehandle of the Ifile */ 1895 /* Return the filehandle of the Ifile */
1896 fhp = (struct fhandle *)ap->a_data; 1896 fhp = (struct fhandle *)ap->a_data;
1897 fhp->fh_fsid = *fsidp; 1897 fhp->fh_fsid = *fsidp;
1898 fh_size = sizeof(struct lfs_fhandle) - 1898 fh_size = sizeof(struct lfs_fhandle) -
1899 offsetof(fhandle_t, fh_fid); 1899 offsetof(fhandle_t, fh_fid);
1900 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size); 1900 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size);
1901 1901
1902 case LFCNREWIND: 1902 case LFCNREWIND:
1903 /* Move lfs_offset to the lowest-numbered segment */ 1903 /* Move lfs_offset to the lowest-numbered segment */
1904 return lfs_rewind(fs, *(int *)ap->a_data); 1904 return lfs_rewind(fs, *(int *)ap->a_data);
1905 1905
1906 case LFCNINVAL: 1906 case LFCNINVAL:
1907 /* Mark a segment SEGUSE_INVAL */ 1907 /* Mark a segment SEGUSE_INVAL */
1908 LFS_SEGENTRY(sup, fs, *(int *)ap->a_data, bp); 1908 LFS_SEGENTRY(sup, fs, *(int *)ap->a_data, bp);
1909 if (sup->su_nbytes > 0) { 1909 if (sup->su_nbytes > 0) {
1910 brelse(bp, 0); 1910 brelse(bp, 0);
1911 lfs_unset_inval_all(fs); 1911 lfs_unset_inval_all(fs);
1912 return EBUSY; 1912 return EBUSY;
1913 } 1913 }
1914 sup->su_flags |= SEGUSE_INVAL; 1914 sup->su_flags |= SEGUSE_INVAL;
1915 VOP_BWRITE(bp->b_vp, bp); 1915 VOP_BWRITE(bp->b_vp, bp);
1916 return 0; 1916 return 0;
1917 1917
1918 case LFCNRESIZE: 1918 case LFCNRESIZE:
1919 /* Resize the filesystem */ 1919 /* Resize the filesystem */
1920 return lfs_resize_fs(fs, *(int *)ap->a_data); 1920 return lfs_resize_fs(fs, *(int *)ap->a_data);
1921 1921
1922 case LFCNWRAPSTOP: 1922 case LFCNWRAPSTOP:
1923 case LFCNWRAPSTOP_COMPAT: 1923 case LFCNWRAPSTOP_COMPAT:
1924 /* 1924 /*
1925 * Hold lfs_newseg at segment 0; if requested, sleep until 1925 * Hold lfs_newseg at segment 0; if requested, sleep until
1926 * the filesystem wraps around. To support external agents 1926 * the filesystem wraps around. To support external agents
1927 * (dump, fsck-based regression test) that need to look at 1927 * (dump, fsck-based regression test) that need to look at
1928 * a snapshot of the filesystem, without necessarily 1928 * a snapshot of the filesystem, without necessarily
1929 * requiring that all fs activity stops. 1929 * requiring that all fs activity stops.
1930 */ 1930 */
1931 if (fs->lfs_stoplwp == curlwp) 1931 if (fs->lfs_stoplwp == curlwp)
1932 return EALREADY; 1932 return EALREADY;
1933 1933
1934 mutex_enter(&lfs_lock); 1934 mutex_enter(&lfs_lock);
1935 while (fs->lfs_stoplwp != NULL) 1935 while (fs->lfs_stoplwp != NULL)
1936 cv_wait(&fs->lfs_stopcv, &lfs_lock); 1936 cv_wait(&fs->lfs_stopcv, &lfs_lock);
1937 fs->lfs_stoplwp = curlwp; 1937 fs->lfs_stoplwp = curlwp;
1938 if (fs->lfs_nowrap == 0) 1938 if (fs->lfs_nowrap == 0)
1939 log(LOG_NOTICE, "%s: disabled log wrap\n", 1939 log(LOG_NOTICE, "%s: disabled log wrap\n",
1940 lfs_sb_getfsmnt(fs)); 1940 lfs_sb_getfsmnt(fs));
1941 ++fs->lfs_nowrap; 1941 ++fs->lfs_nowrap;
1942 if (*(int *)ap->a_data == 1 1942 if (*(int *)ap->a_data == 1
1943 || ap->a_command == LFCNWRAPSTOP_COMPAT) { 1943 || ap->a_command == LFCNWRAPSTOP_COMPAT) {
1944 log(LOG_NOTICE, "LFCNSTOPWRAP waiting for log wrap\n"); 1944 log(LOG_NOTICE, "LFCNSTOPWRAP waiting for log wrap\n");
1945 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER, 1945 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER,
1946 "segwrap", 0, &lfs_lock); 1946 "segwrap", 0, &lfs_lock);
1947 log(LOG_NOTICE, "LFCNSTOPWRAP done waiting\n"); 1947 log(LOG_NOTICE, "LFCNSTOPWRAP done waiting\n");
1948 if (error) { 1948 if (error) {
1949 lfs_wrapgo(fs, VTOI(ap->a_vp), 0); 1949 lfs_wrapgo(fs, VTOI(ap->a_vp), 0);
1950 } 1950 }
1951 } 1951 }
1952 mutex_exit(&lfs_lock); 1952 mutex_exit(&lfs_lock);
1953 return 0; 1953 return 0;
1954 1954
1955 case LFCNWRAPGO: 1955 case LFCNWRAPGO:
1956 case LFCNWRAPGO_COMPAT: 1956 case LFCNWRAPGO_COMPAT:
1957 /* 1957 /*
1958 * Having done its work, the agent wakes up the writer. 1958 * Having done its work, the agent wakes up the writer.
1959 * If the argument is 1, it sleeps until a new segment 1959 * If the argument is 1, it sleeps until a new segment
1960 * is selected. 1960 * is selected.
1961 */ 1961 */
1962 mutex_enter(&lfs_lock); 1962 mutex_enter(&lfs_lock);
1963 error = lfs_wrapgo(fs, VTOI(ap->a_vp), 1963 error = lfs_wrapgo(fs, VTOI(ap->a_vp),
1964 ap->a_command == LFCNWRAPGO_COMPAT ? 1 : 1964 ap->a_command == LFCNWRAPGO_COMPAT ? 1 :
1965 *((int *)ap->a_data)); 1965 *((int *)ap->a_data));
1966 mutex_exit(&lfs_lock); 1966 mutex_exit(&lfs_lock);
1967 return error; 1967 return error;
1968 1968
1969 case LFCNWRAPPASS: 1969 case LFCNWRAPPASS:
1970 if ((VTOI(ap->a_vp)->i_lfs_iflags & LFSI_WRAPWAIT)) 1970 if ((VTOI(ap->a_vp)->i_lfs_iflags & LFSI_WRAPWAIT))
1971 return EALREADY; 1971 return EALREADY;
1972 mutex_enter(&lfs_lock); 1972 mutex_enter(&lfs_lock);
1973 if (fs->lfs_stoplwp != curlwp) { 1973 if (fs->lfs_stoplwp != curlwp) {
1974 mutex_exit(&lfs_lock); 1974 mutex_exit(&lfs_lock);
1975 return EALREADY; 1975 return EALREADY;
1976 } 1976 }
1977 if (fs->lfs_nowrap == 0) { 1977 if (fs->lfs_nowrap == 0) {
1978 mutex_exit(&lfs_lock); 1978 mutex_exit(&lfs_lock);
1979 return EBUSY; 1979 return EBUSY;
1980 } 1980 }
1981 fs->lfs_wrappass = 1; 1981 fs->lfs_wrappass = 1;
1982 wakeup(&fs->lfs_wrappass); 1982 wakeup(&fs->lfs_wrappass);
1983 /* Wait for the log to wrap, if asked */ 1983 /* Wait for the log to wrap, if asked */
1984 if (*(int *)ap->a_data) { 1984 if (*(int *)ap->a_data) {
1985 vref(ap->a_vp); 1985 vref(ap->a_vp);
1986 VTOI(ap->a_vp)->i_lfs_iflags |= LFSI_WRAPWAIT; 1986 VTOI(ap->a_vp)->i_lfs_iflags |= LFSI_WRAPWAIT;
1987 log(LOG_NOTICE, "LFCNPASS waiting for log wrap\n"); 1987 log(LOG_NOTICE, "LFCNPASS waiting for log wrap\n");
1988 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER, 1988 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER,
1989 "segwrap", 0, &lfs_lock); 1989 "segwrap", 0, &lfs_lock);
1990 log(LOG_NOTICE, "LFCNPASS done waiting\n"); 1990 log(LOG_NOTICE, "LFCNPASS done waiting\n");
1991 VTOI(ap->a_vp)->i_lfs_iflags &= ~LFSI_WRAPWAIT; 1991 VTOI(ap->a_vp)->i_lfs_iflags &= ~LFSI_WRAPWAIT;
1992 vrele(ap->a_vp); 1992 vrele(ap->a_vp);
1993 } 1993 }
1994 mutex_exit(&lfs_lock); 1994 mutex_exit(&lfs_lock);
1995 return error; 1995 return error;
1996 1996
1997 case LFCNWRAPSTATUS: 1997 case LFCNWRAPSTATUS:
1998 mutex_enter(&lfs_lock); 1998 mutex_enter(&lfs_lock);
1999 *(int *)ap->a_data = fs->lfs_wrapstatus; 1999 *(int *)ap->a_data = fs->lfs_wrapstatus;
2000 mutex_exit(&lfs_lock); 2000 mutex_exit(&lfs_lock);
2001 return 0; 2001 return 0;
2002 2002
2003 default: 2003 default:
2004 return ulfs_fcntl(v); 2004 return ulfs_fcntl(v);
2005 } 2005 }
2006 return 0; 2006 return 0;
2007} 2007}
2008 2008
2009/* 2009/*
2010 * Return the last logical file offset that should be written for this file 2010 * Return the last logical file offset that should be written for this file
2011 * if we're doing a write that ends at "size". If writing, we need to know 2011 * if we're doing a write that ends at "size". If writing, we need to know
2012 * about sizes on disk, i.e. fragments if there are any; if reading, we need 2012 * about sizes on disk, i.e. fragments if there are any; if reading, we need
2013 * to know about entire blocks. 2013 * to know about entire blocks.
2014 */ 2014 */
2015void 2015void
2016lfs_gop_size(struct vnode *vp, off_t size, off_t *eobp, int flags) 2016lfs_gop_size(struct vnode *vp, off_t size, off_t *eobp, int flags)
2017{ 2017{
2018 struct inode *ip = VTOI(vp); 2018 struct inode *ip = VTOI(vp);
2019 struct lfs *fs = ip->i_lfs; 2019 struct lfs *fs = ip->i_lfs;
2020 daddr_t olbn, nlbn; 2020 daddr_t olbn, nlbn;
2021 2021
2022 olbn = lfs_lblkno(fs, ip->i_size); 2022 olbn = lfs_lblkno(fs, ip->i_size);
2023 nlbn = lfs_lblkno(fs, size); 2023 nlbn = lfs_lblkno(fs, size);
2024 if (!(flags & GOP_SIZE_MEM) && nlbn < ULFS_NDADDR && olbn <= nlbn) { 2024 if (!(flags & GOP_SIZE_MEM) && nlbn < ULFS_NDADDR && olbn <= nlbn) {
2025 *eobp = lfs_fragroundup(fs, size); 2025 *eobp = lfs_fragroundup(fs, size);
2026 } else { 2026 } else {
2027 *eobp = lfs_blkroundup(fs, size); 2027 *eobp = lfs_blkroundup(fs, size);
2028 } 2028 }
2029} 2029}
2030 2030
2031#ifdef DEBUG 2031#ifdef DEBUG
2032void lfs_dump_vop(void *); 2032void lfs_dump_vop(void *);
2033 2033
2034void 2034void
2035lfs_dump_vop(void *v) 2035lfs_dump_vop(void *v)
2036{ 2036{
2037 struct vop_putpages_args /* { 2037 struct vop_putpages_args /* {
2038 struct vnode *a_vp; 2038 struct vnode *a_vp;
2039 voff_t a_offlo; 2039 voff_t a_offlo;
2040 voff_t a_offhi; 2040 voff_t a_offhi;
2041 int a_flags; 2041 int a_flags;
2042 } */ *ap = v; 2042 } */ *ap = v;
2043 2043
2044#ifdef DDB 2044#ifdef DDB
2045 vfs_vnode_print(ap->a_vp, 0, printf); 2045 vfs_vnode_print(ap->a_vp, 0, printf);
2046#endif 2046#endif
2047 lfs_dump_dinode(VTOI(ap->a_vp)->i_din.ffs1_din); 2047 lfs_dump_dinode(VTOI(ap->a_vp)->i_din.ffs1_din);
2048} 2048}
2049#endif 2049#endif
2050 2050
2051int 2051int
2052lfs_mmap(void *v) 2052lfs_mmap(void *v)
2053{ 2053{
2054 struct vop_mmap_args /* { 2054 struct vop_mmap_args /* {
2055 const struct vnodeop_desc *a_desc; 2055 const struct vnodeop_desc *a_desc;
2056 struct vnode *a_vp; 2056 struct vnode *a_vp;
2057 vm_prot_t a_prot; 2057 vm_prot_t a_prot;
2058 kauth_cred_t a_cred; 2058 kauth_cred_t a_cred;
2059 } */ *ap = v; 2059 } */ *ap = v;
2060 2060
2061 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM) 2061 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM)
2062 return EOPNOTSUPP; 2062 return EOPNOTSUPP;
2063 return ulfs_mmap(v); 2063 return ulfs_mmap(v);
2064} 2064}
2065 2065
2066static int 2066static int
2067lfs_openextattr(void *v) 2067lfs_openextattr(void *v)
2068{ 2068{
2069 struct vop_openextattr_args /* { 2069 struct vop_openextattr_args /* {
2070 struct vnode *a_vp; 2070 struct vnode *a_vp;
2071 kauth_cred_t a_cred; 2071 kauth_cred_t a_cred;
2072 struct proc *a_p; 2072 struct proc *a_p;
2073 } */ *ap = v; 2073 } */ *ap = v;
2074 struct inode *ip = VTOI(ap->a_vp); 2074 struct inode *ip = VTOI(ap->a_vp);
2075 struct ulfsmount *ump = ip->i_ump; 2075 struct ulfsmount *ump = ip->i_ump;
2076 //struct lfs *fs = ip->i_lfs; 2076 //struct lfs *fs = ip->i_lfs;
2077 2077
2078 /* Not supported for ULFS1 file systems. */ 2078 /* Not supported for ULFS1 file systems. */
2079 if (ump->um_fstype == ULFS1) 2079 if (ump->um_fstype == ULFS1)
2080 return (EOPNOTSUPP); 2080 return (EOPNOTSUPP);
2081 2081
2082 /* XXX Not implemented for ULFS2 file systems. */ 2082 /* XXX Not implemented for ULFS2 file systems. */
2083 return (EOPNOTSUPP); 2083 return (EOPNOTSUPP);
2084} 2084}
2085 2085
2086static int 2086static int
2087lfs_closeextattr(void *v) 2087lfs_closeextattr(void *v)
2088{ 2088{
2089 struct vop_closeextattr_args /* { 2089 struct vop_closeextattr_args /* {
2090 struct vnode *a_vp; 2090 struct vnode *a_vp;
2091 int a_commit; 2091 int a_commit;
2092 kauth_cred_t a_cred; 2092 kauth_cred_t a_cred;
2093 struct proc *a_p; 2093 struct proc *a_p;
2094 } */ *ap = v; 2094 } */ *ap = v;
2095 struct inode *ip = VTOI(ap->a_vp); 2095 struct inode *ip = VTOI(ap->a_vp);
2096 struct ulfsmount *ump = ip->i_ump; 2096 struct ulfsmount *ump = ip->i_ump;
2097 //struct lfs *fs = ip->i_lfs; 2097 //struct lfs *fs = ip->i_lfs;
2098 2098
2099 /* Not supported for ULFS1 file systems. */ 2099 /* Not supported for ULFS1 file systems. */
2100 if (ump->um_fstype == ULFS1) 2100 if (ump->um_fstype == ULFS1)
2101 return (EOPNOTSUPP); 2101 return (EOPNOTSUPP);
2102 2102
2103 /* XXX Not implemented for ULFS2 file systems. */ 2103 /* XXX Not implemented for ULFS2 file systems. */
2104 return (EOPNOTSUPP); 2104 return (EOPNOTSUPP);
2105} 2105}
2106 2106
2107static int 2107static int
2108lfs_getextattr(void *v) 2108lfs_getextattr(void *v)
2109{ 2109{
2110 struct vop_getextattr_args /* { 2110 struct vop_getextattr_args /* {
2111 struct vnode *a_vp; 2111 struct vnode *a_vp;
2112 int a_attrnamespace; 2112 int a_attrnamespace;
2113 const char *a_name; 2113 const char *a_name;
2114 struct uio *a_uio; 2114 struct uio *a_uio;
2115 size_t *a_size; 2115 size_t *a_size;
2116 kauth_cred_t a_cred; 2116 kauth_cred_t a_cred;
2117 struct proc *a_p; 2117 struct proc *a_p;
2118 } */ *ap = v; 2118 } */ *ap = v;
2119 struct vnode *vp = ap->a_vp; 2119 struct vnode *vp = ap->a_vp;
2120 struct inode *ip = VTOI(vp); 2120 struct inode *ip = VTOI(vp);
2121 struct ulfsmount *ump = ip->i_ump; 2121 struct ulfsmount *ump = ip->i_ump;
2122 //struct lfs *fs = ip->i_lfs; 2122 //struct lfs *fs = ip->i_lfs;
2123 int error; 2123 int error;
2124 2124
2125 if (ump->um_fstype == ULFS1) { 2125 if (ump->um_fstype == ULFS1) {
2126#ifdef LFS_EXTATTR 2126#ifdef LFS_EXTATTR
2127 fstrans_start(vp->v_mount, FSTRANS_SHARED); 2127 fstrans_start(vp->v_mount, FSTRANS_SHARED);
2128 error = ulfs_getextattr(ap); 2128 error = ulfs_getextattr(ap);
2129 fstrans_done(vp->v_mount); 2129 fstrans_done(vp->v_mount);
2130#else 2130#else
2131 error = EOPNOTSUPP; 2131 error = EOPNOTSUPP;
2132#endif 2132#endif
2133 return error; 2133 return error;
2134 } 2134 }
2135 2135
2136 /* XXX Not implemented for ULFS2 file systems. */ 2136 /* XXX Not implemented for ULFS2 file systems. */
2137 return (EOPNOTSUPP); 2137 return (EOPNOTSUPP);
2138} 2138}
2139 2139
2140static int 2140static int
2141lfs_setextattr(void *v) 2141lfs_setextattr(void *v)
2142{ 2142{
2143 struct vop_setextattr_args /* { 2143 struct vop_setextattr_args /* {
2144 struct vnode *a_vp; 2144 struct vnode *a_vp;
2145 int a_attrnamespace; 2145 int a_attrnamespace;
2146 const char *a_name; 2146 const char *a_name;
2147 struct uio *a_uio; 2147 struct uio *a_uio;
2148 kauth_cred_t a_cred; 2148 kauth_cred_t a_cred;
2149 struct proc *a_p; 2149 struct proc *a_p;
2150 } */ *ap = v; 2150 } */ *ap = v;
2151 struct vnode *vp = ap->a_vp; 2151 struct vnode *vp = ap->a_vp;
2152 struct inode *ip = VTOI(vp); 2152 struct inode *ip = VTOI(vp);
2153 struct ulfsmount *ump = ip->i_ump; 2153 struct ulfsmount *ump = ip->i_ump;
2154 //struct lfs *fs = ip->i_lfs; 2154 //struct lfs *fs = ip->i_lfs;
2155 int error; 2155 int error;
2156 2156
2157 if (ump->um_fstype == ULFS1) { 2157 if (ump->um_fstype == ULFS1) {
2158#ifdef LFS_EXTATTR 2158#ifdef LFS_EXTATTR
2159 fstrans_start(vp->v_mount, FSTRANS_SHARED); 2159 fstrans_start(vp->v_mount, FSTRANS_SHARED);
2160 error = ulfs_setextattr(ap); 2160 error = ulfs_setextattr(ap);
2161 fstrans_done(vp->v_mount); 2161 fstrans_done(vp->v_mount);
2162#else 2162#else
2163 error = EOPNOTSUPP; 2163 error = EOPNOTSUPP;
2164#endif 2164#endif
2165 return error; 2165 return error;
2166 } 2166 }
2167 2167
2168 /* XXX Not implemented for ULFS2 file systems. */ 2168 /* XXX Not implemented for ULFS2 file systems. */
2169 return (EOPNOTSUPP); 2169 return (EOPNOTSUPP);
2170} 2170}
2171 2171
2172static int 2172static int
2173lfs_listextattr(void *v) 2173lfs_listextattr(void *v)
2174{ 2174{
2175 struct vop_listextattr_args /* { 2175 struct vop_listextattr_args /* {
2176 struct vnode *a_vp; 2176 struct vnode *a_vp;
2177 int a_attrnamespace; 2177 int a_attrnamespace;
2178 struct uio *a_uio; 2178 struct uio *a_uio;
2179 size_t *a_size; 2179 size_t *a_size;
2180 kauth_cred_t a_cred; 2180 kauth_cred_t a_cred;
2181 struct proc *a_p; 2181 struct proc *a_p;
2182 } */ *ap = v; 2182 } */ *ap = v;
2183 struct vnode *vp = ap->a_vp; 2183 struct vnode *vp = ap->a_vp;
2184 struct inode *ip = VTOI(vp); 2184 struct inode *ip = VTOI(vp);
2185 struct ulfsmount *ump = ip->i_ump; 2185 struct ulfsmount *ump = ip->i_ump;
2186 //struct lfs *fs = ip->i_lfs; 2186 //struct lfs *fs = ip->i_lfs;
2187 int error; 2187 int error;
2188 2188
2189 if (ump->um_fstype == ULFS1) { 2189 if (ump->um_fstype == ULFS1) {
2190#ifdef LFS_EXTATTR 2190#ifdef LFS_EXTATTR
2191 fstrans_start(vp->v_mount, FSTRANS_SHARED); 2191 fstrans_start(vp->v_mount, FSTRANS_SHARED);
2192 error = ulfs_listextattr(ap); 2192 error = ulfs_listextattr(ap);
2193 fstrans_done(vp->v_mount); 2193 fstrans_done(vp->v_mount);
2194#else 2194#else
2195 error = EOPNOTSUPP; 2195 error = EOPNOTSUPP;
2196#endif 2196#endif
2197 return error; 2197 return error;
2198 } 2198 }
2199 2199
2200 /* XXX Not implemented for ULFS2 file systems. */ 2200 /* XXX Not implemented for ULFS2 file systems. */
2201 return (EOPNOTSUPP); 2201 return (EOPNOTSUPP);
2202} 2202}
2203 2203
2204static int 2204static int
2205lfs_deleteextattr(void *v) 2205lfs_deleteextattr(void *v)
2206{ 2206{
2207 struct vop_deleteextattr_args /* { 2207 struct vop_deleteextattr_args /* {
2208 struct vnode *a_vp; 2208 struct vnode *a_vp;
2209 int a_attrnamespace; 2209 int a_attrnamespace;
2210 kauth_cred_t a_cred; 2210 kauth_cred_t a_cred;
2211 struct proc *a_p; 2211 struct proc *a_p;
2212 } */ *ap = v; 2212 } */ *ap = v;
2213 struct vnode *vp = ap->a_vp; 2213 struct vnode *vp = ap->a_vp;
2214 struct inode *ip = VTOI(vp); 2214 struct inode *ip = VTOI(vp);
2215 struct ulfsmount *ump = ip->i_ump; 2215 struct ulfsmount *ump = ip->i_ump;
2216 //struct fs *fs = ip->i_lfs; 2216 //struct fs *fs = ip->i_lfs;
2217 int error; 2217 int error;
2218 2218
2219 if (ump->um_fstype == ULFS1) { 2219 if (ump->um_fstype == ULFS1) {
2220#ifdef LFS_EXTATTR 2220#ifdef LFS_EXTATTR
2221 fstrans_start(vp->v_mount, FSTRANS_SHARED); 2221 fstrans_start(vp->v_mount, FSTRANS_SHARED);
2222 error = ulfs_deleteextattr(ap); 2222 error = ulfs_deleteextattr(ap);
2223 fstrans_done(vp->v_mount); 2223 fstrans_done(vp->v_mount);
2224#else 2224#else
2225 error = EOPNOTSUPP; 2225 error = EOPNOTSUPP;
2226#endif 2226#endif
2227 return error; 2227 return error;
2228 } 2228 }
2229 2229
2230 /* XXX Not implemented for ULFS2 file systems. */ 2230 /* XXX Not implemented for ULFS2 file systems. */
2231 return (EOPNOTSUPP); 2231 return (EOPNOTSUPP);
2232} 2232}