Thu Aug 6 00:23:08 2009 UTC ()
add D to getopt so that previous actually works


(pooka)
diff -r1.19 -r1.20 src/libexec/lfs_cleanerd/lfs_cleanerd.c

cvs diff -r1.19 -r1.20 src/libexec/lfs_cleanerd/lfs_cleanerd.c (switch to unified diff)

--- src/libexec/lfs_cleanerd/lfs_cleanerd.c 2009/08/06 00:20:45 1.19
+++ src/libexec/lfs_cleanerd/lfs_cleanerd.c 2009/08/06 00:23:08 1.20
@@ -1,1548 +1,1548 @@ @@ -1,1548 +1,1548 @@
1/* $NetBSD: lfs_cleanerd.c,v 1.19 2009/08/06 00:20:45 pooka Exp $ */ 1/* $NetBSD: lfs_cleanerd.c,v 1.20 2009/08/06 00:23:08 pooka Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2005 The NetBSD Foundation, Inc. 4 * Copyright (c) 2005 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant@hhhh.org>. 8 * by Konrad E. Schroder <perseant@hhhh.org>.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32/* 32/*
33 * The cleaner daemon for the NetBSD Log-structured File System. 33 * The cleaner daemon for the NetBSD Log-structured File System.
34 * Only tested for use with version 2 LFSs. 34 * Only tested for use with version 2 LFSs.
35 */ 35 */
36 36
37#include <sys/syslog.h> 37#include <sys/syslog.h>
38#include <sys/param.h> 38#include <sys/param.h>
39#include <sys/mount.h> 39#include <sys/mount.h>
40#include <sys/stat.h> 40#include <sys/stat.h>
41#include <ufs/ufs/inode.h> 41#include <ufs/ufs/inode.h>
42#include <ufs/lfs/lfs.h> 42#include <ufs/lfs/lfs.h>
43 43
44#include <assert.h> 44#include <assert.h>
45#include <err.h> 45#include <err.h>
46#include <errno.h> 46#include <errno.h>
47#include <fcntl.h> 47#include <fcntl.h>
48#include <stdio.h> 48#include <stdio.h>
49#include <stdlib.h> 49#include <stdlib.h>
50#include <string.h> 50#include <string.h>
51#include <unistd.h> 51#include <unistd.h>
52#include <time.h> 52#include <time.h>
53#include <util.h> 53#include <util.h>
54 54
55#include "bufcache.h" 55#include "bufcache.h"
56#include "vnode.h" 56#include "vnode.h"
57#include "lfs_user.h" 57#include "lfs_user.h"
58#include "fdfs.h" 58#include "fdfs.h"
59#include "cleaner.h" 59#include "cleaner.h"
60 60
61/* 61/*
62 * Global variables. 62 * Global variables.
63 */ 63 */
64/* XXX these top few should really be fs-specific */ 64/* XXX these top few should really be fs-specific */
65int use_fs_idle; /* Use fs idle rather than cpu idle time */ 65int use_fs_idle; /* Use fs idle rather than cpu idle time */
66int use_bytes; /* Use bytes written rather than segments cleaned */ 66int use_bytes; /* Use bytes written rather than segments cleaned */
67int load_threshold; /* How idle is idle (CPU idle) */ 67int load_threshold; /* How idle is idle (CPU idle) */
68int atatime; /* How many segments (bytes) to clean at a time */ 68int atatime; /* How many segments (bytes) to clean at a time */
69 69
70int nfss; /* Number of filesystems monitored by this cleanerd */ 70int nfss; /* Number of filesystems monitored by this cleanerd */
71struct clfs **fsp; /* Array of extended filesystem structures */ 71struct clfs **fsp; /* Array of extended filesystem structures */
72int segwait_timeout; /* Time to wait in lfs_segwait() */ 72int segwait_timeout; /* Time to wait in lfs_segwait() */
73int do_quit; /* Quit after one cleaning loop */ 73int do_quit; /* Quit after one cleaning loop */
74int do_coalesce; /* Coalesce filesystem */ 74int do_coalesce; /* Coalesce filesystem */
75int do_small; /* Use small writes through markv */ 75int do_small; /* Use small writes through markv */
76char *copylog_filename; /* File to use for fs debugging analysis */ 76char *copylog_filename; /* File to use for fs debugging analysis */
77int inval_segment; /* Segment to invalidate */ 77int inval_segment; /* Segment to invalidate */
78int stat_report; /* Report statistics for this period of cycles */ 78int stat_report; /* Report statistics for this period of cycles */
79int debug; /* Turn on debugging */ 79int debug; /* Turn on debugging */
80struct cleaner_stats { 80struct cleaner_stats {
81 double util_tot; 81 double util_tot;
82 double util_sos; 82 double util_sos;
83 off_t bytes_read; 83 off_t bytes_read;
84 off_t bytes_written; 84 off_t bytes_written;
85 off_t segs_cleaned; 85 off_t segs_cleaned;
86 off_t segs_empty; 86 off_t segs_empty;
87 off_t segs_error; 87 off_t segs_error;
88} cleaner_stats; 88} cleaner_stats;
89 89
90extern u_int32_t cksum(void *, size_t); 90extern u_int32_t cksum(void *, size_t);
91extern u_int32_t lfs_sb_cksum(struct dlfs *); 91extern u_int32_t lfs_sb_cksum(struct dlfs *);
92extern u_int32_t lfs_cksum_part(void *, size_t, u_int32_t); 92extern u_int32_t lfs_cksum_part(void *, size_t, u_int32_t);
93extern int ufs_getlbns(struct lfs *, struct uvnode *, daddr_t, struct indir *, int *); 93extern int ufs_getlbns(struct lfs *, struct uvnode *, daddr_t, struct indir *, int *);
94 94
95/* Compat */ 95/* Compat */
96void pwarn(const char *unused, ...) { /* Does nothing */ }; 96void pwarn(const char *unused, ...) { /* Does nothing */ };
97 97
98/* 98/*
99 * Log a message if debugging is turned on. 99 * Log a message if debugging is turned on.
100 */ 100 */
101void 101void
102dlog(const char *fmt, ...) 102dlog(const char *fmt, ...)
103{ 103{
104 va_list ap; 104 va_list ap;
105 105
106 if (debug == 0) 106 if (debug == 0)
107 return; 107 return;
108 108
109 va_start(ap, fmt); 109 va_start(ap, fmt);
110 vsyslog(LOG_DEBUG, fmt, ap); 110 vsyslog(LOG_DEBUG, fmt, ap);
111 va_end(ap); 111 va_end(ap);
112} 112}
113 113
114/* 114/*
115 * Remove the specified filesystem from the list, due to its having 115 * Remove the specified filesystem from the list, due to its having
116 * become unmounted or other error condition. 116 * become unmounted or other error condition.
117 */ 117 */
118void 118void
119handle_error(struct clfs **cfsp, int n) 119handle_error(struct clfs **cfsp, int n)
120{ 120{
121 syslog(LOG_NOTICE, "%s: detaching cleaner", cfsp[n]->lfs_fsmnt); 121 syslog(LOG_NOTICE, "%s: detaching cleaner", cfsp[n]->lfs_fsmnt);
122 free(cfsp[n]); 122 free(cfsp[n]);
123 if (n != nfss - 1) 123 if (n != nfss - 1)
124 cfsp[n] = cfsp[nfss - 1]; 124 cfsp[n] = cfsp[nfss - 1];
125 --nfss; 125 --nfss;
126} 126}
127 127
128/* 128/*
129 * Reinitialize a filesystem if, e.g., its size changed. 129 * Reinitialize a filesystem if, e.g., its size changed.
130 */ 130 */
131int 131int
132reinit_fs(struct clfs *fs) 132reinit_fs(struct clfs *fs)
133{ 133{
134 char fsname[MNAMELEN]; 134 char fsname[MNAMELEN];
135 135
136 strncpy(fsname, (char *)fs->lfs_fsmnt, MNAMELEN); 136 strncpy(fsname, (char *)fs->lfs_fsmnt, MNAMELEN);
137 close(fs->clfs_ifilefd); 137 close(fs->clfs_ifilefd);
138 close(fs->clfs_devfd); 138 close(fs->clfs_devfd);
139 fd_reclaim(fs->clfs_devvp); 139 fd_reclaim(fs->clfs_devvp);
140 fd_reclaim(fs->lfs_ivnode); 140 fd_reclaim(fs->lfs_ivnode);
141 free(fs->clfs_dev); 141 free(fs->clfs_dev);
142 free(fs->clfs_segtab); 142 free(fs->clfs_segtab);
143 free(fs->clfs_segtabp); 143 free(fs->clfs_segtabp);
144 144
145 return init_fs(fs, fsname); 145 return init_fs(fs, fsname);
146} 146}
147 147
148#ifdef REPAIR_ZERO_FINFO 148#ifdef REPAIR_ZERO_FINFO
149/* 149/*
150 * Use fsck's lfs routines to load the Ifile from an unmounted fs. 150 * Use fsck's lfs routines to load the Ifile from an unmounted fs.
151 * We interpret "fsname" as the name of the raw disk device. 151 * We interpret "fsname" as the name of the raw disk device.
152 */ 152 */
153int 153int
154init_unmounted_fs(struct clfs *fs, char *fsname) 154init_unmounted_fs(struct clfs *fs, char *fsname)
155{ 155{
156 struct lfs *disc_fs; 156 struct lfs *disc_fs;
157 int i; 157 int i;
158  158
159 fs->clfs_dev = fsname; 159 fs->clfs_dev = fsname;
160 if ((fs->clfs_devfd = open(fs->clfs_dev, O_RDWR)) < 0) { 160 if ((fs->clfs_devfd = open(fs->clfs_dev, O_RDWR)) < 0) {
161 syslog(LOG_ERR, "couldn't open device %s read/write", 161 syslog(LOG_ERR, "couldn't open device %s read/write",
162 fs->clfs_dev); 162 fs->clfs_dev);
163 return -1; 163 return -1;
164 } 164 }
165 165
166 disc_fs = lfs_init(fs->clfs_devfd, 0, 0, 0, 0); 166 disc_fs = lfs_init(fs->clfs_devfd, 0, 0, 0, 0);
167 167
168 fs->lfs_dlfs = disc_fs->lfs_dlfs; /* Structure copy */ 168 fs->lfs_dlfs = disc_fs->lfs_dlfs; /* Structure copy */
169 strncpy(fs->lfs_fsmnt, fsname, MNAMELEN); 169 strncpy(fs->lfs_fsmnt, fsname, MNAMELEN);
170 fs->lfs_ivnode = (struct uvnode *)disc_fs->lfs_ivnode; 170 fs->lfs_ivnode = (struct uvnode *)disc_fs->lfs_ivnode;
171 fs->clfs_devvp = fd_vget(fs->clfs_devfd, fs->lfs_fsize, fs->lfs_ssize, 171 fs->clfs_devvp = fd_vget(fs->clfs_devfd, fs->lfs_fsize, fs->lfs_ssize,
172 atatime); 172 atatime);
173 173
174 /* Allocate and clear segtab */ 174 /* Allocate and clear segtab */
175 fs->clfs_segtab = (struct clfs_seguse *)malloc(fs->lfs_nseg * 175 fs->clfs_segtab = (struct clfs_seguse *)malloc(fs->lfs_nseg *
176 sizeof(*fs->clfs_segtab)); 176 sizeof(*fs->clfs_segtab));
177 fs->clfs_segtabp = (struct clfs_seguse **)malloc(fs->lfs_nseg * 177 fs->clfs_segtabp = (struct clfs_seguse **)malloc(fs->lfs_nseg *
178 sizeof(*fs->clfs_segtabp)); 178 sizeof(*fs->clfs_segtabp));
179 for (i = 0; i < fs->lfs_nseg; i++) { 179 for (i = 0; i < fs->lfs_nseg; i++) {
180 fs->clfs_segtabp[i] = &(fs->clfs_segtab[i]); 180 fs->clfs_segtabp[i] = &(fs->clfs_segtab[i]);
181 fs->clfs_segtab[i].flags = 0x0; 181 fs->clfs_segtab[i].flags = 0x0;
182 } 182 }
183 syslog(LOG_NOTICE, "%s: unmounted cleaner starting", fsname); 183 syslog(LOG_NOTICE, "%s: unmounted cleaner starting", fsname);
184 184
185 return 0; 185 return 0;
186} 186}
187#endif 187#endif
188 188
189/* 189/*
190 * Set up the file descriptors, including the Ifile descriptor. 190 * Set up the file descriptors, including the Ifile descriptor.
191 * If we can't get the Ifile, this is not an LFS (or the kernel is 191 * If we can't get the Ifile, this is not an LFS (or the kernel is
192 * too old to support the fcntl). 192 * too old to support the fcntl).
193 * XXX Merge this and init_unmounted_fs, switching on whether 193 * XXX Merge this and init_unmounted_fs, switching on whether
194 * XXX "fsname" is a dir or a char special device. Should 194 * XXX "fsname" is a dir or a char special device. Should
195 * XXX also be able to read unmounted devices out of fstab, the way 195 * XXX also be able to read unmounted devices out of fstab, the way
196 * XXX fsck does. 196 * XXX fsck does.
197 */ 197 */
198int 198int
199init_fs(struct clfs *fs, char *fsname) 199init_fs(struct clfs *fs, char *fsname)
200{ 200{
201 struct statvfs sf; 201 struct statvfs sf;
202 int rootfd; 202 int rootfd;
203 int i; 203 int i;
204 204
205 /* 205 /*
206 * Get the raw device from the block device. 206 * Get the raw device from the block device.
207 * XXX this is ugly. Is there a way to discover the raw device 207 * XXX this is ugly. Is there a way to discover the raw device
208 * XXX for a given mount point? 208 * XXX for a given mount point?
209 */ 209 */
210 if (statvfs(fsname, &sf) < 0) 210 if (statvfs(fsname, &sf) < 0)
211 return -1; 211 return -1;
212 fs->clfs_dev = malloc(strlen(sf.f_mntfromname) + 2); 212 fs->clfs_dev = malloc(strlen(sf.f_mntfromname) + 2);
213 if (fs->clfs_dev == NULL) { 213 if (fs->clfs_dev == NULL) {
214 syslog(LOG_ERR, "couldn't malloc device name string: %m"); 214 syslog(LOG_ERR, "couldn't malloc device name string: %m");
215 return -1; 215 return -1;
216 } 216 }
217 sprintf(fs->clfs_dev, "/dev/r%s", sf.f_mntfromname + 5); 217 sprintf(fs->clfs_dev, "/dev/r%s", sf.f_mntfromname + 5);
218 if ((fs->clfs_devfd = open(fs->clfs_dev, O_RDONLY)) < 0) { 218 if ((fs->clfs_devfd = open(fs->clfs_dev, O_RDONLY)) < 0) {
219 syslog(LOG_ERR, "couldn't open device %s for reading", 219 syslog(LOG_ERR, "couldn't open device %s for reading",
220 fs->clfs_dev); 220 fs->clfs_dev);
221 return -1; 221 return -1;
222 } 222 }
223 223
224 /* Find the Ifile and open it */ 224 /* Find the Ifile and open it */
225 if ((rootfd = open(fsname, O_RDONLY)) < 0) 225 if ((rootfd = open(fsname, O_RDONLY)) < 0)
226 return -2; 226 return -2;
227 if (fcntl(rootfd, LFCNIFILEFH, &fs->clfs_ifilefh) < 0) 227 if (fcntl(rootfd, LFCNIFILEFH, &fs->clfs_ifilefh) < 0)
228 return -3; 228 return -3;
229 if ((fs->clfs_ifilefd = fhopen(&fs->clfs_ifilefh, 229 if ((fs->clfs_ifilefd = fhopen(&fs->clfs_ifilefh,
230 sizeof(fs->clfs_ifilefh), O_RDONLY)) < 0) 230 sizeof(fs->clfs_ifilefh), O_RDONLY)) < 0)
231 return -4; 231 return -4;
232 close(rootfd); 232 close(rootfd);
233 233
234 /* Load in the superblock */ 234 /* Load in the superblock */
235 if (pread(fs->clfs_devfd, &(fs->lfs_dlfs), sizeof(struct dlfs), 235 if (pread(fs->clfs_devfd, &(fs->lfs_dlfs), sizeof(struct dlfs),
236 LFS_LABELPAD) < 0) 236 LFS_LABELPAD) < 0)
237 return -1; 237 return -1;
238 238
239 /* If this is not a version 2 filesystem, complain and exit */ 239 /* If this is not a version 2 filesystem, complain and exit */
240 if (fs->lfs_version != 2) { 240 if (fs->lfs_version != 2) {
241 syslog(LOG_ERR, "%s: not a version 2 LFS", fsname); 241 syslog(LOG_ERR, "%s: not a version 2 LFS", fsname);
242 return -1; 242 return -1;
243 } 243 }
244 244
245 /* Assume fsname is the mounted name */ 245 /* Assume fsname is the mounted name */
246 strncpy((char *)fs->lfs_fsmnt, fsname, MNAMELEN); 246 strncpy((char *)fs->lfs_fsmnt, fsname, MNAMELEN);
247 247
248 /* Set up vnodes for Ifile and raw device */ 248 /* Set up vnodes for Ifile and raw device */
249 fs->lfs_ivnode = fd_vget(fs->clfs_ifilefd, fs->lfs_bsize, 0, 0); 249 fs->lfs_ivnode = fd_vget(fs->clfs_ifilefd, fs->lfs_bsize, 0, 0);
250 fs->clfs_devvp = fd_vget(fs->clfs_devfd, fs->lfs_fsize, fs->lfs_ssize, 250 fs->clfs_devvp = fd_vget(fs->clfs_devfd, fs->lfs_fsize, fs->lfs_ssize,
251 atatime); 251 atatime);
252 252
253 /* Allocate and clear segtab */ 253 /* Allocate and clear segtab */
254 fs->clfs_segtab = (struct clfs_seguse *)malloc(fs->lfs_nseg * 254 fs->clfs_segtab = (struct clfs_seguse *)malloc(fs->lfs_nseg *
255 sizeof(*fs->clfs_segtab)); 255 sizeof(*fs->clfs_segtab));
256 fs->clfs_segtabp = (struct clfs_seguse **)malloc(fs->lfs_nseg * 256 fs->clfs_segtabp = (struct clfs_seguse **)malloc(fs->lfs_nseg *
257 sizeof(*fs->clfs_segtabp)); 257 sizeof(*fs->clfs_segtabp));
258 if (fs->clfs_segtab == NULL || fs->clfs_segtabp == NULL) { 258 if (fs->clfs_segtab == NULL || fs->clfs_segtabp == NULL) {
259 syslog(LOG_ERR, "%s: couldn't malloc segment table: %m", 259 syslog(LOG_ERR, "%s: couldn't malloc segment table: %m",
260 fs->clfs_dev); 260 fs->clfs_dev);
261 return -1; 261 return -1;
262 } 262 }
263 263
264 for (i = 0; i < fs->lfs_nseg; i++) { 264 for (i = 0; i < fs->lfs_nseg; i++) {
265 fs->clfs_segtabp[i] = &(fs->clfs_segtab[i]); 265 fs->clfs_segtabp[i] = &(fs->clfs_segtab[i]);
266 fs->clfs_segtab[i].flags = 0x0; 266 fs->clfs_segtab[i].flags = 0x0;
267 } 267 }
268 268
269 syslog(LOG_NOTICE, "%s: attaching cleaner", fsname); 269 syslog(LOG_NOTICE, "%s: attaching cleaner", fsname);
270 return 0; 270 return 0;
271} 271}
272 272
273/* 273/*
274 * Invalidate all the currently held Ifile blocks so they will be 274 * Invalidate all the currently held Ifile blocks so they will be
275 * reread when we clean. Check the size while we're at it, and 275 * reread when we clean. Check the size while we're at it, and
276 * resize the buffer cache if necessary. 276 * resize the buffer cache if necessary.
277 */ 277 */
278void 278void
279reload_ifile(struct clfs *fs) 279reload_ifile(struct clfs *fs)
280{ 280{
281 struct ubuf *bp; 281 struct ubuf *bp;
282 struct stat st; 282 struct stat st;
283 int ohashmax; 283 int ohashmax;
284 extern int hashmax; 284 extern int hashmax;
285 285
286 while ((bp = LIST_FIRST(&fs->lfs_ivnode->v_dirtyblkhd)) != NULL) { 286 while ((bp = LIST_FIRST(&fs->lfs_ivnode->v_dirtyblkhd)) != NULL) {
287 bremfree(bp); 287 bremfree(bp);
288 buf_destroy(bp); 288 buf_destroy(bp);
289 } 289 }
290 while ((bp = LIST_FIRST(&fs->lfs_ivnode->v_cleanblkhd)) != NULL) { 290 while ((bp = LIST_FIRST(&fs->lfs_ivnode->v_cleanblkhd)) != NULL) {
291 bremfree(bp); 291 bremfree(bp);
292 buf_destroy(bp); 292 buf_destroy(bp);
293 } 293 }
294 294
295 /* If Ifile is larger than buffer cache, rehash */ 295 /* If Ifile is larger than buffer cache, rehash */
296 fstat(fs->clfs_ifilefd, &st); 296 fstat(fs->clfs_ifilefd, &st);
297 if (st.st_size / fs->lfs_bsize > hashmax) { 297 if (st.st_size / fs->lfs_bsize > hashmax) {
298 ohashmax = hashmax; 298 ohashmax = hashmax;
299 bufrehash(st.st_size / fs->lfs_bsize); 299 bufrehash(st.st_size / fs->lfs_bsize);
300 dlog("%s: resized buffer hash from %d to %d", 300 dlog("%s: resized buffer hash from %d to %d",
301 fs->lfs_fsmnt, ohashmax, hashmax); 301 fs->lfs_fsmnt, ohashmax, hashmax);
302 } 302 }
303} 303}
304 304
305/* 305/*
306 * Get IFILE entry for the given inode, store in ifpp. The buffer 306 * Get IFILE entry for the given inode, store in ifpp. The buffer
307 * which contains that data is returned in bpp, and must be brelse()d 307 * which contains that data is returned in bpp, and must be brelse()d
308 * by the caller. 308 * by the caller.
309 */ 309 */
310void 310void
311lfs_ientry(IFILE **ifpp, struct clfs *fs, ino_t ino, struct ubuf **bpp) 311lfs_ientry(IFILE **ifpp, struct clfs *fs, ino_t ino, struct ubuf **bpp)
312{ 312{
313 int error; 313 int error;
314 314
315 error = bread(fs->lfs_ivnode, ino / fs->lfs_ifpb + fs->lfs_cleansz + 315 error = bread(fs->lfs_ivnode, ino / fs->lfs_ifpb + fs->lfs_cleansz +
316 fs->lfs_segtabsz, fs->lfs_bsize, NOCRED, 0, bpp); 316 fs->lfs_segtabsz, fs->lfs_bsize, NOCRED, 0, bpp);
317 if (error) 317 if (error)
318 syslog(LOG_ERR, "%s: ientry failed for ino %d", 318 syslog(LOG_ERR, "%s: ientry failed for ino %d",
319 fs->lfs_fsmnt, (int)ino); 319 fs->lfs_fsmnt, (int)ino);
320 *ifpp = (IFILE *)(*bpp)->b_data + ino % fs->lfs_ifpb; 320 *ifpp = (IFILE *)(*bpp)->b_data + ino % fs->lfs_ifpb;
321 return; 321 return;
322} 322}
323 323
324#ifdef TEST_PATTERN 324#ifdef TEST_PATTERN
325/* 325/*
326 * Check ROOTINO for file data. The assumption is that we are running 326 * Check ROOTINO for file data. The assumption is that we are running
327 * the "twofiles" test with the rest of the filesystem empty. Files 327 * the "twofiles" test with the rest of the filesystem empty. Files
328 * created by "twofiles" match the test pattern, but ROOTINO and the 328 * created by "twofiles" match the test pattern, but ROOTINO and the
329 * executable itself (assumed to be inode 3) should not match. 329 * executable itself (assumed to be inode 3) should not match.
330 */ 330 */
331static void 331static void
332check_test_pattern(BLOCK_INFO *bip) 332check_test_pattern(BLOCK_INFO *bip)
333{ 333{
334 int j; 334 int j;
335 unsigned char *cp = bip->bi_bp; 335 unsigned char *cp = bip->bi_bp;
336 336
337 /* Check inode sanity */ 337 /* Check inode sanity */
338 if (bip->bi_lbn == LFS_UNUSED_LBN) { 338 if (bip->bi_lbn == LFS_UNUSED_LBN) {
339 assert(((struct ufs1_dinode *)bip->bi_bp)->di_inumber == 339 assert(((struct ufs1_dinode *)bip->bi_bp)->di_inumber ==
340 bip->bi_inode); 340 bip->bi_inode);
341 } 341 }
342 342
343 /* These can have the test pattern and it's all good */ 343 /* These can have the test pattern and it's all good */
344 if (bip->bi_inode > 3) 344 if (bip->bi_inode > 3)
345 return; 345 return;
346 346
347 for (j = 0; j < bip->bi_size; j++) { 347 for (j = 0; j < bip->bi_size; j++) {
348 if (cp[j] != (j & 0xff)) 348 if (cp[j] != (j & 0xff))
349 break; 349 break;
350 } 350 }
351 assert(j < bip->bi_size); 351 assert(j < bip->bi_size);
352} 352}
353#endif /* TEST_PATTERN */ 353#endif /* TEST_PATTERN */
354 354
355/* 355/*
356 * Parse the partial segment at daddr, adding its information to 356 * Parse the partial segment at daddr, adding its information to
357 * bip. Return the address of the next partial segment to read. 357 * bip. Return the address of the next partial segment to read.
358 */ 358 */
359int32_t 359int32_t
360parse_pseg(struct clfs *fs, daddr_t daddr, BLOCK_INFO **bipp, int *bic) 360parse_pseg(struct clfs *fs, daddr_t daddr, BLOCK_INFO **bipp, int *bic)
361{ 361{
362 SEGSUM *ssp; 362 SEGSUM *ssp;
363 IFILE *ifp; 363 IFILE *ifp;
364 BLOCK_INFO *bip, *nbip; 364 BLOCK_INFO *bip, *nbip;
365 int32_t *iaddrp, idaddr, odaddr; 365 int32_t *iaddrp, idaddr, odaddr;
366 FINFO *fip; 366 FINFO *fip;
367 struct ubuf *ifbp; 367 struct ubuf *ifbp;
368 struct ufs1_dinode *dip; 368 struct ufs1_dinode *dip;
369 u_int32_t ck, vers; 369 u_int32_t ck, vers;
370 int fic, inoc, obic; 370 int fic, inoc, obic;
371 int i; 371 int i;
372 char *cp; 372 char *cp;
373 373
374 odaddr = daddr; 374 odaddr = daddr;
375 obic = *bic; 375 obic = *bic;
376 bip = *bipp; 376 bip = *bipp;
377 377
378 /* 378 /*
379 * Retrieve the segment header, set up the SEGSUM pointer 379 * Retrieve the segment header, set up the SEGSUM pointer
380 * as well as the first FINFO and inode address pointer. 380 * as well as the first FINFO and inode address pointer.
381 */ 381 */
382 cp = fd_ptrget(fs->clfs_devvp, daddr); 382 cp = fd_ptrget(fs->clfs_devvp, daddr);
383 ssp = (SEGSUM *)cp; 383 ssp = (SEGSUM *)cp;
384 iaddrp = ((int32_t *)(cp + fs->lfs_ibsize)) - 1; 384 iaddrp = ((int32_t *)(cp + fs->lfs_ibsize)) - 1;
385 fip = (FINFO *)(cp + sizeof(SEGSUM)); 385 fip = (FINFO *)(cp + sizeof(SEGSUM));
386 386
387 /* 387 /*
388 * Check segment header magic and checksum 388 * Check segment header magic and checksum
389 */ 389 */
390 if (ssp->ss_magic != SS_MAGIC) { 390 if (ssp->ss_magic != SS_MAGIC) {
391 syslog(LOG_WARNING, "%s: sumsum magic number bad at 0x%x:" 391 syslog(LOG_WARNING, "%s: sumsum magic number bad at 0x%x:"
392 " read 0x%x, expected 0x%x", fs->lfs_fsmnt, 392 " read 0x%x, expected 0x%x", fs->lfs_fsmnt,
393 (int32_t)daddr, ssp->ss_magic, SS_MAGIC); 393 (int32_t)daddr, ssp->ss_magic, SS_MAGIC);
394 return 0x0; 394 return 0x0;
395 } 395 }
396 ck = cksum(&ssp->ss_datasum, fs->lfs_sumsize - sizeof(ssp->ss_sumsum)); 396 ck = cksum(&ssp->ss_datasum, fs->lfs_sumsize - sizeof(ssp->ss_sumsum));
397 if (ck != ssp->ss_sumsum) { 397 if (ck != ssp->ss_sumsum) {
398 syslog(LOG_WARNING, "%s: sumsum checksum mismatch at 0x%x:" 398 syslog(LOG_WARNING, "%s: sumsum checksum mismatch at 0x%x:"
399 " read 0x%x, computed 0x%x", fs->lfs_fsmnt, 399 " read 0x%x, computed 0x%x", fs->lfs_fsmnt,
400 (int32_t)daddr, ssp->ss_sumsum, ck); 400 (int32_t)daddr, ssp->ss_sumsum, ck);
401 return 0x0; 401 return 0x0;
402 } 402 }
403 403
404 /* Initialize data sum */ 404 /* Initialize data sum */
405 ck = 0; 405 ck = 0;
406 406
407 /* Point daddr at next block after segment summary */ 407 /* Point daddr at next block after segment summary */
408 ++daddr; 408 ++daddr;
409 409
410 /* 410 /*
411 * Loop over file info and inode pointers. We always move daddr 411 * Loop over file info and inode pointers. We always move daddr
412 * forward here because we are also computing the data checksum 412 * forward here because we are also computing the data checksum
413 * as we go. 413 * as we go.
414 */ 414 */
415 fic = inoc = 0; 415 fic = inoc = 0;
416 while (fic < ssp->ss_nfinfo || inoc < ssp->ss_ninos) { 416 while (fic < ssp->ss_nfinfo || inoc < ssp->ss_ninos) {
417 /* 417 /*
418 * We must have either a file block or an inode block. 418 * We must have either a file block or an inode block.
419 * If we don't have either one, it's an error. 419 * If we don't have either one, it's an error.
420 */ 420 */
421 if (fic >= ssp->ss_nfinfo && *iaddrp != daddr) { 421 if (fic >= ssp->ss_nfinfo && *iaddrp != daddr) {
422 syslog(LOG_WARNING, "%s: bad pseg at %x (seg %d)", 422 syslog(LOG_WARNING, "%s: bad pseg at %x (seg %d)",
423 fs->lfs_fsmnt, odaddr, dtosn(fs, odaddr)); 423 fs->lfs_fsmnt, odaddr, dtosn(fs, odaddr));
424 *bipp = bip; 424 *bipp = bip;
425 return 0x0; 425 return 0x0;
426 } 426 }
427 427
428 /* 428 /*
429 * Note each inode from the inode blocks 429 * Note each inode from the inode blocks
430 */ 430 */
431 if (inoc < ssp->ss_ninos && *iaddrp == daddr) { 431 if (inoc < ssp->ss_ninos && *iaddrp == daddr) {
432 cp = fd_ptrget(fs->clfs_devvp, daddr); 432 cp = fd_ptrget(fs->clfs_devvp, daddr);
433 ck = lfs_cksum_part(cp, sizeof(u_int32_t), ck); 433 ck = lfs_cksum_part(cp, sizeof(u_int32_t), ck);
434 dip = (struct ufs1_dinode *)cp; 434 dip = (struct ufs1_dinode *)cp;
435 for (i = 0; i < fs->lfs_inopb; i++) { 435 for (i = 0; i < fs->lfs_inopb; i++) {
436 if (dip[i].di_inumber == 0) 436 if (dip[i].di_inumber == 0)
437 break; 437 break;
438 438
439 /* 439 /*
440 * Check currency before adding it 440 * Check currency before adding it
441 */ 441 */
442#ifndef REPAIR_ZERO_FINFO 442#ifndef REPAIR_ZERO_FINFO
443 lfs_ientry(&ifp, fs, dip[i].di_inumber, &ifbp); 443 lfs_ientry(&ifp, fs, dip[i].di_inumber, &ifbp);
444 idaddr = ifp->if_daddr; 444 idaddr = ifp->if_daddr;
445 brelse(ifbp, 0); 445 brelse(ifbp, 0);
446 if (idaddr != daddr) 446 if (idaddr != daddr)
447#endif 447#endif
448 continue; 448 continue;
449 449
450 /* 450 /*
451 * A current inode. Add it. 451 * A current inode. Add it.
452 */ 452 */
453 ++*bic; 453 ++*bic;
454 nbip = (BLOCK_INFO *)realloc(bip, *bic * 454 nbip = (BLOCK_INFO *)realloc(bip, *bic *
455 sizeof(*bip)); 455 sizeof(*bip));
456 if (nbip) 456 if (nbip)
457 bip = nbip; 457 bip = nbip;
458 else { 458 else {
459 --*bic; 459 --*bic;
460 *bipp = bip; 460 *bipp = bip;
461 return 0x0; 461 return 0x0;
462 } 462 }
463 bip[*bic - 1].bi_inode = dip[i].di_inumber; 463 bip[*bic - 1].bi_inode = dip[i].di_inumber;
464 bip[*bic - 1].bi_lbn = LFS_UNUSED_LBN; 464 bip[*bic - 1].bi_lbn = LFS_UNUSED_LBN;
465 bip[*bic - 1].bi_daddr = daddr; 465 bip[*bic - 1].bi_daddr = daddr;
466 bip[*bic - 1].bi_segcreate = ssp->ss_create; 466 bip[*bic - 1].bi_segcreate = ssp->ss_create;
467 bip[*bic - 1].bi_version = dip[i].di_gen; 467 bip[*bic - 1].bi_version = dip[i].di_gen;
468 bip[*bic - 1].bi_bp = &(dip[i]); 468 bip[*bic - 1].bi_bp = &(dip[i]);
469 bip[*bic - 1].bi_size = DINODE1_SIZE; 469 bip[*bic - 1].bi_size = DINODE1_SIZE;
470 } 470 }
471 inoc += i; 471 inoc += i;
472 daddr += btofsb(fs, fs->lfs_ibsize); 472 daddr += btofsb(fs, fs->lfs_ibsize);
473 --iaddrp; 473 --iaddrp;
474 continue; 474 continue;
475 } 475 }
476 476
477 /* 477 /*
478 * Note each file block from the finfo blocks 478 * Note each file block from the finfo blocks
479 */ 479 */
480 if (fic >= ssp->ss_nfinfo) 480 if (fic >= ssp->ss_nfinfo)
481 continue; 481 continue;
482 482
483 /* Count this finfo, whether or not we use it */ 483 /* Count this finfo, whether or not we use it */
484 ++fic; 484 ++fic;
485 485
486 /* 486 /*
487 * If this finfo has nblocks==0, it was written wrong. 487 * If this finfo has nblocks==0, it was written wrong.
488 * Kernels with this problem always wrote this zero-sized 488 * Kernels with this problem always wrote this zero-sized
489 * finfo last, so just ignore it. 489 * finfo last, so just ignore it.
490 */ 490 */
491 if (fip->fi_nblocks == 0) { 491 if (fip->fi_nblocks == 0) {
492#ifdef REPAIR_ZERO_FINFO 492#ifdef REPAIR_ZERO_FINFO
493 struct ubuf *nbp; 493 struct ubuf *nbp;
494 SEGSUM *nssp; 494 SEGSUM *nssp;
495 495
496 syslog(LOG_WARNING, "fixing short FINFO at %x (seg %d)", 496 syslog(LOG_WARNING, "fixing short FINFO at %x (seg %d)",
497 odaddr, dtosn(fs, odaddr)); 497 odaddr, dtosn(fs, odaddr));
498 bread(fs->clfs_devvp, odaddr, fs->lfs_fsize, 498 bread(fs->clfs_devvp, odaddr, fs->lfs_fsize,
499 NOCRED, 0, &nbp); 499 NOCRED, 0, &nbp);
500 nssp = (SEGSUM *)nbp->b_data; 500 nssp = (SEGSUM *)nbp->b_data;
501 --nssp->ss_nfinfo; 501 --nssp->ss_nfinfo;
502 nssp->ss_sumsum = cksum(&nssp->ss_datasum, 502 nssp->ss_sumsum = cksum(&nssp->ss_datasum,
503 fs->lfs_sumsize - sizeof(nssp->ss_sumsum)); 503 fs->lfs_sumsize - sizeof(nssp->ss_sumsum));
504 bwrite(nbp); 504 bwrite(nbp);
505#endif 505#endif
506 syslog(LOG_WARNING, "zero-length FINFO at %x (seg %d)", 506 syslog(LOG_WARNING, "zero-length FINFO at %x (seg %d)",
507 odaddr, dtosn(fs, odaddr)); 507 odaddr, dtosn(fs, odaddr));
508 continue; 508 continue;
509 } 509 }
510 510
511 /* 511 /*
512 * Check currency before adding blocks 512 * Check currency before adding blocks
513 */ 513 */
514#ifdef REPAIR_ZERO_FINFO 514#ifdef REPAIR_ZERO_FINFO
515 vers = -1; 515 vers = -1;
516#else 516#else
517 lfs_ientry(&ifp, fs, fip->fi_ino, &ifbp); 517 lfs_ientry(&ifp, fs, fip->fi_ino, &ifbp);
518 vers = ifp->if_version; 518 vers = ifp->if_version;
519 brelse(ifbp, 0); 519 brelse(ifbp, 0);
520#endif 520#endif
521 if (vers != fip->fi_version) { 521 if (vers != fip->fi_version) {
522 size_t size; 522 size_t size;
523 523
524 /* Read all the blocks from the data summary */ 524 /* Read all the blocks from the data summary */
525 for (i = 0; i < fip->fi_nblocks; i++) { 525 for (i = 0; i < fip->fi_nblocks; i++) {
526 size = (i == fip->fi_nblocks - 1) ? 526 size = (i == fip->fi_nblocks - 1) ?
527 fip->fi_lastlength : fs->lfs_bsize; 527 fip->fi_lastlength : fs->lfs_bsize;
528 cp = fd_ptrget(fs->clfs_devvp, daddr); 528 cp = fd_ptrget(fs->clfs_devvp, daddr);
529 ck = lfs_cksum_part(cp, sizeof(u_int32_t), ck); 529 ck = lfs_cksum_part(cp, sizeof(u_int32_t), ck);
530 daddr += btofsb(fs, size); 530 daddr += btofsb(fs, size);
531 } 531 }
532 fip = (FINFO *)(fip->fi_blocks + fip->fi_nblocks); 532 fip = (FINFO *)(fip->fi_blocks + fip->fi_nblocks);
533 continue; 533 continue;
534 } 534 }
535 535
536 /* Add all the blocks from the finfos (current or not) */ 536 /* Add all the blocks from the finfos (current or not) */
537 nbip = (BLOCK_INFO *)realloc(bip, (*bic + fip->fi_nblocks) * 537 nbip = (BLOCK_INFO *)realloc(bip, (*bic + fip->fi_nblocks) *
538 sizeof(*bip)); 538 sizeof(*bip));
539 if (nbip) 539 if (nbip)
540 bip = nbip; 540 bip = nbip;
541 else { 541 else {
542 *bipp = bip; 542 *bipp = bip;
543 return 0x0; 543 return 0x0;
544 } 544 }
545 545
546 for (i = 0; i < fip->fi_nblocks; i++) { 546 for (i = 0; i < fip->fi_nblocks; i++) {
547 bip[*bic + i].bi_inode = fip->fi_ino; 547 bip[*bic + i].bi_inode = fip->fi_ino;
548 bip[*bic + i].bi_lbn = fip->fi_blocks[i]; 548 bip[*bic + i].bi_lbn = fip->fi_blocks[i];
549 bip[*bic + i].bi_daddr = daddr; 549 bip[*bic + i].bi_daddr = daddr;
550 bip[*bic + i].bi_segcreate = ssp->ss_create; 550 bip[*bic + i].bi_segcreate = ssp->ss_create;
551 bip[*bic + i].bi_version = fip->fi_version; 551 bip[*bic + i].bi_version = fip->fi_version;
552 bip[*bic + i].bi_size = (i == fip->fi_nblocks - 1) ? 552 bip[*bic + i].bi_size = (i == fip->fi_nblocks - 1) ?
553 fip->fi_lastlength : fs->lfs_bsize; 553 fip->fi_lastlength : fs->lfs_bsize;
554 cp = fd_ptrget(fs->clfs_devvp, daddr); 554 cp = fd_ptrget(fs->clfs_devvp, daddr);
555 ck = lfs_cksum_part(cp, sizeof(u_int32_t), ck); 555 ck = lfs_cksum_part(cp, sizeof(u_int32_t), ck);
556 bip[*bic + i].bi_bp = cp; 556 bip[*bic + i].bi_bp = cp;
557 daddr += btofsb(fs, bip[*bic + i].bi_size); 557 daddr += btofsb(fs, bip[*bic + i].bi_size);
558 558
559#ifdef TEST_PATTERN 559#ifdef TEST_PATTERN
560 check_test_pattern(bip + *bic + i); /* XXXDEBUG */ 560 check_test_pattern(bip + *bic + i); /* XXXDEBUG */
561#endif 561#endif
562 } 562 }
563 *bic += fip->fi_nblocks; 563 *bic += fip->fi_nblocks;
564 fip = (FINFO *)(fip->fi_blocks + fip->fi_nblocks); 564 fip = (FINFO *)(fip->fi_blocks + fip->fi_nblocks);
565 } 565 }
566 566
567#ifndef REPAIR_ZERO_FINFO 567#ifndef REPAIR_ZERO_FINFO
568 if (ssp->ss_datasum != ck) { 568 if (ssp->ss_datasum != ck) {
569 syslog(LOG_WARNING, "%s: data checksum bad at 0x%x:" 569 syslog(LOG_WARNING, "%s: data checksum bad at 0x%x:"
570 " read 0x%x, computed 0x%x", fs->lfs_fsmnt, odaddr, 570 " read 0x%x, computed 0x%x", fs->lfs_fsmnt, odaddr,
571 ssp->ss_datasum, ck); 571 ssp->ss_datasum, ck);
572 *bic = obic; 572 *bic = obic;
573 return 0x0; 573 return 0x0;
574 } 574 }
575#endif 575#endif
576 576
577 *bipp = bip; 577 *bipp = bip;
578 return daddr; 578 return daddr;
579} 579}
580 580
581static void 581static void
582log_segment_read(struct clfs *fs, int sn) 582log_segment_read(struct clfs *fs, int sn)
583{ 583{
584 FILE *fp; 584 FILE *fp;
585 char *cp; 585 char *cp;
586  586
587 /* 587 /*
588 * Write the segment read, and its contents, into a log file in 588 * Write the segment read, and its contents, into a log file in
589 * the current directory. We don't need to log the location of 589 * the current directory. We don't need to log the location of
590 * the segment, since that can be inferred from the segments up 590 * the segment, since that can be inferred from the segments up
591 * to this point (ss_nextseg field of the previously written segment). 591 * to this point (ss_nextseg field of the previously written segment).
592 * 592 *
593 * We can use this info later to reconstruct the filesystem at any 593 * We can use this info later to reconstruct the filesystem at any
594 * given point in time for analysis, by replaying the log forward 594 * given point in time for analysis, by replaying the log forward
595 * indexed by the segment serial numbers; but it is not suitable 595 * indexed by the segment serial numbers; but it is not suitable
596 * for everyday use since the copylog will be simply enormous. 596 * for everyday use since the copylog will be simply enormous.
597 */ 597 */
598 cp = fd_ptrget(fs->clfs_devvp, sntod(fs, sn)); 598 cp = fd_ptrget(fs->clfs_devvp, sntod(fs, sn));
599 599
600 fp = fopen(copylog_filename, "ab"); 600 fp = fopen(copylog_filename, "ab");
601 if (fp != NULL) { 601 if (fp != NULL) {
602 if (fwrite(cp, (size_t)fs->lfs_ssize, 1, fp) != 1) { 602 if (fwrite(cp, (size_t)fs->lfs_ssize, 1, fp) != 1) {
603 perror("writing segment to copy log"); 603 perror("writing segment to copy log");
604 } 604 }
605 } 605 }
606 fclose(fp); 606 fclose(fp);
607} 607}
608 608
609/* 609/*
610 * Read a segment to populate the BLOCK_INFO structures. 610 * Read a segment to populate the BLOCK_INFO structures.
611 * Return the number of partial segments read and parsed. 611 * Return the number of partial segments read and parsed.
612 */ 612 */
613int 613int
614load_segment(struct clfs *fs, int sn, BLOCK_INFO **bipp, int *bic) 614load_segment(struct clfs *fs, int sn, BLOCK_INFO **bipp, int *bic)
615{ 615{
616 int32_t daddr; 616 int32_t daddr;
617 int i, npseg; 617 int i, npseg;
618 618
619 daddr = sntod(fs, sn); 619 daddr = sntod(fs, sn);
620 if (daddr < btofsb(fs, LFS_LABELPAD)) 620 if (daddr < btofsb(fs, LFS_LABELPAD))
621 daddr = btofsb(fs, LFS_LABELPAD); 621 daddr = btofsb(fs, LFS_LABELPAD);
622 for (i = 0; i < LFS_MAXNUMSB; i++) { 622 for (i = 0; i < LFS_MAXNUMSB; i++) {
623 if (fs->lfs_sboffs[i] == daddr) { 623 if (fs->lfs_sboffs[i] == daddr) {
624 daddr += btofsb(fs, LFS_SBPAD); 624 daddr += btofsb(fs, LFS_SBPAD);
625 break; 625 break;
626 } 626 }
627 } 627 }
628 628
629 /* Preload the segment buffer */ 629 /* Preload the segment buffer */
630 if (fd_preload(fs->clfs_devvp, sntod(fs, sn)) < 0) 630 if (fd_preload(fs->clfs_devvp, sntod(fs, sn)) < 0)
631 return -1; 631 return -1;
632 632
633 if (copylog_filename) 633 if (copylog_filename)
634 log_segment_read(fs, sn); 634 log_segment_read(fs, sn);
635 635
636 /* Note bytes read for stats */ 636 /* Note bytes read for stats */
637 cleaner_stats.segs_cleaned++; 637 cleaner_stats.segs_cleaned++;
638 cleaner_stats.bytes_read += fs->lfs_ssize; 638 cleaner_stats.bytes_read += fs->lfs_ssize;
639 ++fs->clfs_nactive; 639 ++fs->clfs_nactive;
640 640
641 npseg = 0; 641 npseg = 0;
642 while(dtosn(fs, daddr) == sn && 642 while(dtosn(fs, daddr) == sn &&
643 dtosn(fs, daddr + btofsb(fs, fs->lfs_bsize)) == sn) { 643 dtosn(fs, daddr + btofsb(fs, fs->lfs_bsize)) == sn) {
644 daddr = parse_pseg(fs, daddr, bipp, bic); 644 daddr = parse_pseg(fs, daddr, bipp, bic);
645 if (daddr == 0x0) { 645 if (daddr == 0x0) {
646 ++cleaner_stats.segs_error; 646 ++cleaner_stats.segs_error;
647 break; 647 break;
648 } 648 }
649 ++npseg; 649 ++npseg;
650 } 650 }
651 651
652 return npseg; 652 return npseg;
653} 653}
654 654
655void 655void
656calc_cb(struct clfs *fs, int sn, struct clfs_seguse *t) 656calc_cb(struct clfs *fs, int sn, struct clfs_seguse *t)
657{ 657{
658 time_t now; 658 time_t now;
659 int64_t age, benefit, cost; 659 int64_t age, benefit, cost;
660 660
661 time(&now); 661 time(&now);
662 age = (now < t->lastmod ? 0 : now - t->lastmod); 662 age = (now < t->lastmod ? 0 : now - t->lastmod);
663 663
664 /* Under no circumstances clean active or already-clean segments */ 664 /* Under no circumstances clean active or already-clean segments */
665 if ((t->flags & SEGUSE_ACTIVE) || !(t->flags & SEGUSE_DIRTY)) { 665 if ((t->flags & SEGUSE_ACTIVE) || !(t->flags & SEGUSE_DIRTY)) {
666 t->priority = 0; 666 t->priority = 0;
667 return; 667 return;
668 } 668 }
669 669
670 /* 670 /*
671 * If the segment is empty, there is no reason to clean it. 671 * If the segment is empty, there is no reason to clean it.
672 * Clear its error condition, if any, since we are never going to 672 * Clear its error condition, if any, since we are never going to
673 * try to parse this one. 673 * try to parse this one.
674 */ 674 */
675 if (t->nbytes == 0) { 675 if (t->nbytes == 0) {
676 t->flags &= ~SEGUSE_ERROR; /* Strip error once empty */ 676 t->flags &= ~SEGUSE_ERROR; /* Strip error once empty */
677 t->priority = 0; 677 t->priority = 0;
678 return; 678 return;
679 } 679 }
680 680
681 if (t->flags & SEGUSE_ERROR) { /* No good if not already empty */ 681 if (t->flags & SEGUSE_ERROR) { /* No good if not already empty */
682 /* No benefit */ 682 /* No benefit */
683 t->priority = 0; 683 t->priority = 0;
684 return; 684 return;
685 } 685 }
686 686
687 if (t->nbytes > fs->lfs_ssize) { 687 if (t->nbytes > fs->lfs_ssize) {
688 /* Another type of error */ 688 /* Another type of error */
689 syslog(LOG_WARNING, "segment %d: bad seguse count %d", 689 syslog(LOG_WARNING, "segment %d: bad seguse count %d",
690 sn, t->nbytes); 690 sn, t->nbytes);
691 t->flags |= SEGUSE_ERROR; 691 t->flags |= SEGUSE_ERROR;
692 t->priority = 0; 692 t->priority = 0;
693 return; 693 return;
694 } 694 }
695 695
696 /* 696 /*
697 * The non-degenerate case. Use Rosenblum's cost-benefit algorithm. 697 * The non-degenerate case. Use Rosenblum's cost-benefit algorithm.
698 * Calculate the benefit from cleaning this segment (one segment, 698 * Calculate the benefit from cleaning this segment (one segment,
699 * minus fragmentation, dirty blocks and a segment summary block) 699 * minus fragmentation, dirty blocks and a segment summary block)
700 * and weigh that against the cost (bytes read plus bytes written). 700 * and weigh that against the cost (bytes read plus bytes written).
701 * We count the summary headers as "dirty" to avoid cleaning very 701 * We count the summary headers as "dirty" to avoid cleaning very
702 * old and very full segments. 702 * old and very full segments.
703 */ 703 */
704 benefit = (int64_t)fs->lfs_ssize - t->nbytes - 704 benefit = (int64_t)fs->lfs_ssize - t->nbytes -
705 (t->nsums + 1) * fs->lfs_fsize; 705 (t->nsums + 1) * fs->lfs_fsize;
706 if (fs->lfs_bsize > fs->lfs_fsize) /* fragmentation */ 706 if (fs->lfs_bsize > fs->lfs_fsize) /* fragmentation */
707 benefit -= (fs->lfs_bsize / 2); 707 benefit -= (fs->lfs_bsize / 2);
708 if (benefit <= 0) { 708 if (benefit <= 0) {
709 t->priority = 0; 709 t->priority = 0;
710 return; 710 return;
711 } 711 }
712 712
713 cost = fs->lfs_ssize + t->nbytes; 713 cost = fs->lfs_ssize + t->nbytes;
714 t->priority = (256 * benefit * age) / cost; 714 t->priority = (256 * benefit * age) / cost;
715 715
716 return; 716 return;
717} 717}
718 718
719/* 719/*
720 * Comparator for BLOCK_INFO structures. Anything not in one of the segments 720 * Comparator for BLOCK_INFO structures. Anything not in one of the segments
721 * we're looking at sorts higher; after that we sort first by inode number 721 * we're looking at sorts higher; after that we sort first by inode number
722 * and then by block number (unsigned, i.e., negative sorts higher) *but* 722 * and then by block number (unsigned, i.e., negative sorts higher) *but*
723 * sort inodes before data blocks. 723 * sort inodes before data blocks.
724 */ 724 */
725static int 725static int
726bi_comparator(const void *va, const void *vb) 726bi_comparator(const void *va, const void *vb)
727{ 727{
728 const BLOCK_INFO *a, *b; 728 const BLOCK_INFO *a, *b;
729 729
730 a = (const BLOCK_INFO *)va; 730 a = (const BLOCK_INFO *)va;
731 b = (const BLOCK_INFO *)vb; 731 b = (const BLOCK_INFO *)vb;
732 732
733 /* Check for out-of-place block */ 733 /* Check for out-of-place block */
734 if (a->bi_segcreate == a->bi_daddr && 734 if (a->bi_segcreate == a->bi_daddr &&
735 b->bi_segcreate != b->bi_daddr) 735 b->bi_segcreate != b->bi_daddr)
736 return -1; 736 return -1;
737 if (a->bi_segcreate != a->bi_daddr && 737 if (a->bi_segcreate != a->bi_daddr &&
738 b->bi_segcreate == b->bi_daddr) 738 b->bi_segcreate == b->bi_daddr)
739 return 1; 739 return 1;
740 if (a->bi_size <= 0 && b->bi_size > 0) 740 if (a->bi_size <= 0 && b->bi_size > 0)
741 return 1; 741 return 1;
742 if (b->bi_size <= 0 && a->bi_size > 0) 742 if (b->bi_size <= 0 && a->bi_size > 0)
743 return -1; 743 return -1;
744 744
745 /* Check inode number */ 745 /* Check inode number */
746 if (a->bi_inode != b->bi_inode) 746 if (a->bi_inode != b->bi_inode)
747 return a->bi_inode - b->bi_inode; 747 return a->bi_inode - b->bi_inode;
748 748
749 /* Check lbn */ 749 /* Check lbn */
750 if (a->bi_lbn == LFS_UNUSED_LBN) /* Inodes sort lower than blocks */ 750 if (a->bi_lbn == LFS_UNUSED_LBN) /* Inodes sort lower than blocks */
751 return -1; 751 return -1;
752 if (b->bi_lbn == LFS_UNUSED_LBN) 752 if (b->bi_lbn == LFS_UNUSED_LBN)
753 return 1; 753 return 1;
754 if ((u_int32_t)a->bi_lbn > (u_int32_t)b->bi_lbn) 754 if ((u_int32_t)a->bi_lbn > (u_int32_t)b->bi_lbn)
755 return 1; 755 return 1;
756 else 756 else
757 return -1; 757 return -1;
758 758
759 return 0; 759 return 0;
760} 760}
761 761
762/* 762/*
763 * Comparator for sort_segments: cost-benefit equation. 763 * Comparator for sort_segments: cost-benefit equation.
764 */ 764 */
765static int 765static int
766cb_comparator(const void *va, const void *vb) 766cb_comparator(const void *va, const void *vb)
767{ 767{
768 const struct clfs_seguse *a, *b; 768 const struct clfs_seguse *a, *b;
769 769
770 a = *(const struct clfs_seguse * const *)va; 770 a = *(const struct clfs_seguse * const *)va;
771 b = *(const struct clfs_seguse * const *)vb; 771 b = *(const struct clfs_seguse * const *)vb;
772 return a->priority > b->priority ? -1 : 1; 772 return a->priority > b->priority ? -1 : 1;
773} 773}
774 774
775void 775void
776toss_old_blocks(struct clfs *fs, BLOCK_INFO **bipp, int *bic, int *sizep) 776toss_old_blocks(struct clfs *fs, BLOCK_INFO **bipp, int *bic, int *sizep)
777{ 777{
778 int i, r; 778 int i, r;
779 BLOCK_INFO *bip = *bipp; 779 BLOCK_INFO *bip = *bipp;
780 struct lfs_fcntl_markv /* { 780 struct lfs_fcntl_markv /* {
781 BLOCK_INFO *blkiov; 781 BLOCK_INFO *blkiov;
782 int blkcnt; 782 int blkcnt;
783 } */ lim; 783 } */ lim;
784 784
785 if (bic == 0 || bip == NULL) 785 if (bic == 0 || bip == NULL)
786 return; 786 return;
787 787
788 /* 788 /*
789 * Kludge: Store the disk address in segcreate so we know which 789 * Kludge: Store the disk address in segcreate so we know which
790 * ones to toss. 790 * ones to toss.
791 */ 791 */
792 for (i = 0; i < *bic; i++) 792 for (i = 0; i < *bic; i++)
793 bip[i].bi_segcreate = bip[i].bi_daddr; 793 bip[i].bi_segcreate = bip[i].bi_daddr;
794 794
795 /* Sort the blocks */ 795 /* Sort the blocks */
796 heapsort(bip, *bic, sizeof(BLOCK_INFO), bi_comparator); 796 heapsort(bip, *bic, sizeof(BLOCK_INFO), bi_comparator);
797 797
798 /* Use bmapv to locate the blocks */ 798 /* Use bmapv to locate the blocks */
799 lim.blkiov = bip; 799 lim.blkiov = bip;
800 lim.blkcnt = *bic; 800 lim.blkcnt = *bic;
801 if ((r = fcntl(fs->clfs_ifilefd, LFCNBMAPV, &lim)) < 0) { 801 if ((r = fcntl(fs->clfs_ifilefd, LFCNBMAPV, &lim)) < 0) {
802 syslog(LOG_WARNING, "%s: bmapv returned %d (%m)", 802 syslog(LOG_WARNING, "%s: bmapv returned %d (%m)",
803 fs->lfs_fsmnt, r); 803 fs->lfs_fsmnt, r);
804 return; 804 return;
805 } 805 }
806 806
807 /* Toss blocks not in this segment */ 807 /* Toss blocks not in this segment */
808 heapsort(bip, *bic, sizeof(BLOCK_INFO), bi_comparator); 808 heapsort(bip, *bic, sizeof(BLOCK_INFO), bi_comparator);
809 809
810 /* Get rid of stale blocks */ 810 /* Get rid of stale blocks */
811 if (sizep) 811 if (sizep)
812 *sizep = 0; 812 *sizep = 0;
813 for (i = 0; i < *bic; i++) { 813 for (i = 0; i < *bic; i++) {
814 if (bip[i].bi_segcreate != bip[i].bi_daddr) 814 if (bip[i].bi_segcreate != bip[i].bi_daddr)
815 break; 815 break;
816 if (sizep) 816 if (sizep)
817 *sizep += bip[i].bi_size; 817 *sizep += bip[i].bi_size;
818 } 818 }
819 *bic = i; /* XXX realloc bip? */ 819 *bic = i; /* XXX realloc bip? */
820 *bipp = bip; 820 *bipp = bip;
821 821
822 return; 822 return;
823} 823}
824 824
825/* 825/*
826 * Clean a segment and mark it invalid. 826 * Clean a segment and mark it invalid.
827 */ 827 */
828int 828int
829invalidate_segment(struct clfs *fs, int sn) 829invalidate_segment(struct clfs *fs, int sn)
830{ 830{
831 BLOCK_INFO *bip; 831 BLOCK_INFO *bip;
832 int i, r, bic; 832 int i, r, bic;
833 off_t nb; 833 off_t nb;
834 double util; 834 double util;
835 struct lfs_fcntl_markv /* { 835 struct lfs_fcntl_markv /* {
836 BLOCK_INFO *blkiov; 836 BLOCK_INFO *blkiov;
837 int blkcnt; 837 int blkcnt;
838 } */ lim; 838 } */ lim;
839 839
840 dlog("%s: inval seg %d", fs->lfs_fsmnt, sn); 840 dlog("%s: inval seg %d", fs->lfs_fsmnt, sn);
841 841
842 bip = NULL; 842 bip = NULL;
843 bic = 0; 843 bic = 0;
844 fs->clfs_nactive = 0; 844 fs->clfs_nactive = 0;
845 if (load_segment(fs, sn, &bip, &bic) <= 0) 845 if (load_segment(fs, sn, &bip, &bic) <= 0)
846 return -1; 846 return -1;
847 toss_old_blocks(fs, &bip, &bic, NULL); 847 toss_old_blocks(fs, &bip, &bic, NULL);
848 848
849 /* Record statistics */ 849 /* Record statistics */
850 for (i = nb = 0; i < bic; i++) 850 for (i = nb = 0; i < bic; i++)
851 nb += bip[i].bi_size; 851 nb += bip[i].bi_size;
852 util = ((double)nb) / (fs->clfs_nactive * fs->lfs_ssize); 852 util = ((double)nb) / (fs->clfs_nactive * fs->lfs_ssize);
853 cleaner_stats.util_tot += util; 853 cleaner_stats.util_tot += util;
854 cleaner_stats.util_sos += util * util; 854 cleaner_stats.util_sos += util * util;
855 cleaner_stats.bytes_written += nb; 855 cleaner_stats.bytes_written += nb;
856 856
857 /* 857 /*
858 * Use markv to move the blocks. 858 * Use markv to move the blocks.
859 */ 859 */
860 lim.blkiov = bip; 860 lim.blkiov = bip;
861 lim.blkcnt = bic; 861 lim.blkcnt = bic;
862 if ((r = fcntl(fs->clfs_ifilefd, LFCNMARKV, &lim)) < 0) { 862 if ((r = fcntl(fs->clfs_ifilefd, LFCNMARKV, &lim)) < 0) {
863 syslog(LOG_WARNING, "%s: markv returned %d (%m) " 863 syslog(LOG_WARNING, "%s: markv returned %d (%m) "
864 "for seg %d", fs->lfs_fsmnt, r, sn); 864 "for seg %d", fs->lfs_fsmnt, r, sn);
865 return r; 865 return r;
866 } 866 }
867 867
868 /* 868 /*
869 * Finally call invalidate to invalidate the segment. 869 * Finally call invalidate to invalidate the segment.
870 */ 870 */
871 if ((r = fcntl(fs->clfs_ifilefd, LFCNINVAL, &sn)) < 0) { 871 if ((r = fcntl(fs->clfs_ifilefd, LFCNINVAL, &sn)) < 0) {
872 syslog(LOG_WARNING, "%s: inval returned %d (%m) " 872 syslog(LOG_WARNING, "%s: inval returned %d (%m) "
873 "for seg %d", fs->lfs_fsmnt, r, sn); 873 "for seg %d", fs->lfs_fsmnt, r, sn);
874 return r; 874 return r;
875 } 875 }
876 876
877 return 0; 877 return 0;
878} 878}
879 879
880/* 880/*
881 * Check to see if the given ino/lbn pair is represented in the BLOCK_INFO 881 * Check to see if the given ino/lbn pair is represented in the BLOCK_INFO
882 * array we are sending to the kernel, or if the kernel will have to add it. 882 * array we are sending to the kernel, or if the kernel will have to add it.
883 * The kernel will only add each such pair once, though, so keep track of 883 * The kernel will only add each such pair once, though, so keep track of
884 * previous requests in a separate "extra" BLOCK_INFO array. Returns 1 884 * previous requests in a separate "extra" BLOCK_INFO array. Returns 1
885 * if the block needs to be added, 0 if it is already represented. 885 * if the block needs to be added, 0 if it is already represented.
886 */ 886 */
887static int 887static int
888check_or_add(ino_t ino, int32_t lbn, BLOCK_INFO *bip, int bic, BLOCK_INFO **ebipp, int *ebicp) 888check_or_add(ino_t ino, int32_t lbn, BLOCK_INFO *bip, int bic, BLOCK_INFO **ebipp, int *ebicp)
889{ 889{
890 BLOCK_INFO *t, *ebip = *ebipp; 890 BLOCK_INFO *t, *ebip = *ebipp;
891 int ebic = *ebicp; 891 int ebic = *ebicp;
892 int k; 892 int k;
893 893
894 for (k = 0; k < bic; k++) { 894 for (k = 0; k < bic; k++) {
895 if (bip[k].bi_inode != ino) 895 if (bip[k].bi_inode != ino)
896 break; 896 break;
897 if (bip[k].bi_lbn == lbn) { 897 if (bip[k].bi_lbn == lbn) {
898 return 0; 898 return 0;
899 } 899 }
900 } 900 }
901 901
902 /* Look on the list of extra blocks, too */ 902 /* Look on the list of extra blocks, too */
903 for (k = 0; k < ebic; k++) { 903 for (k = 0; k < ebic; k++) {
904 if (ebip[k].bi_inode == ino && ebip[k].bi_lbn == lbn) { 904 if (ebip[k].bi_inode == ino && ebip[k].bi_lbn == lbn) {
905 return 0; 905 return 0;
906 } 906 }
907 } 907 }
908 908
909 ++ebic; 909 ++ebic;
910 t = realloc(ebip, ebic * sizeof(BLOCK_INFO)); 910 t = realloc(ebip, ebic * sizeof(BLOCK_INFO));
911 if (t == NULL) 911 if (t == NULL)
912 return 1; /* Note *ebipc is not updated */ 912 return 1; /* Note *ebipc is not updated */
913 913
914 ebip = t; 914 ebip = t;
915 ebip[ebic - 1].bi_inode = ino; 915 ebip[ebic - 1].bi_inode = ino;
916 ebip[ebic - 1].bi_lbn = lbn; 916 ebip[ebic - 1].bi_lbn = lbn;
917 917
918 *ebipp = ebip; 918 *ebipp = ebip;
919 *ebicp = ebic; 919 *ebicp = ebic;
920 return 1; 920 return 1;
921} 921}
922 922
923/* 923/*
924 * Look for indirect blocks we will have to write which are not 924 * Look for indirect blocks we will have to write which are not
925 * contained in this collection of blocks. This constitutes 925 * contained in this collection of blocks. This constitutes
926 * a hidden cleaning cost, since we are unaware of it until we 926 * a hidden cleaning cost, since we are unaware of it until we
927 * have already read the segments. Return the total cost, and fill 927 * have already read the segments. Return the total cost, and fill
928 * in *ifc with the part of that cost due to rewriting the Ifile. 928 * in *ifc with the part of that cost due to rewriting the Ifile.
929 */ 929 */
930static off_t 930static off_t
931check_hidden_cost(struct clfs *fs, BLOCK_INFO *bip, int bic, off_t *ifc) 931check_hidden_cost(struct clfs *fs, BLOCK_INFO *bip, int bic, off_t *ifc)
932{ 932{
933 int start; 933 int start;
934 struct indir in[NIADDR + 1]; 934 struct indir in[NIADDR + 1];
935 int num; 935 int num;
936 int i, j, ebic; 936 int i, j, ebic;
937 BLOCK_INFO *ebip; 937 BLOCK_INFO *ebip;
938 int32_t lbn; 938 int32_t lbn;
939 939
940 start = 0; 940 start = 0;
941 ebip = NULL; 941 ebip = NULL;
942 ebic = 0; 942 ebic = 0;
943 for (i = 0; i < bic; i++) { 943 for (i = 0; i < bic; i++) {
944 if (i == 0 || bip[i].bi_inode != bip[start].bi_inode) { 944 if (i == 0 || bip[i].bi_inode != bip[start].bi_inode) {
945 start = i; 945 start = i;
946 /* 946 /*
947 * Look for IFILE blocks, unless this is the Ifile. 947 * Look for IFILE blocks, unless this is the Ifile.
948 */ 948 */
949 if (bip[i].bi_inode != fs->lfs_ifile) { 949 if (bip[i].bi_inode != fs->lfs_ifile) {
950 lbn = fs->lfs_cleansz + bip[i].bi_inode / 950 lbn = fs->lfs_cleansz + bip[i].bi_inode /
951 fs->lfs_ifpb; 951 fs->lfs_ifpb;
952 *ifc += check_or_add(fs->lfs_ifile, lbn, 952 *ifc += check_or_add(fs->lfs_ifile, lbn,
953 bip, bic, &ebip, &ebic); 953 bip, bic, &ebip, &ebic);
954 } 954 }
955 } 955 }
956 if (bip[i].bi_lbn == LFS_UNUSED_LBN) 956 if (bip[i].bi_lbn == LFS_UNUSED_LBN)
957 continue; 957 continue;
958 if (bip[i].bi_lbn < NDADDR) 958 if (bip[i].bi_lbn < NDADDR)
959 continue; 959 continue;
960 960
961 ufs_getlbns((struct lfs *)fs, NULL, (daddr_t)bip[i].bi_lbn, in, &num); 961 ufs_getlbns((struct lfs *)fs, NULL, (daddr_t)bip[i].bi_lbn, in, &num);
962 for (j = 0; j < num; j++) { 962 for (j = 0; j < num; j++) {
963 check_or_add(bip[i].bi_inode, in[j].in_lbn, 963 check_or_add(bip[i].bi_inode, in[j].in_lbn,
964 bip + start, bic - start, &ebip, &ebic); 964 bip + start, bic - start, &ebip, &ebic);
965 } 965 }
966 } 966 }
967 return ebic; 967 return ebic;
968} 968}
969 969
970/* 970/*
971 * Select segments to clean, add blocks from these segments to a cleaning 971 * Select segments to clean, add blocks from these segments to a cleaning
972 * list, and send this list through lfs_markv() to move them to new 972 * list, and send this list through lfs_markv() to move them to new
973 * locations on disk. 973 * locations on disk.
974 */ 974 */
975int 975int
976clean_fs(struct clfs *fs, CLEANERINFO *cip) 976clean_fs(struct clfs *fs, CLEANERINFO *cip)
977{ 977{
978 int i, j, ngood, sn, bic, r, npos; 978 int i, j, ngood, sn, bic, r, npos;
979 int bytes, totbytes; 979 int bytes, totbytes;
980 struct ubuf *bp; 980 struct ubuf *bp;
981 SEGUSE *sup; 981 SEGUSE *sup;
982 static BLOCK_INFO *bip; 982 static BLOCK_INFO *bip;
983 struct lfs_fcntl_markv /* { 983 struct lfs_fcntl_markv /* {
984 BLOCK_INFO *blkiov; 984 BLOCK_INFO *blkiov;
985 int blkcnt; 985 int blkcnt;
986 } */ lim; 986 } */ lim;
987 int mc; 987 int mc;
988 BLOCK_INFO *mbip; 988 BLOCK_INFO *mbip;
989 int inc; 989 int inc;
990 off_t nb; 990 off_t nb;
991 off_t goal; 991 off_t goal;
992 off_t extra, if_extra; 992 off_t extra, if_extra;
993 double util; 993 double util;
994 994
995 /* Read the segment table into our private structure */ 995 /* Read the segment table into our private structure */
996 npos = 0; 996 npos = 0;
997 for (i = 0; i < fs->lfs_nseg; i+= fs->lfs_sepb) { 997 for (i = 0; i < fs->lfs_nseg; i+= fs->lfs_sepb) {
998 bread(fs->lfs_ivnode, fs->lfs_cleansz + i / fs->lfs_sepb, 998 bread(fs->lfs_ivnode, fs->lfs_cleansz + i / fs->lfs_sepb,
999 fs->lfs_bsize, NOCRED, 0, &bp); 999 fs->lfs_bsize, NOCRED, 0, &bp);
1000 for (j = 0; j < fs->lfs_sepb && i + j < fs->lfs_nseg; j++) { 1000 for (j = 0; j < fs->lfs_sepb && i + j < fs->lfs_nseg; j++) {
1001 sup = ((SEGUSE *)bp->b_data) + j; 1001 sup = ((SEGUSE *)bp->b_data) + j;
1002 fs->clfs_segtab[i + j].nbytes = sup->su_nbytes; 1002 fs->clfs_segtab[i + j].nbytes = sup->su_nbytes;
1003 fs->clfs_segtab[i + j].nsums = sup->su_nsums; 1003 fs->clfs_segtab[i + j].nsums = sup->su_nsums;
1004 fs->clfs_segtab[i + j].lastmod = sup->su_lastmod; 1004 fs->clfs_segtab[i + j].lastmod = sup->su_lastmod;
1005 /* Keep error status but renew other flags */ 1005 /* Keep error status but renew other flags */
1006 fs->clfs_segtab[i + j].flags &= SEGUSE_ERROR; 1006 fs->clfs_segtab[i + j].flags &= SEGUSE_ERROR;
1007 fs->clfs_segtab[i + j].flags |= sup->su_flags; 1007 fs->clfs_segtab[i + j].flags |= sup->su_flags;
1008 1008
1009 /* Compute cost-benefit coefficient */ 1009 /* Compute cost-benefit coefficient */
1010 calc_cb(fs, i + j, fs->clfs_segtab + i + j); 1010 calc_cb(fs, i + j, fs->clfs_segtab + i + j);
1011 if (fs->clfs_segtab[i + j].priority > 0) 1011 if (fs->clfs_segtab[i + j].priority > 0)
1012 ++npos; 1012 ++npos;
1013 } 1013 }
1014 brelse(bp, 0); 1014 brelse(bp, 0);
1015 } 1015 }
1016 1016
1017 /* Sort segments based on cleanliness, fulness, and condition */ 1017 /* Sort segments based on cleanliness, fulness, and condition */
1018 heapsort(fs->clfs_segtabp, fs->lfs_nseg, sizeof(struct clfs_seguse *), 1018 heapsort(fs->clfs_segtabp, fs->lfs_nseg, sizeof(struct clfs_seguse *),
1019 cb_comparator); 1019 cb_comparator);
1020 1020
1021 /* If no segment is cleanable, just return */ 1021 /* If no segment is cleanable, just return */
1022 if (fs->clfs_segtabp[0]->priority == 0) { 1022 if (fs->clfs_segtabp[0]->priority == 0) {
1023 dlog("%s: no segment cleanable", fs->lfs_fsmnt); 1023 dlog("%s: no segment cleanable", fs->lfs_fsmnt);
1024 return 0; 1024 return 0;
1025 } 1025 }
1026 1026
1027 /* Load some segments' blocks into bip */ 1027 /* Load some segments' blocks into bip */
1028 bic = 0; 1028 bic = 0;
1029 fs->clfs_nactive = 0; 1029 fs->clfs_nactive = 0;
1030 ngood = 0; 1030 ngood = 0;
1031 if (use_bytes) { 1031 if (use_bytes) {
1032 /* Set attainable goal */ 1032 /* Set attainable goal */
1033 goal = fs->lfs_ssize * atatime; 1033 goal = fs->lfs_ssize * atatime;
1034 if (goal > (cip->clean - 1) * fs->lfs_ssize / 2) 1034 if (goal > (cip->clean - 1) * fs->lfs_ssize / 2)
1035 goal = MAX((cip->clean - 1) * fs->lfs_ssize, 1035 goal = MAX((cip->clean - 1) * fs->lfs_ssize,
1036 fs->lfs_ssize) / 2; 1036 fs->lfs_ssize) / 2;
1037 1037
1038 dlog("%s: cleaning with goal %" PRId64 1038 dlog("%s: cleaning with goal %" PRId64
1039 " bytes (%d segs clean, %d cleanable)", 1039 " bytes (%d segs clean, %d cleanable)",
1040 fs->lfs_fsmnt, goal, cip->clean, npos); 1040 fs->lfs_fsmnt, goal, cip->clean, npos);
1041 syslog(LOG_INFO, "%s: cleaning with goal %" PRId64 1041 syslog(LOG_INFO, "%s: cleaning with goal %" PRId64
1042 " bytes (%d segs clean, %d cleanable)", 1042 " bytes (%d segs clean, %d cleanable)",
1043 fs->lfs_fsmnt, goal, cip->clean, npos); 1043 fs->lfs_fsmnt, goal, cip->clean, npos);
1044 totbytes = 0; 1044 totbytes = 0;
1045 for (i = 0; i < fs->lfs_nseg && totbytes < goal; i++) { 1045 for (i = 0; i < fs->lfs_nseg && totbytes < goal; i++) {
1046 if (fs->clfs_segtabp[i]->priority == 0) 1046 if (fs->clfs_segtabp[i]->priority == 0)
1047 break; 1047 break;
1048 /* Upper bound on number of segments at once */ 1048 /* Upper bound on number of segments at once */
1049 if (ngood * fs->lfs_ssize > 4 * goal) 1049 if (ngood * fs->lfs_ssize > 4 * goal)
1050 break; 1050 break;
1051 sn = (fs->clfs_segtabp[i] - fs->clfs_segtab); 1051 sn = (fs->clfs_segtabp[i] - fs->clfs_segtab);
1052 dlog("%s: add seg %d prio %" PRIu64 1052 dlog("%s: add seg %d prio %" PRIu64
1053 " containing %ld bytes", 1053 " containing %ld bytes",
1054 fs->lfs_fsmnt, sn, fs->clfs_segtabp[i]->priority, 1054 fs->lfs_fsmnt, sn, fs->clfs_segtabp[i]->priority,
1055 fs->clfs_segtabp[i]->nbytes); 1055 fs->clfs_segtabp[i]->nbytes);
1056 if ((r = load_segment(fs, sn, &bip, &bic)) > 0) { 1056 if ((r = load_segment(fs, sn, &bip, &bic)) > 0) {
1057 ++ngood; 1057 ++ngood;
1058 toss_old_blocks(fs, &bip, &bic, &bytes); 1058 toss_old_blocks(fs, &bip, &bic, &bytes);
1059 totbytes += bytes; 1059 totbytes += bytes;
1060 } else if (r == 0) 1060 } else if (r == 0)
1061 fd_release(fs->clfs_devvp); 1061 fd_release(fs->clfs_devvp);
1062 else 1062 else
1063 break; 1063 break;
1064 } 1064 }
1065 } else { 1065 } else {
1066 /* Set attainable goal */ 1066 /* Set attainable goal */
1067 goal = atatime; 1067 goal = atatime;
1068 if (goal > cip->clean - 1) 1068 if (goal > cip->clean - 1)
1069 goal = MAX(cip->clean - 1, 1); 1069 goal = MAX(cip->clean - 1, 1);
1070 1070
1071 dlog("%s: cleaning with goal %d segments (%d clean, %d cleanable)", 1071 dlog("%s: cleaning with goal %d segments (%d clean, %d cleanable)",
1072 fs->lfs_fsmnt, (int)goal, cip->clean, npos); 1072 fs->lfs_fsmnt, (int)goal, cip->clean, npos);
1073 for (i = 0; i < fs->lfs_nseg && ngood < goal; i++) { 1073 for (i = 0; i < fs->lfs_nseg && ngood < goal; i++) {
1074 if (fs->clfs_segtabp[i]->priority == 0) 1074 if (fs->clfs_segtabp[i]->priority == 0)
1075 break; 1075 break;
1076 sn = (fs->clfs_segtabp[i] - fs->clfs_segtab); 1076 sn = (fs->clfs_segtabp[i] - fs->clfs_segtab);
1077 dlog("%s: add seg %d prio %" PRIu64, 1077 dlog("%s: add seg %d prio %" PRIu64,
1078 fs->lfs_fsmnt, sn, fs->clfs_segtabp[i]->priority); 1078 fs->lfs_fsmnt, sn, fs->clfs_segtabp[i]->priority);
1079 if ((r = load_segment(fs, sn, &bip, &bic)) > 0) 1079 if ((r = load_segment(fs, sn, &bip, &bic)) > 0)
1080 ++ngood; 1080 ++ngood;
1081 else if (r == 0) 1081 else if (r == 0)
1082 fd_release(fs->clfs_devvp); 1082 fd_release(fs->clfs_devvp);
1083 else 1083 else
1084 break; 1084 break;
1085 } 1085 }
1086 toss_old_blocks(fs, &bip, &bic, NULL); 1086 toss_old_blocks(fs, &bip, &bic, NULL);
1087 } 1087 }
1088 1088
1089 /* If there is nothing to do, try again later. */ 1089 /* If there is nothing to do, try again later. */
1090 if (bic == 0) { 1090 if (bic == 0) {
1091 dlog("%s: no blocks to clean in %d cleanable segments", 1091 dlog("%s: no blocks to clean in %d cleanable segments",
1092 fs->lfs_fsmnt, (int)ngood); 1092 fs->lfs_fsmnt, (int)ngood);
1093 fd_release_all(fs->clfs_devvp); 1093 fd_release_all(fs->clfs_devvp);
1094 return 0; 1094 return 0;
1095 } 1095 }
1096 1096
1097 /* Record statistics */ 1097 /* Record statistics */
1098 for (i = nb = 0; i < bic; i++) 1098 for (i = nb = 0; i < bic; i++)
1099 nb += bip[i].bi_size; 1099 nb += bip[i].bi_size;
1100 util = ((double)nb) / (fs->clfs_nactive * fs->lfs_ssize); 1100 util = ((double)nb) / (fs->clfs_nactive * fs->lfs_ssize);
1101 cleaner_stats.util_tot += util; 1101 cleaner_stats.util_tot += util;
1102 cleaner_stats.util_sos += util * util; 1102 cleaner_stats.util_sos += util * util;
1103 cleaner_stats.bytes_written += nb; 1103 cleaner_stats.bytes_written += nb;
1104 1104
1105 /* 1105 /*
1106 * Check out our blocks to see if there are hidden cleaning costs. 1106 * Check out our blocks to see if there are hidden cleaning costs.
1107 * If there are, we might be cleaning ourselves deeper into a hole 1107 * If there are, we might be cleaning ourselves deeper into a hole
1108 * rather than doing anything useful. 1108 * rather than doing anything useful.
1109 * XXX do something about this. 1109 * XXX do something about this.
1110 */ 1110 */
1111 if_extra = 0; 1111 if_extra = 0;
1112 extra = fs->lfs_bsize * (off_t)check_hidden_cost(fs, bip, bic, &if_extra); 1112 extra = fs->lfs_bsize * (off_t)check_hidden_cost(fs, bip, bic, &if_extra);
1113 if_extra *= fs->lfs_bsize; 1113 if_extra *= fs->lfs_bsize;
1114 1114
1115 /* 1115 /*
1116 * Use markv to move the blocks. 1116 * Use markv to move the blocks.
1117 */ 1117 */
1118 if (do_small)  1118 if (do_small)
1119 inc = MAXPHYS / fs->lfs_bsize - 1; 1119 inc = MAXPHYS / fs->lfs_bsize - 1;
1120 else 1120 else
1121 inc = LFS_MARKV_MAXBLKCNT / 2; 1121 inc = LFS_MARKV_MAXBLKCNT / 2;
1122 for (mc = 0, mbip = bip; mc < bic; mc += inc, mbip += inc) { 1122 for (mc = 0, mbip = bip; mc < bic; mc += inc, mbip += inc) {
1123 lim.blkiov = mbip; 1123 lim.blkiov = mbip;
1124 lim.blkcnt = (bic - mc > inc ? inc : bic - mc); 1124 lim.blkcnt = (bic - mc > inc ? inc : bic - mc);
1125#ifdef TEST_PATTERN 1125#ifdef TEST_PATTERN
1126 dlog("checking blocks %d-%d", mc, mc + lim.blkcnt - 1); 1126 dlog("checking blocks %d-%d", mc, mc + lim.blkcnt - 1);
1127 for (i = 0; i < lim.blkcnt; i++) { 1127 for (i = 0; i < lim.blkcnt; i++) {
1128 check_test_pattern(mbip + i); 1128 check_test_pattern(mbip + i);
1129 } 1129 }
1130#endif /* TEST_PATTERN */ 1130#endif /* TEST_PATTERN */
1131 dlog("sending blocks %d-%d", mc, mc + lim.blkcnt - 1); 1131 dlog("sending blocks %d-%d", mc, mc + lim.blkcnt - 1);
1132 if ((r = fcntl(fs->clfs_ifilefd, LFCNMARKV, &lim)) < 0) { 1132 if ((r = fcntl(fs->clfs_ifilefd, LFCNMARKV, &lim)) < 0) {
1133 syslog(LOG_WARNING, "%s: markv returned %d (%m)", 1133 syslog(LOG_WARNING, "%s: markv returned %d (%m)",
1134 fs->lfs_fsmnt, r); 1134 fs->lfs_fsmnt, r);
1135 if (errno != EAGAIN && errno != ESHUTDOWN) { 1135 if (errno != EAGAIN && errno != ESHUTDOWN) {
1136 fd_release_all(fs->clfs_devvp); 1136 fd_release_all(fs->clfs_devvp);
1137 return r; 1137 return r;
1138 } 1138 }
1139 } 1139 }
1140 } 1140 }
1141 1141
1142 /* 1142 /*
1143 * Report progress (or lack thereof) 1143 * Report progress (or lack thereof)
1144 */ 1144 */
1145 syslog(LOG_INFO, "%s: wrote %" PRId64 " dirty + %" 1145 syslog(LOG_INFO, "%s: wrote %" PRId64 " dirty + %"
1146 PRId64 " supporting indirect + %" 1146 PRId64 " supporting indirect + %"
1147 PRId64 " supporting Ifile = %" 1147 PRId64 " supporting Ifile = %"
1148 PRId64 " bytes to clean %d segs (%" PRId64 "%% recovery)", 1148 PRId64 " bytes to clean %d segs (%" PRId64 "%% recovery)",
1149 fs->lfs_fsmnt, (int64_t)nb, (int64_t)(extra - if_extra), 1149 fs->lfs_fsmnt, (int64_t)nb, (int64_t)(extra - if_extra),
1150 (int64_t)if_extra, (int64_t)(nb + extra), ngood, 1150 (int64_t)if_extra, (int64_t)(nb + extra), ngood,
1151 (ngood ? (int64_t)(100 - (100 * (nb + extra)) / 1151 (ngood ? (int64_t)(100 - (100 * (nb + extra)) /
1152 (ngood * fs->lfs_ssize)) : 1152 (ngood * fs->lfs_ssize)) :
1153 (int64_t)0)); 1153 (int64_t)0));
1154 if (nb + extra >= ngood * fs->lfs_ssize) 1154 if (nb + extra >= ngood * fs->lfs_ssize)
1155 syslog(LOG_WARNING, "%s: cleaner not making forward progress", 1155 syslog(LOG_WARNING, "%s: cleaner not making forward progress",
1156 fs->lfs_fsmnt); 1156 fs->lfs_fsmnt);
1157 1157
1158 /* 1158 /*
1159 * Finally call reclaim to prompt cleaning of the segments. 1159 * Finally call reclaim to prompt cleaning of the segments.
1160 */ 1160 */
1161 fcntl(fs->clfs_ifilefd, LFCNRECLAIM, NULL); 1161 fcntl(fs->clfs_ifilefd, LFCNRECLAIM, NULL);
1162 1162
1163 fd_release_all(fs->clfs_devvp); 1163 fd_release_all(fs->clfs_devvp);
1164 return 0; 1164 return 0;
1165} 1165}
1166 1166
1167/* 1167/*
1168 * Read the cleanerinfo block and apply cleaning policy to determine whether 1168 * Read the cleanerinfo block and apply cleaning policy to determine whether
1169 * the given filesystem needs to be cleaned. Returns 1 if it does, 0 if it 1169 * the given filesystem needs to be cleaned. Returns 1 if it does, 0 if it
1170 * does not, or -1 on error. 1170 * does not, or -1 on error.
1171 */ 1171 */
1172int 1172int
1173needs_cleaning(struct clfs *fs, CLEANERINFO *cip) 1173needs_cleaning(struct clfs *fs, CLEANERINFO *cip)
1174{ 1174{
1175 struct ubuf *bp; 1175 struct ubuf *bp;
1176 struct stat st; 1176 struct stat st;
1177 daddr_t fsb_per_seg, max_free_segs; 1177 daddr_t fsb_per_seg, max_free_segs;
1178 time_t now; 1178 time_t now;
1179 double loadavg; 1179 double loadavg;
1180 1180
1181 /* If this fs is "on hold", don't clean it. */ 1181 /* If this fs is "on hold", don't clean it. */
1182 if (fs->clfs_onhold) 1182 if (fs->clfs_onhold)
1183 return 0; 1183 return 0;
1184 1184
1185 /* 1185 /*
1186 * Read the cleanerinfo block from the Ifile. We don't want 1186 * Read the cleanerinfo block from the Ifile. We don't want
1187 * the cached information, so invalidate the buffer before 1187 * the cached information, so invalidate the buffer before
1188 * handing it back. 1188 * handing it back.
1189 */ 1189 */
1190 if (bread(fs->lfs_ivnode, 0, fs->lfs_bsize, NOCRED, 0, &bp)) { 1190 if (bread(fs->lfs_ivnode, 0, fs->lfs_bsize, NOCRED, 0, &bp)) {
1191 syslog(LOG_ERR, "%s: can't read inode", fs->lfs_fsmnt); 1191 syslog(LOG_ERR, "%s: can't read inode", fs->lfs_fsmnt);
1192 return -1; 1192 return -1;
1193 } 1193 }
1194 *cip = *(CLEANERINFO *)bp->b_data; /* Structure copy */ 1194 *cip = *(CLEANERINFO *)bp->b_data; /* Structure copy */
1195 brelse(bp, B_INVAL); 1195 brelse(bp, B_INVAL);
1196 cleaner_stats.bytes_read += fs->lfs_bsize; 1196 cleaner_stats.bytes_read += fs->lfs_bsize;
1197 1197
1198 /* 1198 /*
1199 * If the number of segments changed under us, reinit. 1199 * If the number of segments changed under us, reinit.
1200 * We don't have to start over from scratch, however, 1200 * We don't have to start over from scratch, however,
1201 * since we don't hold any buffers. 1201 * since we don't hold any buffers.
1202 */ 1202 */
1203 if (fs->lfs_nseg != cip->clean + cip->dirty) { 1203 if (fs->lfs_nseg != cip->clean + cip->dirty) {
1204 if (reinit_fs(fs) < 0) { 1204 if (reinit_fs(fs) < 0) {
1205 /* The normal case for unmount */ 1205 /* The normal case for unmount */
1206 syslog(LOG_NOTICE, "%s: filesystem unmounted", fs->lfs_fsmnt); 1206 syslog(LOG_NOTICE, "%s: filesystem unmounted", fs->lfs_fsmnt);
1207 return -1; 1207 return -1;
1208 } 1208 }
1209 syslog(LOG_NOTICE, "%s: nsegs changed", fs->lfs_fsmnt); 1209 syslog(LOG_NOTICE, "%s: nsegs changed", fs->lfs_fsmnt);
1210 } 1210 }
1211 1211
1212 /* Compute theoretical "free segments" maximum based on usage */ 1212 /* Compute theoretical "free segments" maximum based on usage */
1213 fsb_per_seg = segtod(fs, 1); 1213 fsb_per_seg = segtod(fs, 1);
1214 max_free_segs = MAX(cip->bfree, 0) / fsb_per_seg + fs->lfs_minfreeseg; 1214 max_free_segs = MAX(cip->bfree, 0) / fsb_per_seg + fs->lfs_minfreeseg;
1215 1215
1216 dlog("%s: bfree = %d, avail = %d, clean = %d/%d", 1216 dlog("%s: bfree = %d, avail = %d, clean = %d/%d",
1217 fs->lfs_fsmnt, cip->bfree, cip->avail, cip->clean, fs->lfs_nseg); 1217 fs->lfs_fsmnt, cip->bfree, cip->avail, cip->clean, fs->lfs_nseg);
1218 1218
1219 /* If the writer is waiting on us, clean it */ 1219 /* If the writer is waiting on us, clean it */
1220 if (cip->clean <= fs->lfs_minfreeseg || 1220 if (cip->clean <= fs->lfs_minfreeseg ||
1221 (cip->flags & LFS_CLEANER_MUST_CLEAN)) 1221 (cip->flags & LFS_CLEANER_MUST_CLEAN))
1222 return 1; 1222 return 1;
1223 1223
1224 /* If there are enough segments, don't clean it */ 1224 /* If there are enough segments, don't clean it */
1225 if (cip->bfree - cip->avail <= fsb_per_seg && 1225 if (cip->bfree - cip->avail <= fsb_per_seg &&
1226 cip->avail > fsb_per_seg) 1226 cip->avail > fsb_per_seg)
1227 return 0; 1227 return 0;
1228 1228
1229 /* If we are in dire straits, clean it */ 1229 /* If we are in dire straits, clean it */
1230 if (cip->bfree - cip->avail > fsb_per_seg && 1230 if (cip->bfree - cip->avail > fsb_per_seg &&
1231 cip->avail <= fsb_per_seg) 1231 cip->avail <= fsb_per_seg)
1232 return 1; 1232 return 1;
1233 1233
1234 /* If under busy threshold, clean regardless of load */ 1234 /* If under busy threshold, clean regardless of load */
1235 if (cip->clean < max_free_segs * BUSY_LIM) 1235 if (cip->clean < max_free_segs * BUSY_LIM)
1236 return 1; 1236 return 1;
1237 1237
1238 /* Check busy status; clean if idle and under idle limit */ 1238 /* Check busy status; clean if idle and under idle limit */
1239 if (use_fs_idle) { 1239 if (use_fs_idle) {
1240 /* Filesystem idle */ 1240 /* Filesystem idle */
1241 time(&now); 1241 time(&now);
1242 if (fstat(fs->clfs_ifilefd, &st) < 0) { 1242 if (fstat(fs->clfs_ifilefd, &st) < 0) {
1243 syslog(LOG_ERR, "%s: failed to stat ifile", 1243 syslog(LOG_ERR, "%s: failed to stat ifile",
1244 fs->lfs_fsmnt); 1244 fs->lfs_fsmnt);
1245 return -1; 1245 return -1;
1246 } 1246 }
1247 if (now - st.st_mtime > segwait_timeout && 1247 if (now - st.st_mtime > segwait_timeout &&
1248 cip->clean < max_free_segs * IDLE_LIM) 1248 cip->clean < max_free_segs * IDLE_LIM)
1249 return 1; 1249 return 1;
1250 } else { 1250 } else {
1251 /* CPU idle - use one-minute load avg */ 1251 /* CPU idle - use one-minute load avg */
1252 if (getloadavg(&loadavg, 1) == -1) { 1252 if (getloadavg(&loadavg, 1) == -1) {
1253 syslog(LOG_ERR, "%s: failed to get load avg", 1253 syslog(LOG_ERR, "%s: failed to get load avg",
1254 fs->lfs_fsmnt); 1254 fs->lfs_fsmnt);
1255 return -1; 1255 return -1;
1256 } 1256 }
1257 if (loadavg < load_threshold && 1257 if (loadavg < load_threshold &&
1258 cip->clean < max_free_segs * IDLE_LIM) 1258 cip->clean < max_free_segs * IDLE_LIM)
1259 return 1; 1259 return 1;
1260 } 1260 }
1261 1261
1262 return 0; 1262 return 0;
1263} 1263}
1264 1264
1265/* 1265/*
1266 * Report statistics. If the signal was SIGUSR2, clear the statistics too. 1266 * Report statistics. If the signal was SIGUSR2, clear the statistics too.
1267 * If the signal was SIGINT, exit. 1267 * If the signal was SIGINT, exit.
1268 */ 1268 */
1269static void 1269static void
1270sig_report(int sig) 1270sig_report(int sig)
1271{ 1271{
1272 double avg = 0.0, stddev; 1272 double avg = 0.0, stddev;
1273 1273
1274 avg = cleaner_stats.util_tot / MAX(cleaner_stats.segs_cleaned, 1.0); 1274 avg = cleaner_stats.util_tot / MAX(cleaner_stats.segs_cleaned, 1.0);
1275 stddev = cleaner_stats.util_sos / MAX(cleaner_stats.segs_cleaned - 1275 stddev = cleaner_stats.util_sos / MAX(cleaner_stats.segs_cleaned -
1276 avg * avg, 1.0); 1276 avg * avg, 1.0);
1277 syslog(LOG_INFO, "bytes read: %" PRId64, cleaner_stats.bytes_read); 1277 syslog(LOG_INFO, "bytes read: %" PRId64, cleaner_stats.bytes_read);
1278 syslog(LOG_INFO, "bytes written: %" PRId64, cleaner_stats.bytes_written); 1278 syslog(LOG_INFO, "bytes written: %" PRId64, cleaner_stats.bytes_written);
1279 syslog(LOG_INFO, "segments cleaned: %" PRId64, cleaner_stats.segs_cleaned); 1279 syslog(LOG_INFO, "segments cleaned: %" PRId64, cleaner_stats.segs_cleaned);
1280#if 0 1280#if 0
1281 /* "Empty segments" is meaningless, since the kernel handles those */ 1281 /* "Empty segments" is meaningless, since the kernel handles those */
1282 syslog(LOG_INFO, "empty segments: %" PRId64, cleaner_stats.segs_empty); 1282 syslog(LOG_INFO, "empty segments: %" PRId64, cleaner_stats.segs_empty);
1283#endif 1283#endif
1284 syslog(LOG_INFO, "error segments: %" PRId64, cleaner_stats.segs_error); 1284 syslog(LOG_INFO, "error segments: %" PRId64, cleaner_stats.segs_error);
1285 syslog(LOG_INFO, "utilization total: %g", cleaner_stats.util_tot); 1285 syslog(LOG_INFO, "utilization total: %g", cleaner_stats.util_tot);
1286 syslog(LOG_INFO, "utilization sos: %g", cleaner_stats.util_sos); 1286 syslog(LOG_INFO, "utilization sos: %g", cleaner_stats.util_sos);
1287 syslog(LOG_INFO, "utilization avg: %4.2f", avg); 1287 syslog(LOG_INFO, "utilization avg: %4.2f", avg);
1288 syslog(LOG_INFO, "utilization sdev: %9.6f", stddev); 1288 syslog(LOG_INFO, "utilization sdev: %9.6f", stddev);
1289 1289
1290 if (debug) 1290 if (debug)
1291 bufstats(); 1291 bufstats();
1292 1292
1293 if (sig == SIGUSR2) 1293 if (sig == SIGUSR2)
1294 memset(&cleaner_stats, 0, sizeof(cleaner_stats)); 1294 memset(&cleaner_stats, 0, sizeof(cleaner_stats));
1295 if (sig == SIGINT) 1295 if (sig == SIGINT)
1296 exit(0); 1296 exit(0);
1297} 1297}
1298 1298
1299static void 1299static void
1300sig_exit(int sig) 1300sig_exit(int sig)
1301{ 1301{
1302 exit(0); 1302 exit(0);
1303} 1303}
1304 1304
1305static void 1305static void
1306usage(void) 1306usage(void)
1307{ 1307{
1308 errx(1, "usage: lfs_cleanerd [-bcdfmqs] [-i segnum] [-l load] " 1308 errx(1, "usage: lfs_cleanerd [-bcdfmqs] [-i segnum] [-l load] "
1309 "[-n nsegs] [-r report_freq] [-t timeout] fs_name ..."); 1309 "[-n nsegs] [-r report_freq] [-t timeout] fs_name ...");
1310} 1310}
1311 1311
1312/* 1312/*
1313 * Main. 1313 * Main.
1314 */ 1314 */
1315int 1315int
1316main(int argc, char **argv) 1316main(int argc, char **argv)
1317{ 1317{
1318 int i, opt, error, r, loopcount, nodetach; 1318 int i, opt, error, r, loopcount, nodetach;
1319 struct timeval tv; 1319 struct timeval tv;
1320 CLEANERINFO ci; 1320 CLEANERINFO ci;
1321#ifndef USE_CLIENT_SERVER 1321#ifndef USE_CLIENT_SERVER
1322 char *cp, *pidname; 1322 char *cp, *pidname;
1323#endif 1323#endif
1324 1324
1325 /* 1325 /*
1326 * Set up defaults 1326 * Set up defaults
1327 */ 1327 */
1328 atatime = 1; 1328 atatime = 1;
1329 segwait_timeout = 300; /* Five minutes */ 1329 segwait_timeout = 300; /* Five minutes */
1330 load_threshold = 0.2; 1330 load_threshold = 0.2;
1331 stat_report = 0; 1331 stat_report = 0;
1332 inval_segment = -1; 1332 inval_segment = -1;
1333 copylog_filename = NULL; 1333 copylog_filename = NULL;
1334 nodetach = 0; 1334 nodetach = 0;
1335 1335
1336 /* 1336 /*
1337 * Parse command-line arguments 1337 * Parse command-line arguments
1338 */ 1338 */
1339 while ((opt = getopt(argc, argv, "bC:cdfi:l:mn:qr:st:")) != -1) { 1339 while ((opt = getopt(argc, argv, "bC:cdDfi:l:mn:qr:st:")) != -1) {
1340 switch (opt) { 1340 switch (opt) {
1341 case 'b': /* Use bytes written, not segments read */ 1341 case 'b': /* Use bytes written, not segments read */
1342 use_bytes = 1; 1342 use_bytes = 1;
1343 break; 1343 break;
1344 case 'C': /* copy log */ 1344 case 'C': /* copy log */
1345 copylog_filename = optarg; 1345 copylog_filename = optarg;
1346 break; 1346 break;
1347 case 'c': /* Coalesce files */ 1347 case 'c': /* Coalesce files */
1348 do_coalesce++; 1348 do_coalesce++;
1349 break; 1349 break;
1350 case 'd': /* Debug mode. */ 1350 case 'd': /* Debug mode. */
1351 nodetach++; 1351 nodetach++;
1352 debug++; 1352 debug++;
1353 break; 1353 break;
1354 case 'D': /* stay-on-foreground */ 1354 case 'D': /* stay-on-foreground */
1355 nodetach++; 1355 nodetach++;
1356 break; 1356 break;
1357 case 'f': /* Use fs idle time rather than cpu idle */ 1357 case 'f': /* Use fs idle time rather than cpu idle */
1358 use_fs_idle = 1; 1358 use_fs_idle = 1;
1359 break; 1359 break;
1360 case 'i': /* Invalidate this segment */ 1360 case 'i': /* Invalidate this segment */
1361 inval_segment = atoi(optarg); 1361 inval_segment = atoi(optarg);
1362 break; 1362 break;
1363 case 'l': /* Load below which to clean */ 1363 case 'l': /* Load below which to clean */
1364 load_threshold = atof(optarg); 1364 load_threshold = atof(optarg);
1365 break; 1365 break;
1366 case 'm': /* [compat only] */ 1366 case 'm': /* [compat only] */
1367 break; 1367 break;
1368 case 'n': /* How many segs to clean at once */ 1368 case 'n': /* How many segs to clean at once */
1369 atatime = atoi(optarg); 1369 atatime = atoi(optarg);
1370 break; 1370 break;
1371 case 'q': /* Quit after one run */ 1371 case 'q': /* Quit after one run */
1372 do_quit = 1; 1372 do_quit = 1;
1373 break; 1373 break;
1374 case 'r': /* Report every stat_report segments */ 1374 case 'r': /* Report every stat_report segments */
1375 stat_report = atoi(optarg); 1375 stat_report = atoi(optarg);
1376 break; 1376 break;
1377 case 's': /* Small writes */ 1377 case 's': /* Small writes */
1378 do_small = 1; 1378 do_small = 1;
1379 break; 1379 break;
1380 case 't': /* timeout */ 1380 case 't': /* timeout */
1381 segwait_timeout = atoi(optarg); 1381 segwait_timeout = atoi(optarg);
1382 break; 1382 break;
1383 default: 1383 default:
1384 usage(); 1384 usage();
1385 /* NOTREACHED */ 1385 /* NOTREACHED */
1386 } 1386 }
1387 } 1387 }
1388 argc -= optind; 1388 argc -= optind;
1389 argv += optind; 1389 argv += optind;
1390 1390
1391 if (argc < 1) 1391 if (argc < 1)
1392 usage(); 1392 usage();
1393 if (inval_segment >= 0 && argc != 1) { 1393 if (inval_segment >= 0 && argc != 1) {
1394 errx(1, "lfs_cleanerd: may only specify one filesystem when " 1394 errx(1, "lfs_cleanerd: may only specify one filesystem when "
1395 "using -i flag"); 1395 "using -i flag");
1396 } 1396 }
1397 1397
1398 if (do_coalesce) { 1398 if (do_coalesce) {
1399 errx(1, "lfs_cleanerd: -c disabled due to reports of file " 1399 errx(1, "lfs_cleanerd: -c disabled due to reports of file "
1400 "corruption; you may re-enable it by rebuilding the " 1400 "corruption; you may re-enable it by rebuilding the "
1401 "cleaner"); 1401 "cleaner");
1402 } 1402 }
1403 1403
1404 /* 1404 /*
1405 * Set up daemon mode or foreground mode 1405 * Set up daemon mode or foreground mode
1406 */ 1406 */
1407 if (nodetach) { 1407 if (nodetach) {
1408 openlog("lfs_cleanerd", LOG_NDELAY | LOG_PID | LOG_PERROR, 1408 openlog("lfs_cleanerd", LOG_NDELAY | LOG_PID | LOG_PERROR,
1409 LOG_DAEMON); 1409 LOG_DAEMON);
1410 signal(SIGINT, sig_report); 1410 signal(SIGINT, sig_report);
1411 } else { 1411 } else {
1412 if (daemon(0, 0) == -1) 1412 if (daemon(0, 0) == -1)
1413 err(1, "lfs_cleanerd: couldn't become a daemon!"); 1413 err(1, "lfs_cleanerd: couldn't become a daemon!");
1414 openlog("lfs_cleanerd", LOG_NDELAY | LOG_PID, LOG_DAEMON); 1414 openlog("lfs_cleanerd", LOG_NDELAY | LOG_PID, LOG_DAEMON);
1415 signal(SIGINT, sig_exit); 1415 signal(SIGINT, sig_exit);
1416 } 1416 }
1417 1417
1418 /* 1418 /*
1419 * Look for an already-running master daemon. If there is one, 1419 * Look for an already-running master daemon. If there is one,
1420 * send it our filesystems to add to its list and exit. 1420 * send it our filesystems to add to its list and exit.
1421 * If there is none, become the master. 1421 * If there is none, become the master.
1422 */ 1422 */
1423#ifdef USE_CLIENT_SERVER 1423#ifdef USE_CLIENT_SERVER
1424 try_to_become_master(argc, argv); 1424 try_to_become_master(argc, argv);
1425#else 1425#else
1426 /* XXX think about this */ 1426 /* XXX think about this */
1427 asprintf(&pidname, "lfs_cleanerd:m:%s", argv[0]); 1427 asprintf(&pidname, "lfs_cleanerd:m:%s", argv[0]);
1428 if (pidname == NULL) { 1428 if (pidname == NULL) {
1429 syslog(LOG_ERR, "malloc failed: %m"); 1429 syslog(LOG_ERR, "malloc failed: %m");
1430 exit(1); 1430 exit(1);
1431 } 1431 }
1432 for (cp = pidname; cp != NULL; cp = strchr(cp, '/')) 1432 for (cp = pidname; cp != NULL; cp = strchr(cp, '/'))
1433 *cp = '|'; 1433 *cp = '|';
1434 pidfile(pidname); 1434 pidfile(pidname);
1435#endif 1435#endif
1436 1436
1437 /* 1437 /*
1438 * Signals mean daemon should report its statistics 1438 * Signals mean daemon should report its statistics
1439 */ 1439 */
1440 memset(&cleaner_stats, 0, sizeof(cleaner_stats)); 1440 memset(&cleaner_stats, 0, sizeof(cleaner_stats));
1441 signal(SIGUSR1, sig_report); 1441 signal(SIGUSR1, sig_report);
1442 signal(SIGUSR2, sig_report); 1442 signal(SIGUSR2, sig_report);
1443 1443
1444 /* 1444 /*
1445 * Start up buffer cache. We only use this for the Ifile, 1445 * Start up buffer cache. We only use this for the Ifile,
1446 * and we will resize it if necessary, so it can start small. 1446 * and we will resize it if necessary, so it can start small.
1447 */ 1447 */
1448 bufinit(4); 1448 bufinit(4);
1449 1449
1450#ifdef REPAIR_ZERO_FINFO 1450#ifdef REPAIR_ZERO_FINFO
1451 { 1451 {
1452 BLOCK_INFO *bip = NULL; 1452 BLOCK_INFO *bip = NULL;
1453 int bic = 0; 1453 int bic = 0;
1454 1454
1455 nfss = 1; 1455 nfss = 1;
1456 fsp = (struct clfs **)malloc(sizeof(*fsp)); 1456 fsp = (struct clfs **)malloc(sizeof(*fsp));
1457 fsp[0] = (struct clfs *)calloc(1, sizeof(**fsp)); 1457 fsp[0] = (struct clfs *)calloc(1, sizeof(**fsp));
1458 1458
1459 if (init_unmounted_fs(fsp[0], argv[0]) < 0) { 1459 if (init_unmounted_fs(fsp[0], argv[0]) < 0) {
1460 err(1, "init_unmounted_fs"); 1460 err(1, "init_unmounted_fs");
1461 } 1461 }
1462 dlog("Filesystem has %d segments", fsp[0]->lfs_nseg); 1462 dlog("Filesystem has %d segments", fsp[0]->lfs_nseg);
1463 for (i = 0; i < fsp[0]->lfs_nseg; i++) { 1463 for (i = 0; i < fsp[0]->lfs_nseg; i++) {
1464 load_segment(fsp[0], i, &bip, &bic); 1464 load_segment(fsp[0], i, &bip, &bic);
1465 bic = 0; 1465 bic = 0;
1466 } 1466 }
1467 exit(0); 1467 exit(0);
1468 } 1468 }
1469#endif 1469#endif
1470 1470
1471 /* 1471 /*
1472 * Initialize cleaning structures, open devices, etc. 1472 * Initialize cleaning structures, open devices, etc.
1473 */ 1473 */
1474 nfss = argc; 1474 nfss = argc;
1475 fsp = (struct clfs **)malloc(nfss * sizeof(*fsp)); 1475 fsp = (struct clfs **)malloc(nfss * sizeof(*fsp));
1476 if (fsp == NULL) { 1476 if (fsp == NULL) {
1477 syslog(LOG_ERR, "couldn't allocate fs table: %m"); 1477 syslog(LOG_ERR, "couldn't allocate fs table: %m");
1478 exit(1); 1478 exit(1);
1479 } 1479 }
1480 for (i = 0; i < nfss; i++) { 1480 for (i = 0; i < nfss; i++) {
1481 fsp[i] = (struct clfs *)calloc(1, sizeof(**fsp)); 1481 fsp[i] = (struct clfs *)calloc(1, sizeof(**fsp));
1482 if ((r = init_fs(fsp[i], argv[i])) < 0) { 1482 if ((r = init_fs(fsp[i], argv[i])) < 0) {
1483 syslog(LOG_ERR, "%s: couldn't init: error code %d", 1483 syslog(LOG_ERR, "%s: couldn't init: error code %d",
1484 argv[i], r); 1484 argv[i], r);
1485 handle_error(fsp, i); 1485 handle_error(fsp, i);
1486 --i; /* Do the new #i over again */ 1486 --i; /* Do the new #i over again */
1487 } 1487 }
1488 } 1488 }
1489 1489
1490 /* 1490 /*
1491 * If asked to coalesce, do so and exit. 1491 * If asked to coalesce, do so and exit.
1492 */ 1492 */
1493 if (do_coalesce) { 1493 if (do_coalesce) {
1494 for (i = 0; i < nfss; i++) 1494 for (i = 0; i < nfss; i++)
1495 clean_all_inodes(fsp[i]); 1495 clean_all_inodes(fsp[i]);
1496 exit(0); 1496 exit(0);
1497 } 1497 }
1498 1498
1499 /* 1499 /*
1500 * If asked to invalidate a segment, do that and exit. 1500 * If asked to invalidate a segment, do that and exit.
1501 */ 1501 */
1502 if (inval_segment >= 0) { 1502 if (inval_segment >= 0) {
1503 invalidate_segment(fsp[0], inval_segment); 1503 invalidate_segment(fsp[0], inval_segment);
1504 exit(0); 1504 exit(0);
1505 } 1505 }
1506 1506
1507 /* 1507 /*
1508 * Main cleaning loop. 1508 * Main cleaning loop.
1509 */ 1509 */
1510 loopcount = 0; 1510 loopcount = 0;
1511 while (nfss > 0) { 1511 while (nfss > 0) {
1512 int cleaned_one; 1512 int cleaned_one;
1513 do { 1513 do {
1514#ifdef USE_CLIENT_SERVER 1514#ifdef USE_CLIENT_SERVER
1515 check_control_socket(); 1515 check_control_socket();
1516#endif 1516#endif
1517 cleaned_one = 0; 1517 cleaned_one = 0;
1518 for (i = 0; i < nfss; i++) { 1518 for (i = 0; i < nfss; i++) {
1519 if ((error = needs_cleaning(fsp[i], &ci)) < 0) { 1519 if ((error = needs_cleaning(fsp[i], &ci)) < 0) {
1520 handle_error(fsp, i); 1520 handle_error(fsp, i);
1521 continue; 1521 continue;
1522 } 1522 }
1523 if (error == 0) /* No need to clean */ 1523 if (error == 0) /* No need to clean */
1524 continue; 1524 continue;
1525  1525
1526 reload_ifile(fsp[i]); 1526 reload_ifile(fsp[i]);
1527 if (clean_fs(fsp[i], &ci) < 0) { 1527 if (clean_fs(fsp[i], &ci) < 0) {
1528 handle_error(fsp, i); 1528 handle_error(fsp, i);
1529 continue; 1529 continue;
1530 } 1530 }
1531 ++cleaned_one; 1531 ++cleaned_one;
1532 } 1532 }
1533 ++loopcount; 1533 ++loopcount;
1534 if (stat_report && loopcount % stat_report == 0) 1534 if (stat_report && loopcount % stat_report == 0)
1535 sig_report(0); 1535 sig_report(0);
1536 if (do_quit) 1536 if (do_quit)
1537 exit(0); 1537 exit(0);
1538 } while(cleaned_one); 1538 } while(cleaned_one);
1539 tv.tv_sec = segwait_timeout; 1539 tv.tv_sec = segwait_timeout;
1540 tv.tv_usec = 0; 1540 tv.tv_usec = 0;
1541 error = fcntl(fsp[0]->clfs_ifilefd, LFCNSEGWAITALL, &tv); 1541 error = fcntl(fsp[0]->clfs_ifilefd, LFCNSEGWAITALL, &tv);
1542 if (error) 1542 if (error)
1543 err(1, "LFCNSEGWAITALL"); 1543 err(1, "LFCNSEGWAITALL");
1544 } 1544 }
1545 1545
1546 /* NOTREACHED */ 1546 /* NOTREACHED */
1547 return 0; 1547 return 0;
1548} 1548}