| @@ -1,1548 +1,1548 @@ | | | @@ -1,1548 +1,1548 @@ |
1 | /* $NetBSD: lfs_cleanerd.c,v 1.19 2009/08/06 00:20:45 pooka Exp $ */ | | 1 | /* $NetBSD: lfs_cleanerd.c,v 1.20 2009/08/06 00:23:08 pooka Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 2005 The NetBSD Foundation, Inc. | | 4 | * Copyright (c) 2005 The NetBSD Foundation, Inc. |
5 | * All rights reserved. | | 5 | * All rights reserved. |
6 | * | | 6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation | | 7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Konrad E. Schroder <perseant@hhhh.org>. | | 8 | * by Konrad E. Schroder <perseant@hhhh.org>. |
9 | * | | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
12 | * are met: | | 12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright | | 15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the | | 16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. | | 17 | * documentation and/or other materials provided with the distribution. |
18 | * | | 18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | | 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | | 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | * POSSIBILITY OF SUCH DAMAGE. | | 29 | * POSSIBILITY OF SUCH DAMAGE. |
30 | */ | | 30 | */ |
31 | | | 31 | |
32 | /* | | 32 | /* |
33 | * The cleaner daemon for the NetBSD Log-structured File System. | | 33 | * The cleaner daemon for the NetBSD Log-structured File System. |
34 | * Only tested for use with version 2 LFSs. | | 34 | * Only tested for use with version 2 LFSs. |
35 | */ | | 35 | */ |
36 | | | 36 | |
37 | #include <sys/syslog.h> | | 37 | #include <sys/syslog.h> |
38 | #include <sys/param.h> | | 38 | #include <sys/param.h> |
39 | #include <sys/mount.h> | | 39 | #include <sys/mount.h> |
40 | #include <sys/stat.h> | | 40 | #include <sys/stat.h> |
41 | #include <ufs/ufs/inode.h> | | 41 | #include <ufs/ufs/inode.h> |
42 | #include <ufs/lfs/lfs.h> | | 42 | #include <ufs/lfs/lfs.h> |
43 | | | 43 | |
44 | #include <assert.h> | | 44 | #include <assert.h> |
45 | #include <err.h> | | 45 | #include <err.h> |
46 | #include <errno.h> | | 46 | #include <errno.h> |
47 | #include <fcntl.h> | | 47 | #include <fcntl.h> |
48 | #include <stdio.h> | | 48 | #include <stdio.h> |
49 | #include <stdlib.h> | | 49 | #include <stdlib.h> |
50 | #include <string.h> | | 50 | #include <string.h> |
51 | #include <unistd.h> | | 51 | #include <unistd.h> |
52 | #include <time.h> | | 52 | #include <time.h> |
53 | #include <util.h> | | 53 | #include <util.h> |
54 | | | 54 | |
55 | #include "bufcache.h" | | 55 | #include "bufcache.h" |
56 | #include "vnode.h" | | 56 | #include "vnode.h" |
57 | #include "lfs_user.h" | | 57 | #include "lfs_user.h" |
58 | #include "fdfs.h" | | 58 | #include "fdfs.h" |
59 | #include "cleaner.h" | | 59 | #include "cleaner.h" |
60 | | | 60 | |
61 | /* | | 61 | /* |
62 | * Global variables. | | 62 | * Global variables. |
63 | */ | | 63 | */ |
64 | /* XXX these top few should really be fs-specific */ | | 64 | /* XXX these top few should really be fs-specific */ |
65 | int use_fs_idle; /* Use fs idle rather than cpu idle time */ | | 65 | int use_fs_idle; /* Use fs idle rather than cpu idle time */ |
66 | int use_bytes; /* Use bytes written rather than segments cleaned */ | | 66 | int use_bytes; /* Use bytes written rather than segments cleaned */ |
67 | int load_threshold; /* How idle is idle (CPU idle) */ | | 67 | int load_threshold; /* How idle is idle (CPU idle) */ |
68 | int atatime; /* How many segments (bytes) to clean at a time */ | | 68 | int atatime; /* How many segments (bytes) to clean at a time */ |
69 | | | 69 | |
70 | int nfss; /* Number of filesystems monitored by this cleanerd */ | | 70 | int nfss; /* Number of filesystems monitored by this cleanerd */ |
71 | struct clfs **fsp; /* Array of extended filesystem structures */ | | 71 | struct clfs **fsp; /* Array of extended filesystem structures */ |
72 | int segwait_timeout; /* Time to wait in lfs_segwait() */ | | 72 | int segwait_timeout; /* Time to wait in lfs_segwait() */ |
73 | int do_quit; /* Quit after one cleaning loop */ | | 73 | int do_quit; /* Quit after one cleaning loop */ |
74 | int do_coalesce; /* Coalesce filesystem */ | | 74 | int do_coalesce; /* Coalesce filesystem */ |
75 | int do_small; /* Use small writes through markv */ | | 75 | int do_small; /* Use small writes through markv */ |
76 | char *copylog_filename; /* File to use for fs debugging analysis */ | | 76 | char *copylog_filename; /* File to use for fs debugging analysis */ |
77 | int inval_segment; /* Segment to invalidate */ | | 77 | int inval_segment; /* Segment to invalidate */ |
78 | int stat_report; /* Report statistics for this period of cycles */ | | 78 | int stat_report; /* Report statistics for this period of cycles */ |
79 | int debug; /* Turn on debugging */ | | 79 | int debug; /* Turn on debugging */ |
80 | struct cleaner_stats { | | 80 | struct cleaner_stats { |
81 | double util_tot; | | 81 | double util_tot; |
82 | double util_sos; | | 82 | double util_sos; |
83 | off_t bytes_read; | | 83 | off_t bytes_read; |
84 | off_t bytes_written; | | 84 | off_t bytes_written; |
85 | off_t segs_cleaned; | | 85 | off_t segs_cleaned; |
86 | off_t segs_empty; | | 86 | off_t segs_empty; |
87 | off_t segs_error; | | 87 | off_t segs_error; |
88 | } cleaner_stats; | | 88 | } cleaner_stats; |
89 | | | 89 | |
90 | extern u_int32_t cksum(void *, size_t); | | 90 | extern u_int32_t cksum(void *, size_t); |
91 | extern u_int32_t lfs_sb_cksum(struct dlfs *); | | 91 | extern u_int32_t lfs_sb_cksum(struct dlfs *); |
92 | extern u_int32_t lfs_cksum_part(void *, size_t, u_int32_t); | | 92 | extern u_int32_t lfs_cksum_part(void *, size_t, u_int32_t); |
93 | extern int ufs_getlbns(struct lfs *, struct uvnode *, daddr_t, struct indir *, int *); | | 93 | extern int ufs_getlbns(struct lfs *, struct uvnode *, daddr_t, struct indir *, int *); |
94 | | | 94 | |
95 | /* Compat */ | | 95 | /* Compat */ |
96 | void pwarn(const char *unused, ...) { /* Does nothing */ }; | | 96 | void pwarn(const char *unused, ...) { /* Does nothing */ }; |
97 | | | 97 | |
98 | /* | | 98 | /* |
99 | * Log a message if debugging is turned on. | | 99 | * Log a message if debugging is turned on. |
100 | */ | | 100 | */ |
101 | void | | 101 | void |
102 | dlog(const char *fmt, ...) | | 102 | dlog(const char *fmt, ...) |
103 | { | | 103 | { |
104 | va_list ap; | | 104 | va_list ap; |
105 | | | 105 | |
106 | if (debug == 0) | | 106 | if (debug == 0) |
107 | return; | | 107 | return; |
108 | | | 108 | |
109 | va_start(ap, fmt); | | 109 | va_start(ap, fmt); |
110 | vsyslog(LOG_DEBUG, fmt, ap); | | 110 | vsyslog(LOG_DEBUG, fmt, ap); |
111 | va_end(ap); | | 111 | va_end(ap); |
112 | } | | 112 | } |
113 | | | 113 | |
114 | /* | | 114 | /* |
115 | * Remove the specified filesystem from the list, due to its having | | 115 | * Remove the specified filesystem from the list, due to its having |
116 | * become unmounted or other error condition. | | 116 | * become unmounted or other error condition. |
117 | */ | | 117 | */ |
118 | void | | 118 | void |
119 | handle_error(struct clfs **cfsp, int n) | | 119 | handle_error(struct clfs **cfsp, int n) |
120 | { | | 120 | { |
121 | syslog(LOG_NOTICE, "%s: detaching cleaner", cfsp[n]->lfs_fsmnt); | | 121 | syslog(LOG_NOTICE, "%s: detaching cleaner", cfsp[n]->lfs_fsmnt); |
122 | free(cfsp[n]); | | 122 | free(cfsp[n]); |
123 | if (n != nfss - 1) | | 123 | if (n != nfss - 1) |
124 | cfsp[n] = cfsp[nfss - 1]; | | 124 | cfsp[n] = cfsp[nfss - 1]; |
125 | --nfss; | | 125 | --nfss; |
126 | } | | 126 | } |
127 | | | 127 | |
128 | /* | | 128 | /* |
129 | * Reinitialize a filesystem if, e.g., its size changed. | | 129 | * Reinitialize a filesystem if, e.g., its size changed. |
130 | */ | | 130 | */ |
131 | int | | 131 | int |
132 | reinit_fs(struct clfs *fs) | | 132 | reinit_fs(struct clfs *fs) |
133 | { | | 133 | { |
134 | char fsname[MNAMELEN]; | | 134 | char fsname[MNAMELEN]; |
135 | | | 135 | |
136 | strncpy(fsname, (char *)fs->lfs_fsmnt, MNAMELEN); | | 136 | strncpy(fsname, (char *)fs->lfs_fsmnt, MNAMELEN); |
137 | close(fs->clfs_ifilefd); | | 137 | close(fs->clfs_ifilefd); |
138 | close(fs->clfs_devfd); | | 138 | close(fs->clfs_devfd); |
139 | fd_reclaim(fs->clfs_devvp); | | 139 | fd_reclaim(fs->clfs_devvp); |
140 | fd_reclaim(fs->lfs_ivnode); | | 140 | fd_reclaim(fs->lfs_ivnode); |
141 | free(fs->clfs_dev); | | 141 | free(fs->clfs_dev); |
142 | free(fs->clfs_segtab); | | 142 | free(fs->clfs_segtab); |
143 | free(fs->clfs_segtabp); | | 143 | free(fs->clfs_segtabp); |
144 | | | 144 | |
145 | return init_fs(fs, fsname); | | 145 | return init_fs(fs, fsname); |
146 | } | | 146 | } |
147 | | | 147 | |
148 | #ifdef REPAIR_ZERO_FINFO | | 148 | #ifdef REPAIR_ZERO_FINFO |
149 | /* | | 149 | /* |
150 | * Use fsck's lfs routines to load the Ifile from an unmounted fs. | | 150 | * Use fsck's lfs routines to load the Ifile from an unmounted fs. |
151 | * We interpret "fsname" as the name of the raw disk device. | | 151 | * We interpret "fsname" as the name of the raw disk device. |
152 | */ | | 152 | */ |
153 | int | | 153 | int |
154 | init_unmounted_fs(struct clfs *fs, char *fsname) | | 154 | init_unmounted_fs(struct clfs *fs, char *fsname) |
155 | { | | 155 | { |
156 | struct lfs *disc_fs; | | 156 | struct lfs *disc_fs; |
157 | int i; | | 157 | int i; |
158 | | | 158 | |
159 | fs->clfs_dev = fsname; | | 159 | fs->clfs_dev = fsname; |
160 | if ((fs->clfs_devfd = open(fs->clfs_dev, O_RDWR)) < 0) { | | 160 | if ((fs->clfs_devfd = open(fs->clfs_dev, O_RDWR)) < 0) { |
161 | syslog(LOG_ERR, "couldn't open device %s read/write", | | 161 | syslog(LOG_ERR, "couldn't open device %s read/write", |
162 | fs->clfs_dev); | | 162 | fs->clfs_dev); |
163 | return -1; | | 163 | return -1; |
164 | } | | 164 | } |
165 | | | 165 | |
166 | disc_fs = lfs_init(fs->clfs_devfd, 0, 0, 0, 0); | | 166 | disc_fs = lfs_init(fs->clfs_devfd, 0, 0, 0, 0); |
167 | | | 167 | |
168 | fs->lfs_dlfs = disc_fs->lfs_dlfs; /* Structure copy */ | | 168 | fs->lfs_dlfs = disc_fs->lfs_dlfs; /* Structure copy */ |
169 | strncpy(fs->lfs_fsmnt, fsname, MNAMELEN); | | 169 | strncpy(fs->lfs_fsmnt, fsname, MNAMELEN); |
170 | fs->lfs_ivnode = (struct uvnode *)disc_fs->lfs_ivnode; | | 170 | fs->lfs_ivnode = (struct uvnode *)disc_fs->lfs_ivnode; |
171 | fs->clfs_devvp = fd_vget(fs->clfs_devfd, fs->lfs_fsize, fs->lfs_ssize, | | 171 | fs->clfs_devvp = fd_vget(fs->clfs_devfd, fs->lfs_fsize, fs->lfs_ssize, |
172 | atatime); | | 172 | atatime); |
173 | | | 173 | |
174 | /* Allocate and clear segtab */ | | 174 | /* Allocate and clear segtab */ |
175 | fs->clfs_segtab = (struct clfs_seguse *)malloc(fs->lfs_nseg * | | 175 | fs->clfs_segtab = (struct clfs_seguse *)malloc(fs->lfs_nseg * |
176 | sizeof(*fs->clfs_segtab)); | | 176 | sizeof(*fs->clfs_segtab)); |
177 | fs->clfs_segtabp = (struct clfs_seguse **)malloc(fs->lfs_nseg * | | 177 | fs->clfs_segtabp = (struct clfs_seguse **)malloc(fs->lfs_nseg * |
178 | sizeof(*fs->clfs_segtabp)); | | 178 | sizeof(*fs->clfs_segtabp)); |
179 | for (i = 0; i < fs->lfs_nseg; i++) { | | 179 | for (i = 0; i < fs->lfs_nseg; i++) { |
180 | fs->clfs_segtabp[i] = &(fs->clfs_segtab[i]); | | 180 | fs->clfs_segtabp[i] = &(fs->clfs_segtab[i]); |
181 | fs->clfs_segtab[i].flags = 0x0; | | 181 | fs->clfs_segtab[i].flags = 0x0; |
182 | } | | 182 | } |
183 | syslog(LOG_NOTICE, "%s: unmounted cleaner starting", fsname); | | 183 | syslog(LOG_NOTICE, "%s: unmounted cleaner starting", fsname); |
184 | | | 184 | |
185 | return 0; | | 185 | return 0; |
186 | } | | 186 | } |
187 | #endif | | 187 | #endif |
188 | | | 188 | |
189 | /* | | 189 | /* |
190 | * Set up the file descriptors, including the Ifile descriptor. | | 190 | * Set up the file descriptors, including the Ifile descriptor. |
191 | * If we can't get the Ifile, this is not an LFS (or the kernel is | | 191 | * If we can't get the Ifile, this is not an LFS (or the kernel is |
192 | * too old to support the fcntl). | | 192 | * too old to support the fcntl). |
193 | * XXX Merge this and init_unmounted_fs, switching on whether | | 193 | * XXX Merge this and init_unmounted_fs, switching on whether |
194 | * XXX "fsname" is a dir or a char special device. Should | | 194 | * XXX "fsname" is a dir or a char special device. Should |
195 | * XXX also be able to read unmounted devices out of fstab, the way | | 195 | * XXX also be able to read unmounted devices out of fstab, the way |
196 | * XXX fsck does. | | 196 | * XXX fsck does. |
197 | */ | | 197 | */ |
198 | int | | 198 | int |
199 | init_fs(struct clfs *fs, char *fsname) | | 199 | init_fs(struct clfs *fs, char *fsname) |
200 | { | | 200 | { |
201 | struct statvfs sf; | | 201 | struct statvfs sf; |
202 | int rootfd; | | 202 | int rootfd; |
203 | int i; | | 203 | int i; |
204 | | | 204 | |
205 | /* | | 205 | /* |
206 | * Get the raw device from the block device. | | 206 | * Get the raw device from the block device. |
207 | * XXX this is ugly. Is there a way to discover the raw device | | 207 | * XXX this is ugly. Is there a way to discover the raw device |
208 | * XXX for a given mount point? | | 208 | * XXX for a given mount point? |
209 | */ | | 209 | */ |
210 | if (statvfs(fsname, &sf) < 0) | | 210 | if (statvfs(fsname, &sf) < 0) |
211 | return -1; | | 211 | return -1; |
212 | fs->clfs_dev = malloc(strlen(sf.f_mntfromname) + 2); | | 212 | fs->clfs_dev = malloc(strlen(sf.f_mntfromname) + 2); |
213 | if (fs->clfs_dev == NULL) { | | 213 | if (fs->clfs_dev == NULL) { |
214 | syslog(LOG_ERR, "couldn't malloc device name string: %m"); | | 214 | syslog(LOG_ERR, "couldn't malloc device name string: %m"); |
215 | return -1; | | 215 | return -1; |
216 | } | | 216 | } |
217 | sprintf(fs->clfs_dev, "/dev/r%s", sf.f_mntfromname + 5); | | 217 | sprintf(fs->clfs_dev, "/dev/r%s", sf.f_mntfromname + 5); |
218 | if ((fs->clfs_devfd = open(fs->clfs_dev, O_RDONLY)) < 0) { | | 218 | if ((fs->clfs_devfd = open(fs->clfs_dev, O_RDONLY)) < 0) { |
219 | syslog(LOG_ERR, "couldn't open device %s for reading", | | 219 | syslog(LOG_ERR, "couldn't open device %s for reading", |
220 | fs->clfs_dev); | | 220 | fs->clfs_dev); |
221 | return -1; | | 221 | return -1; |
222 | } | | 222 | } |
223 | | | 223 | |
224 | /* Find the Ifile and open it */ | | 224 | /* Find the Ifile and open it */ |
225 | if ((rootfd = open(fsname, O_RDONLY)) < 0) | | 225 | if ((rootfd = open(fsname, O_RDONLY)) < 0) |
226 | return -2; | | 226 | return -2; |
227 | if (fcntl(rootfd, LFCNIFILEFH, &fs->clfs_ifilefh) < 0) | | 227 | if (fcntl(rootfd, LFCNIFILEFH, &fs->clfs_ifilefh) < 0) |
228 | return -3; | | 228 | return -3; |
229 | if ((fs->clfs_ifilefd = fhopen(&fs->clfs_ifilefh, | | 229 | if ((fs->clfs_ifilefd = fhopen(&fs->clfs_ifilefh, |
230 | sizeof(fs->clfs_ifilefh), O_RDONLY)) < 0) | | 230 | sizeof(fs->clfs_ifilefh), O_RDONLY)) < 0) |
231 | return -4; | | 231 | return -4; |
232 | close(rootfd); | | 232 | close(rootfd); |
233 | | | 233 | |
234 | /* Load in the superblock */ | | 234 | /* Load in the superblock */ |
235 | if (pread(fs->clfs_devfd, &(fs->lfs_dlfs), sizeof(struct dlfs), | | 235 | if (pread(fs->clfs_devfd, &(fs->lfs_dlfs), sizeof(struct dlfs), |
236 | LFS_LABELPAD) < 0) | | 236 | LFS_LABELPAD) < 0) |
237 | return -1; | | 237 | return -1; |
238 | | | 238 | |
239 | /* If this is not a version 2 filesystem, complain and exit */ | | 239 | /* If this is not a version 2 filesystem, complain and exit */ |
240 | if (fs->lfs_version != 2) { | | 240 | if (fs->lfs_version != 2) { |
241 | syslog(LOG_ERR, "%s: not a version 2 LFS", fsname); | | 241 | syslog(LOG_ERR, "%s: not a version 2 LFS", fsname); |
242 | return -1; | | 242 | return -1; |
243 | } | | 243 | } |
244 | | | 244 | |
245 | /* Assume fsname is the mounted name */ | | 245 | /* Assume fsname is the mounted name */ |
246 | strncpy((char *)fs->lfs_fsmnt, fsname, MNAMELEN); | | 246 | strncpy((char *)fs->lfs_fsmnt, fsname, MNAMELEN); |
247 | | | 247 | |
248 | /* Set up vnodes for Ifile and raw device */ | | 248 | /* Set up vnodes for Ifile and raw device */ |
249 | fs->lfs_ivnode = fd_vget(fs->clfs_ifilefd, fs->lfs_bsize, 0, 0); | | 249 | fs->lfs_ivnode = fd_vget(fs->clfs_ifilefd, fs->lfs_bsize, 0, 0); |
250 | fs->clfs_devvp = fd_vget(fs->clfs_devfd, fs->lfs_fsize, fs->lfs_ssize, | | 250 | fs->clfs_devvp = fd_vget(fs->clfs_devfd, fs->lfs_fsize, fs->lfs_ssize, |
251 | atatime); | | 251 | atatime); |
252 | | | 252 | |
253 | /* Allocate and clear segtab */ | | 253 | /* Allocate and clear segtab */ |
254 | fs->clfs_segtab = (struct clfs_seguse *)malloc(fs->lfs_nseg * | | 254 | fs->clfs_segtab = (struct clfs_seguse *)malloc(fs->lfs_nseg * |
255 | sizeof(*fs->clfs_segtab)); | | 255 | sizeof(*fs->clfs_segtab)); |
256 | fs->clfs_segtabp = (struct clfs_seguse **)malloc(fs->lfs_nseg * | | 256 | fs->clfs_segtabp = (struct clfs_seguse **)malloc(fs->lfs_nseg * |
257 | sizeof(*fs->clfs_segtabp)); | | 257 | sizeof(*fs->clfs_segtabp)); |
258 | if (fs->clfs_segtab == NULL || fs->clfs_segtabp == NULL) { | | 258 | if (fs->clfs_segtab == NULL || fs->clfs_segtabp == NULL) { |
259 | syslog(LOG_ERR, "%s: couldn't malloc segment table: %m", | | 259 | syslog(LOG_ERR, "%s: couldn't malloc segment table: %m", |
260 | fs->clfs_dev); | | 260 | fs->clfs_dev); |
261 | return -1; | | 261 | return -1; |
262 | } | | 262 | } |
263 | | | 263 | |
264 | for (i = 0; i < fs->lfs_nseg; i++) { | | 264 | for (i = 0; i < fs->lfs_nseg; i++) { |
265 | fs->clfs_segtabp[i] = &(fs->clfs_segtab[i]); | | 265 | fs->clfs_segtabp[i] = &(fs->clfs_segtab[i]); |
266 | fs->clfs_segtab[i].flags = 0x0; | | 266 | fs->clfs_segtab[i].flags = 0x0; |
267 | } | | 267 | } |
268 | | | 268 | |
269 | syslog(LOG_NOTICE, "%s: attaching cleaner", fsname); | | 269 | syslog(LOG_NOTICE, "%s: attaching cleaner", fsname); |
270 | return 0; | | 270 | return 0; |
271 | } | | 271 | } |
272 | | | 272 | |
273 | /* | | 273 | /* |
274 | * Invalidate all the currently held Ifile blocks so they will be | | 274 | * Invalidate all the currently held Ifile blocks so they will be |
275 | * reread when we clean. Check the size while we're at it, and | | 275 | * reread when we clean. Check the size while we're at it, and |
276 | * resize the buffer cache if necessary. | | 276 | * resize the buffer cache if necessary. |
277 | */ | | 277 | */ |
278 | void | | 278 | void |
279 | reload_ifile(struct clfs *fs) | | 279 | reload_ifile(struct clfs *fs) |
280 | { | | 280 | { |
281 | struct ubuf *bp; | | 281 | struct ubuf *bp; |
282 | struct stat st; | | 282 | struct stat st; |
283 | int ohashmax; | | 283 | int ohashmax; |
284 | extern int hashmax; | | 284 | extern int hashmax; |
285 | | | 285 | |
286 | while ((bp = LIST_FIRST(&fs->lfs_ivnode->v_dirtyblkhd)) != NULL) { | | 286 | while ((bp = LIST_FIRST(&fs->lfs_ivnode->v_dirtyblkhd)) != NULL) { |
287 | bremfree(bp); | | 287 | bremfree(bp); |
288 | buf_destroy(bp); | | 288 | buf_destroy(bp); |
289 | } | | 289 | } |
290 | while ((bp = LIST_FIRST(&fs->lfs_ivnode->v_cleanblkhd)) != NULL) { | | 290 | while ((bp = LIST_FIRST(&fs->lfs_ivnode->v_cleanblkhd)) != NULL) { |
291 | bremfree(bp); | | 291 | bremfree(bp); |
292 | buf_destroy(bp); | | 292 | buf_destroy(bp); |
293 | } | | 293 | } |
294 | | | 294 | |
295 | /* If Ifile is larger than buffer cache, rehash */ | | 295 | /* If Ifile is larger than buffer cache, rehash */ |
296 | fstat(fs->clfs_ifilefd, &st); | | 296 | fstat(fs->clfs_ifilefd, &st); |
297 | if (st.st_size / fs->lfs_bsize > hashmax) { | | 297 | if (st.st_size / fs->lfs_bsize > hashmax) { |
298 | ohashmax = hashmax; | | 298 | ohashmax = hashmax; |
299 | bufrehash(st.st_size / fs->lfs_bsize); | | 299 | bufrehash(st.st_size / fs->lfs_bsize); |
300 | dlog("%s: resized buffer hash from %d to %d", | | 300 | dlog("%s: resized buffer hash from %d to %d", |
301 | fs->lfs_fsmnt, ohashmax, hashmax); | | 301 | fs->lfs_fsmnt, ohashmax, hashmax); |
302 | } | | 302 | } |
303 | } | | 303 | } |
304 | | | 304 | |
305 | /* | | 305 | /* |
306 | * Get IFILE entry for the given inode, store in ifpp. The buffer | | 306 | * Get IFILE entry for the given inode, store in ifpp. The buffer |
307 | * which contains that data is returned in bpp, and must be brelse()d | | 307 | * which contains that data is returned in bpp, and must be brelse()d |
308 | * by the caller. | | 308 | * by the caller. |
309 | */ | | 309 | */ |
310 | void | | 310 | void |
311 | lfs_ientry(IFILE **ifpp, struct clfs *fs, ino_t ino, struct ubuf **bpp) | | 311 | lfs_ientry(IFILE **ifpp, struct clfs *fs, ino_t ino, struct ubuf **bpp) |
312 | { | | 312 | { |
313 | int error; | | 313 | int error; |
314 | | | 314 | |
315 | error = bread(fs->lfs_ivnode, ino / fs->lfs_ifpb + fs->lfs_cleansz + | | 315 | error = bread(fs->lfs_ivnode, ino / fs->lfs_ifpb + fs->lfs_cleansz + |
316 | fs->lfs_segtabsz, fs->lfs_bsize, NOCRED, 0, bpp); | | 316 | fs->lfs_segtabsz, fs->lfs_bsize, NOCRED, 0, bpp); |
317 | if (error) | | 317 | if (error) |
318 | syslog(LOG_ERR, "%s: ientry failed for ino %d", | | 318 | syslog(LOG_ERR, "%s: ientry failed for ino %d", |
319 | fs->lfs_fsmnt, (int)ino); | | 319 | fs->lfs_fsmnt, (int)ino); |
320 | *ifpp = (IFILE *)(*bpp)->b_data + ino % fs->lfs_ifpb; | | 320 | *ifpp = (IFILE *)(*bpp)->b_data + ino % fs->lfs_ifpb; |
321 | return; | | 321 | return; |
322 | } | | 322 | } |
323 | | | 323 | |
324 | #ifdef TEST_PATTERN | | 324 | #ifdef TEST_PATTERN |
325 | /* | | 325 | /* |
326 | * Check ROOTINO for file data. The assumption is that we are running | | 326 | * Check ROOTINO for file data. The assumption is that we are running |
327 | * the "twofiles" test with the rest of the filesystem empty. Files | | 327 | * the "twofiles" test with the rest of the filesystem empty. Files |
328 | * created by "twofiles" match the test pattern, but ROOTINO and the | | 328 | * created by "twofiles" match the test pattern, but ROOTINO and the |
329 | * executable itself (assumed to be inode 3) should not match. | | 329 | * executable itself (assumed to be inode 3) should not match. |
330 | */ | | 330 | */ |
331 | static void | | 331 | static void |
332 | check_test_pattern(BLOCK_INFO *bip) | | 332 | check_test_pattern(BLOCK_INFO *bip) |
333 | { | | 333 | { |
334 | int j; | | 334 | int j; |
335 | unsigned char *cp = bip->bi_bp; | | 335 | unsigned char *cp = bip->bi_bp; |
336 | | | 336 | |
337 | /* Check inode sanity */ | | 337 | /* Check inode sanity */ |
338 | if (bip->bi_lbn == LFS_UNUSED_LBN) { | | 338 | if (bip->bi_lbn == LFS_UNUSED_LBN) { |
339 | assert(((struct ufs1_dinode *)bip->bi_bp)->di_inumber == | | 339 | assert(((struct ufs1_dinode *)bip->bi_bp)->di_inumber == |
340 | bip->bi_inode); | | 340 | bip->bi_inode); |
341 | } | | 341 | } |
342 | | | 342 | |
343 | /* These can have the test pattern and it's all good */ | | 343 | /* These can have the test pattern and it's all good */ |
344 | if (bip->bi_inode > 3) | | 344 | if (bip->bi_inode > 3) |
345 | return; | | 345 | return; |
346 | | | 346 | |
347 | for (j = 0; j < bip->bi_size; j++) { | | 347 | for (j = 0; j < bip->bi_size; j++) { |
348 | if (cp[j] != (j & 0xff)) | | 348 | if (cp[j] != (j & 0xff)) |
349 | break; | | 349 | break; |
350 | } | | 350 | } |
351 | assert(j < bip->bi_size); | | 351 | assert(j < bip->bi_size); |
352 | } | | 352 | } |
353 | #endif /* TEST_PATTERN */ | | 353 | #endif /* TEST_PATTERN */ |
354 | | | 354 | |
355 | /* | | 355 | /* |
356 | * Parse the partial segment at daddr, adding its information to | | 356 | * Parse the partial segment at daddr, adding its information to |
357 | * bip. Return the address of the next partial segment to read. | | 357 | * bip. Return the address of the next partial segment to read. |
358 | */ | | 358 | */ |
359 | int32_t | | 359 | int32_t |
360 | parse_pseg(struct clfs *fs, daddr_t daddr, BLOCK_INFO **bipp, int *bic) | | 360 | parse_pseg(struct clfs *fs, daddr_t daddr, BLOCK_INFO **bipp, int *bic) |
361 | { | | 361 | { |
362 | SEGSUM *ssp; | | 362 | SEGSUM *ssp; |
363 | IFILE *ifp; | | 363 | IFILE *ifp; |
364 | BLOCK_INFO *bip, *nbip; | | 364 | BLOCK_INFO *bip, *nbip; |
365 | int32_t *iaddrp, idaddr, odaddr; | | 365 | int32_t *iaddrp, idaddr, odaddr; |
366 | FINFO *fip; | | 366 | FINFO *fip; |
367 | struct ubuf *ifbp; | | 367 | struct ubuf *ifbp; |
368 | struct ufs1_dinode *dip; | | 368 | struct ufs1_dinode *dip; |
369 | u_int32_t ck, vers; | | 369 | u_int32_t ck, vers; |
370 | int fic, inoc, obic; | | 370 | int fic, inoc, obic; |
371 | int i; | | 371 | int i; |
372 | char *cp; | | 372 | char *cp; |
373 | | | 373 | |
374 | odaddr = daddr; | | 374 | odaddr = daddr; |
375 | obic = *bic; | | 375 | obic = *bic; |
376 | bip = *bipp; | | 376 | bip = *bipp; |
377 | | | 377 | |
378 | /* | | 378 | /* |
379 | * Retrieve the segment header, set up the SEGSUM pointer | | 379 | * Retrieve the segment header, set up the SEGSUM pointer |
380 | * as well as the first FINFO and inode address pointer. | | 380 | * as well as the first FINFO and inode address pointer. |
381 | */ | | 381 | */ |
382 | cp = fd_ptrget(fs->clfs_devvp, daddr); | | 382 | cp = fd_ptrget(fs->clfs_devvp, daddr); |
383 | ssp = (SEGSUM *)cp; | | 383 | ssp = (SEGSUM *)cp; |
384 | iaddrp = ((int32_t *)(cp + fs->lfs_ibsize)) - 1; | | 384 | iaddrp = ((int32_t *)(cp + fs->lfs_ibsize)) - 1; |
385 | fip = (FINFO *)(cp + sizeof(SEGSUM)); | | 385 | fip = (FINFO *)(cp + sizeof(SEGSUM)); |
386 | | | 386 | |
387 | /* | | 387 | /* |
388 | * Check segment header magic and checksum | | 388 | * Check segment header magic and checksum |
389 | */ | | 389 | */ |
390 | if (ssp->ss_magic != SS_MAGIC) { | | 390 | if (ssp->ss_magic != SS_MAGIC) { |
391 | syslog(LOG_WARNING, "%s: sumsum magic number bad at 0x%x:" | | 391 | syslog(LOG_WARNING, "%s: sumsum magic number bad at 0x%x:" |
392 | " read 0x%x, expected 0x%x", fs->lfs_fsmnt, | | 392 | " read 0x%x, expected 0x%x", fs->lfs_fsmnt, |
393 | (int32_t)daddr, ssp->ss_magic, SS_MAGIC); | | 393 | (int32_t)daddr, ssp->ss_magic, SS_MAGIC); |
394 | return 0x0; | | 394 | return 0x0; |
395 | } | | 395 | } |
396 | ck = cksum(&ssp->ss_datasum, fs->lfs_sumsize - sizeof(ssp->ss_sumsum)); | | 396 | ck = cksum(&ssp->ss_datasum, fs->lfs_sumsize - sizeof(ssp->ss_sumsum)); |
397 | if (ck != ssp->ss_sumsum) { | | 397 | if (ck != ssp->ss_sumsum) { |
398 | syslog(LOG_WARNING, "%s: sumsum checksum mismatch at 0x%x:" | | 398 | syslog(LOG_WARNING, "%s: sumsum checksum mismatch at 0x%x:" |
399 | " read 0x%x, computed 0x%x", fs->lfs_fsmnt, | | 399 | " read 0x%x, computed 0x%x", fs->lfs_fsmnt, |
400 | (int32_t)daddr, ssp->ss_sumsum, ck); | | 400 | (int32_t)daddr, ssp->ss_sumsum, ck); |
401 | return 0x0; | | 401 | return 0x0; |
402 | } | | 402 | } |
403 | | | 403 | |
404 | /* Initialize data sum */ | | 404 | /* Initialize data sum */ |
405 | ck = 0; | | 405 | ck = 0; |
406 | | | 406 | |
407 | /* Point daddr at next block after segment summary */ | | 407 | /* Point daddr at next block after segment summary */ |
408 | ++daddr; | | 408 | ++daddr; |
409 | | | 409 | |
410 | /* | | 410 | /* |
411 | * Loop over file info and inode pointers. We always move daddr | | 411 | * Loop over file info and inode pointers. We always move daddr |
412 | * forward here because we are also computing the data checksum | | 412 | * forward here because we are also computing the data checksum |
413 | * as we go. | | 413 | * as we go. |
414 | */ | | 414 | */ |
415 | fic = inoc = 0; | | 415 | fic = inoc = 0; |
416 | while (fic < ssp->ss_nfinfo || inoc < ssp->ss_ninos) { | | 416 | while (fic < ssp->ss_nfinfo || inoc < ssp->ss_ninos) { |
417 | /* | | 417 | /* |
418 | * We must have either a file block or an inode block. | | 418 | * We must have either a file block or an inode block. |
419 | * If we don't have either one, it's an error. | | 419 | * If we don't have either one, it's an error. |
420 | */ | | 420 | */ |
421 | if (fic >= ssp->ss_nfinfo && *iaddrp != daddr) { | | 421 | if (fic >= ssp->ss_nfinfo && *iaddrp != daddr) { |
422 | syslog(LOG_WARNING, "%s: bad pseg at %x (seg %d)", | | 422 | syslog(LOG_WARNING, "%s: bad pseg at %x (seg %d)", |
423 | fs->lfs_fsmnt, odaddr, dtosn(fs, odaddr)); | | 423 | fs->lfs_fsmnt, odaddr, dtosn(fs, odaddr)); |
424 | *bipp = bip; | | 424 | *bipp = bip; |
425 | return 0x0; | | 425 | return 0x0; |
426 | } | | 426 | } |
427 | | | 427 | |
428 | /* | | 428 | /* |
429 | * Note each inode from the inode blocks | | 429 | * Note each inode from the inode blocks |
430 | */ | | 430 | */ |
431 | if (inoc < ssp->ss_ninos && *iaddrp == daddr) { | | 431 | if (inoc < ssp->ss_ninos && *iaddrp == daddr) { |
432 | cp = fd_ptrget(fs->clfs_devvp, daddr); | | 432 | cp = fd_ptrget(fs->clfs_devvp, daddr); |
433 | ck = lfs_cksum_part(cp, sizeof(u_int32_t), ck); | | 433 | ck = lfs_cksum_part(cp, sizeof(u_int32_t), ck); |
434 | dip = (struct ufs1_dinode *)cp; | | 434 | dip = (struct ufs1_dinode *)cp; |
435 | for (i = 0; i < fs->lfs_inopb; i++) { | | 435 | for (i = 0; i < fs->lfs_inopb; i++) { |
436 | if (dip[i].di_inumber == 0) | | 436 | if (dip[i].di_inumber == 0) |
437 | break; | | 437 | break; |
438 | | | 438 | |
439 | /* | | 439 | /* |
440 | * Check currency before adding it | | 440 | * Check currency before adding it |
441 | */ | | 441 | */ |
442 | #ifndef REPAIR_ZERO_FINFO | | 442 | #ifndef REPAIR_ZERO_FINFO |
443 | lfs_ientry(&ifp, fs, dip[i].di_inumber, &ifbp); | | 443 | lfs_ientry(&ifp, fs, dip[i].di_inumber, &ifbp); |
444 | idaddr = ifp->if_daddr; | | 444 | idaddr = ifp->if_daddr; |
445 | brelse(ifbp, 0); | | 445 | brelse(ifbp, 0); |
446 | if (idaddr != daddr) | | 446 | if (idaddr != daddr) |
447 | #endif | | 447 | #endif |
448 | continue; | | 448 | continue; |
449 | | | 449 | |
450 | /* | | 450 | /* |
451 | * A current inode. Add it. | | 451 | * A current inode. Add it. |
452 | */ | | 452 | */ |
453 | ++*bic; | | 453 | ++*bic; |
454 | nbip = (BLOCK_INFO *)realloc(bip, *bic * | | 454 | nbip = (BLOCK_INFO *)realloc(bip, *bic * |
455 | sizeof(*bip)); | | 455 | sizeof(*bip)); |
456 | if (nbip) | | 456 | if (nbip) |
457 | bip = nbip; | | 457 | bip = nbip; |
458 | else { | | 458 | else { |
459 | --*bic; | | 459 | --*bic; |
460 | *bipp = bip; | | 460 | *bipp = bip; |
461 | return 0x0; | | 461 | return 0x0; |
462 | } | | 462 | } |
463 | bip[*bic - 1].bi_inode = dip[i].di_inumber; | | 463 | bip[*bic - 1].bi_inode = dip[i].di_inumber; |
464 | bip[*bic - 1].bi_lbn = LFS_UNUSED_LBN; | | 464 | bip[*bic - 1].bi_lbn = LFS_UNUSED_LBN; |
465 | bip[*bic - 1].bi_daddr = daddr; | | 465 | bip[*bic - 1].bi_daddr = daddr; |
466 | bip[*bic - 1].bi_segcreate = ssp->ss_create; | | 466 | bip[*bic - 1].bi_segcreate = ssp->ss_create; |
467 | bip[*bic - 1].bi_version = dip[i].di_gen; | | 467 | bip[*bic - 1].bi_version = dip[i].di_gen; |
468 | bip[*bic - 1].bi_bp = &(dip[i]); | | 468 | bip[*bic - 1].bi_bp = &(dip[i]); |
469 | bip[*bic - 1].bi_size = DINODE1_SIZE; | | 469 | bip[*bic - 1].bi_size = DINODE1_SIZE; |
470 | } | | 470 | } |
471 | inoc += i; | | 471 | inoc += i; |
472 | daddr += btofsb(fs, fs->lfs_ibsize); | | 472 | daddr += btofsb(fs, fs->lfs_ibsize); |
473 | --iaddrp; | | 473 | --iaddrp; |
474 | continue; | | 474 | continue; |
475 | } | | 475 | } |
476 | | | 476 | |
477 | /* | | 477 | /* |
478 | * Note each file block from the finfo blocks | | 478 | * Note each file block from the finfo blocks |
479 | */ | | 479 | */ |
480 | if (fic >= ssp->ss_nfinfo) | | 480 | if (fic >= ssp->ss_nfinfo) |
481 | continue; | | 481 | continue; |
482 | | | 482 | |
483 | /* Count this finfo, whether or not we use it */ | | 483 | /* Count this finfo, whether or not we use it */ |
484 | ++fic; | | 484 | ++fic; |
485 | | | 485 | |
486 | /* | | 486 | /* |
487 | * If this finfo has nblocks==0, it was written wrong. | | 487 | * If this finfo has nblocks==0, it was written wrong. |
488 | * Kernels with this problem always wrote this zero-sized | | 488 | * Kernels with this problem always wrote this zero-sized |
489 | * finfo last, so just ignore it. | | 489 | * finfo last, so just ignore it. |
490 | */ | | 490 | */ |
491 | if (fip->fi_nblocks == 0) { | | 491 | if (fip->fi_nblocks == 0) { |
492 | #ifdef REPAIR_ZERO_FINFO | | 492 | #ifdef REPAIR_ZERO_FINFO |
493 | struct ubuf *nbp; | | 493 | struct ubuf *nbp; |
494 | SEGSUM *nssp; | | 494 | SEGSUM *nssp; |
495 | | | 495 | |
496 | syslog(LOG_WARNING, "fixing short FINFO at %x (seg %d)", | | 496 | syslog(LOG_WARNING, "fixing short FINFO at %x (seg %d)", |
497 | odaddr, dtosn(fs, odaddr)); | | 497 | odaddr, dtosn(fs, odaddr)); |
498 | bread(fs->clfs_devvp, odaddr, fs->lfs_fsize, | | 498 | bread(fs->clfs_devvp, odaddr, fs->lfs_fsize, |
499 | NOCRED, 0, &nbp); | | 499 | NOCRED, 0, &nbp); |
500 | nssp = (SEGSUM *)nbp->b_data; | | 500 | nssp = (SEGSUM *)nbp->b_data; |
501 | --nssp->ss_nfinfo; | | 501 | --nssp->ss_nfinfo; |
502 | nssp->ss_sumsum = cksum(&nssp->ss_datasum, | | 502 | nssp->ss_sumsum = cksum(&nssp->ss_datasum, |
503 | fs->lfs_sumsize - sizeof(nssp->ss_sumsum)); | | 503 | fs->lfs_sumsize - sizeof(nssp->ss_sumsum)); |
504 | bwrite(nbp); | | 504 | bwrite(nbp); |
505 | #endif | | 505 | #endif |
506 | syslog(LOG_WARNING, "zero-length FINFO at %x (seg %d)", | | 506 | syslog(LOG_WARNING, "zero-length FINFO at %x (seg %d)", |
507 | odaddr, dtosn(fs, odaddr)); | | 507 | odaddr, dtosn(fs, odaddr)); |
508 | continue; | | 508 | continue; |
509 | } | | 509 | } |
510 | | | 510 | |
511 | /* | | 511 | /* |
512 | * Check currency before adding blocks | | 512 | * Check currency before adding blocks |
513 | */ | | 513 | */ |
514 | #ifdef REPAIR_ZERO_FINFO | | 514 | #ifdef REPAIR_ZERO_FINFO |
515 | vers = -1; | | 515 | vers = -1; |
516 | #else | | 516 | #else |
517 | lfs_ientry(&ifp, fs, fip->fi_ino, &ifbp); | | 517 | lfs_ientry(&ifp, fs, fip->fi_ino, &ifbp); |
518 | vers = ifp->if_version; | | 518 | vers = ifp->if_version; |
519 | brelse(ifbp, 0); | | 519 | brelse(ifbp, 0); |
520 | #endif | | 520 | #endif |
521 | if (vers != fip->fi_version) { | | 521 | if (vers != fip->fi_version) { |
522 | size_t size; | | 522 | size_t size; |
523 | | | 523 | |
524 | /* Read all the blocks from the data summary */ | | 524 | /* Read all the blocks from the data summary */ |
525 | for (i = 0; i < fip->fi_nblocks; i++) { | | 525 | for (i = 0; i < fip->fi_nblocks; i++) { |
526 | size = (i == fip->fi_nblocks - 1) ? | | 526 | size = (i == fip->fi_nblocks - 1) ? |
527 | fip->fi_lastlength : fs->lfs_bsize; | | 527 | fip->fi_lastlength : fs->lfs_bsize; |
528 | cp = fd_ptrget(fs->clfs_devvp, daddr); | | 528 | cp = fd_ptrget(fs->clfs_devvp, daddr); |
529 | ck = lfs_cksum_part(cp, sizeof(u_int32_t), ck); | | 529 | ck = lfs_cksum_part(cp, sizeof(u_int32_t), ck); |
530 | daddr += btofsb(fs, size); | | 530 | daddr += btofsb(fs, size); |
531 | } | | 531 | } |
532 | fip = (FINFO *)(fip->fi_blocks + fip->fi_nblocks); | | 532 | fip = (FINFO *)(fip->fi_blocks + fip->fi_nblocks); |
533 | continue; | | 533 | continue; |
534 | } | | 534 | } |
535 | | | 535 | |
536 | /* Add all the blocks from the finfos (current or not) */ | | 536 | /* Add all the blocks from the finfos (current or not) */ |
537 | nbip = (BLOCK_INFO *)realloc(bip, (*bic + fip->fi_nblocks) * | | 537 | nbip = (BLOCK_INFO *)realloc(bip, (*bic + fip->fi_nblocks) * |
538 | sizeof(*bip)); | | 538 | sizeof(*bip)); |
539 | if (nbip) | | 539 | if (nbip) |
540 | bip = nbip; | | 540 | bip = nbip; |
541 | else { | | 541 | else { |
542 | *bipp = bip; | | 542 | *bipp = bip; |
543 | return 0x0; | | 543 | return 0x0; |
544 | } | | 544 | } |
545 | | | 545 | |
546 | for (i = 0; i < fip->fi_nblocks; i++) { | | 546 | for (i = 0; i < fip->fi_nblocks; i++) { |
547 | bip[*bic + i].bi_inode = fip->fi_ino; | | 547 | bip[*bic + i].bi_inode = fip->fi_ino; |
548 | bip[*bic + i].bi_lbn = fip->fi_blocks[i]; | | 548 | bip[*bic + i].bi_lbn = fip->fi_blocks[i]; |
549 | bip[*bic + i].bi_daddr = daddr; | | 549 | bip[*bic + i].bi_daddr = daddr; |
550 | bip[*bic + i].bi_segcreate = ssp->ss_create; | | 550 | bip[*bic + i].bi_segcreate = ssp->ss_create; |
551 | bip[*bic + i].bi_version = fip->fi_version; | | 551 | bip[*bic + i].bi_version = fip->fi_version; |
552 | bip[*bic + i].bi_size = (i == fip->fi_nblocks - 1) ? | | 552 | bip[*bic + i].bi_size = (i == fip->fi_nblocks - 1) ? |
553 | fip->fi_lastlength : fs->lfs_bsize; | | 553 | fip->fi_lastlength : fs->lfs_bsize; |
554 | cp = fd_ptrget(fs->clfs_devvp, daddr); | | 554 | cp = fd_ptrget(fs->clfs_devvp, daddr); |
555 | ck = lfs_cksum_part(cp, sizeof(u_int32_t), ck); | | 555 | ck = lfs_cksum_part(cp, sizeof(u_int32_t), ck); |
556 | bip[*bic + i].bi_bp = cp; | | 556 | bip[*bic + i].bi_bp = cp; |
557 | daddr += btofsb(fs, bip[*bic + i].bi_size); | | 557 | daddr += btofsb(fs, bip[*bic + i].bi_size); |
558 | | | 558 | |
559 | #ifdef TEST_PATTERN | | 559 | #ifdef TEST_PATTERN |
560 | check_test_pattern(bip + *bic + i); /* XXXDEBUG */ | | 560 | check_test_pattern(bip + *bic + i); /* XXXDEBUG */ |
561 | #endif | | 561 | #endif |
562 | } | | 562 | } |
563 | *bic += fip->fi_nblocks; | | 563 | *bic += fip->fi_nblocks; |
564 | fip = (FINFO *)(fip->fi_blocks + fip->fi_nblocks); | | 564 | fip = (FINFO *)(fip->fi_blocks + fip->fi_nblocks); |
565 | } | | 565 | } |
566 | | | 566 | |
567 | #ifndef REPAIR_ZERO_FINFO | | 567 | #ifndef REPAIR_ZERO_FINFO |
568 | if (ssp->ss_datasum != ck) { | | 568 | if (ssp->ss_datasum != ck) { |
569 | syslog(LOG_WARNING, "%s: data checksum bad at 0x%x:" | | 569 | syslog(LOG_WARNING, "%s: data checksum bad at 0x%x:" |
570 | " read 0x%x, computed 0x%x", fs->lfs_fsmnt, odaddr, | | 570 | " read 0x%x, computed 0x%x", fs->lfs_fsmnt, odaddr, |
571 | ssp->ss_datasum, ck); | | 571 | ssp->ss_datasum, ck); |
572 | *bic = obic; | | 572 | *bic = obic; |
573 | return 0x0; | | 573 | return 0x0; |
574 | } | | 574 | } |
575 | #endif | | 575 | #endif |
576 | | | 576 | |
577 | *bipp = bip; | | 577 | *bipp = bip; |
578 | return daddr; | | 578 | return daddr; |
579 | } | | 579 | } |
580 | | | 580 | |
581 | static void | | 581 | static void |
582 | log_segment_read(struct clfs *fs, int sn) | | 582 | log_segment_read(struct clfs *fs, int sn) |
583 | { | | 583 | { |
584 | FILE *fp; | | 584 | FILE *fp; |
585 | char *cp; | | 585 | char *cp; |
586 | | | 586 | |
587 | /* | | 587 | /* |
588 | * Write the segment read, and its contents, into a log file in | | 588 | * Write the segment read, and its contents, into a log file in |
589 | * the current directory. We don't need to log the location of | | 589 | * the current directory. We don't need to log the location of |
590 | * the segment, since that can be inferred from the segments up | | 590 | * the segment, since that can be inferred from the segments up |
591 | * to this point (ss_nextseg field of the previously written segment). | | 591 | * to this point (ss_nextseg field of the previously written segment). |
592 | * | | 592 | * |
593 | * We can use this info later to reconstruct the filesystem at any | | 593 | * We can use this info later to reconstruct the filesystem at any |
594 | * given point in time for analysis, by replaying the log forward | | 594 | * given point in time for analysis, by replaying the log forward |
595 | * indexed by the segment serial numbers; but it is not suitable | | 595 | * indexed by the segment serial numbers; but it is not suitable |
596 | * for everyday use since the copylog will be simply enormous. | | 596 | * for everyday use since the copylog will be simply enormous. |
597 | */ | | 597 | */ |
598 | cp = fd_ptrget(fs->clfs_devvp, sntod(fs, sn)); | | 598 | cp = fd_ptrget(fs->clfs_devvp, sntod(fs, sn)); |
599 | | | 599 | |
600 | fp = fopen(copylog_filename, "ab"); | | 600 | fp = fopen(copylog_filename, "ab"); |
601 | if (fp != NULL) { | | 601 | if (fp != NULL) { |
602 | if (fwrite(cp, (size_t)fs->lfs_ssize, 1, fp) != 1) { | | 602 | if (fwrite(cp, (size_t)fs->lfs_ssize, 1, fp) != 1) { |
603 | perror("writing segment to copy log"); | | 603 | perror("writing segment to copy log"); |
604 | } | | 604 | } |
605 | } | | 605 | } |
606 | fclose(fp); | | 606 | fclose(fp); |
607 | } | | 607 | } |
608 | | | 608 | |
609 | /* | | 609 | /* |
610 | * Read a segment to populate the BLOCK_INFO structures. | | 610 | * Read a segment to populate the BLOCK_INFO structures. |
611 | * Return the number of partial segments read and parsed. | | 611 | * Return the number of partial segments read and parsed. |
612 | */ | | 612 | */ |
613 | int | | 613 | int |
614 | load_segment(struct clfs *fs, int sn, BLOCK_INFO **bipp, int *bic) | | 614 | load_segment(struct clfs *fs, int sn, BLOCK_INFO **bipp, int *bic) |
615 | { | | 615 | { |
616 | int32_t daddr; | | 616 | int32_t daddr; |
617 | int i, npseg; | | 617 | int i, npseg; |
618 | | | 618 | |
619 | daddr = sntod(fs, sn); | | 619 | daddr = sntod(fs, sn); |
620 | if (daddr < btofsb(fs, LFS_LABELPAD)) | | 620 | if (daddr < btofsb(fs, LFS_LABELPAD)) |
621 | daddr = btofsb(fs, LFS_LABELPAD); | | 621 | daddr = btofsb(fs, LFS_LABELPAD); |
622 | for (i = 0; i < LFS_MAXNUMSB; i++) { | | 622 | for (i = 0; i < LFS_MAXNUMSB; i++) { |
623 | if (fs->lfs_sboffs[i] == daddr) { | | 623 | if (fs->lfs_sboffs[i] == daddr) { |
624 | daddr += btofsb(fs, LFS_SBPAD); | | 624 | daddr += btofsb(fs, LFS_SBPAD); |
625 | break; | | 625 | break; |
626 | } | | 626 | } |
627 | } | | 627 | } |
628 | | | 628 | |
629 | /* Preload the segment buffer */ | | 629 | /* Preload the segment buffer */ |
630 | if (fd_preload(fs->clfs_devvp, sntod(fs, sn)) < 0) | | 630 | if (fd_preload(fs->clfs_devvp, sntod(fs, sn)) < 0) |
631 | return -1; | | 631 | return -1; |
632 | | | 632 | |
633 | if (copylog_filename) | | 633 | if (copylog_filename) |
634 | log_segment_read(fs, sn); | | 634 | log_segment_read(fs, sn); |
635 | | | 635 | |
636 | /* Note bytes read for stats */ | | 636 | /* Note bytes read for stats */ |
637 | cleaner_stats.segs_cleaned++; | | 637 | cleaner_stats.segs_cleaned++; |
638 | cleaner_stats.bytes_read += fs->lfs_ssize; | | 638 | cleaner_stats.bytes_read += fs->lfs_ssize; |
639 | ++fs->clfs_nactive; | | 639 | ++fs->clfs_nactive; |
640 | | | 640 | |
641 | npseg = 0; | | 641 | npseg = 0; |
642 | while(dtosn(fs, daddr) == sn && | | 642 | while(dtosn(fs, daddr) == sn && |
643 | dtosn(fs, daddr + btofsb(fs, fs->lfs_bsize)) == sn) { | | 643 | dtosn(fs, daddr + btofsb(fs, fs->lfs_bsize)) == sn) { |
644 | daddr = parse_pseg(fs, daddr, bipp, bic); | | 644 | daddr = parse_pseg(fs, daddr, bipp, bic); |
645 | if (daddr == 0x0) { | | 645 | if (daddr == 0x0) { |
646 | ++cleaner_stats.segs_error; | | 646 | ++cleaner_stats.segs_error; |
647 | break; | | 647 | break; |
648 | } | | 648 | } |
649 | ++npseg; | | 649 | ++npseg; |
650 | } | | 650 | } |
651 | | | 651 | |
652 | return npseg; | | 652 | return npseg; |
653 | } | | 653 | } |
654 | | | 654 | |
655 | void | | 655 | void |
656 | calc_cb(struct clfs *fs, int sn, struct clfs_seguse *t) | | 656 | calc_cb(struct clfs *fs, int sn, struct clfs_seguse *t) |
657 | { | | 657 | { |
658 | time_t now; | | 658 | time_t now; |
659 | int64_t age, benefit, cost; | | 659 | int64_t age, benefit, cost; |
660 | | | 660 | |
661 | time(&now); | | 661 | time(&now); |
662 | age = (now < t->lastmod ? 0 : now - t->lastmod); | | 662 | age = (now < t->lastmod ? 0 : now - t->lastmod); |
663 | | | 663 | |
664 | /* Under no circumstances clean active or already-clean segments */ | | 664 | /* Under no circumstances clean active or already-clean segments */ |
665 | if ((t->flags & SEGUSE_ACTIVE) || !(t->flags & SEGUSE_DIRTY)) { | | 665 | if ((t->flags & SEGUSE_ACTIVE) || !(t->flags & SEGUSE_DIRTY)) { |
666 | t->priority = 0; | | 666 | t->priority = 0; |
667 | return; | | 667 | return; |
668 | } | | 668 | } |
669 | | | 669 | |
670 | /* | | 670 | /* |
671 | * If the segment is empty, there is no reason to clean it. | | 671 | * If the segment is empty, there is no reason to clean it. |
672 | * Clear its error condition, if any, since we are never going to | | 672 | * Clear its error condition, if any, since we are never going to |
673 | * try to parse this one. | | 673 | * try to parse this one. |
674 | */ | | 674 | */ |
675 | if (t->nbytes == 0) { | | 675 | if (t->nbytes == 0) { |
676 | t->flags &= ~SEGUSE_ERROR; /* Strip error once empty */ | | 676 | t->flags &= ~SEGUSE_ERROR; /* Strip error once empty */ |
677 | t->priority = 0; | | 677 | t->priority = 0; |
678 | return; | | 678 | return; |
679 | } | | 679 | } |
680 | | | 680 | |
681 | if (t->flags & SEGUSE_ERROR) { /* No good if not already empty */ | | 681 | if (t->flags & SEGUSE_ERROR) { /* No good if not already empty */ |
682 | /* No benefit */ | | 682 | /* No benefit */ |
683 | t->priority = 0; | | 683 | t->priority = 0; |
684 | return; | | 684 | return; |
685 | } | | 685 | } |
686 | | | 686 | |
687 | if (t->nbytes > fs->lfs_ssize) { | | 687 | if (t->nbytes > fs->lfs_ssize) { |
688 | /* Another type of error */ | | 688 | /* Another type of error */ |
689 | syslog(LOG_WARNING, "segment %d: bad seguse count %d", | | 689 | syslog(LOG_WARNING, "segment %d: bad seguse count %d", |
690 | sn, t->nbytes); | | 690 | sn, t->nbytes); |
691 | t->flags |= SEGUSE_ERROR; | | 691 | t->flags |= SEGUSE_ERROR; |
692 | t->priority = 0; | | 692 | t->priority = 0; |
693 | return; | | 693 | return; |
694 | } | | 694 | } |
695 | | | 695 | |
696 | /* | | 696 | /* |
697 | * The non-degenerate case. Use Rosenblum's cost-benefit algorithm. | | 697 | * The non-degenerate case. Use Rosenblum's cost-benefit algorithm. |
698 | * Calculate the benefit from cleaning this segment (one segment, | | 698 | * Calculate the benefit from cleaning this segment (one segment, |
699 | * minus fragmentation, dirty blocks and a segment summary block) | | 699 | * minus fragmentation, dirty blocks and a segment summary block) |
700 | * and weigh that against the cost (bytes read plus bytes written). | | 700 | * and weigh that against the cost (bytes read plus bytes written). |
701 | * We count the summary headers as "dirty" to avoid cleaning very | | 701 | * We count the summary headers as "dirty" to avoid cleaning very |
702 | * old and very full segments. | | 702 | * old and very full segments. |
703 | */ | | 703 | */ |
704 | benefit = (int64_t)fs->lfs_ssize - t->nbytes - | | 704 | benefit = (int64_t)fs->lfs_ssize - t->nbytes - |
705 | (t->nsums + 1) * fs->lfs_fsize; | | 705 | (t->nsums + 1) * fs->lfs_fsize; |
706 | if (fs->lfs_bsize > fs->lfs_fsize) /* fragmentation */ | | 706 | if (fs->lfs_bsize > fs->lfs_fsize) /* fragmentation */ |
707 | benefit -= (fs->lfs_bsize / 2); | | 707 | benefit -= (fs->lfs_bsize / 2); |
708 | if (benefit <= 0) { | | 708 | if (benefit <= 0) { |
709 | t->priority = 0; | | 709 | t->priority = 0; |
710 | return; | | 710 | return; |
711 | } | | 711 | } |
712 | | | 712 | |
713 | cost = fs->lfs_ssize + t->nbytes; | | 713 | cost = fs->lfs_ssize + t->nbytes; |
714 | t->priority = (256 * benefit * age) / cost; | | 714 | t->priority = (256 * benefit * age) / cost; |
715 | | | 715 | |
716 | return; | | 716 | return; |
717 | } | | 717 | } |
718 | | | 718 | |
719 | /* | | 719 | /* |
720 | * Comparator for BLOCK_INFO structures. Anything not in one of the segments | | 720 | * Comparator for BLOCK_INFO structures. Anything not in one of the segments |
721 | * we're looking at sorts higher; after that we sort first by inode number | | 721 | * we're looking at sorts higher; after that we sort first by inode number |
722 | * and then by block number (unsigned, i.e., negative sorts higher) *but* | | 722 | * and then by block number (unsigned, i.e., negative sorts higher) *but* |
723 | * sort inodes before data blocks. | | 723 | * sort inodes before data blocks. |
724 | */ | | 724 | */ |
725 | static int | | 725 | static int |
726 | bi_comparator(const void *va, const void *vb) | | 726 | bi_comparator(const void *va, const void *vb) |
727 | { | | 727 | { |
728 | const BLOCK_INFO *a, *b; | | 728 | const BLOCK_INFO *a, *b; |
729 | | | 729 | |
730 | a = (const BLOCK_INFO *)va; | | 730 | a = (const BLOCK_INFO *)va; |
731 | b = (const BLOCK_INFO *)vb; | | 731 | b = (const BLOCK_INFO *)vb; |
732 | | | 732 | |
733 | /* Check for out-of-place block */ | | 733 | /* Check for out-of-place block */ |
734 | if (a->bi_segcreate == a->bi_daddr && | | 734 | if (a->bi_segcreate == a->bi_daddr && |
735 | b->bi_segcreate != b->bi_daddr) | | 735 | b->bi_segcreate != b->bi_daddr) |
736 | return -1; | | 736 | return -1; |
737 | if (a->bi_segcreate != a->bi_daddr && | | 737 | if (a->bi_segcreate != a->bi_daddr && |
738 | b->bi_segcreate == b->bi_daddr) | | 738 | b->bi_segcreate == b->bi_daddr) |
739 | return 1; | | 739 | return 1; |
740 | if (a->bi_size <= 0 && b->bi_size > 0) | | 740 | if (a->bi_size <= 0 && b->bi_size > 0) |
741 | return 1; | | 741 | return 1; |
742 | if (b->bi_size <= 0 && a->bi_size > 0) | | 742 | if (b->bi_size <= 0 && a->bi_size > 0) |
743 | return -1; | | 743 | return -1; |
744 | | | 744 | |
745 | /* Check inode number */ | | 745 | /* Check inode number */ |
746 | if (a->bi_inode != b->bi_inode) | | 746 | if (a->bi_inode != b->bi_inode) |
747 | return a->bi_inode - b->bi_inode; | | 747 | return a->bi_inode - b->bi_inode; |
748 | | | 748 | |
749 | /* Check lbn */ | | 749 | /* Check lbn */ |
750 | if (a->bi_lbn == LFS_UNUSED_LBN) /* Inodes sort lower than blocks */ | | 750 | if (a->bi_lbn == LFS_UNUSED_LBN) /* Inodes sort lower than blocks */ |
751 | return -1; | | 751 | return -1; |
752 | if (b->bi_lbn == LFS_UNUSED_LBN) | | 752 | if (b->bi_lbn == LFS_UNUSED_LBN) |
753 | return 1; | | 753 | return 1; |
754 | if ((u_int32_t)a->bi_lbn > (u_int32_t)b->bi_lbn) | | 754 | if ((u_int32_t)a->bi_lbn > (u_int32_t)b->bi_lbn) |
755 | return 1; | | 755 | return 1; |
756 | else | | 756 | else |
757 | return -1; | | 757 | return -1; |
758 | | | 758 | |
759 | return 0; | | 759 | return 0; |
760 | } | | 760 | } |
761 | | | 761 | |
762 | /* | | 762 | /* |
763 | * Comparator for sort_segments: cost-benefit equation. | | 763 | * Comparator for sort_segments: cost-benefit equation. |
764 | */ | | 764 | */ |
765 | static int | | 765 | static int |
766 | cb_comparator(const void *va, const void *vb) | | 766 | cb_comparator(const void *va, const void *vb) |
767 | { | | 767 | { |
768 | const struct clfs_seguse *a, *b; | | 768 | const struct clfs_seguse *a, *b; |
769 | | | 769 | |
770 | a = *(const struct clfs_seguse * const *)va; | | 770 | a = *(const struct clfs_seguse * const *)va; |
771 | b = *(const struct clfs_seguse * const *)vb; | | 771 | b = *(const struct clfs_seguse * const *)vb; |
772 | return a->priority > b->priority ? -1 : 1; | | 772 | return a->priority > b->priority ? -1 : 1; |
773 | } | | 773 | } |
774 | | | 774 | |
775 | void | | 775 | void |
776 | toss_old_blocks(struct clfs *fs, BLOCK_INFO **bipp, int *bic, int *sizep) | | 776 | toss_old_blocks(struct clfs *fs, BLOCK_INFO **bipp, int *bic, int *sizep) |
777 | { | | 777 | { |
778 | int i, r; | | 778 | int i, r; |
779 | BLOCK_INFO *bip = *bipp; | | 779 | BLOCK_INFO *bip = *bipp; |
780 | struct lfs_fcntl_markv /* { | | 780 | struct lfs_fcntl_markv /* { |
781 | BLOCK_INFO *blkiov; | | 781 | BLOCK_INFO *blkiov; |
782 | int blkcnt; | | 782 | int blkcnt; |
783 | } */ lim; | | 783 | } */ lim; |
784 | | | 784 | |
785 | if (bic == 0 || bip == NULL) | | 785 | if (bic == 0 || bip == NULL) |
786 | return; | | 786 | return; |
787 | | | 787 | |
788 | /* | | 788 | /* |
789 | * Kludge: Store the disk address in segcreate so we know which | | 789 | * Kludge: Store the disk address in segcreate so we know which |
790 | * ones to toss. | | 790 | * ones to toss. |
791 | */ | | 791 | */ |
792 | for (i = 0; i < *bic; i++) | | 792 | for (i = 0; i < *bic; i++) |
793 | bip[i].bi_segcreate = bip[i].bi_daddr; | | 793 | bip[i].bi_segcreate = bip[i].bi_daddr; |
794 | | | 794 | |
795 | /* Sort the blocks */ | | 795 | /* Sort the blocks */ |
796 | heapsort(bip, *bic, sizeof(BLOCK_INFO), bi_comparator); | | 796 | heapsort(bip, *bic, sizeof(BLOCK_INFO), bi_comparator); |
797 | | | 797 | |
798 | /* Use bmapv to locate the blocks */ | | 798 | /* Use bmapv to locate the blocks */ |
799 | lim.blkiov = bip; | | 799 | lim.blkiov = bip; |
800 | lim.blkcnt = *bic; | | 800 | lim.blkcnt = *bic; |
801 | if ((r = fcntl(fs->clfs_ifilefd, LFCNBMAPV, &lim)) < 0) { | | 801 | if ((r = fcntl(fs->clfs_ifilefd, LFCNBMAPV, &lim)) < 0) { |
802 | syslog(LOG_WARNING, "%s: bmapv returned %d (%m)", | | 802 | syslog(LOG_WARNING, "%s: bmapv returned %d (%m)", |
803 | fs->lfs_fsmnt, r); | | 803 | fs->lfs_fsmnt, r); |
804 | return; | | 804 | return; |
805 | } | | 805 | } |
806 | | | 806 | |
807 | /* Toss blocks not in this segment */ | | 807 | /* Toss blocks not in this segment */ |
808 | heapsort(bip, *bic, sizeof(BLOCK_INFO), bi_comparator); | | 808 | heapsort(bip, *bic, sizeof(BLOCK_INFO), bi_comparator); |
809 | | | 809 | |
810 | /* Get rid of stale blocks */ | | 810 | /* Get rid of stale blocks */ |
811 | if (sizep) | | 811 | if (sizep) |
812 | *sizep = 0; | | 812 | *sizep = 0; |
813 | for (i = 0; i < *bic; i++) { | | 813 | for (i = 0; i < *bic; i++) { |
814 | if (bip[i].bi_segcreate != bip[i].bi_daddr) | | 814 | if (bip[i].bi_segcreate != bip[i].bi_daddr) |
815 | break; | | 815 | break; |
816 | if (sizep) | | 816 | if (sizep) |
817 | *sizep += bip[i].bi_size; | | 817 | *sizep += bip[i].bi_size; |
818 | } | | 818 | } |
819 | *bic = i; /* XXX realloc bip? */ | | 819 | *bic = i; /* XXX realloc bip? */ |
820 | *bipp = bip; | | 820 | *bipp = bip; |
821 | | | 821 | |
822 | return; | | 822 | return; |
823 | } | | 823 | } |
824 | | | 824 | |
825 | /* | | 825 | /* |
826 | * Clean a segment and mark it invalid. | | 826 | * Clean a segment and mark it invalid. |
827 | */ | | 827 | */ |
828 | int | | 828 | int |
829 | invalidate_segment(struct clfs *fs, int sn) | | 829 | invalidate_segment(struct clfs *fs, int sn) |
830 | { | | 830 | { |
831 | BLOCK_INFO *bip; | | 831 | BLOCK_INFO *bip; |
832 | int i, r, bic; | | 832 | int i, r, bic; |
833 | off_t nb; | | 833 | off_t nb; |
834 | double util; | | 834 | double util; |
835 | struct lfs_fcntl_markv /* { | | 835 | struct lfs_fcntl_markv /* { |
836 | BLOCK_INFO *blkiov; | | 836 | BLOCK_INFO *blkiov; |
837 | int blkcnt; | | 837 | int blkcnt; |
838 | } */ lim; | | 838 | } */ lim; |
839 | | | 839 | |
840 | dlog("%s: inval seg %d", fs->lfs_fsmnt, sn); | | 840 | dlog("%s: inval seg %d", fs->lfs_fsmnt, sn); |
841 | | | 841 | |
842 | bip = NULL; | | 842 | bip = NULL; |
843 | bic = 0; | | 843 | bic = 0; |
844 | fs->clfs_nactive = 0; | | 844 | fs->clfs_nactive = 0; |
845 | if (load_segment(fs, sn, &bip, &bic) <= 0) | | 845 | if (load_segment(fs, sn, &bip, &bic) <= 0) |
846 | return -1; | | 846 | return -1; |
847 | toss_old_blocks(fs, &bip, &bic, NULL); | | 847 | toss_old_blocks(fs, &bip, &bic, NULL); |
848 | | | 848 | |
849 | /* Record statistics */ | | 849 | /* Record statistics */ |
850 | for (i = nb = 0; i < bic; i++) | | 850 | for (i = nb = 0; i < bic; i++) |
851 | nb += bip[i].bi_size; | | 851 | nb += bip[i].bi_size; |
852 | util = ((double)nb) / (fs->clfs_nactive * fs->lfs_ssize); | | 852 | util = ((double)nb) / (fs->clfs_nactive * fs->lfs_ssize); |
853 | cleaner_stats.util_tot += util; | | 853 | cleaner_stats.util_tot += util; |
854 | cleaner_stats.util_sos += util * util; | | 854 | cleaner_stats.util_sos += util * util; |
855 | cleaner_stats.bytes_written += nb; | | 855 | cleaner_stats.bytes_written += nb; |
856 | | | 856 | |
857 | /* | | 857 | /* |
858 | * Use markv to move the blocks. | | 858 | * Use markv to move the blocks. |
859 | */ | | 859 | */ |
860 | lim.blkiov = bip; | | 860 | lim.blkiov = bip; |
861 | lim.blkcnt = bic; | | 861 | lim.blkcnt = bic; |
862 | if ((r = fcntl(fs->clfs_ifilefd, LFCNMARKV, &lim)) < 0) { | | 862 | if ((r = fcntl(fs->clfs_ifilefd, LFCNMARKV, &lim)) < 0) { |
863 | syslog(LOG_WARNING, "%s: markv returned %d (%m) " | | 863 | syslog(LOG_WARNING, "%s: markv returned %d (%m) " |
864 | "for seg %d", fs->lfs_fsmnt, r, sn); | | 864 | "for seg %d", fs->lfs_fsmnt, r, sn); |
865 | return r; | | 865 | return r; |
866 | } | | 866 | } |
867 | | | 867 | |
868 | /* | | 868 | /* |
869 | * Finally call invalidate to invalidate the segment. | | 869 | * Finally call invalidate to invalidate the segment. |
870 | */ | | 870 | */ |
871 | if ((r = fcntl(fs->clfs_ifilefd, LFCNINVAL, &sn)) < 0) { | | 871 | if ((r = fcntl(fs->clfs_ifilefd, LFCNINVAL, &sn)) < 0) { |
872 | syslog(LOG_WARNING, "%s: inval returned %d (%m) " | | 872 | syslog(LOG_WARNING, "%s: inval returned %d (%m) " |
873 | "for seg %d", fs->lfs_fsmnt, r, sn); | | 873 | "for seg %d", fs->lfs_fsmnt, r, sn); |
874 | return r; | | 874 | return r; |
875 | } | | 875 | } |
876 | | | 876 | |
877 | return 0; | | 877 | return 0; |
878 | } | | 878 | } |
879 | | | 879 | |
880 | /* | | 880 | /* |
881 | * Check to see if the given ino/lbn pair is represented in the BLOCK_INFO | | 881 | * Check to see if the given ino/lbn pair is represented in the BLOCK_INFO |
882 | * array we are sending to the kernel, or if the kernel will have to add it. | | 882 | * array we are sending to the kernel, or if the kernel will have to add it. |
883 | * The kernel will only add each such pair once, though, so keep track of | | 883 | * The kernel will only add each such pair once, though, so keep track of |
884 | * previous requests in a separate "extra" BLOCK_INFO array. Returns 1 | | 884 | * previous requests in a separate "extra" BLOCK_INFO array. Returns 1 |
885 | * if the block needs to be added, 0 if it is already represented. | | 885 | * if the block needs to be added, 0 if it is already represented. |
886 | */ | | 886 | */ |
887 | static int | | 887 | static int |
888 | check_or_add(ino_t ino, int32_t lbn, BLOCK_INFO *bip, int bic, BLOCK_INFO **ebipp, int *ebicp) | | 888 | check_or_add(ino_t ino, int32_t lbn, BLOCK_INFO *bip, int bic, BLOCK_INFO **ebipp, int *ebicp) |
889 | { | | 889 | { |
890 | BLOCK_INFO *t, *ebip = *ebipp; | | 890 | BLOCK_INFO *t, *ebip = *ebipp; |
891 | int ebic = *ebicp; | | 891 | int ebic = *ebicp; |
892 | int k; | | 892 | int k; |
893 | | | 893 | |
894 | for (k = 0; k < bic; k++) { | | 894 | for (k = 0; k < bic; k++) { |
895 | if (bip[k].bi_inode != ino) | | 895 | if (bip[k].bi_inode != ino) |
896 | break; | | 896 | break; |
897 | if (bip[k].bi_lbn == lbn) { | | 897 | if (bip[k].bi_lbn == lbn) { |
898 | return 0; | | 898 | return 0; |
899 | } | | 899 | } |
900 | } | | 900 | } |
901 | | | 901 | |
902 | /* Look on the list of extra blocks, too */ | | 902 | /* Look on the list of extra blocks, too */ |
903 | for (k = 0; k < ebic; k++) { | | 903 | for (k = 0; k < ebic; k++) { |
904 | if (ebip[k].bi_inode == ino && ebip[k].bi_lbn == lbn) { | | 904 | if (ebip[k].bi_inode == ino && ebip[k].bi_lbn == lbn) { |
905 | return 0; | | 905 | return 0; |
906 | } | | 906 | } |
907 | } | | 907 | } |
908 | | | 908 | |
909 | ++ebic; | | 909 | ++ebic; |
910 | t = realloc(ebip, ebic * sizeof(BLOCK_INFO)); | | 910 | t = realloc(ebip, ebic * sizeof(BLOCK_INFO)); |
911 | if (t == NULL) | | 911 | if (t == NULL) |
912 | return 1; /* Note *ebipc is not updated */ | | 912 | return 1; /* Note *ebipc is not updated */ |
913 | | | 913 | |
914 | ebip = t; | | 914 | ebip = t; |
915 | ebip[ebic - 1].bi_inode = ino; | | 915 | ebip[ebic - 1].bi_inode = ino; |
916 | ebip[ebic - 1].bi_lbn = lbn; | | 916 | ebip[ebic - 1].bi_lbn = lbn; |
917 | | | 917 | |
918 | *ebipp = ebip; | | 918 | *ebipp = ebip; |
919 | *ebicp = ebic; | | 919 | *ebicp = ebic; |
920 | return 1; | | 920 | return 1; |
921 | } | | 921 | } |
922 | | | 922 | |
923 | /* | | 923 | /* |
924 | * Look for indirect blocks we will have to write which are not | | 924 | * Look for indirect blocks we will have to write which are not |
925 | * contained in this collection of blocks. This constitutes | | 925 | * contained in this collection of blocks. This constitutes |
926 | * a hidden cleaning cost, since we are unaware of it until we | | 926 | * a hidden cleaning cost, since we are unaware of it until we |
927 | * have already read the segments. Return the total cost, and fill | | 927 | * have already read the segments. Return the total cost, and fill |
928 | * in *ifc with the part of that cost due to rewriting the Ifile. | | 928 | * in *ifc with the part of that cost due to rewriting the Ifile. |
929 | */ | | 929 | */ |
930 | static off_t | | 930 | static off_t |
931 | check_hidden_cost(struct clfs *fs, BLOCK_INFO *bip, int bic, off_t *ifc) | | 931 | check_hidden_cost(struct clfs *fs, BLOCK_INFO *bip, int bic, off_t *ifc) |
932 | { | | 932 | { |
933 | int start; | | 933 | int start; |
934 | struct indir in[NIADDR + 1]; | | 934 | struct indir in[NIADDR + 1]; |
935 | int num; | | 935 | int num; |
936 | int i, j, ebic; | | 936 | int i, j, ebic; |
937 | BLOCK_INFO *ebip; | | 937 | BLOCK_INFO *ebip; |
938 | int32_t lbn; | | 938 | int32_t lbn; |
939 | | | 939 | |
940 | start = 0; | | 940 | start = 0; |
941 | ebip = NULL; | | 941 | ebip = NULL; |
942 | ebic = 0; | | 942 | ebic = 0; |
943 | for (i = 0; i < bic; i++) { | | 943 | for (i = 0; i < bic; i++) { |
944 | if (i == 0 || bip[i].bi_inode != bip[start].bi_inode) { | | 944 | if (i == 0 || bip[i].bi_inode != bip[start].bi_inode) { |
945 | start = i; | | 945 | start = i; |
946 | /* | | 946 | /* |
947 | * Look for IFILE blocks, unless this is the Ifile. | | 947 | * Look for IFILE blocks, unless this is the Ifile. |
948 | */ | | 948 | */ |
949 | if (bip[i].bi_inode != fs->lfs_ifile) { | | 949 | if (bip[i].bi_inode != fs->lfs_ifile) { |
950 | lbn = fs->lfs_cleansz + bip[i].bi_inode / | | 950 | lbn = fs->lfs_cleansz + bip[i].bi_inode / |
951 | fs->lfs_ifpb; | | 951 | fs->lfs_ifpb; |
952 | *ifc += check_or_add(fs->lfs_ifile, lbn, | | 952 | *ifc += check_or_add(fs->lfs_ifile, lbn, |
953 | bip, bic, &ebip, &ebic); | | 953 | bip, bic, &ebip, &ebic); |
954 | } | | 954 | } |
955 | } | | 955 | } |
956 | if (bip[i].bi_lbn == LFS_UNUSED_LBN) | | 956 | if (bip[i].bi_lbn == LFS_UNUSED_LBN) |
957 | continue; | | 957 | continue; |
958 | if (bip[i].bi_lbn < NDADDR) | | 958 | if (bip[i].bi_lbn < NDADDR) |
959 | continue; | | 959 | continue; |
960 | | | 960 | |
961 | ufs_getlbns((struct lfs *)fs, NULL, (daddr_t)bip[i].bi_lbn, in, &num); | | 961 | ufs_getlbns((struct lfs *)fs, NULL, (daddr_t)bip[i].bi_lbn, in, &num); |
962 | for (j = 0; j < num; j++) { | | 962 | for (j = 0; j < num; j++) { |
963 | check_or_add(bip[i].bi_inode, in[j].in_lbn, | | 963 | check_or_add(bip[i].bi_inode, in[j].in_lbn, |
964 | bip + start, bic - start, &ebip, &ebic); | | 964 | bip + start, bic - start, &ebip, &ebic); |
965 | } | | 965 | } |
966 | } | | 966 | } |
967 | return ebic; | | 967 | return ebic; |
968 | } | | 968 | } |
969 | | | 969 | |
970 | /* | | 970 | /* |
971 | * Select segments to clean, add blocks from these segments to a cleaning | | 971 | * Select segments to clean, add blocks from these segments to a cleaning |
972 | * list, and send this list through lfs_markv() to move them to new | | 972 | * list, and send this list through lfs_markv() to move them to new |
973 | * locations on disk. | | 973 | * locations on disk. |
974 | */ | | 974 | */ |
975 | int | | 975 | int |
976 | clean_fs(struct clfs *fs, CLEANERINFO *cip) | | 976 | clean_fs(struct clfs *fs, CLEANERINFO *cip) |
977 | { | | 977 | { |
978 | int i, j, ngood, sn, bic, r, npos; | | 978 | int i, j, ngood, sn, bic, r, npos; |
979 | int bytes, totbytes; | | 979 | int bytes, totbytes; |
980 | struct ubuf *bp; | | 980 | struct ubuf *bp; |
981 | SEGUSE *sup; | | 981 | SEGUSE *sup; |
982 | static BLOCK_INFO *bip; | | 982 | static BLOCK_INFO *bip; |
983 | struct lfs_fcntl_markv /* { | | 983 | struct lfs_fcntl_markv /* { |
984 | BLOCK_INFO *blkiov; | | 984 | BLOCK_INFO *blkiov; |
985 | int blkcnt; | | 985 | int blkcnt; |
986 | } */ lim; | | 986 | } */ lim; |
987 | int mc; | | 987 | int mc; |
988 | BLOCK_INFO *mbip; | | 988 | BLOCK_INFO *mbip; |
989 | int inc; | | 989 | int inc; |
990 | off_t nb; | | 990 | off_t nb; |
991 | off_t goal; | | 991 | off_t goal; |
992 | off_t extra, if_extra; | | 992 | off_t extra, if_extra; |
993 | double util; | | 993 | double util; |
994 | | | 994 | |
995 | /* Read the segment table into our private structure */ | | 995 | /* Read the segment table into our private structure */ |
996 | npos = 0; | | 996 | npos = 0; |
997 | for (i = 0; i < fs->lfs_nseg; i+= fs->lfs_sepb) { | | 997 | for (i = 0; i < fs->lfs_nseg; i+= fs->lfs_sepb) { |
998 | bread(fs->lfs_ivnode, fs->lfs_cleansz + i / fs->lfs_sepb, | | 998 | bread(fs->lfs_ivnode, fs->lfs_cleansz + i / fs->lfs_sepb, |
999 | fs->lfs_bsize, NOCRED, 0, &bp); | | 999 | fs->lfs_bsize, NOCRED, 0, &bp); |
1000 | for (j = 0; j < fs->lfs_sepb && i + j < fs->lfs_nseg; j++) { | | 1000 | for (j = 0; j < fs->lfs_sepb && i + j < fs->lfs_nseg; j++) { |
1001 | sup = ((SEGUSE *)bp->b_data) + j; | | 1001 | sup = ((SEGUSE *)bp->b_data) + j; |
1002 | fs->clfs_segtab[i + j].nbytes = sup->su_nbytes; | | 1002 | fs->clfs_segtab[i + j].nbytes = sup->su_nbytes; |
1003 | fs->clfs_segtab[i + j].nsums = sup->su_nsums; | | 1003 | fs->clfs_segtab[i + j].nsums = sup->su_nsums; |
1004 | fs->clfs_segtab[i + j].lastmod = sup->su_lastmod; | | 1004 | fs->clfs_segtab[i + j].lastmod = sup->su_lastmod; |
1005 | /* Keep error status but renew other flags */ | | 1005 | /* Keep error status but renew other flags */ |
1006 | fs->clfs_segtab[i + j].flags &= SEGUSE_ERROR; | | 1006 | fs->clfs_segtab[i + j].flags &= SEGUSE_ERROR; |
1007 | fs->clfs_segtab[i + j].flags |= sup->su_flags; | | 1007 | fs->clfs_segtab[i + j].flags |= sup->su_flags; |
1008 | | | 1008 | |
1009 | /* Compute cost-benefit coefficient */ | | 1009 | /* Compute cost-benefit coefficient */ |
1010 | calc_cb(fs, i + j, fs->clfs_segtab + i + j); | | 1010 | calc_cb(fs, i + j, fs->clfs_segtab + i + j); |
1011 | if (fs->clfs_segtab[i + j].priority > 0) | | 1011 | if (fs->clfs_segtab[i + j].priority > 0) |
1012 | ++npos; | | 1012 | ++npos; |
1013 | } | | 1013 | } |
1014 | brelse(bp, 0); | | 1014 | brelse(bp, 0); |
1015 | } | | 1015 | } |
1016 | | | 1016 | |
1017 | /* Sort segments based on cleanliness, fulness, and condition */ | | 1017 | /* Sort segments based on cleanliness, fulness, and condition */ |
1018 | heapsort(fs->clfs_segtabp, fs->lfs_nseg, sizeof(struct clfs_seguse *), | | 1018 | heapsort(fs->clfs_segtabp, fs->lfs_nseg, sizeof(struct clfs_seguse *), |
1019 | cb_comparator); | | 1019 | cb_comparator); |
1020 | | | 1020 | |
1021 | /* If no segment is cleanable, just return */ | | 1021 | /* If no segment is cleanable, just return */ |
1022 | if (fs->clfs_segtabp[0]->priority == 0) { | | 1022 | if (fs->clfs_segtabp[0]->priority == 0) { |
1023 | dlog("%s: no segment cleanable", fs->lfs_fsmnt); | | 1023 | dlog("%s: no segment cleanable", fs->lfs_fsmnt); |
1024 | return 0; | | 1024 | return 0; |
1025 | } | | 1025 | } |
1026 | | | 1026 | |
1027 | /* Load some segments' blocks into bip */ | | 1027 | /* Load some segments' blocks into bip */ |
1028 | bic = 0; | | 1028 | bic = 0; |
1029 | fs->clfs_nactive = 0; | | 1029 | fs->clfs_nactive = 0; |
1030 | ngood = 0; | | 1030 | ngood = 0; |
1031 | if (use_bytes) { | | 1031 | if (use_bytes) { |
1032 | /* Set attainable goal */ | | 1032 | /* Set attainable goal */ |
1033 | goal = fs->lfs_ssize * atatime; | | 1033 | goal = fs->lfs_ssize * atatime; |
1034 | if (goal > (cip->clean - 1) * fs->lfs_ssize / 2) | | 1034 | if (goal > (cip->clean - 1) * fs->lfs_ssize / 2) |
1035 | goal = MAX((cip->clean - 1) * fs->lfs_ssize, | | 1035 | goal = MAX((cip->clean - 1) * fs->lfs_ssize, |
1036 | fs->lfs_ssize) / 2; | | 1036 | fs->lfs_ssize) / 2; |
1037 | | | 1037 | |
1038 | dlog("%s: cleaning with goal %" PRId64 | | 1038 | dlog("%s: cleaning with goal %" PRId64 |
1039 | " bytes (%d segs clean, %d cleanable)", | | 1039 | " bytes (%d segs clean, %d cleanable)", |
1040 | fs->lfs_fsmnt, goal, cip->clean, npos); | | 1040 | fs->lfs_fsmnt, goal, cip->clean, npos); |
1041 | syslog(LOG_INFO, "%s: cleaning with goal %" PRId64 | | 1041 | syslog(LOG_INFO, "%s: cleaning with goal %" PRId64 |
1042 | " bytes (%d segs clean, %d cleanable)", | | 1042 | " bytes (%d segs clean, %d cleanable)", |
1043 | fs->lfs_fsmnt, goal, cip->clean, npos); | | 1043 | fs->lfs_fsmnt, goal, cip->clean, npos); |
1044 | totbytes = 0; | | 1044 | totbytes = 0; |
1045 | for (i = 0; i < fs->lfs_nseg && totbytes < goal; i++) { | | 1045 | for (i = 0; i < fs->lfs_nseg && totbytes < goal; i++) { |
1046 | if (fs->clfs_segtabp[i]->priority == 0) | | 1046 | if (fs->clfs_segtabp[i]->priority == 0) |
1047 | break; | | 1047 | break; |
1048 | /* Upper bound on number of segments at once */ | | 1048 | /* Upper bound on number of segments at once */ |
1049 | if (ngood * fs->lfs_ssize > 4 * goal) | | 1049 | if (ngood * fs->lfs_ssize > 4 * goal) |
1050 | break; | | 1050 | break; |
1051 | sn = (fs->clfs_segtabp[i] - fs->clfs_segtab); | | 1051 | sn = (fs->clfs_segtabp[i] - fs->clfs_segtab); |
1052 | dlog("%s: add seg %d prio %" PRIu64 | | 1052 | dlog("%s: add seg %d prio %" PRIu64 |
1053 | " containing %ld bytes", | | 1053 | " containing %ld bytes", |
1054 | fs->lfs_fsmnt, sn, fs->clfs_segtabp[i]->priority, | | 1054 | fs->lfs_fsmnt, sn, fs->clfs_segtabp[i]->priority, |
1055 | fs->clfs_segtabp[i]->nbytes); | | 1055 | fs->clfs_segtabp[i]->nbytes); |
1056 | if ((r = load_segment(fs, sn, &bip, &bic)) > 0) { | | 1056 | if ((r = load_segment(fs, sn, &bip, &bic)) > 0) { |
1057 | ++ngood; | | 1057 | ++ngood; |
1058 | toss_old_blocks(fs, &bip, &bic, &bytes); | | 1058 | toss_old_blocks(fs, &bip, &bic, &bytes); |
1059 | totbytes += bytes; | | 1059 | totbytes += bytes; |
1060 | } else if (r == 0) | | 1060 | } else if (r == 0) |
1061 | fd_release(fs->clfs_devvp); | | 1061 | fd_release(fs->clfs_devvp); |
1062 | else | | 1062 | else |
1063 | break; | | 1063 | break; |
1064 | } | | 1064 | } |
1065 | } else { | | 1065 | } else { |
1066 | /* Set attainable goal */ | | 1066 | /* Set attainable goal */ |
1067 | goal = atatime; | | 1067 | goal = atatime; |
1068 | if (goal > cip->clean - 1) | | 1068 | if (goal > cip->clean - 1) |
1069 | goal = MAX(cip->clean - 1, 1); | | 1069 | goal = MAX(cip->clean - 1, 1); |
1070 | | | 1070 | |
1071 | dlog("%s: cleaning with goal %d segments (%d clean, %d cleanable)", | | 1071 | dlog("%s: cleaning with goal %d segments (%d clean, %d cleanable)", |
1072 | fs->lfs_fsmnt, (int)goal, cip->clean, npos); | | 1072 | fs->lfs_fsmnt, (int)goal, cip->clean, npos); |
1073 | for (i = 0; i < fs->lfs_nseg && ngood < goal; i++) { | | 1073 | for (i = 0; i < fs->lfs_nseg && ngood < goal; i++) { |
1074 | if (fs->clfs_segtabp[i]->priority == 0) | | 1074 | if (fs->clfs_segtabp[i]->priority == 0) |
1075 | break; | | 1075 | break; |
1076 | sn = (fs->clfs_segtabp[i] - fs->clfs_segtab); | | 1076 | sn = (fs->clfs_segtabp[i] - fs->clfs_segtab); |
1077 | dlog("%s: add seg %d prio %" PRIu64, | | 1077 | dlog("%s: add seg %d prio %" PRIu64, |
1078 | fs->lfs_fsmnt, sn, fs->clfs_segtabp[i]->priority); | | 1078 | fs->lfs_fsmnt, sn, fs->clfs_segtabp[i]->priority); |
1079 | if ((r = load_segment(fs, sn, &bip, &bic)) > 0) | | 1079 | if ((r = load_segment(fs, sn, &bip, &bic)) > 0) |
1080 | ++ngood; | | 1080 | ++ngood; |
1081 | else if (r == 0) | | 1081 | else if (r == 0) |
1082 | fd_release(fs->clfs_devvp); | | 1082 | fd_release(fs->clfs_devvp); |
1083 | else | | 1083 | else |
1084 | break; | | 1084 | break; |
1085 | } | | 1085 | } |
1086 | toss_old_blocks(fs, &bip, &bic, NULL); | | 1086 | toss_old_blocks(fs, &bip, &bic, NULL); |
1087 | } | | 1087 | } |
1088 | | | 1088 | |
1089 | /* If there is nothing to do, try again later. */ | | 1089 | /* If there is nothing to do, try again later. */ |
1090 | if (bic == 0) { | | 1090 | if (bic == 0) { |
1091 | dlog("%s: no blocks to clean in %d cleanable segments", | | 1091 | dlog("%s: no blocks to clean in %d cleanable segments", |
1092 | fs->lfs_fsmnt, (int)ngood); | | 1092 | fs->lfs_fsmnt, (int)ngood); |
1093 | fd_release_all(fs->clfs_devvp); | | 1093 | fd_release_all(fs->clfs_devvp); |
1094 | return 0; | | 1094 | return 0; |
1095 | } | | 1095 | } |
1096 | | | 1096 | |
1097 | /* Record statistics */ | | 1097 | /* Record statistics */ |
1098 | for (i = nb = 0; i < bic; i++) | | 1098 | for (i = nb = 0; i < bic; i++) |
1099 | nb += bip[i].bi_size; | | 1099 | nb += bip[i].bi_size; |
1100 | util = ((double)nb) / (fs->clfs_nactive * fs->lfs_ssize); | | 1100 | util = ((double)nb) / (fs->clfs_nactive * fs->lfs_ssize); |
1101 | cleaner_stats.util_tot += util; | | 1101 | cleaner_stats.util_tot += util; |
1102 | cleaner_stats.util_sos += util * util; | | 1102 | cleaner_stats.util_sos += util * util; |
1103 | cleaner_stats.bytes_written += nb; | | 1103 | cleaner_stats.bytes_written += nb; |
1104 | | | 1104 | |
1105 | /* | | 1105 | /* |
1106 | * Check out our blocks to see if there are hidden cleaning costs. | | 1106 | * Check out our blocks to see if there are hidden cleaning costs. |
1107 | * If there are, we might be cleaning ourselves deeper into a hole | | 1107 | * If there are, we might be cleaning ourselves deeper into a hole |
1108 | * rather than doing anything useful. | | 1108 | * rather than doing anything useful. |
1109 | * XXX do something about this. | | 1109 | * XXX do something about this. |
1110 | */ | | 1110 | */ |
1111 | if_extra = 0; | | 1111 | if_extra = 0; |
1112 | extra = fs->lfs_bsize * (off_t)check_hidden_cost(fs, bip, bic, &if_extra); | | 1112 | extra = fs->lfs_bsize * (off_t)check_hidden_cost(fs, bip, bic, &if_extra); |
1113 | if_extra *= fs->lfs_bsize; | | 1113 | if_extra *= fs->lfs_bsize; |
1114 | | | 1114 | |
1115 | /* | | 1115 | /* |
1116 | * Use markv to move the blocks. | | 1116 | * Use markv to move the blocks. |
1117 | */ | | 1117 | */ |
1118 | if (do_small) | | 1118 | if (do_small) |
1119 | inc = MAXPHYS / fs->lfs_bsize - 1; | | 1119 | inc = MAXPHYS / fs->lfs_bsize - 1; |
1120 | else | | 1120 | else |
1121 | inc = LFS_MARKV_MAXBLKCNT / 2; | | 1121 | inc = LFS_MARKV_MAXBLKCNT / 2; |
1122 | for (mc = 0, mbip = bip; mc < bic; mc += inc, mbip += inc) { | | 1122 | for (mc = 0, mbip = bip; mc < bic; mc += inc, mbip += inc) { |
1123 | lim.blkiov = mbip; | | 1123 | lim.blkiov = mbip; |
1124 | lim.blkcnt = (bic - mc > inc ? inc : bic - mc); | | 1124 | lim.blkcnt = (bic - mc > inc ? inc : bic - mc); |
1125 | #ifdef TEST_PATTERN | | 1125 | #ifdef TEST_PATTERN |
1126 | dlog("checking blocks %d-%d", mc, mc + lim.blkcnt - 1); | | 1126 | dlog("checking blocks %d-%d", mc, mc + lim.blkcnt - 1); |
1127 | for (i = 0; i < lim.blkcnt; i++) { | | 1127 | for (i = 0; i < lim.blkcnt; i++) { |
1128 | check_test_pattern(mbip + i); | | 1128 | check_test_pattern(mbip + i); |
1129 | } | | 1129 | } |
1130 | #endif /* TEST_PATTERN */ | | 1130 | #endif /* TEST_PATTERN */ |
1131 | dlog("sending blocks %d-%d", mc, mc + lim.blkcnt - 1); | | 1131 | dlog("sending blocks %d-%d", mc, mc + lim.blkcnt - 1); |
1132 | if ((r = fcntl(fs->clfs_ifilefd, LFCNMARKV, &lim)) < 0) { | | 1132 | if ((r = fcntl(fs->clfs_ifilefd, LFCNMARKV, &lim)) < 0) { |
1133 | syslog(LOG_WARNING, "%s: markv returned %d (%m)", | | 1133 | syslog(LOG_WARNING, "%s: markv returned %d (%m)", |
1134 | fs->lfs_fsmnt, r); | | 1134 | fs->lfs_fsmnt, r); |
1135 | if (errno != EAGAIN && errno != ESHUTDOWN) { | | 1135 | if (errno != EAGAIN && errno != ESHUTDOWN) { |
1136 | fd_release_all(fs->clfs_devvp); | | 1136 | fd_release_all(fs->clfs_devvp); |
1137 | return r; | | 1137 | return r; |
1138 | } | | 1138 | } |
1139 | } | | 1139 | } |
1140 | } | | 1140 | } |
1141 | | | 1141 | |
1142 | /* | | 1142 | /* |
1143 | * Report progress (or lack thereof) | | 1143 | * Report progress (or lack thereof) |
1144 | */ | | 1144 | */ |
1145 | syslog(LOG_INFO, "%s: wrote %" PRId64 " dirty + %" | | 1145 | syslog(LOG_INFO, "%s: wrote %" PRId64 " dirty + %" |
1146 | PRId64 " supporting indirect + %" | | 1146 | PRId64 " supporting indirect + %" |
1147 | PRId64 " supporting Ifile = %" | | 1147 | PRId64 " supporting Ifile = %" |
1148 | PRId64 " bytes to clean %d segs (%" PRId64 "%% recovery)", | | 1148 | PRId64 " bytes to clean %d segs (%" PRId64 "%% recovery)", |
1149 | fs->lfs_fsmnt, (int64_t)nb, (int64_t)(extra - if_extra), | | 1149 | fs->lfs_fsmnt, (int64_t)nb, (int64_t)(extra - if_extra), |
1150 | (int64_t)if_extra, (int64_t)(nb + extra), ngood, | | 1150 | (int64_t)if_extra, (int64_t)(nb + extra), ngood, |
1151 | (ngood ? (int64_t)(100 - (100 * (nb + extra)) / | | 1151 | (ngood ? (int64_t)(100 - (100 * (nb + extra)) / |
1152 | (ngood * fs->lfs_ssize)) : | | 1152 | (ngood * fs->lfs_ssize)) : |
1153 | (int64_t)0)); | | 1153 | (int64_t)0)); |
1154 | if (nb + extra >= ngood * fs->lfs_ssize) | | 1154 | if (nb + extra >= ngood * fs->lfs_ssize) |
1155 | syslog(LOG_WARNING, "%s: cleaner not making forward progress", | | 1155 | syslog(LOG_WARNING, "%s: cleaner not making forward progress", |
1156 | fs->lfs_fsmnt); | | 1156 | fs->lfs_fsmnt); |
1157 | | | 1157 | |
1158 | /* | | 1158 | /* |
1159 | * Finally call reclaim to prompt cleaning of the segments. | | 1159 | * Finally call reclaim to prompt cleaning of the segments. |
1160 | */ | | 1160 | */ |
1161 | fcntl(fs->clfs_ifilefd, LFCNRECLAIM, NULL); | | 1161 | fcntl(fs->clfs_ifilefd, LFCNRECLAIM, NULL); |
1162 | | | 1162 | |
1163 | fd_release_all(fs->clfs_devvp); | | 1163 | fd_release_all(fs->clfs_devvp); |
1164 | return 0; | | 1164 | return 0; |
1165 | } | | 1165 | } |
1166 | | | 1166 | |
1167 | /* | | 1167 | /* |
1168 | * Read the cleanerinfo block and apply cleaning policy to determine whether | | 1168 | * Read the cleanerinfo block and apply cleaning policy to determine whether |
1169 | * the given filesystem needs to be cleaned. Returns 1 if it does, 0 if it | | 1169 | * the given filesystem needs to be cleaned. Returns 1 if it does, 0 if it |
1170 | * does not, or -1 on error. | | 1170 | * does not, or -1 on error. |
1171 | */ | | 1171 | */ |
1172 | int | | 1172 | int |
1173 | needs_cleaning(struct clfs *fs, CLEANERINFO *cip) | | 1173 | needs_cleaning(struct clfs *fs, CLEANERINFO *cip) |
1174 | { | | 1174 | { |
1175 | struct ubuf *bp; | | 1175 | struct ubuf *bp; |
1176 | struct stat st; | | 1176 | struct stat st; |
1177 | daddr_t fsb_per_seg, max_free_segs; | | 1177 | daddr_t fsb_per_seg, max_free_segs; |
1178 | time_t now; | | 1178 | time_t now; |
1179 | double loadavg; | | 1179 | double loadavg; |
1180 | | | 1180 | |
1181 | /* If this fs is "on hold", don't clean it. */ | | 1181 | /* If this fs is "on hold", don't clean it. */ |
1182 | if (fs->clfs_onhold) | | 1182 | if (fs->clfs_onhold) |
1183 | return 0; | | 1183 | return 0; |
1184 | | | 1184 | |
1185 | /* | | 1185 | /* |
1186 | * Read the cleanerinfo block from the Ifile. We don't want | | 1186 | * Read the cleanerinfo block from the Ifile. We don't want |
1187 | * the cached information, so invalidate the buffer before | | 1187 | * the cached information, so invalidate the buffer before |
1188 | * handing it back. | | 1188 | * handing it back. |
1189 | */ | | 1189 | */ |
1190 | if (bread(fs->lfs_ivnode, 0, fs->lfs_bsize, NOCRED, 0, &bp)) { | | 1190 | if (bread(fs->lfs_ivnode, 0, fs->lfs_bsize, NOCRED, 0, &bp)) { |
1191 | syslog(LOG_ERR, "%s: can't read inode", fs->lfs_fsmnt); | | 1191 | syslog(LOG_ERR, "%s: can't read inode", fs->lfs_fsmnt); |
1192 | return -1; | | 1192 | return -1; |
1193 | } | | 1193 | } |
1194 | *cip = *(CLEANERINFO *)bp->b_data; /* Structure copy */ | | 1194 | *cip = *(CLEANERINFO *)bp->b_data; /* Structure copy */ |
1195 | brelse(bp, B_INVAL); | | 1195 | brelse(bp, B_INVAL); |
1196 | cleaner_stats.bytes_read += fs->lfs_bsize; | | 1196 | cleaner_stats.bytes_read += fs->lfs_bsize; |
1197 | | | 1197 | |
1198 | /* | | 1198 | /* |
1199 | * If the number of segments changed under us, reinit. | | 1199 | * If the number of segments changed under us, reinit. |
1200 | * We don't have to start over from scratch, however, | | 1200 | * We don't have to start over from scratch, however, |
1201 | * since we don't hold any buffers. | | 1201 | * since we don't hold any buffers. |
1202 | */ | | 1202 | */ |
1203 | if (fs->lfs_nseg != cip->clean + cip->dirty) { | | 1203 | if (fs->lfs_nseg != cip->clean + cip->dirty) { |
1204 | if (reinit_fs(fs) < 0) { | | 1204 | if (reinit_fs(fs) < 0) { |
1205 | /* The normal case for unmount */ | | 1205 | /* The normal case for unmount */ |
1206 | syslog(LOG_NOTICE, "%s: filesystem unmounted", fs->lfs_fsmnt); | | 1206 | syslog(LOG_NOTICE, "%s: filesystem unmounted", fs->lfs_fsmnt); |
1207 | return -1; | | 1207 | return -1; |
1208 | } | | 1208 | } |
1209 | syslog(LOG_NOTICE, "%s: nsegs changed", fs->lfs_fsmnt); | | 1209 | syslog(LOG_NOTICE, "%s: nsegs changed", fs->lfs_fsmnt); |
1210 | } | | 1210 | } |
1211 | | | 1211 | |
1212 | /* Compute theoretical "free segments" maximum based on usage */ | | 1212 | /* Compute theoretical "free segments" maximum based on usage */ |
1213 | fsb_per_seg = segtod(fs, 1); | | 1213 | fsb_per_seg = segtod(fs, 1); |
1214 | max_free_segs = MAX(cip->bfree, 0) / fsb_per_seg + fs->lfs_minfreeseg; | | 1214 | max_free_segs = MAX(cip->bfree, 0) / fsb_per_seg + fs->lfs_minfreeseg; |
1215 | | | 1215 | |
1216 | dlog("%s: bfree = %d, avail = %d, clean = %d/%d", | | 1216 | dlog("%s: bfree = %d, avail = %d, clean = %d/%d", |
1217 | fs->lfs_fsmnt, cip->bfree, cip->avail, cip->clean, fs->lfs_nseg); | | 1217 | fs->lfs_fsmnt, cip->bfree, cip->avail, cip->clean, fs->lfs_nseg); |
1218 | | | 1218 | |
1219 | /* If the writer is waiting on us, clean it */ | | 1219 | /* If the writer is waiting on us, clean it */ |
1220 | if (cip->clean <= fs->lfs_minfreeseg || | | 1220 | if (cip->clean <= fs->lfs_minfreeseg || |
1221 | (cip->flags & LFS_CLEANER_MUST_CLEAN)) | | 1221 | (cip->flags & LFS_CLEANER_MUST_CLEAN)) |
1222 | return 1; | | 1222 | return 1; |
1223 | | | 1223 | |
1224 | /* If there are enough segments, don't clean it */ | | 1224 | /* If there are enough segments, don't clean it */ |
1225 | if (cip->bfree - cip->avail <= fsb_per_seg && | | 1225 | if (cip->bfree - cip->avail <= fsb_per_seg && |
1226 | cip->avail > fsb_per_seg) | | 1226 | cip->avail > fsb_per_seg) |
1227 | return 0; | | 1227 | return 0; |
1228 | | | 1228 | |
1229 | /* If we are in dire straits, clean it */ | | 1229 | /* If we are in dire straits, clean it */ |
1230 | if (cip->bfree - cip->avail > fsb_per_seg && | | 1230 | if (cip->bfree - cip->avail > fsb_per_seg && |
1231 | cip->avail <= fsb_per_seg) | | 1231 | cip->avail <= fsb_per_seg) |
1232 | return 1; | | 1232 | return 1; |
1233 | | | 1233 | |
1234 | /* If under busy threshold, clean regardless of load */ | | 1234 | /* If under busy threshold, clean regardless of load */ |
1235 | if (cip->clean < max_free_segs * BUSY_LIM) | | 1235 | if (cip->clean < max_free_segs * BUSY_LIM) |
1236 | return 1; | | 1236 | return 1; |
1237 | | | 1237 | |
1238 | /* Check busy status; clean if idle and under idle limit */ | | 1238 | /* Check busy status; clean if idle and under idle limit */ |
1239 | if (use_fs_idle) { | | 1239 | if (use_fs_idle) { |
1240 | /* Filesystem idle */ | | 1240 | /* Filesystem idle */ |
1241 | time(&now); | | 1241 | time(&now); |
1242 | if (fstat(fs->clfs_ifilefd, &st) < 0) { | | 1242 | if (fstat(fs->clfs_ifilefd, &st) < 0) { |
1243 | syslog(LOG_ERR, "%s: failed to stat ifile", | | 1243 | syslog(LOG_ERR, "%s: failed to stat ifile", |
1244 | fs->lfs_fsmnt); | | 1244 | fs->lfs_fsmnt); |
1245 | return -1; | | 1245 | return -1; |
1246 | } | | 1246 | } |
1247 | if (now - st.st_mtime > segwait_timeout && | | 1247 | if (now - st.st_mtime > segwait_timeout && |
1248 | cip->clean < max_free_segs * IDLE_LIM) | | 1248 | cip->clean < max_free_segs * IDLE_LIM) |
1249 | return 1; | | 1249 | return 1; |
1250 | } else { | | 1250 | } else { |
1251 | /* CPU idle - use one-minute load avg */ | | 1251 | /* CPU idle - use one-minute load avg */ |
1252 | if (getloadavg(&loadavg, 1) == -1) { | | 1252 | if (getloadavg(&loadavg, 1) == -1) { |
1253 | syslog(LOG_ERR, "%s: failed to get load avg", | | 1253 | syslog(LOG_ERR, "%s: failed to get load avg", |
1254 | fs->lfs_fsmnt); | | 1254 | fs->lfs_fsmnt); |
1255 | return -1; | | 1255 | return -1; |
1256 | } | | 1256 | } |
1257 | if (loadavg < load_threshold && | | 1257 | if (loadavg < load_threshold && |
1258 | cip->clean < max_free_segs * IDLE_LIM) | | 1258 | cip->clean < max_free_segs * IDLE_LIM) |
1259 | return 1; | | 1259 | return 1; |
1260 | } | | 1260 | } |
1261 | | | 1261 | |
1262 | return 0; | | 1262 | return 0; |
1263 | } | | 1263 | } |
1264 | | | 1264 | |
1265 | /* | | 1265 | /* |
1266 | * Report statistics. If the signal was SIGUSR2, clear the statistics too. | | 1266 | * Report statistics. If the signal was SIGUSR2, clear the statistics too. |
1267 | * If the signal was SIGINT, exit. | | 1267 | * If the signal was SIGINT, exit. |
1268 | */ | | 1268 | */ |
1269 | static void | | 1269 | static void |
1270 | sig_report(int sig) | | 1270 | sig_report(int sig) |
1271 | { | | 1271 | { |
1272 | double avg = 0.0, stddev; | | 1272 | double avg = 0.0, stddev; |
1273 | | | 1273 | |
1274 | avg = cleaner_stats.util_tot / MAX(cleaner_stats.segs_cleaned, 1.0); | | 1274 | avg = cleaner_stats.util_tot / MAX(cleaner_stats.segs_cleaned, 1.0); |
1275 | stddev = cleaner_stats.util_sos / MAX(cleaner_stats.segs_cleaned - | | 1275 | stddev = cleaner_stats.util_sos / MAX(cleaner_stats.segs_cleaned - |
1276 | avg * avg, 1.0); | | 1276 | avg * avg, 1.0); |
1277 | syslog(LOG_INFO, "bytes read: %" PRId64, cleaner_stats.bytes_read); | | 1277 | syslog(LOG_INFO, "bytes read: %" PRId64, cleaner_stats.bytes_read); |
1278 | syslog(LOG_INFO, "bytes written: %" PRId64, cleaner_stats.bytes_written); | | 1278 | syslog(LOG_INFO, "bytes written: %" PRId64, cleaner_stats.bytes_written); |
1279 | syslog(LOG_INFO, "segments cleaned: %" PRId64, cleaner_stats.segs_cleaned); | | 1279 | syslog(LOG_INFO, "segments cleaned: %" PRId64, cleaner_stats.segs_cleaned); |
1280 | #if 0 | | 1280 | #if 0 |
1281 | /* "Empty segments" is meaningless, since the kernel handles those */ | | 1281 | /* "Empty segments" is meaningless, since the kernel handles those */ |
1282 | syslog(LOG_INFO, "empty segments: %" PRId64, cleaner_stats.segs_empty); | | 1282 | syslog(LOG_INFO, "empty segments: %" PRId64, cleaner_stats.segs_empty); |
1283 | #endif | | 1283 | #endif |
1284 | syslog(LOG_INFO, "error segments: %" PRId64, cleaner_stats.segs_error); | | 1284 | syslog(LOG_INFO, "error segments: %" PRId64, cleaner_stats.segs_error); |
1285 | syslog(LOG_INFO, "utilization total: %g", cleaner_stats.util_tot); | | 1285 | syslog(LOG_INFO, "utilization total: %g", cleaner_stats.util_tot); |
1286 | syslog(LOG_INFO, "utilization sos: %g", cleaner_stats.util_sos); | | 1286 | syslog(LOG_INFO, "utilization sos: %g", cleaner_stats.util_sos); |
1287 | syslog(LOG_INFO, "utilization avg: %4.2f", avg); | | 1287 | syslog(LOG_INFO, "utilization avg: %4.2f", avg); |
1288 | syslog(LOG_INFO, "utilization sdev: %9.6f", stddev); | | 1288 | syslog(LOG_INFO, "utilization sdev: %9.6f", stddev); |
1289 | | | 1289 | |
1290 | if (debug) | | 1290 | if (debug) |
1291 | bufstats(); | | 1291 | bufstats(); |
1292 | | | 1292 | |
1293 | if (sig == SIGUSR2) | | 1293 | if (sig == SIGUSR2) |
1294 | memset(&cleaner_stats, 0, sizeof(cleaner_stats)); | | 1294 | memset(&cleaner_stats, 0, sizeof(cleaner_stats)); |
1295 | if (sig == SIGINT) | | 1295 | if (sig == SIGINT) |
1296 | exit(0); | | 1296 | exit(0); |
1297 | } | | 1297 | } |
1298 | | | 1298 | |
1299 | static void | | 1299 | static void |
1300 | sig_exit(int sig) | | 1300 | sig_exit(int sig) |
1301 | { | | 1301 | { |
1302 | exit(0); | | 1302 | exit(0); |
1303 | } | | 1303 | } |
1304 | | | 1304 | |
1305 | static void | | 1305 | static void |
1306 | usage(void) | | 1306 | usage(void) |
1307 | { | | 1307 | { |
1308 | errx(1, "usage: lfs_cleanerd [-bcdfmqs] [-i segnum] [-l load] " | | 1308 | errx(1, "usage: lfs_cleanerd [-bcdfmqs] [-i segnum] [-l load] " |
1309 | "[-n nsegs] [-r report_freq] [-t timeout] fs_name ..."); | | 1309 | "[-n nsegs] [-r report_freq] [-t timeout] fs_name ..."); |
1310 | } | | 1310 | } |
1311 | | | 1311 | |
1312 | /* | | 1312 | /* |
1313 | * Main. | | 1313 | * Main. |
1314 | */ | | 1314 | */ |
1315 | int | | 1315 | int |
1316 | main(int argc, char **argv) | | 1316 | main(int argc, char **argv) |
1317 | { | | 1317 | { |
1318 | int i, opt, error, r, loopcount, nodetach; | | 1318 | int i, opt, error, r, loopcount, nodetach; |
1319 | struct timeval tv; | | 1319 | struct timeval tv; |
1320 | CLEANERINFO ci; | | 1320 | CLEANERINFO ci; |
1321 | #ifndef USE_CLIENT_SERVER | | 1321 | #ifndef USE_CLIENT_SERVER |
1322 | char *cp, *pidname; | | 1322 | char *cp, *pidname; |
1323 | #endif | | 1323 | #endif |
1324 | | | 1324 | |
1325 | /* | | 1325 | /* |
1326 | * Set up defaults | | 1326 | * Set up defaults |
1327 | */ | | 1327 | */ |
1328 | atatime = 1; | | 1328 | atatime = 1; |
1329 | segwait_timeout = 300; /* Five minutes */ | | 1329 | segwait_timeout = 300; /* Five minutes */ |
1330 | load_threshold = 0.2; | | 1330 | load_threshold = 0.2; |
1331 | stat_report = 0; | | 1331 | stat_report = 0; |
1332 | inval_segment = -1; | | 1332 | inval_segment = -1; |
1333 | copylog_filename = NULL; | | 1333 | copylog_filename = NULL; |
1334 | nodetach = 0; | | 1334 | nodetach = 0; |
1335 | | | 1335 | |
1336 | /* | | 1336 | /* |
1337 | * Parse command-line arguments | | 1337 | * Parse command-line arguments |
1338 | */ | | 1338 | */ |
1339 | while ((opt = getopt(argc, argv, "bC:cdfi:l:mn:qr:st:")) != -1) { | | 1339 | while ((opt = getopt(argc, argv, "bC:cdDfi:l:mn:qr:st:")) != -1) { |
1340 | switch (opt) { | | 1340 | switch (opt) { |
1341 | case 'b': /* Use bytes written, not segments read */ | | 1341 | case 'b': /* Use bytes written, not segments read */ |
1342 | use_bytes = 1; | | 1342 | use_bytes = 1; |
1343 | break; | | 1343 | break; |
1344 | case 'C': /* copy log */ | | 1344 | case 'C': /* copy log */ |
1345 | copylog_filename = optarg; | | 1345 | copylog_filename = optarg; |
1346 | break; | | 1346 | break; |
1347 | case 'c': /* Coalesce files */ | | 1347 | case 'c': /* Coalesce files */ |
1348 | do_coalesce++; | | 1348 | do_coalesce++; |
1349 | break; | | 1349 | break; |
1350 | case 'd': /* Debug mode. */ | | 1350 | case 'd': /* Debug mode. */ |
1351 | nodetach++; | | 1351 | nodetach++; |
1352 | debug++; | | 1352 | debug++; |
1353 | break; | | 1353 | break; |
1354 | case 'D': /* stay-on-foreground */ | | 1354 | case 'D': /* stay-on-foreground */ |
1355 | nodetach++; | | 1355 | nodetach++; |
1356 | break; | | 1356 | break; |
1357 | case 'f': /* Use fs idle time rather than cpu idle */ | | 1357 | case 'f': /* Use fs idle time rather than cpu idle */ |
1358 | use_fs_idle = 1; | | 1358 | use_fs_idle = 1; |
1359 | break; | | 1359 | break; |
1360 | case 'i': /* Invalidate this segment */ | | 1360 | case 'i': /* Invalidate this segment */ |
1361 | inval_segment = atoi(optarg); | | 1361 | inval_segment = atoi(optarg); |
1362 | break; | | 1362 | break; |
1363 | case 'l': /* Load below which to clean */ | | 1363 | case 'l': /* Load below which to clean */ |
1364 | load_threshold = atof(optarg); | | 1364 | load_threshold = atof(optarg); |
1365 | break; | | 1365 | break; |
1366 | case 'm': /* [compat only] */ | | 1366 | case 'm': /* [compat only] */ |
1367 | break; | | 1367 | break; |
1368 | case 'n': /* How many segs to clean at once */ | | 1368 | case 'n': /* How many segs to clean at once */ |
1369 | atatime = atoi(optarg); | | 1369 | atatime = atoi(optarg); |
1370 | break; | | 1370 | break; |
1371 | case 'q': /* Quit after one run */ | | 1371 | case 'q': /* Quit after one run */ |
1372 | do_quit = 1; | | 1372 | do_quit = 1; |
1373 | break; | | 1373 | break; |
1374 | case 'r': /* Report every stat_report segments */ | | 1374 | case 'r': /* Report every stat_report segments */ |
1375 | stat_report = atoi(optarg); | | 1375 | stat_report = atoi(optarg); |
1376 | break; | | 1376 | break; |
1377 | case 's': /* Small writes */ | | 1377 | case 's': /* Small writes */ |
1378 | do_small = 1; | | 1378 | do_small = 1; |
1379 | break; | | 1379 | break; |
1380 | case 't': /* timeout */ | | 1380 | case 't': /* timeout */ |
1381 | segwait_timeout = atoi(optarg); | | 1381 | segwait_timeout = atoi(optarg); |
1382 | break; | | 1382 | break; |
1383 | default: | | 1383 | default: |
1384 | usage(); | | 1384 | usage(); |
1385 | /* NOTREACHED */ | | 1385 | /* NOTREACHED */ |
1386 | } | | 1386 | } |
1387 | } | | 1387 | } |
1388 | argc -= optind; | | 1388 | argc -= optind; |
1389 | argv += optind; | | 1389 | argv += optind; |
1390 | | | 1390 | |
1391 | if (argc < 1) | | 1391 | if (argc < 1) |
1392 | usage(); | | 1392 | usage(); |
1393 | if (inval_segment >= 0 && argc != 1) { | | 1393 | if (inval_segment >= 0 && argc != 1) { |
1394 | errx(1, "lfs_cleanerd: may only specify one filesystem when " | | 1394 | errx(1, "lfs_cleanerd: may only specify one filesystem when " |
1395 | "using -i flag"); | | 1395 | "using -i flag"); |
1396 | } | | 1396 | } |
1397 | | | 1397 | |
1398 | if (do_coalesce) { | | 1398 | if (do_coalesce) { |
1399 | errx(1, "lfs_cleanerd: -c disabled due to reports of file " | | 1399 | errx(1, "lfs_cleanerd: -c disabled due to reports of file " |
1400 | "corruption; you may re-enable it by rebuilding the " | | 1400 | "corruption; you may re-enable it by rebuilding the " |
1401 | "cleaner"); | | 1401 | "cleaner"); |
1402 | } | | 1402 | } |
1403 | | | 1403 | |
1404 | /* | | 1404 | /* |
1405 | * Set up daemon mode or foreground mode | | 1405 | * Set up daemon mode or foreground mode |
1406 | */ | | 1406 | */ |
1407 | if (nodetach) { | | 1407 | if (nodetach) { |
1408 | openlog("lfs_cleanerd", LOG_NDELAY | LOG_PID | LOG_PERROR, | | 1408 | openlog("lfs_cleanerd", LOG_NDELAY | LOG_PID | LOG_PERROR, |
1409 | LOG_DAEMON); | | 1409 | LOG_DAEMON); |
1410 | signal(SIGINT, sig_report); | | 1410 | signal(SIGINT, sig_report); |
1411 | } else { | | 1411 | } else { |
1412 | if (daemon(0, 0) == -1) | | 1412 | if (daemon(0, 0) == -1) |
1413 | err(1, "lfs_cleanerd: couldn't become a daemon!"); | | 1413 | err(1, "lfs_cleanerd: couldn't become a daemon!"); |
1414 | openlog("lfs_cleanerd", LOG_NDELAY | LOG_PID, LOG_DAEMON); | | 1414 | openlog("lfs_cleanerd", LOG_NDELAY | LOG_PID, LOG_DAEMON); |
1415 | signal(SIGINT, sig_exit); | | 1415 | signal(SIGINT, sig_exit); |
1416 | } | | 1416 | } |
1417 | | | 1417 | |
1418 | /* | | 1418 | /* |
1419 | * Look for an already-running master daemon. If there is one, | | 1419 | * Look for an already-running master daemon. If there is one, |
1420 | * send it our filesystems to add to its list and exit. | | 1420 | * send it our filesystems to add to its list and exit. |
1421 | * If there is none, become the master. | | 1421 | * If there is none, become the master. |
1422 | */ | | 1422 | */ |
1423 | #ifdef USE_CLIENT_SERVER | | 1423 | #ifdef USE_CLIENT_SERVER |
1424 | try_to_become_master(argc, argv); | | 1424 | try_to_become_master(argc, argv); |
1425 | #else | | 1425 | #else |
1426 | /* XXX think about this */ | | 1426 | /* XXX think about this */ |
1427 | asprintf(&pidname, "lfs_cleanerd:m:%s", argv[0]); | | 1427 | asprintf(&pidname, "lfs_cleanerd:m:%s", argv[0]); |
1428 | if (pidname == NULL) { | | 1428 | if (pidname == NULL) { |
1429 | syslog(LOG_ERR, "malloc failed: %m"); | | 1429 | syslog(LOG_ERR, "malloc failed: %m"); |
1430 | exit(1); | | 1430 | exit(1); |
1431 | } | | 1431 | } |
1432 | for (cp = pidname; cp != NULL; cp = strchr(cp, '/')) | | 1432 | for (cp = pidname; cp != NULL; cp = strchr(cp, '/')) |
1433 | *cp = '|'; | | 1433 | *cp = '|'; |
1434 | pidfile(pidname); | | 1434 | pidfile(pidname); |
1435 | #endif | | 1435 | #endif |
1436 | | | 1436 | |
1437 | /* | | 1437 | /* |
1438 | * Signals mean daemon should report its statistics | | 1438 | * Signals mean daemon should report its statistics |
1439 | */ | | 1439 | */ |
1440 | memset(&cleaner_stats, 0, sizeof(cleaner_stats)); | | 1440 | memset(&cleaner_stats, 0, sizeof(cleaner_stats)); |
1441 | signal(SIGUSR1, sig_report); | | 1441 | signal(SIGUSR1, sig_report); |
1442 | signal(SIGUSR2, sig_report); | | 1442 | signal(SIGUSR2, sig_report); |
1443 | | | 1443 | |
1444 | /* | | 1444 | /* |
1445 | * Start up buffer cache. We only use this for the Ifile, | | 1445 | * Start up buffer cache. We only use this for the Ifile, |
1446 | * and we will resize it if necessary, so it can start small. | | 1446 | * and we will resize it if necessary, so it can start small. |
1447 | */ | | 1447 | */ |
1448 | bufinit(4); | | 1448 | bufinit(4); |
1449 | | | 1449 | |
1450 | #ifdef REPAIR_ZERO_FINFO | | 1450 | #ifdef REPAIR_ZERO_FINFO |
1451 | { | | 1451 | { |
1452 | BLOCK_INFO *bip = NULL; | | 1452 | BLOCK_INFO *bip = NULL; |
1453 | int bic = 0; | | 1453 | int bic = 0; |
1454 | | | 1454 | |
1455 | nfss = 1; | | 1455 | nfss = 1; |
1456 | fsp = (struct clfs **)malloc(sizeof(*fsp)); | | 1456 | fsp = (struct clfs **)malloc(sizeof(*fsp)); |
1457 | fsp[0] = (struct clfs *)calloc(1, sizeof(**fsp)); | | 1457 | fsp[0] = (struct clfs *)calloc(1, sizeof(**fsp)); |
1458 | | | 1458 | |
1459 | if (init_unmounted_fs(fsp[0], argv[0]) < 0) { | | 1459 | if (init_unmounted_fs(fsp[0], argv[0]) < 0) { |
1460 | err(1, "init_unmounted_fs"); | | 1460 | err(1, "init_unmounted_fs"); |
1461 | } | | 1461 | } |
1462 | dlog("Filesystem has %d segments", fsp[0]->lfs_nseg); | | 1462 | dlog("Filesystem has %d segments", fsp[0]->lfs_nseg); |
1463 | for (i = 0; i < fsp[0]->lfs_nseg; i++) { | | 1463 | for (i = 0; i < fsp[0]->lfs_nseg; i++) { |
1464 | load_segment(fsp[0], i, &bip, &bic); | | 1464 | load_segment(fsp[0], i, &bip, &bic); |
1465 | bic = 0; | | 1465 | bic = 0; |
1466 | } | | 1466 | } |
1467 | exit(0); | | 1467 | exit(0); |
1468 | } | | 1468 | } |
1469 | #endif | | 1469 | #endif |
1470 | | | 1470 | |
1471 | /* | | 1471 | /* |
1472 | * Initialize cleaning structures, open devices, etc. | | 1472 | * Initialize cleaning structures, open devices, etc. |
1473 | */ | | 1473 | */ |
1474 | nfss = argc; | | 1474 | nfss = argc; |
1475 | fsp = (struct clfs **)malloc(nfss * sizeof(*fsp)); | | 1475 | fsp = (struct clfs **)malloc(nfss * sizeof(*fsp)); |
1476 | if (fsp == NULL) { | | 1476 | if (fsp == NULL) { |
1477 | syslog(LOG_ERR, "couldn't allocate fs table: %m"); | | 1477 | syslog(LOG_ERR, "couldn't allocate fs table: %m"); |
1478 | exit(1); | | 1478 | exit(1); |
1479 | } | | 1479 | } |
1480 | for (i = 0; i < nfss; i++) { | | 1480 | for (i = 0; i < nfss; i++) { |
1481 | fsp[i] = (struct clfs *)calloc(1, sizeof(**fsp)); | | 1481 | fsp[i] = (struct clfs *)calloc(1, sizeof(**fsp)); |
1482 | if ((r = init_fs(fsp[i], argv[i])) < 0) { | | 1482 | if ((r = init_fs(fsp[i], argv[i])) < 0) { |
1483 | syslog(LOG_ERR, "%s: couldn't init: error code %d", | | 1483 | syslog(LOG_ERR, "%s: couldn't init: error code %d", |
1484 | argv[i], r); | | 1484 | argv[i], r); |
1485 | handle_error(fsp, i); | | 1485 | handle_error(fsp, i); |
1486 | --i; /* Do the new #i over again */ | | 1486 | --i; /* Do the new #i over again */ |
1487 | } | | 1487 | } |
1488 | } | | 1488 | } |
1489 | | | 1489 | |
1490 | /* | | 1490 | /* |
1491 | * If asked to coalesce, do so and exit. | | 1491 | * If asked to coalesce, do so and exit. |
1492 | */ | | 1492 | */ |
1493 | if (do_coalesce) { | | 1493 | if (do_coalesce) { |
1494 | for (i = 0; i < nfss; i++) | | 1494 | for (i = 0; i < nfss; i++) |
1495 | clean_all_inodes(fsp[i]); | | 1495 | clean_all_inodes(fsp[i]); |
1496 | exit(0); | | 1496 | exit(0); |
1497 | } | | 1497 | } |
1498 | | | 1498 | |
1499 | /* | | 1499 | /* |
1500 | * If asked to invalidate a segment, do that and exit. | | 1500 | * If asked to invalidate a segment, do that and exit. |
1501 | */ | | 1501 | */ |
1502 | if (inval_segment >= 0) { | | 1502 | if (inval_segment >= 0) { |
1503 | invalidate_segment(fsp[0], inval_segment); | | 1503 | invalidate_segment(fsp[0], inval_segment); |
1504 | exit(0); | | 1504 | exit(0); |
1505 | } | | 1505 | } |
1506 | | | 1506 | |
1507 | /* | | 1507 | /* |
1508 | * Main cleaning loop. | | 1508 | * Main cleaning loop. |
1509 | */ | | 1509 | */ |
1510 | loopcount = 0; | | 1510 | loopcount = 0; |
1511 | while (nfss > 0) { | | 1511 | while (nfss > 0) { |
1512 | int cleaned_one; | | 1512 | int cleaned_one; |
1513 | do { | | 1513 | do { |
1514 | #ifdef USE_CLIENT_SERVER | | 1514 | #ifdef USE_CLIENT_SERVER |
1515 | check_control_socket(); | | 1515 | check_control_socket(); |
1516 | #endif | | 1516 | #endif |
1517 | cleaned_one = 0; | | 1517 | cleaned_one = 0; |
1518 | for (i = 0; i < nfss; i++) { | | 1518 | for (i = 0; i < nfss; i++) { |
1519 | if ((error = needs_cleaning(fsp[i], &ci)) < 0) { | | 1519 | if ((error = needs_cleaning(fsp[i], &ci)) < 0) { |
1520 | handle_error(fsp, i); | | 1520 | handle_error(fsp, i); |
1521 | continue; | | 1521 | continue; |
1522 | } | | 1522 | } |
1523 | if (error == 0) /* No need to clean */ | | 1523 | if (error == 0) /* No need to clean */ |
1524 | continue; | | 1524 | continue; |
1525 | | | 1525 | |
1526 | reload_ifile(fsp[i]); | | 1526 | reload_ifile(fsp[i]); |
1527 | if (clean_fs(fsp[i], &ci) < 0) { | | 1527 | if (clean_fs(fsp[i], &ci) < 0) { |
1528 | handle_error(fsp, i); | | 1528 | handle_error(fsp, i); |
1529 | continue; | | 1529 | continue; |
1530 | } | | 1530 | } |
1531 | ++cleaned_one; | | 1531 | ++cleaned_one; |
1532 | } | | 1532 | } |
1533 | ++loopcount; | | 1533 | ++loopcount; |
1534 | if (stat_report && loopcount % stat_report == 0) | | 1534 | if (stat_report && loopcount % stat_report == 0) |
1535 | sig_report(0); | | 1535 | sig_report(0); |
1536 | if (do_quit) | | 1536 | if (do_quit) |
1537 | exit(0); | | 1537 | exit(0); |
1538 | } while(cleaned_one); | | 1538 | } while(cleaned_one); |
1539 | tv.tv_sec = segwait_timeout; | | 1539 | tv.tv_sec = segwait_timeout; |
1540 | tv.tv_usec = 0; | | 1540 | tv.tv_usec = 0; |
1541 | error = fcntl(fsp[0]->clfs_ifilefd, LFCNSEGWAITALL, &tv); | | 1541 | error = fcntl(fsp[0]->clfs_ifilefd, LFCNSEGWAITALL, &tv); |
1542 | if (error) | | 1542 | if (error) |
1543 | err(1, "LFCNSEGWAITALL"); | | 1543 | err(1, "LFCNSEGWAITALL"); |
1544 | } | | 1544 | } |
1545 | | | 1545 | |
1546 | /* NOTREACHED */ | | 1546 | /* NOTREACHED */ |
1547 | return 0; | | 1547 | return 0; |
1548 | } | | 1548 | } |