| @@ -1,2113 +1,2122 @@ | | | @@ -1,2113 +1,2122 @@ |
1 | /* $NetBSD: lfs_vfsops.c,v 1.276 2009/08/05 14:37:01 pooka Exp $ */ | | 1 | /* $NetBSD: lfs_vfsops.c,v 1.277 2009/08/05 15:39:57 pooka Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007, 2007 | | 4 | * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007, 2007 |
5 | * The NetBSD Foundation, Inc. | | 5 | * The NetBSD Foundation, Inc. |
6 | * All rights reserved. | | 6 | * All rights reserved. |
7 | * | | 7 | * |
8 | * This code is derived from software contributed to The NetBSD Foundation | | 8 | * This code is derived from software contributed to The NetBSD Foundation |
9 | * by Konrad E. Schroder <perseant@hhhh.org>. | | 9 | * by Konrad E. Schroder <perseant@hhhh.org>. |
10 | * | | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without | | 11 | * Redistribution and use in source and binary forms, with or without |
12 | * modification, are permitted provided that the following conditions | | 12 | * modification, are permitted provided that the following conditions |
13 | * are met: | | 13 | * are met: |
14 | * 1. Redistributions of source code must retain the above copyright | | 14 | * 1. Redistributions of source code must retain the above copyright |
15 | * notice, this list of conditions and the following disclaimer. | | 15 | * notice, this list of conditions and the following disclaimer. |
16 | * 2. Redistributions in binary form must reproduce the above copyright | | 16 | * 2. Redistributions in binary form must reproduce the above copyright |
17 | * notice, this list of conditions and the following disclaimer in the | | 17 | * notice, this list of conditions and the following disclaimer in the |
18 | * documentation and/or other materials provided with the distribution. | | 18 | * documentation and/or other materials provided with the distribution. |
19 | * | | 19 | * |
20 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | | 20 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
22 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 22 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
23 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | | 23 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
24 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | 24 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
25 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | 25 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
26 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | 26 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
27 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 27 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
29 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 29 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
30 | * POSSIBILITY OF SUCH DAMAGE. | | 30 | * POSSIBILITY OF SUCH DAMAGE. |
31 | */ | | 31 | */ |
32 | /*- | | 32 | /*- |
33 | * Copyright (c) 1989, 1991, 1993, 1994 | | 33 | * Copyright (c) 1989, 1991, 1993, 1994 |
34 | * The Regents of the University of California. All rights reserved. | | 34 | * The Regents of the University of California. All rights reserved. |
35 | * | | 35 | * |
36 | * Redistribution and use in source and binary forms, with or without | | 36 | * Redistribution and use in source and binary forms, with or without |
37 | * modification, are permitted provided that the following conditions | | 37 | * modification, are permitted provided that the following conditions |
38 | * are met: | | 38 | * are met: |
39 | * 1. Redistributions of source code must retain the above copyright | | 39 | * 1. Redistributions of source code must retain the above copyright |
40 | * notice, this list of conditions and the following disclaimer. | | 40 | * notice, this list of conditions and the following disclaimer. |
41 | * 2. Redistributions in binary form must reproduce the above copyright | | 41 | * 2. Redistributions in binary form must reproduce the above copyright |
42 | * notice, this list of conditions and the following disclaimer in the | | 42 | * notice, this list of conditions and the following disclaimer in the |
43 | * documentation and/or other materials provided with the distribution. | | 43 | * documentation and/or other materials provided with the distribution. |
44 | * 3. Neither the name of the University nor the names of its contributors | | 44 | * 3. Neither the name of the University nor the names of its contributors |
45 | * may be used to endorse or promote products derived from this software | | 45 | * may be used to endorse or promote products derived from this software |
46 | * without specific prior written permission. | | 46 | * without specific prior written permission. |
47 | * | | 47 | * |
48 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | | 48 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
49 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | | 49 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
50 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | | 50 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
51 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | | 51 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
52 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 52 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
53 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | | 53 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
54 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 54 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
55 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 55 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
56 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 56 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
57 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 57 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
58 | * SUCH DAMAGE. | | 58 | * SUCH DAMAGE. |
59 | * | | 59 | * |
60 | * @(#)lfs_vfsops.c 8.20 (Berkeley) 6/10/95 | | 60 | * @(#)lfs_vfsops.c 8.20 (Berkeley) 6/10/95 |
61 | */ | | 61 | */ |
62 | | | 62 | |
63 | #include <sys/cdefs.h> | | 63 | #include <sys/cdefs.h> |
64 | __KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.276 2009/08/05 14:37:01 pooka Exp $"); | | 64 | __KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.277 2009/08/05 15:39:57 pooka Exp $"); |
65 | | | 65 | |
66 | #if defined(_KERNEL_OPT) | | 66 | #if defined(_KERNEL_OPT) |
67 | #include "opt_lfs.h" | | 67 | #include "opt_lfs.h" |
68 | #include "opt_quota.h" | | 68 | #include "opt_quota.h" |
69 | #endif | | 69 | #endif |
70 | | | 70 | |
71 | #include <sys/param.h> | | 71 | #include <sys/param.h> |
72 | #include <sys/systm.h> | | 72 | #include <sys/systm.h> |
73 | #include <sys/namei.h> | | 73 | #include <sys/namei.h> |
74 | #include <sys/proc.h> | | 74 | #include <sys/proc.h> |
75 | #include <sys/kernel.h> | | 75 | #include <sys/kernel.h> |
76 | #include <sys/vnode.h> | | 76 | #include <sys/vnode.h> |
77 | #include <sys/mount.h> | | 77 | #include <sys/mount.h> |
78 | #include <sys/kthread.h> | | 78 | #include <sys/kthread.h> |
79 | #include <sys/buf.h> | | 79 | #include <sys/buf.h> |
80 | #include <sys/device.h> | | 80 | #include <sys/device.h> |
81 | #include <sys/mbuf.h> | | 81 | #include <sys/mbuf.h> |
82 | #include <sys/file.h> | | 82 | #include <sys/file.h> |
83 | #include <sys/disklabel.h> | | 83 | #include <sys/disklabel.h> |
84 | #include <sys/ioctl.h> | | 84 | #include <sys/ioctl.h> |
85 | #include <sys/errno.h> | | 85 | #include <sys/errno.h> |
86 | #include <sys/malloc.h> | | 86 | #include <sys/malloc.h> |
87 | #include <sys/pool.h> | | 87 | #include <sys/pool.h> |
88 | #include <sys/socket.h> | | 88 | #include <sys/socket.h> |
89 | #include <sys/syslog.h> | | 89 | #include <sys/syslog.h> |
90 | #include <uvm/uvm_extern.h> | | 90 | #include <uvm/uvm_extern.h> |
91 | #include <sys/sysctl.h> | | 91 | #include <sys/sysctl.h> |
92 | #include <sys/conf.h> | | 92 | #include <sys/conf.h> |
93 | #include <sys/kauth.h> | | 93 | #include <sys/kauth.h> |
94 | #include <sys/module.h> | | 94 | #include <sys/module.h> |
95 | | | 95 | |
96 | #include <miscfs/specfs/specdev.h> | | 96 | #include <miscfs/specfs/specdev.h> |
97 | | | 97 | |
98 | #include <ufs/ufs/quota.h> | | 98 | #include <ufs/ufs/quota.h> |
99 | #include <ufs/ufs/inode.h> | | 99 | #include <ufs/ufs/inode.h> |
100 | #include <ufs/ufs/ufsmount.h> | | 100 | #include <ufs/ufs/ufsmount.h> |
101 | #include <ufs/ufs/ufs_extern.h> | | 101 | #include <ufs/ufs/ufs_extern.h> |
102 | | | 102 | |
103 | #include <uvm/uvm.h> | | 103 | #include <uvm/uvm.h> |
104 | #include <uvm/uvm_stat.h> | | 104 | #include <uvm/uvm_stat.h> |
105 | #include <uvm/uvm_pager.h> | | 105 | #include <uvm/uvm_pager.h> |
106 | #include <uvm/uvm_pdaemon.h> | | 106 | #include <uvm/uvm_pdaemon.h> |
107 | | | 107 | |
108 | #include <ufs/lfs/lfs.h> | | 108 | #include <ufs/lfs/lfs.h> |
109 | #include <ufs/lfs/lfs_extern.h> | | 109 | #include <ufs/lfs/lfs_extern.h> |
110 | | | 110 | |
111 | #include <miscfs/genfs/genfs.h> | | 111 | #include <miscfs/genfs/genfs.h> |
112 | #include <miscfs/genfs/genfs_node.h> | | 112 | #include <miscfs/genfs/genfs_node.h> |
113 | | | 113 | |
114 | MODULE(MODULE_CLASS_VFS, lfs, "ffs"); | | 114 | MODULE(MODULE_CLASS_VFS, lfs, "ffs"); |
115 | | | 115 | |
116 | static int lfs_gop_write(struct vnode *, struct vm_page **, int, int); | | 116 | static int lfs_gop_write(struct vnode *, struct vm_page **, int, int); |
117 | static bool lfs_issequential_hole(const struct ufsmount *, | | 117 | static bool lfs_issequential_hole(const struct ufsmount *, |
118 | daddr_t, daddr_t); | | 118 | daddr_t, daddr_t); |
119 | | | 119 | |
120 | static int lfs_mountfs(struct vnode *, struct mount *, struct lwp *); | | 120 | static int lfs_mountfs(struct vnode *, struct mount *, struct lwp *); |
121 | | | 121 | |
122 | void lfs_sysctl_setup(struct sysctllog *); | | 122 | void lfs_sysctl_setup(struct sysctllog *); |
123 | static struct sysctllog *lfs_sysctl_log; | | 123 | static struct sysctllog *lfs_sysctl_log; |
124 | | | 124 | |
125 | extern const struct vnodeopv_desc lfs_vnodeop_opv_desc; | | 125 | extern const struct vnodeopv_desc lfs_vnodeop_opv_desc; |
126 | extern const struct vnodeopv_desc lfs_specop_opv_desc; | | 126 | extern const struct vnodeopv_desc lfs_specop_opv_desc; |
127 | extern const struct vnodeopv_desc lfs_fifoop_opv_desc; | | 127 | extern const struct vnodeopv_desc lfs_fifoop_opv_desc; |
128 | | | 128 | |
129 | pid_t lfs_writer_daemon = 0; | | 129 | pid_t lfs_writer_daemon = 0; |
130 | int lfs_do_flush = 0; | | 130 | int lfs_do_flush = 0; |
131 | #ifdef LFS_KERNEL_RFW | | 131 | #ifdef LFS_KERNEL_RFW |
132 | int lfs_do_rfw = 0; | | 132 | int lfs_do_rfw = 0; |
133 | #endif | | 133 | #endif |
134 | | | 134 | |
135 | const struct vnodeopv_desc * const lfs_vnodeopv_descs[] = { | | 135 | const struct vnodeopv_desc * const lfs_vnodeopv_descs[] = { |
136 | &lfs_vnodeop_opv_desc, | | 136 | &lfs_vnodeop_opv_desc, |
137 | &lfs_specop_opv_desc, | | 137 | &lfs_specop_opv_desc, |
138 | &lfs_fifoop_opv_desc, | | 138 | &lfs_fifoop_opv_desc, |
139 | NULL, | | 139 | NULL, |
140 | }; | | 140 | }; |
141 | | | 141 | |
142 | struct vfsops lfs_vfsops = { | | 142 | struct vfsops lfs_vfsops = { |
143 | MOUNT_LFS, | | 143 | MOUNT_LFS, |
144 | sizeof (struct ufs_args), | | 144 | sizeof (struct ufs_args), |
145 | lfs_mount, | | 145 | lfs_mount, |
146 | ufs_start, | | 146 | ufs_start, |
147 | lfs_unmount, | | 147 | lfs_unmount, |
148 | ufs_root, | | 148 | ufs_root, |
149 | ufs_quotactl, | | 149 | ufs_quotactl, |
150 | lfs_statvfs, | | 150 | lfs_statvfs, |
151 | lfs_sync, | | 151 | lfs_sync, |
152 | lfs_vget, | | 152 | lfs_vget, |
153 | lfs_fhtovp, | | 153 | lfs_fhtovp, |
154 | lfs_vptofh, | | 154 | lfs_vptofh, |
155 | lfs_init, | | 155 | lfs_init, |
156 | lfs_reinit, | | 156 | lfs_reinit, |
157 | lfs_done, | | 157 | lfs_done, |
158 | lfs_mountroot, | | 158 | lfs_mountroot, |
159 | (int (*)(struct mount *, struct vnode *, struct timespec *)) eopnotsupp, | | 159 | (int (*)(struct mount *, struct vnode *, struct timespec *)) eopnotsupp, |
160 | vfs_stdextattrctl, | | 160 | vfs_stdextattrctl, |
161 | (void *)eopnotsupp, /* vfs_suspendctl */ | | 161 | (void *)eopnotsupp, /* vfs_suspendctl */ |
162 | genfs_renamelock_enter, | | 162 | genfs_renamelock_enter, |
163 | genfs_renamelock_exit, | | 163 | genfs_renamelock_exit, |
164 | (void *)eopnotsupp, | | 164 | (void *)eopnotsupp, |
165 | lfs_vnodeopv_descs, | | 165 | lfs_vnodeopv_descs, |
166 | 0, | | 166 | 0, |
167 | { NULL, NULL }, | | 167 | { NULL, NULL }, |
168 | }; | | 168 | }; |
169 | | | 169 | |
170 | const struct genfs_ops lfs_genfsops = { | | 170 | const struct genfs_ops lfs_genfsops = { |
171 | .gop_size = lfs_gop_size, | | 171 | .gop_size = lfs_gop_size, |
172 | .gop_alloc = ufs_gop_alloc, | | 172 | .gop_alloc = ufs_gop_alloc, |
173 | .gop_write = lfs_gop_write, | | 173 | .gop_write = lfs_gop_write, |
174 | .gop_markupdate = ufs_gop_markupdate, | | 174 | .gop_markupdate = ufs_gop_markupdate, |
175 | }; | | 175 | }; |
176 | | | 176 | |
177 | static const struct ufs_ops lfs_ufsops = { | | 177 | static const struct ufs_ops lfs_ufsops = { |
178 | .uo_itimes = NULL, | | 178 | .uo_itimes = NULL, |
179 | .uo_update = lfs_update, | | 179 | .uo_update = lfs_update, |
180 | .uo_truncate = lfs_truncate, | | 180 | .uo_truncate = lfs_truncate, |
181 | .uo_valloc = lfs_valloc, | | 181 | .uo_valloc = lfs_valloc, |
182 | .uo_vfree = lfs_vfree, | | 182 | .uo_vfree = lfs_vfree, |
183 | .uo_balloc = lfs_balloc, | | 183 | .uo_balloc = lfs_balloc, |
184 | .uo_unmark_vnode = lfs_unmark_vnode, | | 184 | .uo_unmark_vnode = lfs_unmark_vnode, |
185 | }; | | 185 | }; |
186 | | | 186 | |
187 | struct shortlong { | | 187 | struct shortlong { |
188 | const char *sname; | | 188 | const char *sname; |
189 | const char *lname; | | 189 | const char *lname; |
190 | }; | | 190 | }; |
191 | | | 191 | |
192 | static int | | 192 | static int |
193 | sysctl_lfs_dostats(SYSCTLFN_ARGS) | | 193 | sysctl_lfs_dostats(SYSCTLFN_ARGS) |
194 | { | | 194 | { |
195 | extern struct lfs_stats lfs_stats; | | 195 | extern struct lfs_stats lfs_stats; |
196 | extern int lfs_dostats; | | 196 | extern int lfs_dostats; |
197 | int error; | | 197 | int error; |
198 | | | 198 | |
199 | error = sysctl_lookup(SYSCTLFN_CALL(rnode)); | | 199 | error = sysctl_lookup(SYSCTLFN_CALL(rnode)); |
200 | if (error || newp == NULL) | | 200 | if (error || newp == NULL) |
201 | return (error); | | 201 | return (error); |
202 | | | 202 | |
203 | if (lfs_dostats == 0) | | 203 | if (lfs_dostats == 0) |
204 | memset(&lfs_stats, 0, sizeof(lfs_stats)); | | 204 | memset(&lfs_stats, 0, sizeof(lfs_stats)); |
205 | | | 205 | |
206 | return (0); | | 206 | return (0); |
207 | } | | 207 | } |
208 | | | 208 | |
209 | void | | 209 | void |
210 | lfs_sysctl_setup(struct sysctllog *clog) | | 210 | lfs_sysctl_setup(struct sysctllog *clog) |
211 | { | | 211 | { |
212 | int i; | | 212 | int i; |
213 | extern int lfs_writeindir, lfs_dostats, lfs_clean_vnhead, | | 213 | extern int lfs_writeindir, lfs_dostats, lfs_clean_vnhead, |
214 | lfs_fs_pagetrip, lfs_ignore_lazy_sync; | | 214 | lfs_fs_pagetrip, lfs_ignore_lazy_sync; |
215 | #ifdef DEBUG | | 215 | #ifdef DEBUG |
216 | extern int lfs_debug_log_subsys[DLOG_MAX]; | | 216 | extern int lfs_debug_log_subsys[DLOG_MAX]; |
217 | struct shortlong dlog_names[DLOG_MAX] = { /* Must match lfs.h ! */ | | 217 | struct shortlong dlog_names[DLOG_MAX] = { /* Must match lfs.h ! */ |
218 | { "rollforward", "Debug roll-forward code" }, | | 218 | { "rollforward", "Debug roll-forward code" }, |
219 | { "alloc", "Debug inode allocation and free list" }, | | 219 | { "alloc", "Debug inode allocation and free list" }, |
220 | { "avail", "Debug space-available-now accounting" }, | | 220 | { "avail", "Debug space-available-now accounting" }, |
221 | { "flush", "Debug flush triggers" }, | | 221 | { "flush", "Debug flush triggers" }, |
222 | { "lockedlist", "Debug locked list accounting" }, | | 222 | { "lockedlist", "Debug locked list accounting" }, |
223 | { "vnode_verbose", "Verbose per-vnode-written debugging" }, | | 223 | { "vnode_verbose", "Verbose per-vnode-written debugging" }, |
224 | { "vnode", "Debug vnode use during segment write" }, | | 224 | { "vnode", "Debug vnode use during segment write" }, |
225 | { "segment", "Debug segment writing" }, | | 225 | { "segment", "Debug segment writing" }, |
226 | { "seguse", "Debug segment used-bytes accounting" }, | | 226 | { "seguse", "Debug segment used-bytes accounting" }, |
227 | { "cleaner", "Debug cleaning routines" }, | | 227 | { "cleaner", "Debug cleaning routines" }, |
228 | { "mount", "Debug mount/unmount routines" }, | | 228 | { "mount", "Debug mount/unmount routines" }, |
229 | { "pagecache", "Debug UBC interactions" }, | | 229 | { "pagecache", "Debug UBC interactions" }, |
230 | { "dirop", "Debug directory-operation accounting" }, | | 230 | { "dirop", "Debug directory-operation accounting" }, |
231 | { "malloc", "Debug private malloc accounting" }, | | 231 | { "malloc", "Debug private malloc accounting" }, |
232 | }; | | 232 | }; |
233 | #endif /* DEBUG */ | | 233 | #endif /* DEBUG */ |
234 | struct shortlong stat_names[] = { /* Must match lfs.h! */ | | 234 | struct shortlong stat_names[] = { /* Must match lfs.h! */ |
235 | { "segsused", "Number of new segments allocated" }, | | 235 | { "segsused", "Number of new segments allocated" }, |
236 | { "psegwrites", "Number of partial-segment writes" }, | | 236 | { "psegwrites", "Number of partial-segment writes" }, |
237 | { "psyncwrites", "Number of synchronous partial-segment" | | 237 | { "psyncwrites", "Number of synchronous partial-segment" |
238 | " writes" }, | | 238 | " writes" }, |
239 | { "pcleanwrites", "Number of partial-segment writes by the" | | 239 | { "pcleanwrites", "Number of partial-segment writes by the" |
240 | " cleaner" }, | | 240 | " cleaner" }, |
241 | { "blocktot", "Number of blocks written" }, | | 241 | { "blocktot", "Number of blocks written" }, |
242 | { "cleanblocks", "Number of blocks written by the cleaner" }, | | 242 | { "cleanblocks", "Number of blocks written by the cleaner" }, |
243 | { "ncheckpoints", "Number of checkpoints made" }, | | 243 | { "ncheckpoints", "Number of checkpoints made" }, |
244 | { "nwrites", "Number of whole writes" }, | | 244 | { "nwrites", "Number of whole writes" }, |
245 | { "nsync_writes", "Number of synchronous writes" }, | | 245 | { "nsync_writes", "Number of synchronous writes" }, |
246 | { "wait_exceeded", "Number of times writer waited for" | | 246 | { "wait_exceeded", "Number of times writer waited for" |
247 | " cleaner" }, | | 247 | " cleaner" }, |
248 | { "write_exceeded", "Number of times writer invoked flush" }, | | 248 | { "write_exceeded", "Number of times writer invoked flush" }, |
249 | { "flush_invoked", "Number of times flush was invoked" }, | | 249 | { "flush_invoked", "Number of times flush was invoked" }, |
250 | { "vflush_invoked", "Number of time vflush was called" }, | | 250 | { "vflush_invoked", "Number of time vflush was called" }, |
251 | { "clean_inlocked", "Number of vnodes skipped for VI_XLOCK" }, | | 251 | { "clean_inlocked", "Number of vnodes skipped for VI_XLOCK" }, |
252 | { "clean_vnlocked", "Number of vnodes skipped for vget failure" }, | | 252 | { "clean_vnlocked", "Number of vnodes skipped for vget failure" }, |
253 | { "segs_reclaimed", "Number of segments reclaimed" }, | | 253 | { "segs_reclaimed", "Number of segments reclaimed" }, |
254 | }; | | 254 | }; |
255 | | | 255 | |
256 | sysctl_createv(&clog, 0, NULL, NULL, | | 256 | sysctl_createv(&clog, 0, NULL, NULL, |
257 | CTLFLAG_PERMANENT, | | 257 | CTLFLAG_PERMANENT, |
258 | CTLTYPE_NODE, "vfs", NULL, | | 258 | CTLTYPE_NODE, "vfs", NULL, |
259 | NULL, 0, NULL, 0, | | 259 | NULL, 0, NULL, 0, |
260 | CTL_VFS, CTL_EOL); | | 260 | CTL_VFS, CTL_EOL); |
261 | sysctl_createv(&clog, 0, NULL, NULL, | | 261 | sysctl_createv(&clog, 0, NULL, NULL, |
262 | CTLFLAG_PERMANENT, | | 262 | CTLFLAG_PERMANENT, |
263 | CTLTYPE_NODE, "lfs", | | 263 | CTLTYPE_NODE, "lfs", |
264 | SYSCTL_DESCR("Log-structured file system"), | | 264 | SYSCTL_DESCR("Log-structured file system"), |
265 | NULL, 0, NULL, 0, | | 265 | NULL, 0, NULL, 0, |
266 | CTL_VFS, 5, CTL_EOL); | | 266 | CTL_VFS, 5, CTL_EOL); |
267 | /* | | 267 | /* |
268 | * XXX the "5" above could be dynamic, thereby eliminating one | | 268 | * XXX the "5" above could be dynamic, thereby eliminating one |
269 | * more instance of the "number to vfs" mapping problem, but | | 269 | * more instance of the "number to vfs" mapping problem, but |
270 | * "5" is the order as taken from sys/mount.h | | 270 | * "5" is the order as taken from sys/mount.h |
271 | */ | | 271 | */ |
272 | | | 272 | |
273 | sysctl_createv(&clog, 0, NULL, NULL, | | 273 | sysctl_createv(&clog, 0, NULL, NULL, |
274 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, | | 274 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, |
275 | CTLTYPE_INT, "flushindir", NULL, | | 275 | CTLTYPE_INT, "flushindir", NULL, |
276 | NULL, 0, &lfs_writeindir, 0, | | 276 | NULL, 0, &lfs_writeindir, 0, |
277 | CTL_VFS, 5, LFS_WRITEINDIR, CTL_EOL); | | 277 | CTL_VFS, 5, LFS_WRITEINDIR, CTL_EOL); |
278 | sysctl_createv(&clog, 0, NULL, NULL, | | 278 | sysctl_createv(&clog, 0, NULL, NULL, |
279 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, | | 279 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, |
280 | CTLTYPE_INT, "clean_vnhead", NULL, | | 280 | CTLTYPE_INT, "clean_vnhead", NULL, |
281 | NULL, 0, &lfs_clean_vnhead, 0, | | 281 | NULL, 0, &lfs_clean_vnhead, 0, |
282 | CTL_VFS, 5, LFS_CLEAN_VNHEAD, CTL_EOL); | | 282 | CTL_VFS, 5, LFS_CLEAN_VNHEAD, CTL_EOL); |
283 | sysctl_createv(&clog, 0, NULL, NULL, | | 283 | sysctl_createv(&clog, 0, NULL, NULL, |
284 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, | | 284 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, |
285 | CTLTYPE_INT, "dostats", | | 285 | CTLTYPE_INT, "dostats", |
286 | SYSCTL_DESCR("Maintain statistics on LFS operations"), | | 286 | SYSCTL_DESCR("Maintain statistics on LFS operations"), |
287 | sysctl_lfs_dostats, 0, &lfs_dostats, 0, | | 287 | sysctl_lfs_dostats, 0, &lfs_dostats, 0, |
288 | CTL_VFS, 5, LFS_DOSTATS, CTL_EOL); | | 288 | CTL_VFS, 5, LFS_DOSTATS, CTL_EOL); |
289 | sysctl_createv(&clog, 0, NULL, NULL, | | 289 | sysctl_createv(&clog, 0, NULL, NULL, |
290 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, | | 290 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, |
291 | CTLTYPE_INT, "pagetrip", | | 291 | CTLTYPE_INT, "pagetrip", |
292 | SYSCTL_DESCR("How many dirty pages in fs triggers" | | 292 | SYSCTL_DESCR("How many dirty pages in fs triggers" |
293 | " a flush"), | | 293 | " a flush"), |
294 | NULL, 0, &lfs_fs_pagetrip, 0, | | 294 | NULL, 0, &lfs_fs_pagetrip, 0, |
295 | CTL_VFS, 5, LFS_FS_PAGETRIP, CTL_EOL); | | 295 | CTL_VFS, 5, LFS_FS_PAGETRIP, CTL_EOL); |
296 | sysctl_createv(&clog, 0, NULL, NULL, | | 296 | sysctl_createv(&clog, 0, NULL, NULL, |
297 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, | | 297 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, |
298 | CTLTYPE_INT, "ignore_lazy_sync", | | 298 | CTLTYPE_INT, "ignore_lazy_sync", |
299 | SYSCTL_DESCR("Lazy Sync is ignored entirely"), | | 299 | SYSCTL_DESCR("Lazy Sync is ignored entirely"), |
300 | NULL, 0, &lfs_ignore_lazy_sync, 0, | | 300 | NULL, 0, &lfs_ignore_lazy_sync, 0, |
301 | CTL_VFS, 5, LFS_IGNORE_LAZY_SYNC, CTL_EOL); | | 301 | CTL_VFS, 5, LFS_IGNORE_LAZY_SYNC, CTL_EOL); |
302 | #ifdef LFS_KERNEL_RFW | | 302 | #ifdef LFS_KERNEL_RFW |
303 | sysctl_createv(&clog, 0, NULL, NULL, | | 303 | sysctl_createv(&clog, 0, NULL, NULL, |
304 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, | | 304 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, |
305 | CTLTYPE_INT, "rfw", | | 305 | CTLTYPE_INT, "rfw", |
306 | SYSCTL_DESCR("Use in-kernel roll-forward on mount"), | | 306 | SYSCTL_DESCR("Use in-kernel roll-forward on mount"), |
307 | NULL, 0, &lfs_do_rfw, 0, | | 307 | NULL, 0, &lfs_do_rfw, 0, |
308 | CTL_VFS, 5, LFS_DO_RFW, CTL_EOL); | | 308 | CTL_VFS, 5, LFS_DO_RFW, CTL_EOL); |
309 | #endif | | 309 | #endif |
310 | | | 310 | |
311 | sysctl_createv(&clog, 0, NULL, NULL, | | 311 | sysctl_createv(&clog, 0, NULL, NULL, |
312 | CTLFLAG_PERMANENT, | | 312 | CTLFLAG_PERMANENT, |
313 | CTLTYPE_NODE, "stats", | | 313 | CTLTYPE_NODE, "stats", |
314 | SYSCTL_DESCR("Debugging options"), | | 314 | SYSCTL_DESCR("Debugging options"), |
315 | NULL, 0, NULL, 0, | | 315 | NULL, 0, NULL, 0, |
316 | CTL_VFS, 5, LFS_STATS, CTL_EOL); | | 316 | CTL_VFS, 5, LFS_STATS, CTL_EOL); |
317 | for (i = 0; i < sizeof(struct lfs_stats) / sizeof(u_int); i++) { | | 317 | for (i = 0; i < sizeof(struct lfs_stats) / sizeof(u_int); i++) { |
318 | sysctl_createv(&clog, 0, NULL, NULL, | | 318 | sysctl_createv(&clog, 0, NULL, NULL, |
319 | CTLFLAG_PERMANENT|CTLFLAG_READONLY, | | 319 | CTLFLAG_PERMANENT|CTLFLAG_READONLY, |
320 | CTLTYPE_INT, stat_names[i].sname, | | 320 | CTLTYPE_INT, stat_names[i].sname, |
321 | SYSCTL_DESCR(stat_names[i].lname), | | 321 | SYSCTL_DESCR(stat_names[i].lname), |
322 | NULL, 0, &(((u_int *)&lfs_stats.segsused)[i]), | | 322 | NULL, 0, &(((u_int *)&lfs_stats.segsused)[i]), |
323 | 0, CTL_VFS, 5, LFS_STATS, i, CTL_EOL); | | 323 | 0, CTL_VFS, 5, LFS_STATS, i, CTL_EOL); |
324 | } | | 324 | } |
325 | | | 325 | |
326 | #ifdef DEBUG | | 326 | #ifdef DEBUG |
327 | sysctl_createv(&clog, 0, NULL, NULL, | | 327 | sysctl_createv(&clog, 0, NULL, NULL, |
328 | CTLFLAG_PERMANENT, | | 328 | CTLFLAG_PERMANENT, |
329 | CTLTYPE_NODE, "debug", | | 329 | CTLTYPE_NODE, "debug", |
330 | SYSCTL_DESCR("Debugging options"), | | 330 | SYSCTL_DESCR("Debugging options"), |
331 | NULL, 0, NULL, 0, | | 331 | NULL, 0, NULL, 0, |
332 | CTL_VFS, 5, LFS_DEBUGLOG, CTL_EOL); | | 332 | CTL_VFS, 5, LFS_DEBUGLOG, CTL_EOL); |
333 | for (i = 0; i < DLOG_MAX; i++) { | | 333 | for (i = 0; i < DLOG_MAX; i++) { |
334 | sysctl_createv(&clog, 0, NULL, NULL, | | 334 | sysctl_createv(&clog, 0, NULL, NULL, |
335 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, | | 335 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, |
336 | CTLTYPE_INT, dlog_names[i].sname, | | 336 | CTLTYPE_INT, dlog_names[i].sname, |
337 | SYSCTL_DESCR(dlog_names[i].lname), | | 337 | SYSCTL_DESCR(dlog_names[i].lname), |
338 | NULL, 0, &(lfs_debug_log_subsys[i]), 0, | | 338 | NULL, 0, &(lfs_debug_log_subsys[i]), 0, |
339 | CTL_VFS, 5, LFS_DEBUGLOG, i, CTL_EOL); | | 339 | CTL_VFS, 5, LFS_DEBUGLOG, i, CTL_EOL); |
340 | } | | 340 | } |
341 | #endif | | 341 | #endif |
342 | } | | 342 | } |
343 | | | 343 | |
344 | static int | | 344 | static int |
345 | lfs_modcmd(modcmd_t cmd, void *arg) | | 345 | lfs_modcmd(modcmd_t cmd, void *arg) |
346 | { | | 346 | { |
347 | int error; | | 347 | int error; |
348 | | | 348 | |
349 | switch (cmd) { | | 349 | switch (cmd) { |
350 | case MODULE_CMD_INIT: | | 350 | case MODULE_CMD_INIT: |
351 | error = vfs_attach(&lfs_vfsops); | | 351 | error = vfs_attach(&lfs_vfsops); |
352 | if (error != 0) | | 352 | if (error != 0) |
353 | break; | | 353 | break; |
354 | lfs_sysctl_setup(lfs_sysctl_log); | | 354 | lfs_sysctl_setup(lfs_sysctl_log); |
355 | break; | | 355 | break; |
356 | case MODULE_CMD_FINI: | | 356 | case MODULE_CMD_FINI: |
357 | error = vfs_detach(&lfs_vfsops); | | 357 | error = vfs_detach(&lfs_vfsops); |
358 | if (error != 0) | | 358 | if (error != 0) |
359 | break; | | 359 | break; |
360 | sysctl_teardown(&lfs_sysctl_log); | | 360 | sysctl_teardown(&lfs_sysctl_log); |
361 | break; | | 361 | break; |
362 | default: | | 362 | default: |
363 | error = ENOTTY; | | 363 | error = ENOTTY; |
364 | break; | | 364 | break; |
365 | } | | 365 | } |
366 | | | 366 | |
367 | return (error); | | 367 | return (error); |
368 | } | | 368 | } |
369 | | | 369 | |
370 | /* | | 370 | /* |
371 | * XXX Same structure as FFS inodes? Should we share a common pool? | | 371 | * XXX Same structure as FFS inodes? Should we share a common pool? |
372 | */ | | 372 | */ |
373 | struct pool lfs_inode_pool; | | 373 | struct pool lfs_inode_pool; |
374 | struct pool lfs_dinode_pool; | | 374 | struct pool lfs_dinode_pool; |
375 | struct pool lfs_inoext_pool; | | 375 | struct pool lfs_inoext_pool; |
376 | struct pool lfs_lbnentry_pool; | | 376 | struct pool lfs_lbnentry_pool; |
377 | | | 377 | |
378 | /* | | 378 | /* |
379 | * The writer daemon. UVM keeps track of how many dirty pages we are holding | | 379 | * The writer daemon. UVM keeps track of how many dirty pages we are holding |
380 | * in lfs_subsys_pages; the daemon flushes the filesystem when this value | | 380 | * in lfs_subsys_pages; the daemon flushes the filesystem when this value |
381 | * crosses the (user-defined) threshhold LFS_MAX_PAGES. | | 381 | * crosses the (user-defined) threshhold LFS_MAX_PAGES. |
382 | */ | | 382 | */ |
383 | static void | | 383 | static void |
384 | lfs_writerd(void *arg) | | 384 | lfs_writerd(void *arg) |
385 | { | | 385 | { |
386 | struct mount *mp, *nmp; | | 386 | struct mount *mp, *nmp; |
387 | struct lfs *fs; | | 387 | struct lfs *fs; |
388 | int fsflags; | | 388 | int fsflags; |
389 | int loopcount; | | 389 | int loopcount; |
390 | | | 390 | |
391 | lfs_writer_daemon = curproc->p_pid; | | 391 | lfs_writer_daemon = curproc->p_pid; |
392 | | | 392 | |
393 | mutex_enter(&lfs_lock); | | 393 | mutex_enter(&lfs_lock); |
394 | for (;;) { | | 394 | for (;;) { |
395 | mtsleep(&lfs_writer_daemon, PVM | PNORELOCK, "lfswriter", hz/10, | | 395 | mtsleep(&lfs_writer_daemon, PVM | PNORELOCK, "lfswriter", hz/10, |
396 | &lfs_lock); | | 396 | &lfs_lock); |
397 | | | 397 | |
398 | /* | | 398 | /* |
399 | * Look through the list of LFSs to see if any of them | | 399 | * Look through the list of LFSs to see if any of them |
400 | * have requested pageouts. | | 400 | * have requested pageouts. |
401 | */ | | 401 | */ |
402 | mutex_enter(&mountlist_lock); | | 402 | mutex_enter(&mountlist_lock); |
403 | for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; | | 403 | for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; |
404 | mp = nmp) { | | 404 | mp = nmp) { |
405 | if (vfs_busy(mp, &nmp)) { | | 405 | if (vfs_busy(mp, &nmp)) { |
406 | continue; | | 406 | continue; |
407 | } | | 407 | } |
408 | if (strncmp(mp->mnt_stat.f_fstypename, MOUNT_LFS, | | 408 | if (strncmp(mp->mnt_stat.f_fstypename, MOUNT_LFS, |
409 | sizeof(mp->mnt_stat.f_fstypename)) == 0) { | | 409 | sizeof(mp->mnt_stat.f_fstypename)) == 0) { |
410 | fs = VFSTOUFS(mp)->um_lfs; | | 410 | fs = VFSTOUFS(mp)->um_lfs; |
411 | mutex_enter(&lfs_lock); | | 411 | mutex_enter(&lfs_lock); |
412 | fsflags = 0; | | 412 | fsflags = 0; |
413 | if ((fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || | | 413 | if ((fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || |
414 | lfs_dirvcount > LFS_MAX_DIROP) && | | 414 | lfs_dirvcount > LFS_MAX_DIROP) && |
415 | fs->lfs_dirops == 0) | | 415 | fs->lfs_dirops == 0) |
416 | fsflags |= SEGM_CKP; | | 416 | fsflags |= SEGM_CKP; |
417 | if (fs->lfs_pdflush) { | | 417 | if (fs->lfs_pdflush) { |
418 | DLOG((DLOG_FLUSH, "lfs_writerd: pdflush set\n")); | | 418 | DLOG((DLOG_FLUSH, "lfs_writerd: pdflush set\n")); |
419 | fs->lfs_pdflush = 0; | | 419 | fs->lfs_pdflush = 0; |
420 | lfs_flush_fs(fs, fsflags); | | 420 | lfs_flush_fs(fs, fsflags); |
421 | mutex_exit(&lfs_lock); | | 421 | mutex_exit(&lfs_lock); |
422 | } else if (!TAILQ_EMPTY(&fs->lfs_pchainhd)) { | | 422 | } else if (!TAILQ_EMPTY(&fs->lfs_pchainhd)) { |
423 | DLOG((DLOG_FLUSH, "lfs_writerd: pchain non-empty\n")); | | 423 | DLOG((DLOG_FLUSH, "lfs_writerd: pchain non-empty\n")); |
424 | mutex_exit(&lfs_lock); | | 424 | mutex_exit(&lfs_lock); |
425 | lfs_writer_enter(fs, "wrdirop"); | | 425 | lfs_writer_enter(fs, "wrdirop"); |
426 | lfs_flush_pchain(fs); | | 426 | lfs_flush_pchain(fs); |
427 | lfs_writer_leave(fs); | | 427 | lfs_writer_leave(fs); |
428 | } else | | 428 | } else |
429 | mutex_exit(&lfs_lock); | | 429 | mutex_exit(&lfs_lock); |
430 | } | | 430 | } |
431 | vfs_unbusy(mp, false, &nmp); | | 431 | vfs_unbusy(mp, false, &nmp); |
432 | } | | 432 | } |
433 | mutex_exit(&mountlist_lock); | | 433 | mutex_exit(&mountlist_lock); |
434 | | | 434 | |
435 | /* | | 435 | /* |
436 | * If global state wants a flush, flush everything. | | 436 | * If global state wants a flush, flush everything. |
437 | */ | | 437 | */ |
438 | mutex_enter(&lfs_lock); | | 438 | mutex_enter(&lfs_lock); |
439 | loopcount = 0; | | 439 | loopcount = 0; |
440 | if (lfs_do_flush || locked_queue_count > LFS_MAX_BUFS || | | 440 | if (lfs_do_flush || locked_queue_count > LFS_MAX_BUFS || |
441 | locked_queue_bytes > LFS_MAX_BYTES || | | 441 | locked_queue_bytes > LFS_MAX_BYTES || |
442 | lfs_subsys_pages > LFS_MAX_PAGES) { | | 442 | lfs_subsys_pages > LFS_MAX_PAGES) { |
443 | | | 443 | |
444 | if (lfs_do_flush) { | | 444 | if (lfs_do_flush) { |
445 | DLOG((DLOG_FLUSH, "daemon: lfs_do_flush\n")); | | 445 | DLOG((DLOG_FLUSH, "daemon: lfs_do_flush\n")); |
446 | } | | 446 | } |
447 | if (locked_queue_count > LFS_MAX_BUFS) { | | 447 | if (locked_queue_count > LFS_MAX_BUFS) { |
448 | DLOG((DLOG_FLUSH, "daemon: lqc = %d, max %d\n", | | 448 | DLOG((DLOG_FLUSH, "daemon: lqc = %d, max %d\n", |
449 | locked_queue_count, LFS_MAX_BUFS)); | | 449 | locked_queue_count, LFS_MAX_BUFS)); |
450 | } | | 450 | } |
451 | if (locked_queue_bytes > LFS_MAX_BYTES) { | | 451 | if (locked_queue_bytes > LFS_MAX_BYTES) { |
452 | DLOG((DLOG_FLUSH, "daemon: lqb = %ld, max %ld\n", | | 452 | DLOG((DLOG_FLUSH, "daemon: lqb = %ld, max %ld\n", |
453 | locked_queue_bytes, LFS_MAX_BYTES)); | | 453 | locked_queue_bytes, LFS_MAX_BYTES)); |
454 | } | | 454 | } |
455 | if (lfs_subsys_pages > LFS_MAX_PAGES) { | | 455 | if (lfs_subsys_pages > LFS_MAX_PAGES) { |
456 | DLOG((DLOG_FLUSH, "daemon: lssp = %d, max %d\n", | | 456 | DLOG((DLOG_FLUSH, "daemon: lssp = %d, max %d\n", |
457 | lfs_subsys_pages, LFS_MAX_PAGES)); | | 457 | lfs_subsys_pages, LFS_MAX_PAGES)); |
458 | } | | 458 | } |
459 | | | 459 | |
460 | lfs_flush(NULL, SEGM_WRITERD, 0); | | 460 | lfs_flush(NULL, SEGM_WRITERD, 0); |
461 | lfs_do_flush = 0; | | 461 | lfs_do_flush = 0; |
462 | } | | 462 | } |
463 | } | | 463 | } |
464 | /* NOTREACHED */ | | 464 | /* NOTREACHED */ |
465 | } | | 465 | } |
466 | | | 466 | |
467 | /* | | 467 | /* |
468 | * Initialize the filesystem, most work done by ufs_init. | | 468 | * Initialize the filesystem, most work done by ufs_init. |
469 | */ | | 469 | */ |
470 | void | | 470 | void |
471 | lfs_init(void) | | 471 | lfs_init(void) |
472 | { | | 472 | { |
473 | | | 473 | |
474 | malloc_type_attach(M_SEGMENT); | | 474 | malloc_type_attach(M_SEGMENT); |
475 | pool_init(&lfs_inode_pool, sizeof(struct inode), 0, 0, 0, | | 475 | pool_init(&lfs_inode_pool, sizeof(struct inode), 0, 0, 0, |
476 | "lfsinopl", &pool_allocator_nointr, IPL_NONE); | | 476 | "lfsinopl", &pool_allocator_nointr, IPL_NONE); |
477 | pool_init(&lfs_dinode_pool, sizeof(struct ufs1_dinode), 0, 0, 0, | | 477 | pool_init(&lfs_dinode_pool, sizeof(struct ufs1_dinode), 0, 0, 0, |
478 | "lfsdinopl", &pool_allocator_nointr, IPL_NONE); | | 478 | "lfsdinopl", &pool_allocator_nointr, IPL_NONE); |
479 | pool_init(&lfs_inoext_pool, sizeof(struct lfs_inode_ext), 8, 0, 0, | | 479 | pool_init(&lfs_inoext_pool, sizeof(struct lfs_inode_ext), 8, 0, 0, |
480 | "lfsinoextpl", &pool_allocator_nointr, IPL_NONE); | | 480 | "lfsinoextpl", &pool_allocator_nointr, IPL_NONE); |
481 | pool_init(&lfs_lbnentry_pool, sizeof(struct lbnentry), 0, 0, 0, | | 481 | pool_init(&lfs_lbnentry_pool, sizeof(struct lbnentry), 0, 0, 0, |
482 | "lfslbnpool", &pool_allocator_nointr, IPL_NONE); | | 482 | "lfslbnpool", &pool_allocator_nointr, IPL_NONE); |
483 | ufs_init(); | | 483 | ufs_init(); |
484 | | | 484 | |
485 | #ifdef DEBUG | | 485 | #ifdef DEBUG |
486 | memset(lfs_log, 0, sizeof(lfs_log)); | | 486 | memset(lfs_log, 0, sizeof(lfs_log)); |
487 | #endif | | 487 | #endif |
488 | mutex_init(&lfs_lock, MUTEX_DEFAULT, IPL_NONE); | | 488 | mutex_init(&lfs_lock, MUTEX_DEFAULT, IPL_NONE); |
489 | cv_init(&locked_queue_cv, "lfsbuf"); | | 489 | cv_init(&locked_queue_cv, "lfsbuf"); |
490 | cv_init(&lfs_writing_cv, "lfsflush"); | | 490 | cv_init(&lfs_writing_cv, "lfsflush"); |
491 | } | | 491 | } |
492 | | | 492 | |
493 | void | | 493 | void |
494 | lfs_reinit(void) | | 494 | lfs_reinit(void) |
495 | { | | 495 | { |
496 | ufs_reinit(); | | 496 | ufs_reinit(); |
497 | } | | 497 | } |
498 | | | 498 | |
499 | void | | 499 | void |
500 | lfs_done(void) | | 500 | lfs_done(void) |
501 | { | | 501 | { |
502 | ufs_done(); | | 502 | ufs_done(); |
503 | mutex_destroy(&lfs_lock); | | 503 | mutex_destroy(&lfs_lock); |
504 | cv_destroy(&locked_queue_cv); | | 504 | cv_destroy(&locked_queue_cv); |
505 | cv_destroy(&lfs_writing_cv); | | 505 | cv_destroy(&lfs_writing_cv); |
506 | pool_destroy(&lfs_inode_pool); | | 506 | pool_destroy(&lfs_inode_pool); |
507 | pool_destroy(&lfs_dinode_pool); | | 507 | pool_destroy(&lfs_dinode_pool); |
508 | pool_destroy(&lfs_inoext_pool); | | 508 | pool_destroy(&lfs_inoext_pool); |
509 | pool_destroy(&lfs_lbnentry_pool); | | 509 | pool_destroy(&lfs_lbnentry_pool); |
510 | malloc_type_detach(M_SEGMENT); | | 510 | malloc_type_detach(M_SEGMENT); |
511 | } | | 511 | } |
512 | | | 512 | |
513 | /* | | 513 | /* |
514 | * Called by main() when ufs is going to be mounted as root. | | 514 | * Called by main() when ufs is going to be mounted as root. |
515 | */ | | 515 | */ |
516 | int | | 516 | int |
517 | lfs_mountroot(void) | | 517 | lfs_mountroot(void) |
518 | { | | 518 | { |
519 | extern struct vnode *rootvp; | | 519 | extern struct vnode *rootvp; |
520 | struct mount *mp; | | 520 | struct mount *mp; |
521 | struct lwp *l = curlwp; | | 521 | struct lwp *l = curlwp; |
522 | int error; | | 522 | int error; |
523 | | | 523 | |
524 | if (device_class(root_device) != DV_DISK) | | 524 | if (device_class(root_device) != DV_DISK) |
525 | return (ENODEV); | | 525 | return (ENODEV); |
526 | | | 526 | |
527 | if (rootdev == NODEV) | | 527 | if (rootdev == NODEV) |
528 | return (ENODEV); | | 528 | return (ENODEV); |
529 | if ((error = vfs_rootmountalloc(MOUNT_LFS, "root_device", &mp))) { | | 529 | if ((error = vfs_rootmountalloc(MOUNT_LFS, "root_device", &mp))) { |
530 | vrele(rootvp); | | 530 | vrele(rootvp); |
531 | return (error); | | 531 | return (error); |
532 | } | | 532 | } |
533 | if ((error = lfs_mountfs(rootvp, mp, l))) { | | 533 | if ((error = lfs_mountfs(rootvp, mp, l))) { |
534 | vfs_unbusy(mp, false, NULL); | | 534 | vfs_unbusy(mp, false, NULL); |
535 | vfs_destroy(mp); | | 535 | vfs_destroy(mp); |
536 | return (error); | | 536 | return (error); |
537 | } | | 537 | } |
538 | mutex_enter(&mountlist_lock); | | 538 | mutex_enter(&mountlist_lock); |
539 | CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); | | 539 | CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); |
540 | mutex_exit(&mountlist_lock); | | 540 | mutex_exit(&mountlist_lock); |
541 | (void)lfs_statvfs(mp, &mp->mnt_stat); | | 541 | (void)lfs_statvfs(mp, &mp->mnt_stat); |
542 | vfs_unbusy(mp, false, NULL); | | 542 | vfs_unbusy(mp, false, NULL); |
543 | setrootfstime((time_t)(VFSTOUFS(mp)->um_lfs->lfs_tstamp)); | | 543 | setrootfstime((time_t)(VFSTOUFS(mp)->um_lfs->lfs_tstamp)); |
544 | return (0); | | 544 | return (0); |
545 | } | | 545 | } |
546 | | | 546 | |
547 | /* | | 547 | /* |
548 | * VFS Operations. | | 548 | * VFS Operations. |
549 | * | | 549 | * |
550 | * mount system call | | 550 | * mount system call |
551 | */ | | 551 | */ |
552 | int | | 552 | int |
553 | lfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) | | 553 | lfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) |
554 | { | | 554 | { |
555 | struct lwp *l = curlwp; | | 555 | struct lwp *l = curlwp; |
556 | struct vnode *devvp; | | 556 | struct vnode *devvp; |
557 | struct ufs_args *args = data; | | 557 | struct ufs_args *args = data; |
558 | struct ufsmount *ump = NULL; | | 558 | struct ufsmount *ump = NULL; |
559 | struct lfs *fs = NULL; /* LFS */ | | 559 | struct lfs *fs = NULL; /* LFS */ |
560 | int error = 0, update; | | 560 | int error = 0, update; |
561 | mode_t accessmode; | | 561 | mode_t accessmode; |
562 | | | 562 | |
563 | if (*data_len < sizeof *args) | | 563 | if (*data_len < sizeof *args) |
564 | return EINVAL; | | 564 | return EINVAL; |
565 | | | 565 | |
566 | if (mp->mnt_flag & MNT_GETARGS) { | | 566 | if (mp->mnt_flag & MNT_GETARGS) { |
567 | ump = VFSTOUFS(mp); | | 567 | ump = VFSTOUFS(mp); |
568 | if (ump == NULL) | | 568 | if (ump == NULL) |
569 | return EIO; | | 569 | return EIO; |
570 | args->fspec = NULL; | | 570 | args->fspec = NULL; |
571 | *data_len = sizeof *args; | | 571 | *data_len = sizeof *args; |
572 | return 0; | | 572 | return 0; |
573 | } | | 573 | } |
574 | | | 574 | |
575 | update = mp->mnt_flag & MNT_UPDATE; | | 575 | update = mp->mnt_flag & MNT_UPDATE; |
576 | | | 576 | |
577 | /* Check arguments */ | | 577 | /* Check arguments */ |
578 | if (args->fspec != NULL) { | | 578 | if (args->fspec != NULL) { |
579 | /* | | 579 | /* |
580 | * Look up the name and verify that it's sane. | | 580 | * Look up the name and verify that it's sane. |
581 | */ | | 581 | */ |
582 | error = namei_simple_user(args->fspec, | | 582 | error = namei_simple_user(args->fspec, |
583 | NSM_FOLLOW_NOEMULROOT, &devvp); | | 583 | NSM_FOLLOW_NOEMULROOT, &devvp); |
584 | if (error != 0) | | 584 | if (error != 0) |
585 | return (error); | | 585 | return (error); |
586 | | | 586 | |
587 | if (!update) { | | 587 | if (!update) { |
588 | /* | | 588 | /* |
589 | * Be sure this is a valid block device | | 589 | * Be sure this is a valid block device |
590 | */ | | 590 | */ |
591 | if (devvp->v_type != VBLK) | | 591 | if (devvp->v_type != VBLK) |
592 | error = ENOTBLK; | | 592 | error = ENOTBLK; |
593 | else if (bdevsw_lookup(devvp->v_rdev) == NULL) | | 593 | else if (bdevsw_lookup(devvp->v_rdev) == NULL) |
594 | error = ENXIO; | | 594 | error = ENXIO; |
595 | } else { | | 595 | } else { |
596 | /* | | 596 | /* |
597 | * Be sure we're still naming the same device | | 597 | * Be sure we're still naming the same device |
598 | * used for our initial mount | | 598 | * used for our initial mount |
599 | */ | | 599 | */ |
600 | ump = VFSTOUFS(mp); | | 600 | ump = VFSTOUFS(mp); |
601 | if (devvp != ump->um_devvp) | | 601 | if (devvp != ump->um_devvp) |
602 | error = EINVAL; | | 602 | error = EINVAL; |
603 | } | | 603 | } |
604 | } else { | | 604 | } else { |
605 | if (!update) { | | 605 | if (!update) { |
606 | /* New mounts must have a filename for the device */ | | 606 | /* New mounts must have a filename for the device */ |
607 | return (EINVAL); | | 607 | return (EINVAL); |
608 | } else { | | 608 | } else { |
609 | /* Use the extant mount */ | | 609 | /* Use the extant mount */ |
610 | ump = VFSTOUFS(mp); | | 610 | ump = VFSTOUFS(mp); |
611 | devvp = ump->um_devvp; | | 611 | devvp = ump->um_devvp; |
612 | vref(devvp); | | 612 | vref(devvp); |
613 | } | | 613 | } |
614 | } | | 614 | } |
615 | | | 615 | |
616 | | | 616 | |
617 | /* | | 617 | /* |
618 | * If mount by non-root, then verify that user has necessary | | 618 | * If mount by non-root, then verify that user has necessary |
619 | * permissions on the device. | | 619 | * permissions on the device. |
620 | */ | | 620 | */ |
621 | if (error == 0) { | | 621 | if (error == 0) { |
622 | accessmode = VREAD; | | 622 | accessmode = VREAD; |
623 | if (update ? | | 623 | if (update ? |
624 | (mp->mnt_iflag & IMNT_WANTRDWR) != 0 : | | 624 | (mp->mnt_iflag & IMNT_WANTRDWR) != 0 : |
625 | (mp->mnt_flag & MNT_RDONLY) == 0) | | 625 | (mp->mnt_flag & MNT_RDONLY) == 0) |
626 | accessmode |= VWRITE; | | 626 | accessmode |= VWRITE; |
627 | vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); | | 627 | vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); |
628 | error = genfs_can_mount(devvp, accessmode, l->l_cred); | | 628 | error = genfs_can_mount(devvp, accessmode, l->l_cred); |
629 | VOP_UNLOCK(devvp, 0); | | 629 | VOP_UNLOCK(devvp, 0); |
630 | } | | 630 | } |
631 | | | 631 | |
632 | if (error) { | | 632 | if (error) { |
633 | vrele(devvp); | | 633 | vrele(devvp); |
634 | return (error); | | 634 | return (error); |
635 | } | | 635 | } |
636 | | | 636 | |
637 | if (!update) { | | 637 | if (!update) { |
638 | int flags; | | 638 | int flags; |
639 | | | 639 | |
640 | if (mp->mnt_flag & MNT_RDONLY) | | 640 | if (mp->mnt_flag & MNT_RDONLY) |
641 | flags = FREAD; | | 641 | flags = FREAD; |
642 | else | | 642 | else |
643 | flags = FREAD|FWRITE; | | 643 | flags = FREAD|FWRITE; |
644 | error = VOP_OPEN(devvp, flags, FSCRED); | | 644 | error = VOP_OPEN(devvp, flags, FSCRED); |
645 | if (error) | | 645 | if (error) |
646 | goto fail; | | 646 | goto fail; |
647 | error = lfs_mountfs(devvp, mp, l); /* LFS */ | | 647 | error = lfs_mountfs(devvp, mp, l); /* LFS */ |
648 | if (error) { | | 648 | if (error) { |
649 | vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); | | 649 | vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); |
650 | (void)VOP_CLOSE(devvp, flags, NOCRED); | | 650 | (void)VOP_CLOSE(devvp, flags, NOCRED); |
651 | VOP_UNLOCK(devvp, 0); | | 651 | VOP_UNLOCK(devvp, 0); |
652 | goto fail; | | 652 | goto fail; |
653 | } | | 653 | } |
654 | | | 654 | |
655 | ump = VFSTOUFS(mp); | | 655 | ump = VFSTOUFS(mp); |
656 | fs = ump->um_lfs; | | 656 | fs = ump->um_lfs; |
657 | } else { | | 657 | } else { |
658 | /* | | 658 | /* |
659 | * Update the mount. | | 659 | * Update the mount. |
660 | */ | | 660 | */ |
661 | | | 661 | |
662 | /* | | 662 | /* |
663 | * The initial mount got a reference on this | | 663 | * The initial mount got a reference on this |
664 | * device, so drop the one obtained via | | 664 | * device, so drop the one obtained via |
665 | * namei(), above. | | 665 | * namei(), above. |
666 | */ | | 666 | */ |
667 | vrele(devvp); | | 667 | vrele(devvp); |
668 | | | 668 | |
669 | ump = VFSTOUFS(mp); | | 669 | ump = VFSTOUFS(mp); |
670 | fs = ump->um_lfs; | | 670 | fs = ump->um_lfs; |
671 | if (fs->lfs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) { | | 671 | if (fs->lfs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) { |
672 | /* | | 672 | /* |
673 | * Changing from read-only to read/write. | | 673 | * Changing from read-only to read/write. |
674 | * Note in the superblocks that we're writing. | | 674 | * Note in the superblocks that we're writing. |
675 | */ | | 675 | */ |
676 | fs->lfs_ronly = 0; | | 676 | fs->lfs_ronly = 0; |
677 | if (fs->lfs_pflags & LFS_PF_CLEAN) { | | 677 | if (fs->lfs_pflags & LFS_PF_CLEAN) { |
678 | fs->lfs_pflags &= ~LFS_PF_CLEAN; | | 678 | fs->lfs_pflags &= ~LFS_PF_CLEAN; |
679 | lfs_writesuper(fs, fs->lfs_sboffs[0]); | | 679 | lfs_writesuper(fs, fs->lfs_sboffs[0]); |
680 | lfs_writesuper(fs, fs->lfs_sboffs[1]); | | 680 | lfs_writesuper(fs, fs->lfs_sboffs[1]); |
681 | } | | 681 | } |
682 | } | | 682 | } |
683 | if (args->fspec == NULL) | | 683 | if (args->fspec == NULL) |
684 | return EINVAL; | | 684 | return EINVAL; |
685 | } | | 685 | } |
686 | | | 686 | |
687 | error = set_statvfs_info(path, UIO_USERSPACE, args->fspec, | | 687 | error = set_statvfs_info(path, UIO_USERSPACE, args->fspec, |
688 | UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l); | | 688 | UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l); |
689 | if (error == 0) | | 689 | if (error == 0) |
690 | (void)strncpy(fs->lfs_fsmnt, mp->mnt_stat.f_mntonname, | | 690 | (void)strncpy(fs->lfs_fsmnt, mp->mnt_stat.f_mntonname, |
691 | sizeof(fs->lfs_fsmnt)); | | 691 | sizeof(fs->lfs_fsmnt)); |
692 | return error; | | 692 | return error; |
693 | | | 693 | |
694 | fail: | | 694 | fail: |
695 | vrele(devvp); | | 695 | vrele(devvp); |
696 | return (error); | | 696 | return (error); |
697 | } | | 697 | } |
698 | | | 698 | |
699 | | | 699 | |
700 | /* | | 700 | /* |
701 | * Common code for mount and mountroot | | 701 | * Common code for mount and mountroot |
702 | * LFS specific | | 702 | * LFS specific |
703 | */ | | 703 | */ |
704 | int | | 704 | int |
705 | lfs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l) | | 705 | lfs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l) |
706 | { | | 706 | { |
707 | struct dlfs *tdfs, *dfs, *adfs; | | 707 | struct dlfs *tdfs, *dfs, *adfs; |
708 | struct lfs *fs; | | 708 | struct lfs *fs; |
709 | struct ufsmount *ump; | | 709 | struct ufsmount *ump; |
710 | struct vnode *vp; | | 710 | struct vnode *vp; |
711 | struct buf *bp, *abp; | | 711 | struct buf *bp, *abp; |
712 | struct partinfo dpart; | | 712 | struct partinfo dpart; |
713 | dev_t dev; | | 713 | dev_t dev; |
714 | int error, i, ronly, secsize, fsbsize; | | 714 | int error, i, ronly, secsize, fsbsize; |
715 | kauth_cred_t cred; | | 715 | kauth_cred_t cred; |
716 | CLEANERINFO *cip; | | 716 | CLEANERINFO *cip; |
717 | SEGUSE *sup; | | 717 | SEGUSE *sup; |
718 | daddr_t sb_addr; | | 718 | daddr_t sb_addr; |
719 | | | 719 | |
720 | cred = l ? l->l_cred : NOCRED; | | 720 | cred = l ? l->l_cred : NOCRED; |
721 | | | 721 | |
722 | /* | | 722 | /* |
723 | * Flush out any old buffers remaining from a previous use. | | 723 | * Flush out any old buffers remaining from a previous use. |
724 | */ | | 724 | */ |
725 | vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); | | 725 | vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); |
726 | error = vinvalbuf(devvp, V_SAVE, cred, l, 0, 0); | | 726 | error = vinvalbuf(devvp, V_SAVE, cred, l, 0, 0); |
727 | VOP_UNLOCK(devvp, 0); | | 727 | VOP_UNLOCK(devvp, 0); |
728 | if (error) | | 728 | if (error) |
729 | return (error); | | 729 | return (error); |
730 | | | 730 | |
731 | ronly = (mp->mnt_flag & MNT_RDONLY) != 0; | | 731 | ronly = (mp->mnt_flag & MNT_RDONLY) != 0; |
732 | if (VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred) != 0) | | 732 | if (VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred) != 0) |
733 | secsize = DEV_BSIZE; | | 733 | secsize = DEV_BSIZE; |
734 | else | | 734 | else |
735 | secsize = dpart.disklab->d_secsize; | | 735 | secsize = dpart.disklab->d_secsize; |
736 | | | 736 | |
737 | /* Don't free random space on error. */ | | 737 | /* Don't free random space on error. */ |
738 | bp = NULL; | | 738 | bp = NULL; |
739 | abp = NULL; | | 739 | abp = NULL; |
740 | ump = NULL; | | 740 | ump = NULL; |
741 | | | 741 | |
742 | sb_addr = LFS_LABELPAD / secsize; | | 742 | sb_addr = LFS_LABELPAD / secsize; |
743 | while (1) { | | 743 | while (1) { |
744 | /* Read in the superblock. */ | | 744 | /* Read in the superblock. */ |
745 | error = bread(devvp, sb_addr, LFS_SBPAD, cred, 0, &bp); | | 745 | error = bread(devvp, sb_addr, LFS_SBPAD, cred, 0, &bp); |
746 | if (error) | | 746 | if (error) |
747 | goto out; | | 747 | goto out; |
748 | dfs = (struct dlfs *)bp->b_data; | | 748 | dfs = (struct dlfs *)bp->b_data; |
749 | | | 749 | |
750 | /* Check the basics. */ | | 750 | /* Check the basics. */ |
751 | if (dfs->dlfs_magic != LFS_MAGIC || dfs->dlfs_bsize > MAXBSIZE || | | 751 | if (dfs->dlfs_magic != LFS_MAGIC || dfs->dlfs_bsize > MAXBSIZE || |
752 | dfs->dlfs_version > LFS_VERSION || | | 752 | dfs->dlfs_version > LFS_VERSION || |
753 | dfs->dlfs_bsize < sizeof(struct dlfs)) { | | 753 | dfs->dlfs_bsize < sizeof(struct dlfs)) { |
754 | DLOG((DLOG_MOUNT, "lfs_mountfs: primary superblock sanity failed\n")); | | 754 | DLOG((DLOG_MOUNT, "lfs_mountfs: primary superblock sanity failed\n")); |
755 | error = EINVAL; /* XXX needs translation */ | | 755 | error = EINVAL; /* XXX needs translation */ |
756 | goto out; | | 756 | goto out; |
757 | } | | 757 | } |
758 | if (dfs->dlfs_inodefmt > LFS_MAXINODEFMT) { | | 758 | if (dfs->dlfs_inodefmt > LFS_MAXINODEFMT) { |
759 | DLOG((DLOG_MOUNT, "lfs_mountfs: unknown inode format %d\n", | | 759 | DLOG((DLOG_MOUNT, "lfs_mountfs: unknown inode format %d\n", |
760 | dfs->dlfs_inodefmt)); | | 760 | dfs->dlfs_inodefmt)); |
761 | error = EINVAL; | | 761 | error = EINVAL; |
762 | goto out; | | 762 | goto out; |
763 | } | | 763 | } |
764 | | | 764 | |
765 | if (dfs->dlfs_version == 1) | | 765 | if (dfs->dlfs_version == 1) |
766 | fsbsize = secsize; | | 766 | fsbsize = secsize; |
767 | else { | | 767 | else { |
768 | fsbsize = 1 << (dfs->dlfs_bshift - dfs->dlfs_blktodb + | | 768 | fsbsize = 1 << (dfs->dlfs_bshift - dfs->dlfs_blktodb + |
769 | dfs->dlfs_fsbtodb); | | 769 | dfs->dlfs_fsbtodb); |
770 | /* | | 770 | /* |
771 | * Could be, if the frag size is large enough, that we | | 771 | * Could be, if the frag size is large enough, that we |
772 | * don't have the "real" primary superblock. If that's | | 772 | * don't have the "real" primary superblock. If that's |
773 | * the case, get the real one, and try again. | | 773 | * the case, get the real one, and try again. |
774 | */ | | 774 | */ |
775 | if (sb_addr != dfs->dlfs_sboffs[0] << | | 775 | if (sb_addr != dfs->dlfs_sboffs[0] << |
776 | dfs->dlfs_fsbtodb) { | | 776 | dfs->dlfs_fsbtodb) { |
777 | DLOG((DLOG_MOUNT, "lfs_mountfs: sb daddr" | | 777 | DLOG((DLOG_MOUNT, "lfs_mountfs: sb daddr" |
778 | " 0x%llx is not right, trying 0x%llx\n", | | 778 | " 0x%llx is not right, trying 0x%llx\n", |
779 | (long long)sb_addr, | | 779 | (long long)sb_addr, |
780 | (long long)(dfs->dlfs_sboffs[0] << | | 780 | (long long)(dfs->dlfs_sboffs[0] << |
781 | dfs->dlfs_fsbtodb))); | | 781 | dfs->dlfs_fsbtodb))); |
782 | sb_addr = dfs->dlfs_sboffs[0] << | | 782 | sb_addr = dfs->dlfs_sboffs[0] << |
783 | dfs->dlfs_fsbtodb; | | 783 | dfs->dlfs_fsbtodb; |
784 | brelse(bp, 0); | | 784 | brelse(bp, 0); |
785 | continue; | | 785 | continue; |
786 | } | | 786 | } |
787 | } | | 787 | } |
788 | break; | | 788 | break; |
789 | } | | 789 | } |
790 | | | 790 | |
791 | /* | | 791 | /* |
792 | * Check the second superblock to see which is newer; then mount | | 792 | * Check the second superblock to see which is newer; then mount |
793 | * using the older of the two. This is necessary to ensure that | | 793 | * using the older of the two. This is necessary to ensure that |
794 | * the filesystem is valid if it was not unmounted cleanly. | | 794 | * the filesystem is valid if it was not unmounted cleanly. |
795 | */ | | 795 | */ |
796 | | | 796 | |
797 | if (dfs->dlfs_sboffs[1] && | | 797 | if (dfs->dlfs_sboffs[1] && |
798 | dfs->dlfs_sboffs[1] - LFS_LABELPAD / fsbsize > LFS_SBPAD / fsbsize) | | 798 | dfs->dlfs_sboffs[1] - LFS_LABELPAD / fsbsize > LFS_SBPAD / fsbsize) |
799 | { | | 799 | { |
800 | error = bread(devvp, dfs->dlfs_sboffs[1] * (fsbsize / secsize), | | 800 | error = bread(devvp, dfs->dlfs_sboffs[1] * (fsbsize / secsize), |
801 | LFS_SBPAD, cred, 0, &abp); | | 801 | LFS_SBPAD, cred, 0, &abp); |
802 | if (error) | | 802 | if (error) |
803 | goto out; | | 803 | goto out; |
804 | adfs = (struct dlfs *)abp->b_data; | | 804 | adfs = (struct dlfs *)abp->b_data; |
805 | | | 805 | |
806 | if (dfs->dlfs_version == 1) { | | 806 | if (dfs->dlfs_version == 1) { |
807 | /* 1s resolution comparison */ | | 807 | /* 1s resolution comparison */ |
808 | if (adfs->dlfs_tstamp < dfs->dlfs_tstamp) | | 808 | if (adfs->dlfs_tstamp < dfs->dlfs_tstamp) |
809 | tdfs = adfs; | | 809 | tdfs = adfs; |
810 | else | | 810 | else |
811 | tdfs = dfs; | | 811 | tdfs = dfs; |
812 | } else { | | 812 | } else { |
813 | /* monotonic infinite-resolution comparison */ | | 813 | /* monotonic infinite-resolution comparison */ |
814 | if (adfs->dlfs_serial < dfs->dlfs_serial) | | 814 | if (adfs->dlfs_serial < dfs->dlfs_serial) |
815 | tdfs = adfs; | | 815 | tdfs = adfs; |
816 | else | | 816 | else |
817 | tdfs = dfs; | | 817 | tdfs = dfs; |
818 | } | | 818 | } |
819 | | | 819 | |
820 | /* Check the basics. */ | | 820 | /* Check the basics. */ |
821 | if (tdfs->dlfs_magic != LFS_MAGIC || | | 821 | if (tdfs->dlfs_magic != LFS_MAGIC || |
822 | tdfs->dlfs_bsize > MAXBSIZE || | | 822 | tdfs->dlfs_bsize > MAXBSIZE || |
823 | tdfs->dlfs_version > LFS_VERSION || | | 823 | tdfs->dlfs_version > LFS_VERSION || |
824 | tdfs->dlfs_bsize < sizeof(struct dlfs)) { | | 824 | tdfs->dlfs_bsize < sizeof(struct dlfs)) { |
825 | DLOG((DLOG_MOUNT, "lfs_mountfs: alt superblock" | | 825 | DLOG((DLOG_MOUNT, "lfs_mountfs: alt superblock" |
826 | " sanity failed\n")); | | 826 | " sanity failed\n")); |
827 | error = EINVAL; /* XXX needs translation */ | | 827 | error = EINVAL; /* XXX needs translation */ |
828 | goto out; | | 828 | goto out; |
829 | } | | 829 | } |
830 | } else { | | 830 | } else { |
831 | DLOG((DLOG_MOUNT, "lfs_mountfs: invalid alt superblock" | | 831 | DLOG((DLOG_MOUNT, "lfs_mountfs: invalid alt superblock" |
832 | " daddr=0x%x\n", dfs->dlfs_sboffs[1])); | | 832 | " daddr=0x%x\n", dfs->dlfs_sboffs[1])); |
833 | error = EINVAL; | | 833 | error = EINVAL; |
834 | goto out; | | 834 | goto out; |
835 | } | | 835 | } |
836 | | | 836 | |
837 | /* Allocate the mount structure, copy the superblock into it. */ | | 837 | /* Allocate the mount structure, copy the superblock into it. */ |
838 | fs = malloc(sizeof(struct lfs), M_UFSMNT, M_WAITOK | M_ZERO); | | 838 | fs = malloc(sizeof(struct lfs), M_UFSMNT, M_WAITOK | M_ZERO); |
839 | memcpy(&fs->lfs_dlfs, tdfs, sizeof(struct dlfs)); | | 839 | memcpy(&fs->lfs_dlfs, tdfs, sizeof(struct dlfs)); |
840 | | | 840 | |
841 | /* Compatibility */ | | 841 | /* Compatibility */ |
842 | if (fs->lfs_version < 2) { | | 842 | if (fs->lfs_version < 2) { |
843 | fs->lfs_sumsize = LFS_V1_SUMMARY_SIZE; | | 843 | fs->lfs_sumsize = LFS_V1_SUMMARY_SIZE; |
844 | fs->lfs_ibsize = fs->lfs_bsize; | | 844 | fs->lfs_ibsize = fs->lfs_bsize; |
845 | fs->lfs_start = fs->lfs_sboffs[0]; | | 845 | fs->lfs_start = fs->lfs_sboffs[0]; |
846 | fs->lfs_tstamp = fs->lfs_otstamp; | | 846 | fs->lfs_tstamp = fs->lfs_otstamp; |
847 | fs->lfs_fsbtodb = 0; | | 847 | fs->lfs_fsbtodb = 0; |
848 | } | | 848 | } |
849 | if (fs->lfs_resvseg == 0) | | 849 | if (fs->lfs_resvseg == 0) |
850 | fs->lfs_resvseg = MIN(fs->lfs_minfreeseg - 1, \ | | 850 | fs->lfs_resvseg = MIN(fs->lfs_minfreeseg - 1, \ |
851 | MAX(MIN_RESV_SEGS, fs->lfs_minfreeseg / 2 + 1)); | | 851 | MAX(MIN_RESV_SEGS, fs->lfs_minfreeseg / 2 + 1)); |
852 | | | 852 | |
853 | /* | | 853 | /* |
854 | * If we aren't going to be able to write meaningfully to this | | 854 | * If we aren't going to be able to write meaningfully to this |
855 | * filesystem, and were not mounted readonly, bomb out now. | | 855 | * filesystem, and were not mounted readonly, bomb out now. |
856 | */ | | 856 | */ |
857 | if (fsbtob(fs, LFS_NRESERVE(fs)) > LFS_MAX_BYTES && !ronly) { | | 857 | if (fsbtob(fs, LFS_NRESERVE(fs)) > LFS_MAX_BYTES && !ronly) { |
858 | DLOG((DLOG_MOUNT, "lfs_mount: to mount this filesystem read/write," | | 858 | DLOG((DLOG_MOUNT, "lfs_mount: to mount this filesystem read/write," |
859 | " we need BUFPAGES >= %lld\n", | | 859 | " we need BUFPAGES >= %lld\n", |
860 | (long long)((bufmem_hiwater / bufmem_lowater) * | | 860 | (long long)((bufmem_hiwater / bufmem_lowater) * |
861 | LFS_INVERSE_MAX_BYTES( | | 861 | LFS_INVERSE_MAX_BYTES( |
862 | fsbtob(fs, LFS_NRESERVE(fs))) >> PAGE_SHIFT))); | | 862 | fsbtob(fs, LFS_NRESERVE(fs))) >> PAGE_SHIFT))); |
863 | free(fs, M_UFSMNT); | | 863 | free(fs, M_UFSMNT); |
864 | error = EFBIG; /* XXX needs translation */ | | 864 | error = EFBIG; /* XXX needs translation */ |
865 | goto out; | | 865 | goto out; |
866 | } | | 866 | } |
867 | | | 867 | |
868 | /* Before rolling forward, lock so vget will sleep for other procs */ | | 868 | /* Before rolling forward, lock so vget will sleep for other procs */ |
869 | if (l != NULL) { | | 869 | if (l != NULL) { |
870 | fs->lfs_flags = LFS_NOTYET; | | 870 | fs->lfs_flags = LFS_NOTYET; |
871 | fs->lfs_rfpid = l->l_proc->p_pid; | | 871 | fs->lfs_rfpid = l->l_proc->p_pid; |
872 | } | | 872 | } |
873 | | | 873 | |
874 | ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO); | | 874 | ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO); |
875 | ump->um_lfs = fs; | | 875 | ump->um_lfs = fs; |
876 | ump->um_ops = &lfs_ufsops; | | 876 | ump->um_ops = &lfs_ufsops; |
877 | ump->um_fstype = UFS1; | | 877 | ump->um_fstype = UFS1; |
878 | if (sizeof(struct lfs) < LFS_SBPAD) { /* XXX why? */ | | 878 | if (sizeof(struct lfs) < LFS_SBPAD) { /* XXX why? */ |
879 | brelse(bp, BC_INVAL); | | 879 | brelse(bp, BC_INVAL); |
880 | brelse(abp, BC_INVAL); | | 880 | brelse(abp, BC_INVAL); |
881 | } else { | | 881 | } else { |
882 | brelse(bp, 0); | | 882 | brelse(bp, 0); |
883 | brelse(abp, 0); | | 883 | brelse(abp, 0); |
884 | } | | 884 | } |
885 | bp = NULL; | | 885 | bp = NULL; |
886 | abp = NULL; | | 886 | abp = NULL; |
887 | | | 887 | |
888 | | | 888 | |
889 | /* Set up the I/O information */ | | 889 | /* Set up the I/O information */ |
890 | fs->lfs_devbsize = secsize; | | 890 | fs->lfs_devbsize = secsize; |
891 | fs->lfs_iocount = 0; | | 891 | fs->lfs_iocount = 0; |
892 | fs->lfs_diropwait = 0; | | 892 | fs->lfs_diropwait = 0; |
893 | fs->lfs_activesb = 0; | | 893 | fs->lfs_activesb = 0; |
894 | fs->lfs_uinodes = 0; | | 894 | fs->lfs_uinodes = 0; |
895 | fs->lfs_ravail = 0; | | 895 | fs->lfs_ravail = 0; |
896 | fs->lfs_favail = 0; | | 896 | fs->lfs_favail = 0; |
897 | fs->lfs_sbactive = 0; | | 897 | fs->lfs_sbactive = 0; |
898 | | | 898 | |
899 | /* Set up the ifile and lock aflags */ | | 899 | /* Set up the ifile and lock aflags */ |
900 | fs->lfs_doifile = 0; | | 900 | fs->lfs_doifile = 0; |
901 | fs->lfs_writer = 0; | | 901 | fs->lfs_writer = 0; |
902 | fs->lfs_dirops = 0; | | 902 | fs->lfs_dirops = 0; |
903 | fs->lfs_nadirop = 0; | | 903 | fs->lfs_nadirop = 0; |
904 | fs->lfs_seglock = 0; | | 904 | fs->lfs_seglock = 0; |
905 | fs->lfs_pdflush = 0; | | 905 | fs->lfs_pdflush = 0; |
906 | fs->lfs_sleepers = 0; | | 906 | fs->lfs_sleepers = 0; |
907 | fs->lfs_pages = 0; | | 907 | fs->lfs_pages = 0; |
908 | rw_init(&fs->lfs_fraglock); | | 908 | rw_init(&fs->lfs_fraglock); |
909 | rw_init(&fs->lfs_iflock); | | 909 | rw_init(&fs->lfs_iflock); |
910 | cv_init(&fs->lfs_stopcv, "lfsstop"); | | 910 | cv_init(&fs->lfs_stopcv, "lfsstop"); |
911 | | | 911 | |
912 | /* Set the file system readonly/modify bits. */ | | 912 | /* Set the file system readonly/modify bits. */ |
913 | fs->lfs_ronly = ronly; | | 913 | fs->lfs_ronly = ronly; |
914 | if (ronly == 0) | | 914 | if (ronly == 0) |
915 | fs->lfs_fmod = 1; | | 915 | fs->lfs_fmod = 1; |
916 | | | 916 | |
917 | /* Initialize the mount structure. */ | | 917 | /* Initialize the mount structure. */ |
918 | dev = devvp->v_rdev; | | 918 | dev = devvp->v_rdev; |
919 | mp->mnt_data = ump; | | 919 | mp->mnt_data = ump; |
920 | mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev; | | 920 | mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev; |
921 | mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_LFS); | | 921 | mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_LFS); |
922 | mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; | | 922 | mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; |
923 | mp->mnt_stat.f_namemax = LFS_MAXNAMLEN; | | 923 | mp->mnt_stat.f_namemax = LFS_MAXNAMLEN; |
924 | mp->mnt_stat.f_iosize = fs->lfs_bsize; | | 924 | mp->mnt_stat.f_iosize = fs->lfs_bsize; |
925 | mp->mnt_flag |= MNT_LOCAL; | | 925 | mp->mnt_flag |= MNT_LOCAL; |
926 | mp->mnt_fs_bshift = fs->lfs_bshift; | | 926 | mp->mnt_fs_bshift = fs->lfs_bshift; |
927 | ump->um_flags = 0; | | 927 | ump->um_flags = 0; |
928 | ump->um_mountp = mp; | | 928 | ump->um_mountp = mp; |
929 | ump->um_dev = dev; | | 929 | ump->um_dev = dev; |
930 | ump->um_devvp = devvp; | | 930 | ump->um_devvp = devvp; |
931 | ump->um_bptrtodb = fs->lfs_fsbtodb; | | 931 | ump->um_bptrtodb = fs->lfs_fsbtodb; |
932 | ump->um_seqinc = fragstofsb(fs, fs->lfs_frag); | | 932 | ump->um_seqinc = fragstofsb(fs, fs->lfs_frag); |
933 | ump->um_nindir = fs->lfs_nindir; | | 933 | ump->um_nindir = fs->lfs_nindir; |
934 | ump->um_lognindir = ffs(fs->lfs_nindir) - 1; | | 934 | ump->um_lognindir = ffs(fs->lfs_nindir) - 1; |
935 | for (i = 0; i < MAXQUOTAS; i++) | | 935 | for (i = 0; i < MAXQUOTAS; i++) |
936 | ump->um_quotas[i] = NULLVP; | | 936 | ump->um_quotas[i] = NULLVP; |
937 | ump->um_maxsymlinklen = fs->lfs_maxsymlinklen; | | 937 | ump->um_maxsymlinklen = fs->lfs_maxsymlinklen; |
938 | ump->um_dirblksiz = DIRBLKSIZ; | | 938 | ump->um_dirblksiz = DIRBLKSIZ; |
939 | ump->um_maxfilesize = fs->lfs_maxfilesize; | | 939 | ump->um_maxfilesize = fs->lfs_maxfilesize; |
940 | if (ump->um_maxsymlinklen > 0) | | 940 | if (ump->um_maxsymlinklen > 0) |
941 | mp->mnt_iflag |= IMNT_DTYPE; | | 941 | mp->mnt_iflag |= IMNT_DTYPE; |
942 | devvp->v_specmountpoint = mp; | | 942 | devvp->v_specmountpoint = mp; |
943 | | | 943 | |
944 | /* Set up reserved memory for pageout */ | | 944 | /* Set up reserved memory for pageout */ |
945 | lfs_setup_resblks(fs); | | 945 | lfs_setup_resblks(fs); |
946 | /* Set up vdirop tailq */ | | 946 | /* Set up vdirop tailq */ |
947 | TAILQ_INIT(&fs->lfs_dchainhd); | | 947 | TAILQ_INIT(&fs->lfs_dchainhd); |
948 | /* and paging tailq */ | | 948 | /* and paging tailq */ |
949 | TAILQ_INIT(&fs->lfs_pchainhd); | | 949 | TAILQ_INIT(&fs->lfs_pchainhd); |
950 | /* and delayed segment accounting for truncation list */ | | 950 | /* and delayed segment accounting for truncation list */ |
951 | LIST_INIT(&fs->lfs_segdhd); | | 951 | LIST_INIT(&fs->lfs_segdhd); |
952 | | | 952 | |
953 | /* | | 953 | /* |
954 | * We use the ifile vnode for almost every operation. Instead of | | 954 | * We use the ifile vnode for almost every operation. Instead of |
955 | * retrieving it from the hash table each time we retrieve it here, | | 955 | * retrieving it from the hash table each time we retrieve it here, |
956 | * artificially increment the reference count and keep a pointer | | 956 | * artificially increment the reference count and keep a pointer |
957 | * to it in the incore copy of the superblock. | | 957 | * to it in the incore copy of the superblock. |
958 | */ | | 958 | */ |
959 | if ((error = VFS_VGET(mp, LFS_IFILE_INUM, &vp)) != 0) { | | 959 | if ((error = VFS_VGET(mp, LFS_IFILE_INUM, &vp)) != 0) { |
960 | DLOG((DLOG_MOUNT, "lfs_mountfs: ifile vget failed, error=%d\n", error)); | | 960 | DLOG((DLOG_MOUNT, "lfs_mountfs: ifile vget failed, error=%d\n", error)); |
961 | goto out; | | 961 | goto out; |
962 | } | | 962 | } |
963 | fs->lfs_ivnode = vp; | | 963 | fs->lfs_ivnode = vp; |
964 | VREF(vp); | | 964 | VREF(vp); |
965 | | | 965 | |
966 | /* Set up inode bitmap and order free list */ | | 966 | /* Set up inode bitmap and order free list */ |
967 | lfs_order_freelist(fs); | | 967 | lfs_order_freelist(fs); |
968 | | | 968 | |
969 | /* Set up segment usage flags for the autocleaner. */ | | 969 | /* Set up segment usage flags for the autocleaner. */ |
970 | fs->lfs_nactive = 0; | | 970 | fs->lfs_nactive = 0; |
971 | fs->lfs_suflags = (u_int32_t **)malloc(2 * sizeof(u_int32_t *), | | 971 | fs->lfs_suflags = (u_int32_t **)malloc(2 * sizeof(u_int32_t *), |
972 | M_SEGMENT, M_WAITOK); | | 972 | M_SEGMENT, M_WAITOK); |
973 | fs->lfs_suflags[0] = (u_int32_t *)malloc(fs->lfs_nseg * sizeof(u_int32_t), | | 973 | fs->lfs_suflags[0] = (u_int32_t *)malloc(fs->lfs_nseg * sizeof(u_int32_t), |
974 | M_SEGMENT, M_WAITOK); | | 974 | M_SEGMENT, M_WAITOK); |
975 | fs->lfs_suflags[1] = (u_int32_t *)malloc(fs->lfs_nseg * sizeof(u_int32_t), | | 975 | fs->lfs_suflags[1] = (u_int32_t *)malloc(fs->lfs_nseg * sizeof(u_int32_t), |
976 | M_SEGMENT, M_WAITOK); | | 976 | M_SEGMENT, M_WAITOK); |
977 | memset(fs->lfs_suflags[1], 0, fs->lfs_nseg * sizeof(u_int32_t)); | | 977 | memset(fs->lfs_suflags[1], 0, fs->lfs_nseg * sizeof(u_int32_t)); |
978 | for (i = 0; i < fs->lfs_nseg; i++) { | | 978 | for (i = 0; i < fs->lfs_nseg; i++) { |
979 | int changed; | | 979 | int changed; |
980 | | | 980 | |
981 | LFS_SEGENTRY(sup, fs, i, bp); | | 981 | LFS_SEGENTRY(sup, fs, i, bp); |
982 | changed = 0; | | 982 | changed = 0; |
983 | if (!ronly) { | | 983 | if (!ronly) { |
984 | if (sup->su_nbytes == 0 && | | 984 | if (sup->su_nbytes == 0 && |
985 | !(sup->su_flags & SEGUSE_EMPTY)) { | | 985 | !(sup->su_flags & SEGUSE_EMPTY)) { |
986 | sup->su_flags |= SEGUSE_EMPTY; | | 986 | sup->su_flags |= SEGUSE_EMPTY; |
987 | ++changed; | | 987 | ++changed; |
988 | } else if (!(sup->su_nbytes == 0) && | | 988 | } else if (!(sup->su_nbytes == 0) && |
989 | (sup->su_flags & SEGUSE_EMPTY)) { | | 989 | (sup->su_flags & SEGUSE_EMPTY)) { |
990 | sup->su_flags &= ~SEGUSE_EMPTY; | | 990 | sup->su_flags &= ~SEGUSE_EMPTY; |
991 | ++changed; | | 991 | ++changed; |
992 | } | | 992 | } |
993 | if (sup->su_flags & (SEGUSE_ACTIVE|SEGUSE_INVAL)) { | | 993 | if (sup->su_flags & (SEGUSE_ACTIVE|SEGUSE_INVAL)) { |
994 | sup->su_flags &= ~(SEGUSE_ACTIVE|SEGUSE_INVAL); | | 994 | sup->su_flags &= ~(SEGUSE_ACTIVE|SEGUSE_INVAL); |
995 | ++changed; | | 995 | ++changed; |
996 | } | | 996 | } |
997 | } | | 997 | } |
998 | fs->lfs_suflags[0][i] = sup->su_flags; | | 998 | fs->lfs_suflags[0][i] = sup->su_flags; |
999 | if (changed) | | 999 | if (changed) |
1000 | LFS_WRITESEGENTRY(sup, fs, i, bp); | | 1000 | LFS_WRITESEGENTRY(sup, fs, i, bp); |
1001 | else | | 1001 | else |
1002 | brelse(bp, 0); | | 1002 | brelse(bp, 0); |
1003 | } | | 1003 | } |
1004 | | | 1004 | |
1005 | #ifdef LFS_KERNEL_RFW | | 1005 | #ifdef LFS_KERNEL_RFW |
1006 | lfs_roll_forward(fs, mp, l); | | 1006 | lfs_roll_forward(fs, mp, l); |
1007 | #endif | | 1007 | #endif |
1008 | | | 1008 | |
1009 | /* If writing, sb is not clean; record in case of immediate crash */ | | 1009 | /* If writing, sb is not clean; record in case of immediate crash */ |
1010 | if (!fs->lfs_ronly) { | | 1010 | if (!fs->lfs_ronly) { |
1011 | fs->lfs_pflags &= ~LFS_PF_CLEAN; | | 1011 | fs->lfs_pflags &= ~LFS_PF_CLEAN; |
1012 | lfs_writesuper(fs, fs->lfs_sboffs[0]); | | 1012 | lfs_writesuper(fs, fs->lfs_sboffs[0]); |
1013 | lfs_writesuper(fs, fs->lfs_sboffs[1]); | | 1013 | lfs_writesuper(fs, fs->lfs_sboffs[1]); |
1014 | } | | 1014 | } |
1015 | | | 1015 | |
1016 | /* Allow vget now that roll-forward is complete */ | | 1016 | /* Allow vget now that roll-forward is complete */ |
1017 | fs->lfs_flags &= ~(LFS_NOTYET); | | 1017 | fs->lfs_flags &= ~(LFS_NOTYET); |
1018 | wakeup(&fs->lfs_flags); | | 1018 | wakeup(&fs->lfs_flags); |
1019 | | | 1019 | |
1020 | /* | | 1020 | /* |
1021 | * Initialize the ifile cleaner info with information from | | 1021 | * Initialize the ifile cleaner info with information from |
1022 | * the superblock. | | 1022 | * the superblock. |
1023 | */ | | 1023 | */ |
1024 | LFS_CLEANERINFO(cip, fs, bp); | | 1024 | LFS_CLEANERINFO(cip, fs, bp); |
1025 | cip->clean = fs->lfs_nclean; | | 1025 | cip->clean = fs->lfs_nclean; |
1026 | cip->dirty = fs->lfs_nseg - fs->lfs_nclean; | | 1026 | cip->dirty = fs->lfs_nseg - fs->lfs_nclean; |
1027 | cip->avail = fs->lfs_avail; | | 1027 | cip->avail = fs->lfs_avail; |
1028 | cip->bfree = fs->lfs_bfree; | | 1028 | cip->bfree = fs->lfs_bfree; |
1029 | (void) LFS_BWRITE_LOG(bp); /* Ifile */ | | 1029 | (void) LFS_BWRITE_LOG(bp); /* Ifile */ |
1030 | | | 1030 | |
1031 | /* | | 1031 | /* |
1032 | * Mark the current segment as ACTIVE, since we're going to | | 1032 | * Mark the current segment as ACTIVE, since we're going to |
1033 | * be writing to it. | | 1033 | * be writing to it. |
1034 | */ | | 1034 | */ |
1035 | LFS_SEGENTRY(sup, fs, dtosn(fs, fs->lfs_offset), bp); | | 1035 | LFS_SEGENTRY(sup, fs, dtosn(fs, fs->lfs_offset), bp); |
1036 | sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE; | | 1036 | sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE; |
1037 | fs->lfs_nactive++; | | 1037 | fs->lfs_nactive++; |
1038 | LFS_WRITESEGENTRY(sup, fs, dtosn(fs, fs->lfs_offset), bp); /* Ifile */ | | 1038 | LFS_WRITESEGENTRY(sup, fs, dtosn(fs, fs->lfs_offset), bp); /* Ifile */ |
1039 | | | 1039 | |
1040 | /* Now that roll-forward is done, unlock the Ifile */ | | 1040 | /* Now that roll-forward is done, unlock the Ifile */ |
1041 | vput(vp); | | 1041 | vput(vp); |
1042 | | | 1042 | |
1043 | /* Start the pagedaemon-anticipating daemon */ | | 1043 | /* Start the pagedaemon-anticipating daemon */ |
1044 | if (lfs_writer_daemon == 0 && kthread_create(PRI_BIO, 0, NULL, | | 1044 | if (lfs_writer_daemon == 0 && kthread_create(PRI_BIO, 0, NULL, |
1045 | lfs_writerd, NULL, NULL, "lfs_writer") != 0) | | 1045 | lfs_writerd, NULL, NULL, "lfs_writer") != 0) |
1046 | panic("fork lfs_writer"); | | 1046 | panic("fork lfs_writer"); |
1047 | | | 1047 | |
1048 | printf("WARNING: the log-structured file system is experimental\n" | | 1048 | printf("WARNING: the log-structured file system is experimental\n" |
1049 | "WARNING: it may cause system crashes and/or corrupt data\n"); | | 1049 | "WARNING: it may cause system crashes and/or corrupt data\n"); |
1050 | | | 1050 | |
1051 | return (0); | | 1051 | return (0); |
1052 | | | 1052 | |
1053 | out: | | 1053 | out: |
1054 | if (bp) | | 1054 | if (bp) |
1055 | brelse(bp, 0); | | 1055 | brelse(bp, 0); |
1056 | if (abp) | | 1056 | if (abp) |
1057 | brelse(abp, 0); | | 1057 | brelse(abp, 0); |
1058 | if (ump) { | | 1058 | if (ump) { |
1059 | free(ump->um_lfs, M_UFSMNT); | | 1059 | free(ump->um_lfs, M_UFSMNT); |
1060 | free(ump, M_UFSMNT); | | 1060 | free(ump, M_UFSMNT); |
1061 | mp->mnt_data = NULL; | | 1061 | mp->mnt_data = NULL; |
1062 | } | | 1062 | } |
1063 | | | 1063 | |
1064 | return (error); | | 1064 | return (error); |
1065 | } | | 1065 | } |
1066 | | | 1066 | |
1067 | /* | | 1067 | /* |
1068 | * unmount system call | | 1068 | * unmount system call |
1069 | */ | | 1069 | */ |
1070 | int | | 1070 | int |
1071 | lfs_unmount(struct mount *mp, int mntflags) | | 1071 | lfs_unmount(struct mount *mp, int mntflags) |
1072 | { | | 1072 | { |
1073 | struct lwp *l = curlwp; | | 1073 | struct lwp *l = curlwp; |
1074 | struct ufsmount *ump; | | 1074 | struct ufsmount *ump; |
1075 | struct lfs *fs; | | 1075 | struct lfs *fs; |
1076 | int error, flags, ronly; | | 1076 | int error, flags, ronly; |
1077 | vnode_t *vp; | | 1077 | vnode_t *vp; |
1078 | | | 1078 | |
1079 | flags = 0; | | 1079 | flags = 0; |
1080 | if (mntflags & MNT_FORCE) | | 1080 | if (mntflags & MNT_FORCE) |
1081 | flags |= FORCECLOSE; | | 1081 | flags |= FORCECLOSE; |
1082 | | | 1082 | |
1083 | ump = VFSTOUFS(mp); | | 1083 | ump = VFSTOUFS(mp); |
1084 | fs = ump->um_lfs; | | 1084 | fs = ump->um_lfs; |
1085 | | | 1085 | |
1086 | /* Two checkpoints */ | | 1086 | /* Two checkpoints */ |
1087 | lfs_segwrite(mp, SEGM_CKP | SEGM_SYNC); | | 1087 | lfs_segwrite(mp, SEGM_CKP | SEGM_SYNC); |
1088 | lfs_segwrite(mp, SEGM_CKP | SEGM_SYNC); | | 1088 | lfs_segwrite(mp, SEGM_CKP | SEGM_SYNC); |
1089 | | | 1089 | |
1090 | /* wake up the cleaner so it can die */ | | 1090 | /* wake up the cleaner so it can die */ |
1091 | lfs_wakeup_cleaner(fs); | | 1091 | lfs_wakeup_cleaner(fs); |
1092 | mutex_enter(&lfs_lock); | | 1092 | mutex_enter(&lfs_lock); |
1093 | while (fs->lfs_sleepers) | | 1093 | while (fs->lfs_sleepers) |
1094 | mtsleep(&fs->lfs_sleepers, PRIBIO + 1, "lfs_sleepers", 0, | | 1094 | mtsleep(&fs->lfs_sleepers, PRIBIO + 1, "lfs_sleepers", 0, |
1095 | &lfs_lock); | | 1095 | &lfs_lock); |
1096 | mutex_exit(&lfs_lock); | | 1096 | mutex_exit(&lfs_lock); |
1097 | | | 1097 | |
1098 | #ifdef QUOTA | | 1098 | #ifdef QUOTA |
1099 | if (mp->mnt_flag & MNT_QUOTA) { | | 1099 | if (mp->mnt_flag & MNT_QUOTA) { |
1100 | int i; | | 1100 | int i; |
1101 | error = vflush(mp, fs->lfs_ivnode, SKIPSYSTEM|flags); | | 1101 | error = vflush(mp, fs->lfs_ivnode, SKIPSYSTEM|flags); |
1102 | if (error) | | 1102 | if (error) |
1103 | return (error); | | 1103 | return (error); |
1104 | for (i = 0; i < MAXQUOTAS; i++) { | | 1104 | for (i = 0; i < MAXQUOTAS; i++) { |
1105 | if (ump->um_quotas[i] == NULLVP) | | 1105 | if (ump->um_quotas[i] == NULLVP) |
1106 | continue; | | 1106 | continue; |
1107 | quotaoff(l, mp, i); | | 1107 | quotaoff(l, mp, i); |
1108 | } | | 1108 | } |
1109 | /* | | 1109 | /* |
1110 | * Here we fall through to vflush again to ensure | | 1110 | * Here we fall through to vflush again to ensure |
1111 | * that we have gotten rid of all the system vnodes. | | 1111 | * that we have gotten rid of all the system vnodes. |
1112 | */ | | 1112 | */ |
1113 | } | | 1113 | } |
1114 | #endif | | 1114 | #endif |
1115 | if ((error = vflush(mp, fs->lfs_ivnode, flags)) != 0) | | 1115 | if ((error = vflush(mp, fs->lfs_ivnode, flags)) != 0) |
1116 | return (error); | | 1116 | return (error); |
1117 | if ((error = VFS_SYNC(mp, 1, l->l_cred)) != 0) | | 1117 | if ((error = VFS_SYNC(mp, 1, l->l_cred)) != 0) |
1118 | return (error); | | 1118 | return (error); |
1119 | vp = fs->lfs_ivnode; | | 1119 | vp = fs->lfs_ivnode; |
1120 | mutex_enter(&vp->v_interlock); | | 1120 | mutex_enter(&vp->v_interlock); |
1121 | if (LIST_FIRST(&vp->v_dirtyblkhd)) | | 1121 | if (LIST_FIRST(&vp->v_dirtyblkhd)) |
1122 | panic("lfs_unmount: still dirty blocks on ifile vnode"); | | 1122 | panic("lfs_unmount: still dirty blocks on ifile vnode"); |
1123 | mutex_exit(&vp->v_interlock); | | 1123 | mutex_exit(&vp->v_interlock); |
1124 | | | 1124 | |
1125 | /* Explicitly write the superblock, to update serial and pflags */ | | 1125 | /* Explicitly write the superblock, to update serial and pflags */ |
1126 | fs->lfs_pflags |= LFS_PF_CLEAN; | | 1126 | fs->lfs_pflags |= LFS_PF_CLEAN; |
1127 | lfs_writesuper(fs, fs->lfs_sboffs[0]); | | 1127 | lfs_writesuper(fs, fs->lfs_sboffs[0]); |
1128 | lfs_writesuper(fs, fs->lfs_sboffs[1]); | | 1128 | lfs_writesuper(fs, fs->lfs_sboffs[1]); |
1129 | mutex_enter(&lfs_lock); | | 1129 | mutex_enter(&lfs_lock); |
1130 | while (fs->lfs_iocount) | | 1130 | while (fs->lfs_iocount) |
1131 | mtsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs_umount", 0, | | 1131 | mtsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs_umount", 0, |
1132 | &lfs_lock); | | 1132 | &lfs_lock); |
1133 | mutex_exit(&lfs_lock); | | 1133 | mutex_exit(&lfs_lock); |
1134 | | | 1134 | |
1135 | /* Finish with the Ifile, now that we're done with it */ | | 1135 | /* Finish with the Ifile, now that we're done with it */ |
1136 | vgone(fs->lfs_ivnode); | | 1136 | vgone(fs->lfs_ivnode); |
1137 | | | 1137 | |
1138 | ronly = !fs->lfs_ronly; | | 1138 | ronly = !fs->lfs_ronly; |
1139 | if (ump->um_devvp->v_type != VBAD) | | 1139 | if (ump->um_devvp->v_type != VBAD) |
1140 | ump->um_devvp->v_specmountpoint = NULL; | | 1140 | ump->um_devvp->v_specmountpoint = NULL; |
1141 | vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); | | 1141 | vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); |
1142 | error = VOP_CLOSE(ump->um_devvp, | | 1142 | error = VOP_CLOSE(ump->um_devvp, |
1143 | ronly ? FREAD : FREAD|FWRITE, NOCRED); | | 1143 | ronly ? FREAD : FREAD|FWRITE, NOCRED); |
1144 | vput(ump->um_devvp); | | 1144 | vput(ump->um_devvp); |
1145 | | | 1145 | |
1146 | /* Complain about page leakage */ | | 1146 | /* Complain about page leakage */ |
1147 | if (fs->lfs_pages > 0) | | 1147 | if (fs->lfs_pages > 0) |
1148 | printf("lfs_unmount: still claim %d pages (%d in subsystem)\n", | | 1148 | printf("lfs_unmount: still claim %d pages (%d in subsystem)\n", |
1149 | fs->lfs_pages, lfs_subsys_pages); | | 1149 | fs->lfs_pages, lfs_subsys_pages); |
1150 | | | 1150 | |
1151 | /* Free per-mount data structures */ | | 1151 | /* Free per-mount data structures */ |
1152 | free(fs->lfs_ino_bitmap, M_SEGMENT); | | 1152 | free(fs->lfs_ino_bitmap, M_SEGMENT); |
1153 | free(fs->lfs_suflags[0], M_SEGMENT); | | 1153 | free(fs->lfs_suflags[0], M_SEGMENT); |
1154 | free(fs->lfs_suflags[1], M_SEGMENT); | | 1154 | free(fs->lfs_suflags[1], M_SEGMENT); |
1155 | free(fs->lfs_suflags, M_SEGMENT); | | 1155 | free(fs->lfs_suflags, M_SEGMENT); |
1156 | lfs_free_resblks(fs); | | 1156 | lfs_free_resblks(fs); |
1157 | cv_destroy(&fs->lfs_stopcv); | | 1157 | cv_destroy(&fs->lfs_stopcv); |
1158 | rw_destroy(&fs->lfs_fraglock); | | 1158 | rw_destroy(&fs->lfs_fraglock); |
1159 | rw_destroy(&fs->lfs_iflock); | | 1159 | rw_destroy(&fs->lfs_iflock); |
1160 | free(fs, M_UFSMNT); | | 1160 | free(fs, M_UFSMNT); |
1161 | free(ump, M_UFSMNT); | | 1161 | free(ump, M_UFSMNT); |
1162 | | | 1162 | |
1163 | mp->mnt_data = NULL; | | 1163 | mp->mnt_data = NULL; |
1164 | mp->mnt_flag &= ~MNT_LOCAL; | | 1164 | mp->mnt_flag &= ~MNT_LOCAL; |
1165 | return (error); | | 1165 | return (error); |
1166 | } | | 1166 | } |
1167 | | | 1167 | |
1168 | /* | | 1168 | /* |
1169 | * Get file system statistics. | | 1169 | * Get file system statistics. |
1170 | * | | 1170 | * |
1171 | * NB: We don't lock to access the superblock here, because it's not | | 1171 | * NB: We don't lock to access the superblock here, because it's not |
1172 | * really that important if we get it wrong. | | 1172 | * really that important if we get it wrong. |
1173 | */ | | 1173 | */ |
1174 | int | | 1174 | int |
1175 | lfs_statvfs(struct mount *mp, struct statvfs *sbp) | | 1175 | lfs_statvfs(struct mount *mp, struct statvfs *sbp) |
1176 | { | | 1176 | { |
1177 | struct lfs *fs; | | 1177 | struct lfs *fs; |
1178 | struct ufsmount *ump; | | 1178 | struct ufsmount *ump; |
1179 | | | 1179 | |
1180 | ump = VFSTOUFS(mp); | | 1180 | ump = VFSTOUFS(mp); |
1181 | fs = ump->um_lfs; | | 1181 | fs = ump->um_lfs; |
1182 | if (fs->lfs_magic != LFS_MAGIC) | | 1182 | if (fs->lfs_magic != LFS_MAGIC) |
1183 | panic("lfs_statvfs: magic"); | | 1183 | panic("lfs_statvfs: magic"); |
1184 | | | 1184 | |
1185 | sbp->f_bsize = fs->lfs_bsize; | | 1185 | sbp->f_bsize = fs->lfs_bsize; |
1186 | sbp->f_frsize = fs->lfs_fsize; | | 1186 | sbp->f_frsize = fs->lfs_fsize; |
1187 | sbp->f_iosize = fs->lfs_bsize; | | 1187 | sbp->f_iosize = fs->lfs_bsize; |
1188 | sbp->f_blocks = fsbtofrags(fs, LFS_EST_NONMETA(fs) - VTOI(fs->lfs_ivnode)->i_lfs_effnblks); | | 1188 | sbp->f_blocks = fsbtofrags(fs, LFS_EST_NONMETA(fs) - VTOI(fs->lfs_ivnode)->i_lfs_effnblks); |
1189 | | | 1189 | |
1190 | sbp->f_bfree = fsbtofrags(fs, LFS_EST_BFREE(fs)); | | 1190 | sbp->f_bfree = fsbtofrags(fs, LFS_EST_BFREE(fs)); |
1191 | KASSERT(sbp->f_bfree <= fs->lfs_dsize); | | 1191 | KASSERT(sbp->f_bfree <= fs->lfs_dsize); |
1192 | #if 0 | | 1192 | #if 0 |
1193 | if (sbp->f_bfree < 0) | | 1193 | if (sbp->f_bfree < 0) |
1194 | sbp->f_bfree = 0; | | 1194 | sbp->f_bfree = 0; |
1195 | #endif | | 1195 | #endif |
1196 | | | 1196 | |
1197 | sbp->f_bresvd = fsbtofrags(fs, LFS_EST_RSVD(fs)); | | 1197 | sbp->f_bresvd = fsbtofrags(fs, LFS_EST_RSVD(fs)); |
1198 | if (sbp->f_bfree > sbp->f_bresvd) | | 1198 | if (sbp->f_bfree > sbp->f_bresvd) |
1199 | sbp->f_bavail = sbp->f_bfree - sbp->f_bresvd; | | 1199 | sbp->f_bavail = sbp->f_bfree - sbp->f_bresvd; |
1200 | else | | 1200 | else |
1201 | sbp->f_bavail = 0; | | 1201 | sbp->f_bavail = 0; |
1202 | | | 1202 | |
1203 | sbp->f_files = fs->lfs_bfree / btofsb(fs, fs->lfs_ibsize) * INOPB(fs); | | 1203 | sbp->f_files = fs->lfs_bfree / btofsb(fs, fs->lfs_ibsize) * INOPB(fs); |
1204 | sbp->f_ffree = sbp->f_files - fs->lfs_nfiles; | | 1204 | sbp->f_ffree = sbp->f_files - fs->lfs_nfiles; |
1205 | sbp->f_favail = sbp->f_ffree; | | 1205 | sbp->f_favail = sbp->f_ffree; |
1206 | sbp->f_fresvd = 0; | | 1206 | sbp->f_fresvd = 0; |
1207 | copy_statvfs_info(sbp, mp); | | 1207 | copy_statvfs_info(sbp, mp); |
1208 | return (0); | | 1208 | return (0); |
1209 | } | | 1209 | } |
1210 | | | 1210 | |
1211 | /* | | 1211 | /* |
1212 | * Go through the disk queues to initiate sandbagged IO; | | 1212 | * Go through the disk queues to initiate sandbagged IO; |
1213 | * go through the inodes to write those that have been modified; | | 1213 | * go through the inodes to write those that have been modified; |
1214 | * initiate the writing of the super block if it has been modified. | | 1214 | * initiate the writing of the super block if it has been modified. |
1215 | * | | 1215 | * |
1216 | * Note: we are always called with the filesystem marked `MPBUSY'. | | 1216 | * Note: we are always called with the filesystem marked `MPBUSY'. |
1217 | */ | | 1217 | */ |
1218 | int | | 1218 | int |
1219 | lfs_sync(struct mount *mp, int waitfor, kauth_cred_t cred) | | 1219 | lfs_sync(struct mount *mp, int waitfor, kauth_cred_t cred) |
1220 | { | | 1220 | { |
1221 | int error; | | 1221 | int error; |
1222 | struct lfs *fs; | | 1222 | struct lfs *fs; |
1223 | | | 1223 | |
1224 | fs = VFSTOUFS(mp)->um_lfs; | | 1224 | fs = VFSTOUFS(mp)->um_lfs; |
1225 | if (fs->lfs_ronly) | | 1225 | if (fs->lfs_ronly) |
1226 | return 0; | | 1226 | return 0; |
1227 | | | 1227 | |
1228 | /* Snapshots should not hose the syncer */ | | 1228 | /* Snapshots should not hose the syncer */ |
1229 | /* | | 1229 | /* |
1230 | * XXX Sync can block here anyway, since we don't have a very | | 1230 | * XXX Sync can block here anyway, since we don't have a very |
1231 | * XXX good idea of how much data is pending. If it's more | | 1231 | * XXX good idea of how much data is pending. If it's more |
1232 | * XXX than a segment and lfs_nextseg is close to the end of | | 1232 | * XXX than a segment and lfs_nextseg is close to the end of |
1233 | * XXX the log, we'll likely block. | | 1233 | * XXX the log, we'll likely block. |
1234 | */ | | 1234 | */ |
1235 | mutex_enter(&lfs_lock); | | 1235 | mutex_enter(&lfs_lock); |
1236 | if (fs->lfs_nowrap && fs->lfs_nextseg < fs->lfs_curseg) { | | 1236 | if (fs->lfs_nowrap && fs->lfs_nextseg < fs->lfs_curseg) { |
1237 | mutex_exit(&lfs_lock); | | 1237 | mutex_exit(&lfs_lock); |
1238 | return 0; | | 1238 | return 0; |
1239 | } | | 1239 | } |
1240 | mutex_exit(&lfs_lock); | | 1240 | mutex_exit(&lfs_lock); |
1241 | | | 1241 | |
1242 | lfs_writer_enter(fs, "lfs_dirops"); | | 1242 | lfs_writer_enter(fs, "lfs_dirops"); |
1243 | | | 1243 | |
1244 | /* All syncs must be checkpoints until roll-forward is implemented. */ | | 1244 | /* All syncs must be checkpoints until roll-forward is implemented. */ |
1245 | DLOG((DLOG_FLUSH, "lfs_sync at 0x%x\n", fs->lfs_offset)); | | 1245 | DLOG((DLOG_FLUSH, "lfs_sync at 0x%x\n", fs->lfs_offset)); |
1246 | error = lfs_segwrite(mp, SEGM_CKP | (waitfor ? SEGM_SYNC : 0)); | | 1246 | error = lfs_segwrite(mp, SEGM_CKP | (waitfor ? SEGM_SYNC : 0)); |
1247 | lfs_writer_leave(fs); | | 1247 | lfs_writer_leave(fs); |
1248 | #ifdef QUOTA | | 1248 | #ifdef QUOTA |
1249 | qsync(mp); | | 1249 | qsync(mp); |
1250 | #endif | | 1250 | #endif |
1251 | return (error); | | 1251 | return (error); |
1252 | } | | 1252 | } |
1253 | | | 1253 | |
1254 | extern kmutex_t ufs_hashlock; | | 1254 | extern kmutex_t ufs_hashlock; |
1255 | | | 1255 | |
1256 | /* | | 1256 | /* |
1257 | * Look up an LFS dinode number to find its incore vnode. If not already | | 1257 | * Look up an LFS dinode number to find its incore vnode. If not already |
1258 | * in core, read it in from the specified device. Return the inode locked. | | 1258 | * in core, read it in from the specified device. Return the inode locked. |
1259 | * Detection and handling of mount points must be done by the calling routine. | | 1259 | * Detection and handling of mount points must be done by the calling routine. |
1260 | */ | | 1260 | */ |
1261 | int | | 1261 | int |
1262 | lfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp) | | 1262 | lfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp) |
1263 | { | | 1263 | { |
1264 | struct lfs *fs; | | 1264 | struct lfs *fs; |
1265 | struct ufs1_dinode *dip; | | 1265 | struct ufs1_dinode *dip; |
1266 | struct inode *ip; | | 1266 | struct inode *ip; |
1267 | struct buf *bp; | | 1267 | struct buf *bp; |
1268 | struct ifile *ifp; | | 1268 | struct ifile *ifp; |
1269 | struct vnode *vp; | | 1269 | struct vnode *vp; |
1270 | struct ufsmount *ump; | | 1270 | struct ufsmount *ump; |
1271 | daddr_t daddr; | | 1271 | daddr_t daddr; |
1272 | dev_t dev; | | 1272 | dev_t dev; |
1273 | int error, retries; | | 1273 | int error, retries; |
1274 | struct timespec ts; | | 1274 | struct timespec ts; |
1275 | | | 1275 | |
1276 | memset(&ts, 0, sizeof ts); /* XXX gcc */ | | 1276 | memset(&ts, 0, sizeof ts); /* XXX gcc */ |
1277 | | | 1277 | |
1278 | ump = VFSTOUFS(mp); | | 1278 | ump = VFSTOUFS(mp); |
1279 | dev = ump->um_dev; | | 1279 | dev = ump->um_dev; |
1280 | fs = ump->um_lfs; | | 1280 | fs = ump->um_lfs; |
1281 | | | 1281 | |
1282 | /* | | 1282 | /* |
1283 | * If the filesystem is not completely mounted yet, suspend | | 1283 | * If the filesystem is not completely mounted yet, suspend |
1284 | * any access requests (wait for roll-forward to complete). | | 1284 | * any access requests (wait for roll-forward to complete). |
1285 | */ | | 1285 | */ |
1286 | mutex_enter(&lfs_lock); | | 1286 | mutex_enter(&lfs_lock); |
1287 | while ((fs->lfs_flags & LFS_NOTYET) && curproc->p_pid != fs->lfs_rfpid) | | 1287 | while ((fs->lfs_flags & LFS_NOTYET) && curproc->p_pid != fs->lfs_rfpid) |
1288 | mtsleep(&fs->lfs_flags, PRIBIO+1, "lfs_notyet", 0, | | 1288 | mtsleep(&fs->lfs_flags, PRIBIO+1, "lfs_notyet", 0, |
1289 | &lfs_lock); | | 1289 | &lfs_lock); |
1290 | mutex_exit(&lfs_lock); | | 1290 | mutex_exit(&lfs_lock); |
1291 | | | 1291 | |
1292 | retry: | | 1292 | retry: |
1293 | if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL) | | 1293 | if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL) |
1294 | return (0); | | 1294 | return (0); |
1295 | | | 1295 | |
1296 | if ((error = getnewvnode(VT_LFS, mp, lfs_vnodeop_p, &vp)) != 0) { | | 1296 | if ((error = getnewvnode(VT_LFS, mp, lfs_vnodeop_p, &vp)) != 0) { |
1297 | *vpp = NULL; | | 1297 | *vpp = NULL; |
1298 | return (error); | | 1298 | return (error); |
1299 | } | | 1299 | } |
1300 | | | 1300 | |
1301 | mutex_enter(&ufs_hashlock); | | 1301 | mutex_enter(&ufs_hashlock); |
1302 | if (ufs_ihashget(dev, ino, 0) != NULL) { | | 1302 | if (ufs_ihashget(dev, ino, 0) != NULL) { |
1303 | mutex_exit(&ufs_hashlock); | | 1303 | mutex_exit(&ufs_hashlock); |
1304 | ungetnewvnode(vp); | | 1304 | ungetnewvnode(vp); |
1305 | goto retry; | | 1305 | goto retry; |
1306 | } | | 1306 | } |
1307 | | | 1307 | |
1308 | /* Translate the inode number to a disk address. */ | | 1308 | /* Translate the inode number to a disk address. */ |
1309 | if (ino == LFS_IFILE_INUM) | | 1309 | if (ino == LFS_IFILE_INUM) |
1310 | daddr = fs->lfs_idaddr; | | 1310 | daddr = fs->lfs_idaddr; |
1311 | else { | | 1311 | else { |
1312 | /* XXX bounds-check this too */ | | 1312 | /* XXX bounds-check this too */ |
1313 | LFS_IENTRY(ifp, fs, ino, bp); | | 1313 | LFS_IENTRY(ifp, fs, ino, bp); |
1314 | daddr = ifp->if_daddr; | | 1314 | daddr = ifp->if_daddr; |
1315 | if (fs->lfs_version > 1) { | | 1315 | if (fs->lfs_version > 1) { |
1316 | ts.tv_sec = ifp->if_atime_sec; | | 1316 | ts.tv_sec = ifp->if_atime_sec; |
1317 | ts.tv_nsec = ifp->if_atime_nsec; | | 1317 | ts.tv_nsec = ifp->if_atime_nsec; |
1318 | } | | 1318 | } |
1319 | | | 1319 | |
1320 | brelse(bp, 0); | | 1320 | brelse(bp, 0); |
1321 | if (daddr == LFS_UNUSED_DADDR) { | | 1321 | if (daddr == LFS_UNUSED_DADDR) { |
1322 | *vpp = NULLVP; | | 1322 | *vpp = NULLVP; |
1323 | mutex_exit(&ufs_hashlock); | | 1323 | mutex_exit(&ufs_hashlock); |
1324 | ungetnewvnode(vp); | | 1324 | ungetnewvnode(vp); |
1325 | return (ENOENT); | | 1325 | return (ENOENT); |
1326 | } | | 1326 | } |
1327 | } | | 1327 | } |
1328 | | | 1328 | |
1329 | /* Allocate/init new vnode/inode. */ | | 1329 | /* Allocate/init new vnode/inode. */ |
1330 | lfs_vcreate(mp, ino, vp); | | 1330 | lfs_vcreate(mp, ino, vp); |
1331 | | | 1331 | |
1332 | /* | | 1332 | /* |
1333 | * Put it onto its hash chain and lock it so that other requests for | | 1333 | * Put it onto its hash chain and lock it so that other requests for |
1334 | * this inode will block if they arrive while we are sleeping waiting | | 1334 | * this inode will block if they arrive while we are sleeping waiting |
1335 | * for old data structures to be purged or for the contents of the | | 1335 | * for old data structures to be purged or for the contents of the |
1336 | * disk portion of this inode to be read. | | 1336 | * disk portion of this inode to be read. |
1337 | */ | | 1337 | */ |
1338 | ip = VTOI(vp); | | 1338 | ip = VTOI(vp); |
1339 | ufs_ihashins(ip); | | 1339 | ufs_ihashins(ip); |
1340 | mutex_exit(&ufs_hashlock); | | 1340 | mutex_exit(&ufs_hashlock); |
1341 | | | 1341 | |
1342 | /* | | 1342 | /* |
1343 | * XXX | | 1343 | * XXX |
1344 | * This may not need to be here, logically it should go down with | | 1344 | * This may not need to be here, logically it should go down with |
1345 | * the i_devvp initialization. | | 1345 | * the i_devvp initialization. |
1346 | * Ask Kirk. | | 1346 | * Ask Kirk. |
1347 | */ | | 1347 | */ |
1348 | ip->i_lfs = ump->um_lfs; | | 1348 | ip->i_lfs = ump->um_lfs; |
1349 | | | 1349 | |
1350 | /* Read in the disk contents for the inode, copy into the inode. */ | | 1350 | /* Read in the disk contents for the inode, copy into the inode. */ |
1351 | retries = 0; | | 1351 | retries = 0; |
1352 | again: | | 1352 | again: |
1353 | error = bread(ump->um_devvp, fsbtodb(fs, daddr), | | 1353 | error = bread(ump->um_devvp, fsbtodb(fs, daddr), |
1354 | (fs->lfs_version == 1 ? fs->lfs_bsize : fs->lfs_ibsize), | | 1354 | (fs->lfs_version == 1 ? fs->lfs_bsize : fs->lfs_ibsize), |
1355 | NOCRED, 0, &bp); | | 1355 | NOCRED, 0, &bp); |
1356 | if (error) { | | 1356 | if (error) { |
1357 | /* | | 1357 | /* |
1358 | * The inode does not contain anything useful, so it would | | 1358 | * The inode does not contain anything useful, so it would |
1359 | * be misleading to leave it on its hash chain. With mode | | 1359 | * be misleading to leave it on its hash chain. With mode |
1360 | * still zero, it will be unlinked and returned to the free | | 1360 | * still zero, it will be unlinked and returned to the free |
1361 | * list by vput(). | | 1361 | * list by vput(). |
1362 | */ | | 1362 | */ |
1363 | vput(vp); | | 1363 | vput(vp); |
1364 | brelse(bp, 0); | | 1364 | brelse(bp, 0); |
1365 | *vpp = NULL; | | 1365 | *vpp = NULL; |
1366 | return (error); | | 1366 | return (error); |
1367 | } | | 1367 | } |
1368 | | | 1368 | |
1369 | dip = lfs_ifind(fs, ino, bp); | | 1369 | dip = lfs_ifind(fs, ino, bp); |
1370 | if (dip == NULL) { | | 1370 | if (dip == NULL) { |
1371 | /* Assume write has not completed yet; try again */ | | 1371 | /* Assume write has not completed yet; try again */ |
1372 | brelse(bp, BC_INVAL); | | 1372 | brelse(bp, BC_INVAL); |
1373 | ++retries; | | 1373 | ++retries; |
1374 | if (retries > LFS_IFIND_RETRIES) { | | 1374 | if (retries > LFS_IFIND_RETRIES) { |
1375 | #ifdef DEBUG | | 1375 | #ifdef DEBUG |
1376 | /* If the seglock is held look at the bpp to see | | 1376 | /* If the seglock is held look at the bpp to see |
1377 | what is there anyway */ | | 1377 | what is there anyway */ |
1378 | mutex_enter(&lfs_lock); | | 1378 | mutex_enter(&lfs_lock); |
1379 | if (fs->lfs_seglock > 0) { | | 1379 | if (fs->lfs_seglock > 0) { |
1380 | struct buf **bpp; | | 1380 | struct buf **bpp; |
1381 | struct ufs1_dinode *dp; | | 1381 | struct ufs1_dinode *dp; |
1382 | int i; | | 1382 | int i; |
1383 | | | 1383 | |
1384 | for (bpp = fs->lfs_sp->bpp; | | 1384 | for (bpp = fs->lfs_sp->bpp; |
1385 | bpp != fs->lfs_sp->cbpp; ++bpp) { | | 1385 | bpp != fs->lfs_sp->cbpp; ++bpp) { |
1386 | if ((*bpp)->b_vp == fs->lfs_ivnode && | | 1386 | if ((*bpp)->b_vp == fs->lfs_ivnode && |
1387 | bpp != fs->lfs_sp->bpp) { | | 1387 | bpp != fs->lfs_sp->bpp) { |
1388 | /* Inode block */ | | 1388 | /* Inode block */ |
1389 | printf("lfs_vget: block 0x%" PRIx64 ": ", | | 1389 | printf("lfs_vget: block 0x%" PRIx64 ": ", |
1390 | (*bpp)->b_blkno); | | 1390 | (*bpp)->b_blkno); |
1391 | dp = (struct ufs1_dinode *)(*bpp)->b_data; | | 1391 | dp = (struct ufs1_dinode *)(*bpp)->b_data; |
1392 | for (i = 0; i < INOPB(fs); i++) | | 1392 | for (i = 0; i < INOPB(fs); i++) |
1393 | if (dp[i].di_u.inumber) | | 1393 | if (dp[i].di_u.inumber) |
1394 | printf("%d ", dp[i].di_u.inumber); | | 1394 | printf("%d ", dp[i].di_u.inumber); |
1395 | printf("\n"); | | 1395 | printf("\n"); |
1396 | } | | 1396 | } |
1397 | } | | 1397 | } |
1398 | } | | 1398 | } |
1399 | mutex_exit(&lfs_lock); | | 1399 | mutex_exit(&lfs_lock); |
1400 | #endif /* DEBUG */ | | 1400 | #endif /* DEBUG */ |
1401 | panic("lfs_vget: dinode not found"); | | 1401 | panic("lfs_vget: dinode not found"); |
1402 | } | | 1402 | } |
1403 | mutex_enter(&lfs_lock); | | 1403 | mutex_enter(&lfs_lock); |
1404 | if (fs->lfs_iocount) { | | 1404 | if (fs->lfs_iocount) { |
1405 | DLOG((DLOG_VNODE, "lfs_vget: dinode %d not found, retrying...\n", ino)); | | 1405 | DLOG((DLOG_VNODE, "lfs_vget: dinode %d not found, retrying...\n", ino)); |
1406 | (void)mtsleep(&fs->lfs_iocount, PRIBIO + 1, | | 1406 | (void)mtsleep(&fs->lfs_iocount, PRIBIO + 1, |
1407 | "lfs ifind", 1, &lfs_lock); | | 1407 | "lfs ifind", 1, &lfs_lock); |
1408 | } else | | 1408 | } else |
1409 | retries = LFS_IFIND_RETRIES; | | 1409 | retries = LFS_IFIND_RETRIES; |
1410 | mutex_exit(&lfs_lock); | | 1410 | mutex_exit(&lfs_lock); |
1411 | goto again; | | 1411 | goto again; |
1412 | } | | 1412 | } |
1413 | *ip->i_din.ffs1_din = *dip; | | 1413 | *ip->i_din.ffs1_din = *dip; |
1414 | brelse(bp, 0); | | 1414 | brelse(bp, 0); |
1415 | | | 1415 | |
1416 | if (fs->lfs_version > 1) { | | 1416 | if (fs->lfs_version > 1) { |
1417 | ip->i_ffs1_atime = ts.tv_sec; | | 1417 | ip->i_ffs1_atime = ts.tv_sec; |
1418 | ip->i_ffs1_atimensec = ts.tv_nsec; | | 1418 | ip->i_ffs1_atimensec = ts.tv_nsec; |
1419 | } | | 1419 | } |
1420 | | | 1420 | |
1421 | lfs_vinit(mp, &vp); | | 1421 | lfs_vinit(mp, &vp); |
1422 | | | 1422 | |
1423 | *vpp = vp; | | 1423 | *vpp = vp; |
1424 | | | 1424 | |
1425 | KASSERT(VOP_ISLOCKED(vp)); | | 1425 | KASSERT(VOP_ISLOCKED(vp)); |
1426 | | | 1426 | |
1427 | return (0); | | 1427 | return (0); |
1428 | } | | 1428 | } |
1429 | | | 1429 | |
1430 | /* | | 1430 | /* |
1431 | * File handle to vnode | | 1431 | * File handle to vnode |
1432 | */ | | 1432 | */ |
1433 | int | | 1433 | int |
1434 | lfs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp) | | 1434 | lfs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp) |
1435 | { | | 1435 | { |
1436 | struct lfid lfh; | | 1436 | struct lfid lfh; |
1437 | struct buf *bp; | | 1437 | struct buf *bp; |
1438 | IFILE *ifp; | | 1438 | IFILE *ifp; |
1439 | int32_t daddr; | | 1439 | int32_t daddr; |
1440 | struct lfs *fs; | | 1440 | struct lfs *fs; |
1441 | vnode_t *vp; | | 1441 | vnode_t *vp; |
1442 | | | 1442 | |
1443 | if (fhp->fid_len != sizeof(struct lfid)) | | 1443 | if (fhp->fid_len != sizeof(struct lfid)) |
1444 | return EINVAL; | | 1444 | return EINVAL; |
1445 | | | 1445 | |
1446 | memcpy(&lfh, fhp, sizeof(lfh)); | | 1446 | memcpy(&lfh, fhp, sizeof(lfh)); |
1447 | if (lfh.lfid_ino < LFS_IFILE_INUM) | | 1447 | if (lfh.lfid_ino < LFS_IFILE_INUM) |
1448 | return ESTALE; | | 1448 | return ESTALE; |
1449 | | | 1449 | |
1450 | fs = VFSTOUFS(mp)->um_lfs; | | 1450 | fs = VFSTOUFS(mp)->um_lfs; |
1451 | if (lfh.lfid_ident != fs->lfs_ident) | | 1451 | if (lfh.lfid_ident != fs->lfs_ident) |
1452 | return ESTALE; | | 1452 | return ESTALE; |
1453 | | | 1453 | |
1454 | if (lfh.lfid_ino > | | 1454 | if (lfh.lfid_ino > |
1455 | ((VTOI(fs->lfs_ivnode)->i_ffs1_size >> fs->lfs_bshift) - | | 1455 | ((VTOI(fs->lfs_ivnode)->i_ffs1_size >> fs->lfs_bshift) - |
1456 | fs->lfs_cleansz - fs->lfs_segtabsz) * fs->lfs_ifpb) | | 1456 | fs->lfs_cleansz - fs->lfs_segtabsz) * fs->lfs_ifpb) |
1457 | return ESTALE; | | 1457 | return ESTALE; |
1458 | | | 1458 | |
1459 | mutex_enter(&ufs_ihash_lock); | | 1459 | mutex_enter(&ufs_ihash_lock); |
1460 | vp = ufs_ihashlookup(VFSTOUFS(mp)->um_dev, lfh.lfid_ino); | | 1460 | vp = ufs_ihashlookup(VFSTOUFS(mp)->um_dev, lfh.lfid_ino); |
1461 | mutex_exit(&ufs_ihash_lock); | | 1461 | mutex_exit(&ufs_ihash_lock); |
1462 | if (vp == NULL) { | | 1462 | if (vp == NULL) { |
1463 | LFS_IENTRY(ifp, fs, lfh.lfid_ino, bp); | | 1463 | LFS_IENTRY(ifp, fs, lfh.lfid_ino, bp); |
1464 | daddr = ifp->if_daddr; | | 1464 | daddr = ifp->if_daddr; |
1465 | brelse(bp, 0); | | 1465 | brelse(bp, 0); |
1466 | if (daddr == LFS_UNUSED_DADDR) | | 1466 | if (daddr == LFS_UNUSED_DADDR) |
1467 | return ESTALE; | | 1467 | return ESTALE; |
1468 | } | | 1468 | } |
1469 | | | 1469 | |
1470 | return (ufs_fhtovp(mp, &lfh.lfid_ufid, vpp)); | | 1470 | return (ufs_fhtovp(mp, &lfh.lfid_ufid, vpp)); |
1471 | } | | 1471 | } |
1472 | | | 1472 | |
1473 | /* | | 1473 | /* |
1474 | * Vnode pointer to File handle | | 1474 | * Vnode pointer to File handle |
1475 | */ | | 1475 | */ |
1476 | /* ARGSUSED */ | | 1476 | /* ARGSUSED */ |
1477 | int | | 1477 | int |
1478 | lfs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size) | | 1478 | lfs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size) |
1479 | { | | 1479 | { |
1480 | struct inode *ip; | | 1480 | struct inode *ip; |
1481 | struct lfid lfh; | | 1481 | struct lfid lfh; |
1482 | | | 1482 | |
1483 | if (*fh_size < sizeof(struct lfid)) { | | 1483 | if (*fh_size < sizeof(struct lfid)) { |
1484 | *fh_size = sizeof(struct lfid); | | 1484 | *fh_size = sizeof(struct lfid); |
1485 | return E2BIG; | | 1485 | return E2BIG; |
1486 | } | | 1486 | } |
1487 | *fh_size = sizeof(struct lfid); | | 1487 | *fh_size = sizeof(struct lfid); |
1488 | ip = VTOI(vp); | | 1488 | ip = VTOI(vp); |
1489 | memset(&lfh, 0, sizeof(lfh)); | | 1489 | memset(&lfh, 0, sizeof(lfh)); |
1490 | lfh.lfid_len = sizeof(struct lfid); | | 1490 | lfh.lfid_len = sizeof(struct lfid); |
1491 | lfh.lfid_ino = ip->i_number; | | 1491 | lfh.lfid_ino = ip->i_number; |
1492 | lfh.lfid_gen = ip->i_gen; | | 1492 | lfh.lfid_gen = ip->i_gen; |
1493 | lfh.lfid_ident = ip->i_lfs->lfs_ident; | | 1493 | lfh.lfid_ident = ip->i_lfs->lfs_ident; |
1494 | memcpy(fhp, &lfh, sizeof(lfh)); | | 1494 | memcpy(fhp, &lfh, sizeof(lfh)); |
1495 | return (0); | | 1495 | return (0); |
1496 | } | | 1496 | } |
1497 | | | 1497 | |
1498 | /* | | 1498 | /* |
1499 | * ufs_bmaparray callback function for writing. | | 1499 | * ufs_bmaparray callback function for writing. |
1500 | * | | 1500 | * |
1501 | * Since blocks will be written to the new segment anyway, | | 1501 | * Since blocks will be written to the new segment anyway, |
1502 | * we don't care about current daddr of them. | | 1502 | * we don't care about current daddr of them. |
1503 | */ | | 1503 | */ |
1504 | static bool | | 1504 | static bool |
1505 | lfs_issequential_hole(const struct ufsmount *ump, | | 1505 | lfs_issequential_hole(const struct ufsmount *ump, |
1506 | daddr_t daddr0, daddr_t daddr1) | | 1506 | daddr_t daddr0, daddr_t daddr1) |
1507 | { | | 1507 | { |
1508 | daddr0 = (daddr_t)((int32_t)daddr0); /* XXX ondisk32 */ | | 1508 | daddr0 = (daddr_t)((int32_t)daddr0); /* XXX ondisk32 */ |
1509 | daddr1 = (daddr_t)((int32_t)daddr1); /* XXX ondisk32 */ | | 1509 | daddr1 = (daddr_t)((int32_t)daddr1); /* XXX ondisk32 */ |
1510 | | | 1510 | |
1511 | KASSERT(daddr0 == UNWRITTEN || | | 1511 | KASSERT(daddr0 == UNWRITTEN || |
1512 | (0 <= daddr0 && daddr0 <= LFS_MAX_DADDR)); | | 1512 | (0 <= daddr0 && daddr0 <= LFS_MAX_DADDR)); |
1513 | KASSERT(daddr1 == UNWRITTEN || | | 1513 | KASSERT(daddr1 == UNWRITTEN || |
1514 | (0 <= daddr1 && daddr1 <= LFS_MAX_DADDR)); | | 1514 | (0 <= daddr1 && daddr1 <= LFS_MAX_DADDR)); |
1515 | | | 1515 | |
1516 | /* NOTE: all we want to know here is 'hole or not'. */ | | 1516 | /* NOTE: all we want to know here is 'hole or not'. */ |
1517 | /* NOTE: UNASSIGNED is converted to 0 by ufs_bmaparray. */ | | 1517 | /* NOTE: UNASSIGNED is converted to 0 by ufs_bmaparray. */ |
1518 | | | 1518 | |
1519 | /* | | 1519 | /* |
1520 | * treat UNWRITTENs and all resident blocks as 'contiguous' | | 1520 | * treat UNWRITTENs and all resident blocks as 'contiguous' |
1521 | */ | | 1521 | */ |
1522 | if (daddr0 != 0 && daddr1 != 0) | | 1522 | if (daddr0 != 0 && daddr1 != 0) |
1523 | return true; | | 1523 | return true; |
1524 | | | 1524 | |
1525 | /* | | 1525 | /* |
1526 | * both are in hole? | | 1526 | * both are in hole? |
1527 | */ | | 1527 | */ |
1528 | if (daddr0 == 0 && daddr1 == 0) | | 1528 | if (daddr0 == 0 && daddr1 == 0) |
1529 | return true; /* all holes are 'contiguous' for us. */ | | 1529 | return true; /* all holes are 'contiguous' for us. */ |
1530 | | | 1530 | |
1531 | return false; | | 1531 | return false; |
1532 | } | | 1532 | } |
1533 | | | 1533 | |
1534 | /* | | 1534 | /* |
1535 | * lfs_gop_write functions exactly like genfs_gop_write, except that | | 1535 | * lfs_gop_write functions exactly like genfs_gop_write, except that |
1536 | * (1) it requires the seglock to be held by its caller, and sp->fip | | 1536 | * (1) it requires the seglock to be held by its caller, and sp->fip |
1537 | * to be properly initialized (it will return without re-initializing | | 1537 | * to be properly initialized (it will return without re-initializing |
1538 | * sp->fip, and without calling lfs_writeseg). | | 1538 | * sp->fip, and without calling lfs_writeseg). |
1539 | * (2) it uses the remaining space in the segment, rather than VOP_BMAP, | | 1539 | * (2) it uses the remaining space in the segment, rather than VOP_BMAP, |
1540 | * to determine how large a block it can write at once (though it does | | 1540 | * to determine how large a block it can write at once (though it does |
1541 | * still use VOP_BMAP to find holes in the file); | | 1541 | * still use VOP_BMAP to find holes in the file); |
1542 | * (3) it calls lfs_gatherblock instead of VOP_STRATEGY on its blocks | | 1542 | * (3) it calls lfs_gatherblock instead of VOP_STRATEGY on its blocks |
1543 | * (leaving lfs_writeseg to deal with the cluster blocks, so we might | | 1543 | * (leaving lfs_writeseg to deal with the cluster blocks, so we might |
1544 | * now have clusters of clusters, ick.) | | 1544 | * now have clusters of clusters, ick.) |
1545 | */ | | 1545 | */ |
1546 | static int | | 1546 | static int |
1547 | lfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, | | 1547 | lfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, |
1548 | int flags) | | 1548 | int flags) |
1549 | { | | 1549 | { |
1550 | int i, error, run, haveeof = 0; | | 1550 | int i, error, run, haveeof = 0; |
1551 | int fs_bshift; | | 1551 | int fs_bshift; |
1552 | vaddr_t kva; | | 1552 | vaddr_t kva; |
1553 | off_t eof, offset, startoffset = 0; | | 1553 | off_t eof, offset, startoffset = 0; |
1554 | size_t bytes, iobytes, skipbytes; | | 1554 | size_t bytes, iobytes, skipbytes; |
1555 | daddr_t lbn, blkno; | | 1555 | daddr_t lbn, blkno; |
1556 | struct vm_page *pg; | | 1556 | struct vm_page *pg; |
1557 | struct buf *mbp, *bp; | | 1557 | struct buf *mbp, *bp; |
1558 | struct vnode *devvp = VTOI(vp)->i_devvp; | | 1558 | struct vnode *devvp = VTOI(vp)->i_devvp; |
1559 | struct inode *ip = VTOI(vp); | | 1559 | struct inode *ip = VTOI(vp); |
1560 | struct lfs *fs = ip->i_lfs; | | 1560 | struct lfs *fs = ip->i_lfs; |
1561 | struct segment *sp = fs->lfs_sp; | | 1561 | struct segment *sp = fs->lfs_sp; |
1562 | UVMHIST_FUNC("lfs_gop_write"); UVMHIST_CALLED(ubchist); | | 1562 | UVMHIST_FUNC("lfs_gop_write"); UVMHIST_CALLED(ubchist); |
1563 | | | 1563 | |
1564 | ASSERT_SEGLOCK(fs); | | 1564 | ASSERT_SEGLOCK(fs); |
1565 | | | 1565 | |
1566 | /* The Ifile lives in the buffer cache */ | | 1566 | /* The Ifile lives in the buffer cache */ |
1567 | KASSERT(vp != fs->lfs_ivnode); | | 1567 | KASSERT(vp != fs->lfs_ivnode); |
1568 | | | 1568 | |
1569 | /* | | 1569 | /* |
1570 | * We don't want to fill the disk before the cleaner has a chance | | 1570 | * We don't want to fill the disk before the cleaner has a chance |
1571 | * to make room for us. If we're in danger of doing that, fail | | 1571 | * to make room for us. If we're in danger of doing that, fail |
1572 | * with EAGAIN. The caller will have to notice this, unlock | | 1572 | * with EAGAIN. The caller will have to notice this, unlock |
1573 | * so the cleaner can run, relock and try again. | | 1573 | * so the cleaner can run, relock and try again. |
1574 | * | | 1574 | * |
1575 | * We must write everything, however, if our vnode is being | | 1575 | * We must write everything, however, if our vnode is being |
1576 | * reclaimed. | | 1576 | * reclaimed. |
1577 | */ | | 1577 | */ |
1578 | if (LFS_STARVED_FOR_SEGS(fs) && vp != fs->lfs_flushvp) | | 1578 | if (LFS_STARVED_FOR_SEGS(fs) && vp != fs->lfs_flushvp) |
1579 | goto tryagain; | | 1579 | goto tryagain; |
1580 | | | 1580 | |
1581 | /* | | 1581 | /* |
1582 | * Sometimes things slip past the filters in lfs_putpages, | | 1582 | * Sometimes things slip past the filters in lfs_putpages, |
1583 | * and the pagedaemon tries to write pages---problem is | | 1583 | * and the pagedaemon tries to write pages---problem is |
1584 | * that the pagedaemon never acquires the segment lock. | | 1584 | * that the pagedaemon never acquires the segment lock. |
1585 | * | | 1585 | * |
1586 | * Alternatively, pages that were clean when we called | | 1586 | * Alternatively, pages that were clean when we called |
1587 | * genfs_putpages may have become dirty in the meantime. In this | | 1587 | * genfs_putpages may have become dirty in the meantime. In this |
1588 | * case the segment header is not properly set up for blocks | | 1588 | * case the segment header is not properly set up for blocks |
1589 | * to be added to it. | | 1589 | * to be added to it. |
1590 | * | | 1590 | * |
1591 | * Unbusy and unclean the pages, and put them on the ACTIVE | | 1591 | * Unbusy and unclean the pages, and put them on the ACTIVE |
1592 | * queue under the hypothesis that they couldn't have got here | | 1592 | * queue under the hypothesis that they couldn't have got here |
1593 | * unless they were modified *quite* recently. | | 1593 | * unless they were modified *quite* recently. |
1594 | * | | 1594 | * |
1595 | * XXXUBC that last statement is an oversimplification of course. | | 1595 | * XXXUBC that last statement is an oversimplification of course. |
1596 | */ | | 1596 | */ |
1597 | if (!LFS_SEGLOCK_HELD(fs) || | | 1597 | if (!LFS_SEGLOCK_HELD(fs) || |
1598 | (ip->i_lfs_iflags & LFSI_NO_GOP_WRITE) || | | 1598 | (ip->i_lfs_iflags & LFSI_NO_GOP_WRITE) || |
1599 | (pgs[0]->offset & fs->lfs_bmask) != 0) { | | 1599 | (pgs[0]->offset & fs->lfs_bmask) != 0) { |
1600 | goto tryagain; | | 1600 | goto tryagain; |
1601 | } | | 1601 | } |
1602 | | | 1602 | |
1603 | UVMHIST_LOG(ubchist, "vp %p pgs %p npages %d flags 0x%x", | | 1603 | UVMHIST_LOG(ubchist, "vp %p pgs %p npages %d flags 0x%x", |
1604 | vp, pgs, npages, flags); | | 1604 | vp, pgs, npages, flags); |
1605 | | | 1605 | |
1606 | GOP_SIZE(vp, vp->v_size, &eof, 0); | | 1606 | GOP_SIZE(vp, vp->v_size, &eof, 0); |
1607 | haveeof = 1; | | 1607 | haveeof = 1; |
1608 | | | 1608 | |
1609 | if (vp->v_type == VREG) | | 1609 | if (vp->v_type == VREG) |
1610 | fs_bshift = vp->v_mount->mnt_fs_bshift; | | 1610 | fs_bshift = vp->v_mount->mnt_fs_bshift; |
1611 | else | | 1611 | else |
1612 | fs_bshift = DEV_BSHIFT; | | 1612 | fs_bshift = DEV_BSHIFT; |
1613 | error = 0; | | 1613 | error = 0; |
1614 | pg = pgs[0]; | | 1614 | pg = pgs[0]; |
1615 | startoffset = pg->offset; | | 1615 | startoffset = pg->offset; |
1616 | KASSERT(eof >= 0); | | 1616 | KASSERT(eof >= 0); |
1617 | | | 1617 | |
1618 | if (startoffset >= eof) { | | 1618 | if (startoffset >= eof) { |
1619 | goto tryagain; | | 1619 | goto tryagain; |
1620 | } else | | 1620 | } else |
1621 | bytes = MIN(npages << PAGE_SHIFT, eof - startoffset); | | 1621 | bytes = MIN(npages << PAGE_SHIFT, eof - startoffset); |
1622 | skipbytes = 0; | | 1622 | skipbytes = 0; |
1623 | | | 1623 | |
1624 | KASSERT(bytes != 0); | | 1624 | KASSERT(bytes != 0); |
1625 | | | 1625 | |
1626 | /* Swap PG_DELWRI for PG_PAGEOUT */ | | 1626 | /* Swap PG_DELWRI for PG_PAGEOUT */ |
1627 | for (i = 0; i < npages; i++) { | | 1627 | for (i = 0; i < npages; i++) { |
1628 | if (pgs[i]->flags & PG_DELWRI) { | | 1628 | if (pgs[i]->flags & PG_DELWRI) { |
1629 | KASSERT(!(pgs[i]->flags & PG_PAGEOUT)); | | 1629 | KASSERT(!(pgs[i]->flags & PG_PAGEOUT)); |
1630 | pgs[i]->flags &= ~PG_DELWRI; | | 1630 | pgs[i]->flags &= ~PG_DELWRI; |
1631 | pgs[i]->flags |= PG_PAGEOUT; | | 1631 | pgs[i]->flags |= PG_PAGEOUT; |
1632 | uvm_pageout_start(1); | | 1632 | uvm_pageout_start(1); |
1633 | mutex_enter(&uvm_pageqlock); | | 1633 | mutex_enter(&uvm_pageqlock); |
1634 | uvm_pageunwire(pgs[i]); | | 1634 | uvm_pageunwire(pgs[i]); |
1635 | mutex_exit(&uvm_pageqlock); | | 1635 | mutex_exit(&uvm_pageqlock); |
1636 | } | | 1636 | } |
1637 | } | | 1637 | } |
1638 | | | 1638 | |
1639 | /* | | 1639 | /* |
1640 | * Check to make sure we're starting on a block boundary. | | 1640 | * Check to make sure we're starting on a block boundary. |
1641 | * We'll check later to make sure we always write entire | | 1641 | * We'll check later to make sure we always write entire |
1642 | * blocks (or fragments). | | 1642 | * blocks (or fragments). |
1643 | */ | | 1643 | */ |
1644 | if (startoffset & fs->lfs_bmask) | | 1644 | if (startoffset & fs->lfs_bmask) |
1645 | printf("%" PRId64 " & %" PRId64 " = %" PRId64 "\n", | | 1645 | printf("%" PRId64 " & %" PRId64 " = %" PRId64 "\n", |
1646 | startoffset, fs->lfs_bmask, | | 1646 | startoffset, fs->lfs_bmask, |
1647 | startoffset & fs->lfs_bmask); | | 1647 | startoffset & fs->lfs_bmask); |
1648 | KASSERT((startoffset & fs->lfs_bmask) == 0); | | 1648 | KASSERT((startoffset & fs->lfs_bmask) == 0); |
1649 | if (bytes & fs->lfs_ffmask) { | | 1649 | if (bytes & fs->lfs_ffmask) { |
1650 | printf("lfs_gop_write: asked to write %ld bytes\n", (long)bytes); | | 1650 | printf("lfs_gop_write: asked to write %ld bytes\n", (long)bytes); |
1651 | panic("lfs_gop_write: non-integer blocks"); | | 1651 | panic("lfs_gop_write: non-integer blocks"); |
1652 | } | | 1652 | } |
1653 | | | 1653 | |
1654 | /* | | 1654 | /* |
1655 | * We could deadlock here on pager_map with UVMPAGER_MAPIN_WAITOK. | | 1655 | * We could deadlock here on pager_map with UVMPAGER_MAPIN_WAITOK. |
1656 | * If we would, write what we have and try again. If we don't | | 1656 | * If we would, write what we have and try again. If we don't |
1657 | * have anything to write, we'll have to sleep. | | 1657 | * have anything to write, we'll have to sleep. |
1658 | */ | | 1658 | */ |
1659 | if ((kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WRITE | | | 1659 | if ((kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WRITE | |
1660 | (((SEGSUM *)(sp->segsum))->ss_nfinfo < 1 ? | | 1660 | (((SEGSUM *)(sp->segsum))->ss_nfinfo < 1 ? |
1661 | UVMPAGER_MAPIN_WAITOK : 0))) == 0x0) { | | 1661 | UVMPAGER_MAPIN_WAITOK : 0))) == 0x0) { |
1662 | DLOG((DLOG_PAGE, "lfs_gop_write: forcing write\n")); | | 1662 | DLOG((DLOG_PAGE, "lfs_gop_write: forcing write\n")); |
1663 | #if 0 | | 1663 | #if 0 |
1664 | " with nfinfo=%d at offset 0x%x\n", | | 1664 | " with nfinfo=%d at offset 0x%x\n", |
1665 | (int)((SEGSUM *)(sp->segsum))->ss_nfinfo, | | 1665 | (int)((SEGSUM *)(sp->segsum))->ss_nfinfo, |
1666 | (unsigned)fs->lfs_offset)); | | 1666 | (unsigned)fs->lfs_offset)); |
1667 | #endif | | 1667 | #endif |
1668 | lfs_updatemeta(sp); | | 1668 | lfs_updatemeta(sp); |
1669 | lfs_release_finfo(fs); | | 1669 | lfs_release_finfo(fs); |
1670 | (void) lfs_writeseg(fs, sp); | | 1670 | (void) lfs_writeseg(fs, sp); |
1671 | | | 1671 | |
1672 | lfs_acquire_finfo(fs, ip->i_number, ip->i_gen); | | 1672 | lfs_acquire_finfo(fs, ip->i_number, ip->i_gen); |
1673 | | | 1673 | |
1674 | /* | | 1674 | /* |
1675 | * Having given up all of the pager_map we were holding, | | 1675 | * Having given up all of the pager_map we were holding, |
1676 | * we can now wait for aiodoned to reclaim it for us | | 1676 | * we can now wait for aiodoned to reclaim it for us |
1677 | * without fear of deadlock. | | 1677 | * without fear of deadlock. |
1678 | */ | | 1678 | */ |
1679 | kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WRITE | | | 1679 | kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WRITE | |
1680 | UVMPAGER_MAPIN_WAITOK); | | 1680 | UVMPAGER_MAPIN_WAITOK); |
1681 | } | | 1681 | } |
1682 | | | 1682 | |
1683 | mutex_enter(&vp->v_interlock); | | 1683 | mutex_enter(&vp->v_interlock); |
1684 | vp->v_numoutput += 2; /* one for biodone, one for aiodone */ | | 1684 | vp->v_numoutput += 2; /* one for biodone, one for aiodone */ |
1685 | mutex_exit(&vp->v_interlock); | | 1685 | mutex_exit(&vp->v_interlock); |
1686 | | | 1686 | |
1687 | mbp = getiobuf(NULL, true); | | 1687 | mbp = getiobuf(NULL, true); |
1688 | UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x", | | 1688 | UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x", |
1689 | vp, mbp, vp->v_numoutput, bytes); | | 1689 | vp, mbp, vp->v_numoutput, bytes); |
1690 | mbp->b_bufsize = npages << PAGE_SHIFT; | | 1690 | mbp->b_bufsize = npages << PAGE_SHIFT; |
1691 | mbp->b_data = (void *)kva; | | 1691 | mbp->b_data = (void *)kva; |
1692 | mbp->b_resid = mbp->b_bcount = bytes; | | 1692 | mbp->b_resid = mbp->b_bcount = bytes; |
1693 | mbp->b_cflags = BC_BUSY|BC_AGE; | | 1693 | mbp->b_cflags = BC_BUSY|BC_AGE; |
1694 | mbp->b_iodone = uvm_aio_biodone; | | 1694 | mbp->b_iodone = uvm_aio_biodone; |
1695 | | | 1695 | |
1696 | bp = NULL; | | 1696 | bp = NULL; |
1697 | for (offset = startoffset; | | 1697 | for (offset = startoffset; |
1698 | bytes > 0; | | 1698 | bytes > 0; |
1699 | offset += iobytes, bytes -= iobytes) { | | 1699 | offset += iobytes, bytes -= iobytes) { |
1700 | lbn = offset >> fs_bshift; | | 1700 | lbn = offset >> fs_bshift; |
1701 | error = ufs_bmaparray(vp, lbn, &blkno, NULL, NULL, &run, | | 1701 | error = ufs_bmaparray(vp, lbn, &blkno, NULL, NULL, &run, |
1702 | lfs_issequential_hole); | | 1702 | lfs_issequential_hole); |
1703 | if (error) { | | 1703 | if (error) { |
1704 | UVMHIST_LOG(ubchist, "ufs_bmaparray() -> %d", | | 1704 | UVMHIST_LOG(ubchist, "ufs_bmaparray() -> %d", |
1705 | error,0,0,0); | | 1705 | error,0,0,0); |
1706 | skipbytes += bytes; | | 1706 | skipbytes += bytes; |
1707 | bytes = 0; | | 1707 | bytes = 0; |
1708 | break; | | 1708 | break; |
1709 | } | | 1709 | } |
1710 | | | 1710 | |
1711 | iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset, | | 1711 | iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset, |
1712 | bytes); | | 1712 | bytes); |
1713 | if (blkno == (daddr_t)-1) { | | 1713 | if (blkno == (daddr_t)-1) { |
1714 | skipbytes += iobytes; | | 1714 | skipbytes += iobytes; |
1715 | continue; | | 1715 | continue; |
1716 | } | | 1716 | } |
1717 | | | 1717 | |
1718 | /* | | 1718 | /* |
1719 | * Discover how much we can really pack into this buffer. | | 1719 | * Discover how much we can really pack into this buffer. |
1720 | */ | | 1720 | */ |
1721 | /* If no room in the current segment, finish it up */ | | 1721 | /* If no room in the current segment, finish it up */ |
1722 | if (sp->sum_bytes_left < sizeof(int32_t) || | | 1722 | if (sp->sum_bytes_left < sizeof(int32_t) || |
1723 | sp->seg_bytes_left < (1 << fs->lfs_bshift)) { | | 1723 | sp->seg_bytes_left < (1 << fs->lfs_bshift)) { |
1724 | int vers; | | 1724 | int vers; |
1725 | | | 1725 | |
1726 | lfs_updatemeta(sp); | | 1726 | lfs_updatemeta(sp); |
1727 | vers = sp->fip->fi_version; | | 1727 | vers = sp->fip->fi_version; |
1728 | lfs_release_finfo(fs); | | 1728 | lfs_release_finfo(fs); |
1729 | (void) lfs_writeseg(fs, sp); | | 1729 | (void) lfs_writeseg(fs, sp); |
1730 | | | 1730 | |
1731 | lfs_acquire_finfo(fs, ip->i_number, vers); | | 1731 | lfs_acquire_finfo(fs, ip->i_number, vers); |
1732 | } | | 1732 | } |
1733 | /* Check both for space in segment and space in segsum */ | | 1733 | /* Check both for space in segment and space in segsum */ |
1734 | iobytes = MIN(iobytes, (sp->seg_bytes_left >> fs_bshift) | | 1734 | iobytes = MIN(iobytes, (sp->seg_bytes_left >> fs_bshift) |
1735 | << fs_bshift); | | 1735 | << fs_bshift); |
1736 | iobytes = MIN(iobytes, (sp->sum_bytes_left / sizeof(int32_t)) | | 1736 | iobytes = MIN(iobytes, (sp->sum_bytes_left / sizeof(int32_t)) |
1737 | << fs_bshift); | | 1737 | << fs_bshift); |
1738 | KASSERT(iobytes > 0); | | 1738 | KASSERT(iobytes > 0); |
1739 | | | 1739 | |
1740 | /* if it's really one i/o, don't make a second buf */ | | 1740 | /* if it's really one i/o, don't make a second buf */ |
1741 | if (offset == startoffset && iobytes == bytes) { | | 1741 | if (offset == startoffset && iobytes == bytes) { |
1742 | bp = mbp; | | 1742 | bp = mbp; |
1743 | /* correct overcount if there is no second buffer */ | | 1743 | /* correct overcount if there is no second buffer */ |
1744 | mutex_enter(&vp->v_interlock); | | 1744 | mutex_enter(&vp->v_interlock); |
1745 | --vp->v_numoutput; | | 1745 | --vp->v_numoutput; |
1746 | mutex_exit(&vp->v_interlock); | | 1746 | mutex_exit(&vp->v_interlock); |
1747 | } else { | | 1747 | } else { |
1748 | bp = getiobuf(NULL, true); | | 1748 | bp = getiobuf(NULL, true); |
1749 | UVMHIST_LOG(ubchist, "vp %p bp %p num now %d", | | 1749 | UVMHIST_LOG(ubchist, "vp %p bp %p num now %d", |
1750 | vp, bp, vp->v_numoutput, 0); | | 1750 | vp, bp, vp->v_numoutput, 0); |
1751 | nestiobuf_setup(mbp, bp, offset - pg->offset, iobytes); | | 1751 | nestiobuf_setup(mbp, bp, offset - pg->offset, iobytes); |
1752 | /* | | 1752 | /* |
1753 | * LFS doesn't like async I/O here, dies with | | 1753 | * LFS doesn't like async I/O here, dies with |
1754 | * and assert in lfs_bwrite(). Is that assert | | 1754 | * and assert in lfs_bwrite(). Is that assert |
1755 | * valid? I retained non-async behaviour when | | 1755 | * valid? I retained non-async behaviour when |
1756 | * converted this to use nestiobuf --pooka | | 1756 | * converted this to use nestiobuf --pooka |
1757 | */ | | 1757 | */ |
1758 | bp->b_flags &= ~B_ASYNC; | | 1758 | bp->b_flags &= ~B_ASYNC; |
| | | 1759 | /* |
| | | 1760 | * LFS uses VOP_BWRITE instead of VOP_STRATEGY. |
| | | 1761 | * Therefore biodone doesn't get called for |
| | | 1762 | * the buffer. Therefore decrement the output |
| | | 1763 | * counter that nestiobuf_setup() incremented. |
| | | 1764 | */ |
| | | 1765 | mutex_enter(&vp->v_interlock); |
| | | 1766 | vp->v_numoutput--; |
| | | 1767 | mutex_exit(&vp->v_interlock); |
1759 | } | | 1768 | } |
1760 | | | 1769 | |
1761 | /* XXX This is silly ... is this necessary? */ | | 1770 | /* XXX This is silly ... is this necessary? */ |
1762 | mutex_enter(&bufcache_lock); | | 1771 | mutex_enter(&bufcache_lock); |
1763 | mutex_enter(&vp->v_interlock); | | 1772 | mutex_enter(&vp->v_interlock); |
1764 | bgetvp(vp, bp); | | 1773 | bgetvp(vp, bp); |
1765 | mutex_exit(&vp->v_interlock); | | 1774 | mutex_exit(&vp->v_interlock); |
1766 | mutex_exit(&bufcache_lock); | | 1775 | mutex_exit(&bufcache_lock); |
1767 | | | 1776 | |
1768 | bp->b_lblkno = lblkno(fs, offset); | | 1777 | bp->b_lblkno = lblkno(fs, offset); |
1769 | bp->b_private = mbp; | | 1778 | bp->b_private = mbp; |
1770 | if (devvp->v_type == VBLK) { | | 1779 | if (devvp->v_type == VBLK) { |
1771 | bp->b_dev = devvp->v_rdev; | | 1780 | bp->b_dev = devvp->v_rdev; |
1772 | } | | 1781 | } |
1773 | VOP_BWRITE(bp); | | 1782 | VOP_BWRITE(bp); |
1774 | while (lfs_gatherblock(sp, bp, NULL)) | | 1783 | while (lfs_gatherblock(sp, bp, NULL)) |
1775 | continue; | | 1784 | continue; |
1776 | } | | 1785 | } |
1777 | | | 1786 | |
1778 | nestiobuf_done(mbp, skipbytes, error); | | 1787 | nestiobuf_done(mbp, skipbytes, error); |
1779 | if (skipbytes) { | | 1788 | if (skipbytes) { |
1780 | UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0); | | 1789 | UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0); |
1781 | } | | 1790 | } |
1782 | UVMHIST_LOG(ubchist, "returning 0", 0,0,0,0); | | 1791 | UVMHIST_LOG(ubchist, "returning 0", 0,0,0,0); |
1783 | return (0); | | 1792 | return (0); |
1784 | | | 1793 | |
1785 | tryagain: | | 1794 | tryagain: |
1786 | /* | | 1795 | /* |
1787 | * We can't write the pages, for whatever reason. | | 1796 | * We can't write the pages, for whatever reason. |
1788 | * Clean up after ourselves, and make the caller try again. | | 1797 | * Clean up after ourselves, and make the caller try again. |
1789 | */ | | 1798 | */ |
1790 | mutex_enter(&vp->v_interlock); | | 1799 | mutex_enter(&vp->v_interlock); |
1791 | | | 1800 | |
1792 | /* Tell why we're here, if we know */ | | 1801 | /* Tell why we're here, if we know */ |
1793 | if (ip->i_lfs_iflags & LFSI_NO_GOP_WRITE) { | | 1802 | if (ip->i_lfs_iflags & LFSI_NO_GOP_WRITE) { |
1794 | DLOG((DLOG_PAGE, "lfs_gop_write: clean pages dirtied\n")); | | 1803 | DLOG((DLOG_PAGE, "lfs_gop_write: clean pages dirtied\n")); |
1795 | } else if ((pgs[0]->offset & fs->lfs_bmask) != 0) { | | 1804 | } else if ((pgs[0]->offset & fs->lfs_bmask) != 0) { |
1796 | DLOG((DLOG_PAGE, "lfs_gop_write: not on block boundary\n")); | | 1805 | DLOG((DLOG_PAGE, "lfs_gop_write: not on block boundary\n")); |
1797 | } else if (haveeof && startoffset >= eof) { | | 1806 | } else if (haveeof && startoffset >= eof) { |
1798 | DLOG((DLOG_PAGE, "lfs_gop_write: ino %d start 0x%" PRIx64 | | 1807 | DLOG((DLOG_PAGE, "lfs_gop_write: ino %d start 0x%" PRIx64 |
1799 | " eof 0x%" PRIx64 " npages=%d\n", VTOI(vp)->i_number, | | 1808 | " eof 0x%" PRIx64 " npages=%d\n", VTOI(vp)->i_number, |
1800 | pgs[0]->offset, eof, npages)); | | 1809 | pgs[0]->offset, eof, npages)); |
1801 | } else if (LFS_STARVED_FOR_SEGS(fs)) { | | 1810 | } else if (LFS_STARVED_FOR_SEGS(fs)) { |
1802 | DLOG((DLOG_PAGE, "lfs_gop_write: avail too low\n")); | | 1811 | DLOG((DLOG_PAGE, "lfs_gop_write: avail too low\n")); |
1803 | } else { | | 1812 | } else { |
1804 | DLOG((DLOG_PAGE, "lfs_gop_write: seglock not held\n")); | | 1813 | DLOG((DLOG_PAGE, "lfs_gop_write: seglock not held\n")); |
1805 | } | | 1814 | } |
1806 | | | 1815 | |
1807 | mutex_enter(&uvm_pageqlock); | | 1816 | mutex_enter(&uvm_pageqlock); |
1808 | for (i = 0; i < npages; i++) { | | 1817 | for (i = 0; i < npages; i++) { |
1809 | pg = pgs[i]; | | 1818 | pg = pgs[i]; |
1810 | | | 1819 | |
1811 | if (pg->flags & PG_PAGEOUT) | | 1820 | if (pg->flags & PG_PAGEOUT) |
1812 | uvm_pageout_done(1); | | 1821 | uvm_pageout_done(1); |
1813 | if (pg->flags & PG_DELWRI) { | | 1822 | if (pg->flags & PG_DELWRI) { |
1814 | uvm_pageunwire(pg); | | 1823 | uvm_pageunwire(pg); |
1815 | } | | 1824 | } |
1816 | uvm_pageactivate(pg); | | 1825 | uvm_pageactivate(pg); |
1817 | pg->flags &= ~(PG_CLEAN|PG_DELWRI|PG_PAGEOUT|PG_RELEASED); | | 1826 | pg->flags &= ~(PG_CLEAN|PG_DELWRI|PG_PAGEOUT|PG_RELEASED); |
1818 | DLOG((DLOG_PAGE, "pg[%d] = %p (vp %p off %" PRIx64 ")\n", i, pg, | | 1827 | DLOG((DLOG_PAGE, "pg[%d] = %p (vp %p off %" PRIx64 ")\n", i, pg, |
1819 | vp, pg->offset)); | | 1828 | vp, pg->offset)); |
1820 | DLOG((DLOG_PAGE, "pg[%d]->flags = %x\n", i, pg->flags)); | | 1829 | DLOG((DLOG_PAGE, "pg[%d]->flags = %x\n", i, pg->flags)); |
1821 | DLOG((DLOG_PAGE, "pg[%d]->pqflags = %x\n", i, pg->pqflags)); | | 1830 | DLOG((DLOG_PAGE, "pg[%d]->pqflags = %x\n", i, pg->pqflags)); |
1822 | DLOG((DLOG_PAGE, "pg[%d]->uanon = %p\n", i, pg->uanon)); | | 1831 | DLOG((DLOG_PAGE, "pg[%d]->uanon = %p\n", i, pg->uanon)); |
1823 | DLOG((DLOG_PAGE, "pg[%d]->uobject = %p\n", i, pg->uobject)); | | 1832 | DLOG((DLOG_PAGE, "pg[%d]->uobject = %p\n", i, pg->uobject)); |
1824 | DLOG((DLOG_PAGE, "pg[%d]->wire_count = %d\n", i, | | 1833 | DLOG((DLOG_PAGE, "pg[%d]->wire_count = %d\n", i, |
1825 | pg->wire_count)); | | 1834 | pg->wire_count)); |
1826 | DLOG((DLOG_PAGE, "pg[%d]->loan_count = %d\n", i, | | 1835 | DLOG((DLOG_PAGE, "pg[%d]->loan_count = %d\n", i, |
1827 | pg->loan_count)); | | 1836 | pg->loan_count)); |
1828 | } | | 1837 | } |
1829 | /* uvm_pageunbusy takes care of PG_BUSY, PG_WANTED */ | | 1838 | /* uvm_pageunbusy takes care of PG_BUSY, PG_WANTED */ |
1830 | uvm_page_unbusy(pgs, npages); | | 1839 | uvm_page_unbusy(pgs, npages); |
1831 | mutex_exit(&uvm_pageqlock); | | 1840 | mutex_exit(&uvm_pageqlock); |
1832 | mutex_exit(&vp->v_interlock); | | 1841 | mutex_exit(&vp->v_interlock); |
1833 | return EAGAIN; | | 1842 | return EAGAIN; |
1834 | } | | 1843 | } |
1835 | | | 1844 | |
1836 | /* | | 1845 | /* |
1837 | * finish vnode/inode initialization. | | 1846 | * finish vnode/inode initialization. |
1838 | * used by lfs_vget and lfs_fastvget. | | 1847 | * used by lfs_vget and lfs_fastvget. |
1839 | */ | | 1848 | */ |
1840 | void | | 1849 | void |
1841 | lfs_vinit(struct mount *mp, struct vnode **vpp) | | 1850 | lfs_vinit(struct mount *mp, struct vnode **vpp) |
1842 | { | | 1851 | { |
1843 | struct vnode *vp = *vpp; | | 1852 | struct vnode *vp = *vpp; |
1844 | struct inode *ip = VTOI(vp); | | 1853 | struct inode *ip = VTOI(vp); |
1845 | struct ufsmount *ump = VFSTOUFS(mp); | | 1854 | struct ufsmount *ump = VFSTOUFS(mp); |
1846 | struct lfs *fs = ump->um_lfs; | | 1855 | struct lfs *fs = ump->um_lfs; |
1847 | int i; | | 1856 | int i; |
1848 | | | 1857 | |
1849 | ip->i_mode = ip->i_ffs1_mode; | | 1858 | ip->i_mode = ip->i_ffs1_mode; |
1850 | ip->i_nlink = ip->i_ffs1_nlink; | | 1859 | ip->i_nlink = ip->i_ffs1_nlink; |
1851 | ip->i_lfs_osize = ip->i_size = ip->i_ffs1_size; | | 1860 | ip->i_lfs_osize = ip->i_size = ip->i_ffs1_size; |
1852 | ip->i_flags = ip->i_ffs1_flags; | | 1861 | ip->i_flags = ip->i_ffs1_flags; |
1853 | ip->i_gen = ip->i_ffs1_gen; | | 1862 | ip->i_gen = ip->i_ffs1_gen; |
1854 | ip->i_uid = ip->i_ffs1_uid; | | 1863 | ip->i_uid = ip->i_ffs1_uid; |
1855 | ip->i_gid = ip->i_ffs1_gid; | | 1864 | ip->i_gid = ip->i_ffs1_gid; |
1856 | | | 1865 | |
1857 | ip->i_lfs_effnblks = ip->i_ffs1_blocks; | | 1866 | ip->i_lfs_effnblks = ip->i_ffs1_blocks; |
1858 | ip->i_lfs_odnlink = ip->i_ffs1_nlink; | | 1867 | ip->i_lfs_odnlink = ip->i_ffs1_nlink; |
1859 | | | 1868 | |
1860 | /* | | 1869 | /* |
1861 | * Initialize the vnode from the inode, check for aliases. In all | | 1870 | * Initialize the vnode from the inode, check for aliases. In all |
1862 | * cases re-init ip, the underlying vnode/inode may have changed. | | 1871 | * cases re-init ip, the underlying vnode/inode may have changed. |
1863 | */ | | 1872 | */ |
1864 | ufs_vinit(mp, lfs_specop_p, lfs_fifoop_p, &vp); | | 1873 | ufs_vinit(mp, lfs_specop_p, lfs_fifoop_p, &vp); |
1865 | ip = VTOI(vp); | | 1874 | ip = VTOI(vp); |
1866 | | | 1875 | |
1867 | memset(ip->i_lfs_fragsize, 0, NDADDR * sizeof(*ip->i_lfs_fragsize)); | | 1876 | memset(ip->i_lfs_fragsize, 0, NDADDR * sizeof(*ip->i_lfs_fragsize)); |
1868 | if (vp->v_type != VLNK || ip->i_size >= ip->i_ump->um_maxsymlinklen) { | | 1877 | if (vp->v_type != VLNK || ip->i_size >= ip->i_ump->um_maxsymlinklen) { |
1869 | #ifdef DEBUG | | 1878 | #ifdef DEBUG |
1870 | for (i = (ip->i_size + fs->lfs_bsize - 1) >> fs->lfs_bshift; | | 1879 | for (i = (ip->i_size + fs->lfs_bsize - 1) >> fs->lfs_bshift; |
1871 | i < NDADDR; i++) { | | 1880 | i < NDADDR; i++) { |
1872 | if ((vp->v_type == VBLK || vp->v_type == VCHR) && | | 1881 | if ((vp->v_type == VBLK || vp->v_type == VCHR) && |
1873 | i == 0) | | 1882 | i == 0) |
1874 | continue; | | 1883 | continue; |
1875 | if (ip->i_ffs1_db[i] != 0) { | | 1884 | if (ip->i_ffs1_db[i] != 0) { |
1876 | inconsistent: | | 1885 | inconsistent: |
1877 | lfs_dump_dinode(ip->i_din.ffs1_din); | | 1886 | lfs_dump_dinode(ip->i_din.ffs1_din); |
1878 | panic("inconsistent inode"); | | 1887 | panic("inconsistent inode"); |
1879 | } | | 1888 | } |
1880 | } | | 1889 | } |
1881 | for ( ; i < NDADDR + NIADDR; i++) { | | 1890 | for ( ; i < NDADDR + NIADDR; i++) { |
1882 | if (ip->i_ffs1_ib[i - NDADDR] != 0) { | | 1891 | if (ip->i_ffs1_ib[i - NDADDR] != 0) { |
1883 | goto inconsistent; | | 1892 | goto inconsistent; |
1884 | } | | 1893 | } |
1885 | } | | 1894 | } |
1886 | #endif /* DEBUG */ | | 1895 | #endif /* DEBUG */ |
1887 | for (i = 0; i < NDADDR; i++) | | 1896 | for (i = 0; i < NDADDR; i++) |
1888 | if (ip->i_ffs1_db[i] != 0) | | 1897 | if (ip->i_ffs1_db[i] != 0) |
1889 | ip->i_lfs_fragsize[i] = blksize(fs, ip, i); | | 1898 | ip->i_lfs_fragsize[i] = blksize(fs, ip, i); |
1890 | } | | 1899 | } |
1891 | | | 1900 | |
1892 | #ifdef DIAGNOSTIC | | 1901 | #ifdef DIAGNOSTIC |
1893 | if (vp->v_type == VNON) { | | 1902 | if (vp->v_type == VNON) { |
1894 | # ifdef DEBUG | | 1903 | # ifdef DEBUG |
1895 | lfs_dump_dinode(ip->i_din.ffs1_din); | | 1904 | lfs_dump_dinode(ip->i_din.ffs1_din); |
1896 | # endif | | 1905 | # endif |
1897 | panic("lfs_vinit: ino %llu is type VNON! (ifmt=%o)\n", | | 1906 | panic("lfs_vinit: ino %llu is type VNON! (ifmt=%o)\n", |
1898 | (unsigned long long)ip->i_number, | | 1907 | (unsigned long long)ip->i_number, |
1899 | (ip->i_mode & IFMT) >> 12); | | 1908 | (ip->i_mode & IFMT) >> 12); |
1900 | } | | 1909 | } |
1901 | #endif /* DIAGNOSTIC */ | | 1910 | #endif /* DIAGNOSTIC */ |
1902 | | | 1911 | |
1903 | /* | | 1912 | /* |
1904 | * Finish inode initialization now that aliasing has been resolved. | | 1913 | * Finish inode initialization now that aliasing has been resolved. |
1905 | */ | | 1914 | */ |
1906 | | | 1915 | |
1907 | ip->i_devvp = ump->um_devvp; | | 1916 | ip->i_devvp = ump->um_devvp; |
1908 | VREF(ip->i_devvp); | | 1917 | VREF(ip->i_devvp); |
1909 | genfs_node_init(vp, &lfs_genfsops); | | 1918 | genfs_node_init(vp, &lfs_genfsops); |
1910 | uvm_vnp_setsize(vp, ip->i_size); | | 1919 | uvm_vnp_setsize(vp, ip->i_size); |
1911 | | | 1920 | |
1912 | /* Initialize hiblk from file size */ | | 1921 | /* Initialize hiblk from file size */ |
1913 | ip->i_lfs_hiblk = lblkno(ip->i_lfs, ip->i_size + ip->i_lfs->lfs_bsize - 1) - 1; | | 1922 | ip->i_lfs_hiblk = lblkno(ip->i_lfs, ip->i_size + ip->i_lfs->lfs_bsize - 1) - 1; |
1914 | | | 1923 | |
1915 | *vpp = vp; | | 1924 | *vpp = vp; |
1916 | } | | 1925 | } |
1917 | | | 1926 | |
1918 | /* | | 1927 | /* |
1919 | * Resize the filesystem to contain the specified number of segments. | | 1928 | * Resize the filesystem to contain the specified number of segments. |
1920 | */ | | 1929 | */ |
1921 | int | | 1930 | int |
1922 | lfs_resize_fs(struct lfs *fs, int newnsegs) | | 1931 | lfs_resize_fs(struct lfs *fs, int newnsegs) |
1923 | { | | 1932 | { |
1924 | SEGUSE *sup; | | 1933 | SEGUSE *sup; |
1925 | struct buf *bp, *obp; | | 1934 | struct buf *bp, *obp; |
1926 | daddr_t olast, nlast, ilast, noff, start, end; | | 1935 | daddr_t olast, nlast, ilast, noff, start, end; |
1927 | struct vnode *ivp; | | 1936 | struct vnode *ivp; |
1928 | struct inode *ip; | | 1937 | struct inode *ip; |
1929 | int error, badnews, inc, oldnsegs; | | 1938 | int error, badnews, inc, oldnsegs; |
1930 | int sbbytes, csbbytes, gain, cgain; | | 1939 | int sbbytes, csbbytes, gain, cgain; |
1931 | int i; | | 1940 | int i; |
1932 | | | 1941 | |
1933 | /* Only support v2 and up */ | | 1942 | /* Only support v2 and up */ |
1934 | if (fs->lfs_version < 2) | | 1943 | if (fs->lfs_version < 2) |
1935 | return EOPNOTSUPP; | | 1944 | return EOPNOTSUPP; |
1936 | | | 1945 | |
1937 | /* If we're doing nothing, do it fast */ | | 1946 | /* If we're doing nothing, do it fast */ |
1938 | oldnsegs = fs->lfs_nseg; | | 1947 | oldnsegs = fs->lfs_nseg; |
1939 | if (newnsegs == oldnsegs) | | 1948 | if (newnsegs == oldnsegs) |
1940 | return 0; | | 1949 | return 0; |
1941 | | | 1950 | |
1942 | /* We always have to have two superblocks */ | | 1951 | /* We always have to have two superblocks */ |
1943 | if (newnsegs <= dtosn(fs, fs->lfs_sboffs[1])) | | 1952 | if (newnsegs <= dtosn(fs, fs->lfs_sboffs[1])) |
1944 | return EFBIG; | | 1953 | return EFBIG; |
1945 | | | 1954 | |
1946 | ivp = fs->lfs_ivnode; | | 1955 | ivp = fs->lfs_ivnode; |
1947 | ip = VTOI(ivp); | | 1956 | ip = VTOI(ivp); |
1948 | error = 0; | | 1957 | error = 0; |
1949 | | | 1958 | |
1950 | /* Take the segment lock so no one else calls lfs_newseg() */ | | 1959 | /* Take the segment lock so no one else calls lfs_newseg() */ |
1951 | lfs_seglock(fs, SEGM_PROT); | | 1960 | lfs_seglock(fs, SEGM_PROT); |
1952 | | | 1961 | |
1953 | /* | | 1962 | /* |
1954 | * Make sure the segments we're going to be losing, if any, | | 1963 | * Make sure the segments we're going to be losing, if any, |
1955 | * are in fact empty. We hold the seglock, so their status | | 1964 | * are in fact empty. We hold the seglock, so their status |
1956 | * cannot change underneath us. Count the superblocks we lose, | | 1965 | * cannot change underneath us. Count the superblocks we lose, |
1957 | * while we're at it. | | 1966 | * while we're at it. |
1958 | */ | | 1967 | */ |
1959 | sbbytes = csbbytes = 0; | | 1968 | sbbytes = csbbytes = 0; |
1960 | cgain = 0; | | 1969 | cgain = 0; |
1961 | for (i = newnsegs; i < oldnsegs; i++) { | | 1970 | for (i = newnsegs; i < oldnsegs; i++) { |
1962 | LFS_SEGENTRY(sup, fs, i, bp); | | 1971 | LFS_SEGENTRY(sup, fs, i, bp); |
1963 | badnews = sup->su_nbytes || !(sup->su_flags & SEGUSE_INVAL); | | 1972 | badnews = sup->su_nbytes || !(sup->su_flags & SEGUSE_INVAL); |
1964 | if (sup->su_flags & SEGUSE_SUPERBLOCK) | | 1973 | if (sup->su_flags & SEGUSE_SUPERBLOCK) |
1965 | sbbytes += LFS_SBPAD; | | 1974 | sbbytes += LFS_SBPAD; |
1966 | if (!(sup->su_flags & SEGUSE_DIRTY)) { | | 1975 | if (!(sup->su_flags & SEGUSE_DIRTY)) { |
1967 | ++cgain; | | 1976 | ++cgain; |
1968 | if (sup->su_flags & SEGUSE_SUPERBLOCK) | | 1977 | if (sup->su_flags & SEGUSE_SUPERBLOCK) |
1969 | csbbytes += LFS_SBPAD; | | 1978 | csbbytes += LFS_SBPAD; |
1970 | } | | 1979 | } |
1971 | brelse(bp, 0); | | 1980 | brelse(bp, 0); |
1972 | if (badnews) { | | 1981 | if (badnews) { |
1973 | error = EBUSY; | | 1982 | error = EBUSY; |
1974 | goto out; | | 1983 | goto out; |
1975 | } | | 1984 | } |
1976 | } | | 1985 | } |
1977 | | | 1986 | |
1978 | /* Note old and new segment table endpoints, and old ifile size */ | | 1987 | /* Note old and new segment table endpoints, and old ifile size */ |
1979 | olast = fs->lfs_cleansz + fs->lfs_segtabsz; | | 1988 | olast = fs->lfs_cleansz + fs->lfs_segtabsz; |
1980 | nlast = howmany(newnsegs, fs->lfs_sepb) + fs->lfs_cleansz; | | 1989 | nlast = howmany(newnsegs, fs->lfs_sepb) + fs->lfs_cleansz; |
1981 | ilast = ivp->v_size >> fs->lfs_bshift; | | 1990 | ilast = ivp->v_size >> fs->lfs_bshift; |
1982 | noff = nlast - olast; | | 1991 | noff = nlast - olast; |
1983 | | | 1992 | |
1984 | /* | | 1993 | /* |
1985 | * Make sure no one can use the Ifile while we change it around. | | 1994 | * Make sure no one can use the Ifile while we change it around. |
1986 | * Even after taking the iflock we need to make sure no one still | | 1995 | * Even after taking the iflock we need to make sure no one still |
1987 | * is holding Ifile buffers, so we get each one, to drain them. | | 1996 | * is holding Ifile buffers, so we get each one, to drain them. |
1988 | * (XXX this could be done better.) | | 1997 | * (XXX this could be done better.) |
1989 | */ | | 1998 | */ |
1990 | rw_enter(&fs->lfs_iflock, RW_WRITER); | | 1999 | rw_enter(&fs->lfs_iflock, RW_WRITER); |
1991 | vn_lock(ivp, LK_EXCLUSIVE | LK_RETRY); | | 2000 | vn_lock(ivp, LK_EXCLUSIVE | LK_RETRY); |
1992 | for (i = 0; i < ilast; i++) { | | 2001 | for (i = 0; i < ilast; i++) { |
1993 | bread(ivp, i, fs->lfs_bsize, NOCRED, 0, &bp); | | 2002 | bread(ivp, i, fs->lfs_bsize, NOCRED, 0, &bp); |
1994 | brelse(bp, 0); | | 2003 | brelse(bp, 0); |
1995 | } | | 2004 | } |
1996 | | | 2005 | |
1997 | /* Allocate new Ifile blocks */ | | 2006 | /* Allocate new Ifile blocks */ |
1998 | for (i = ilast; i < ilast + noff; i++) { | | 2007 | for (i = ilast; i < ilast + noff; i++) { |
1999 | if (lfs_balloc(ivp, i * fs->lfs_bsize, fs->lfs_bsize, NOCRED, 0, | | 2008 | if (lfs_balloc(ivp, i * fs->lfs_bsize, fs->lfs_bsize, NOCRED, 0, |
2000 | &bp) != 0) | | 2009 | &bp) != 0) |
2001 | panic("balloc extending ifile"); | | 2010 | panic("balloc extending ifile"); |
2002 | memset(bp->b_data, 0, fs->lfs_bsize); | | 2011 | memset(bp->b_data, 0, fs->lfs_bsize); |
2003 | VOP_BWRITE(bp); | | 2012 | VOP_BWRITE(bp); |
2004 | } | | 2013 | } |
2005 | | | 2014 | |
2006 | /* Register new ifile size */ | | 2015 | /* Register new ifile size */ |
2007 | ip->i_size += noff * fs->lfs_bsize; | | 2016 | ip->i_size += noff * fs->lfs_bsize; |
2008 | ip->i_ffs1_size = ip->i_size; | | 2017 | ip->i_ffs1_size = ip->i_size; |
2009 | uvm_vnp_setsize(ivp, ip->i_size); | | 2018 | uvm_vnp_setsize(ivp, ip->i_size); |
2010 | | | 2019 | |
2011 | /* Copy the inode table to its new position */ | | 2020 | /* Copy the inode table to its new position */ |
2012 | if (noff != 0) { | | 2021 | if (noff != 0) { |
2013 | if (noff < 0) { | | 2022 | if (noff < 0) { |
2014 | start = nlast; | | 2023 | start = nlast; |
2015 | end = ilast + noff; | | 2024 | end = ilast + noff; |
2016 | inc = 1; | | 2025 | inc = 1; |
2017 | } else { | | 2026 | } else { |
2018 | start = ilast + noff - 1; | | 2027 | start = ilast + noff - 1; |
2019 | end = nlast - 1; | | 2028 | end = nlast - 1; |
2020 | inc = -1; | | 2029 | inc = -1; |
2021 | } | | 2030 | } |
2022 | for (i = start; i != end; i += inc) { | | 2031 | for (i = start; i != end; i += inc) { |
2023 | if (bread(ivp, i, fs->lfs_bsize, NOCRED, | | 2032 | if (bread(ivp, i, fs->lfs_bsize, NOCRED, |
2024 | B_MODIFY, &bp) != 0) | | 2033 | B_MODIFY, &bp) != 0) |
2025 | panic("resize: bread dst blk failed"); | | 2034 | panic("resize: bread dst blk failed"); |
2026 | if (bread(ivp, i - noff, fs->lfs_bsize, | | 2035 | if (bread(ivp, i - noff, fs->lfs_bsize, |
2027 | NOCRED, 0, &obp)) | | 2036 | NOCRED, 0, &obp)) |
2028 | panic("resize: bread src blk failed"); | | 2037 | panic("resize: bread src blk failed"); |
2029 | memcpy(bp->b_data, obp->b_data, fs->lfs_bsize); | | 2038 | memcpy(bp->b_data, obp->b_data, fs->lfs_bsize); |
2030 | VOP_BWRITE(bp); | | 2039 | VOP_BWRITE(bp); |
2031 | brelse(obp, 0); | | 2040 | brelse(obp, 0); |
2032 | } | | 2041 | } |
2033 | } | | 2042 | } |
2034 | | | 2043 | |
2035 | /* If we are expanding, write the new empty SEGUSE entries */ | | 2044 | /* If we are expanding, write the new empty SEGUSE entries */ |
2036 | if (newnsegs > oldnsegs) { | | 2045 | if (newnsegs > oldnsegs) { |
2037 | for (i = oldnsegs; i < newnsegs; i++) { | | 2046 | for (i = oldnsegs; i < newnsegs; i++) { |
2038 | if ((error = bread(ivp, i / fs->lfs_sepb + | | 2047 | if ((error = bread(ivp, i / fs->lfs_sepb + |
2039 | fs->lfs_cleansz, fs->lfs_bsize, | | 2048 | fs->lfs_cleansz, fs->lfs_bsize, |
2040 | NOCRED, B_MODIFY, &bp)) != 0) | | 2049 | NOCRED, B_MODIFY, &bp)) != 0) |
2041 | panic("lfs: ifile read: %d", error); | | 2050 | panic("lfs: ifile read: %d", error); |
2042 | while ((i + 1) % fs->lfs_sepb && i < newnsegs) { | | 2051 | while ((i + 1) % fs->lfs_sepb && i < newnsegs) { |
2043 | sup = &((SEGUSE *)bp->b_data)[i % fs->lfs_sepb]; | | 2052 | sup = &((SEGUSE *)bp->b_data)[i % fs->lfs_sepb]; |
2044 | memset(sup, 0, sizeof(*sup)); | | 2053 | memset(sup, 0, sizeof(*sup)); |
2045 | i++; | | 2054 | i++; |
2046 | } | | 2055 | } |
2047 | VOP_BWRITE(bp); | | 2056 | VOP_BWRITE(bp); |
2048 | } | | 2057 | } |
2049 | } | | 2058 | } |
2050 | | | 2059 | |
2051 | /* Zero out unused superblock offsets */ | | 2060 | /* Zero out unused superblock offsets */ |
2052 | for (i = 2; i < LFS_MAXNUMSB; i++) | | 2061 | for (i = 2; i < LFS_MAXNUMSB; i++) |
2053 | if (dtosn(fs, fs->lfs_sboffs[i]) >= newnsegs) | | 2062 | if (dtosn(fs, fs->lfs_sboffs[i]) >= newnsegs) |
2054 | fs->lfs_sboffs[i] = 0x0; | | 2063 | fs->lfs_sboffs[i] = 0x0; |
2055 | | | 2064 | |
2056 | /* | | 2065 | /* |
2057 | * Correct superblock entries that depend on fs size. | | 2066 | * Correct superblock entries that depend on fs size. |
2058 | * The computations of these are as follows: | | 2067 | * The computations of these are as follows: |
2059 | * | | 2068 | * |
2060 | * size = segtod(fs, nseg) | | 2069 | * size = segtod(fs, nseg) |
2061 | * dsize = segtod(fs, nseg - minfreeseg) - btofsb(#super * LFS_SBPAD) | | 2070 | * dsize = segtod(fs, nseg - minfreeseg) - btofsb(#super * LFS_SBPAD) |
2062 | * bfree = dsize - btofsb(fs, bsize * nseg / 2) - blocks_actually_used | | 2071 | * bfree = dsize - btofsb(fs, bsize * nseg / 2) - blocks_actually_used |
2063 | * avail = segtod(fs, nclean) - btofsb(#clean_super * LFS_SBPAD) | | 2072 | * avail = segtod(fs, nclean) - btofsb(#clean_super * LFS_SBPAD) |
2064 | * + (segtod(fs, 1) - (offset - curseg)) | | 2073 | * + (segtod(fs, 1) - (offset - curseg)) |
2065 | * - segtod(fs, minfreeseg - (minfreeseg / 2)) | | 2074 | * - segtod(fs, minfreeseg - (minfreeseg / 2)) |
2066 | * | | 2075 | * |
2067 | * XXX - we should probably adjust minfreeseg as well. | | 2076 | * XXX - we should probably adjust minfreeseg as well. |
2068 | */ | | 2077 | */ |
2069 | gain = (newnsegs - oldnsegs); | | 2078 | gain = (newnsegs - oldnsegs); |
2070 | fs->lfs_nseg = newnsegs; | | 2079 | fs->lfs_nseg = newnsegs; |
2071 | fs->lfs_segtabsz = nlast - fs->lfs_cleansz; | | 2080 | fs->lfs_segtabsz = nlast - fs->lfs_cleansz; |
2072 | fs->lfs_size += gain * btofsb(fs, fs->lfs_ssize); | | 2081 | fs->lfs_size += gain * btofsb(fs, fs->lfs_ssize); |
2073 | fs->lfs_dsize += gain * btofsb(fs, fs->lfs_ssize) - btofsb(fs, sbbytes); | | 2082 | fs->lfs_dsize += gain * btofsb(fs, fs->lfs_ssize) - btofsb(fs, sbbytes); |
2074 | fs->lfs_bfree += gain * btofsb(fs, fs->lfs_ssize) - btofsb(fs, sbbytes) | | 2083 | fs->lfs_bfree += gain * btofsb(fs, fs->lfs_ssize) - btofsb(fs, sbbytes) |
2075 | - gain * btofsb(fs, fs->lfs_bsize / 2); | | 2084 | - gain * btofsb(fs, fs->lfs_bsize / 2); |
2076 | if (gain > 0) { | | 2085 | if (gain > 0) { |
2077 | fs->lfs_nclean += gain; | | 2086 | fs->lfs_nclean += gain; |
2078 | fs->lfs_avail += gain * btofsb(fs, fs->lfs_ssize); | | 2087 | fs->lfs_avail += gain * btofsb(fs, fs->lfs_ssize); |
2079 | } else { | | 2088 | } else { |
2080 | fs->lfs_nclean -= cgain; | | 2089 | fs->lfs_nclean -= cgain; |
2081 | fs->lfs_avail -= cgain * btofsb(fs, fs->lfs_ssize) - | | 2090 | fs->lfs_avail -= cgain * btofsb(fs, fs->lfs_ssize) - |
2082 | btofsb(fs, csbbytes); | | 2091 | btofsb(fs, csbbytes); |
2083 | } | | 2092 | } |
2084 | | | 2093 | |
2085 | /* Resize segment flag cache */ | | 2094 | /* Resize segment flag cache */ |
2086 | fs->lfs_suflags[0] = (u_int32_t *)realloc(fs->lfs_suflags[0], | | 2095 | fs->lfs_suflags[0] = (u_int32_t *)realloc(fs->lfs_suflags[0], |
2087 | fs->lfs_nseg * sizeof(u_int32_t), | | 2096 | fs->lfs_nseg * sizeof(u_int32_t), |
2088 | M_SEGMENT, M_WAITOK); | | 2097 | M_SEGMENT, M_WAITOK); |
2089 | fs->lfs_suflags[1] = (u_int32_t *)realloc(fs->lfs_suflags[1], | | 2098 | fs->lfs_suflags[1] = (u_int32_t *)realloc(fs->lfs_suflags[1], |
2090 | fs->lfs_nseg * sizeof(u_int32_t), | | 2099 | fs->lfs_nseg * sizeof(u_int32_t), |
2091 | M_SEGMENT, M_WAITOK); | | 2100 | M_SEGMENT, M_WAITOK); |
2092 | for (i = oldnsegs; i < newnsegs; i++) | | 2101 | for (i = oldnsegs; i < newnsegs; i++) |
2093 | fs->lfs_suflags[0][i] = fs->lfs_suflags[1][i] = 0x0; | | 2102 | fs->lfs_suflags[0][i] = fs->lfs_suflags[1][i] = 0x0; |
2094 | | | 2103 | |
2095 | /* Truncate Ifile if necessary */ | | 2104 | /* Truncate Ifile if necessary */ |
2096 | if (noff < 0) | | 2105 | if (noff < 0) |
2097 | lfs_truncate(ivp, ivp->v_size + (noff << fs->lfs_bshift), 0, | | 2106 | lfs_truncate(ivp, ivp->v_size + (noff << fs->lfs_bshift), 0, |
2098 | NOCRED); | | 2107 | NOCRED); |
2099 | | | 2108 | |
2100 | /* Update cleaner info so the cleaner can die */ | | 2109 | /* Update cleaner info so the cleaner can die */ |
2101 | bread(ivp, 0, fs->lfs_bsize, NOCRED, B_MODIFY, &bp); | | 2110 | bread(ivp, 0, fs->lfs_bsize, NOCRED, B_MODIFY, &bp); |
2102 | ((CLEANERINFO *)bp->b_data)->clean = fs->lfs_nclean; | | 2111 | ((CLEANERINFO *)bp->b_data)->clean = fs->lfs_nclean; |
2103 | ((CLEANERINFO *)bp->b_data)->dirty = fs->lfs_nseg - fs->lfs_nclean; | | 2112 | ((CLEANERINFO *)bp->b_data)->dirty = fs->lfs_nseg - fs->lfs_nclean; |
2104 | VOP_BWRITE(bp); | | 2113 | VOP_BWRITE(bp); |
2105 | | | 2114 | |
2106 | /* Let Ifile accesses proceed */ | | 2115 | /* Let Ifile accesses proceed */ |
2107 | VOP_UNLOCK(ivp, 0); | | 2116 | VOP_UNLOCK(ivp, 0); |
2108 | rw_exit(&fs->lfs_iflock); | | 2117 | rw_exit(&fs->lfs_iflock); |
2109 | | | 2118 | |
2110 | out: | | 2119 | out: |
2111 | lfs_segunlock(fs); | | 2120 | lfs_segunlock(fs); |
2112 | return error; | | 2121 | return error; |
2113 | } | | 2122 | } |