| @@ -1,937 +1,940 @@ | | | @@ -1,937 +1,940 @@ |
1 | /* $NetBSD: lfs_pages.c,v 1.14 2017/06/10 05:29:36 maya Exp $ */ | | 1 | /* $NetBSD: lfs_pages.c,v 1.15 2017/08/19 14:22:49 maya Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. | | 4 | * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. |
5 | * All rights reserved. | | 5 | * All rights reserved. |
6 | * | | 6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation | | 7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Konrad E. Schroder <perseant@hhhh.org>. | | 8 | * by Konrad E. Schroder <perseant@hhhh.org>. |
9 | * | | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
12 | * are met: | | 12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright | | 15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the | | 16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. | | 17 | * documentation and/or other materials provided with the distribution. |
18 | * | | 18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | | 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | | 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | * POSSIBILITY OF SUCH DAMAGE. | | 29 | * POSSIBILITY OF SUCH DAMAGE. |
30 | */ | | 30 | */ |
31 | /* | | 31 | /* |
32 | * Copyright (c) 1986, 1989, 1991, 1993, 1995 | | 32 | * Copyright (c) 1986, 1989, 1991, 1993, 1995 |
33 | * The Regents of the University of California. All rights reserved. | | 33 | * The Regents of the University of California. All rights reserved. |
34 | * | | 34 | * |
35 | * Redistribution and use in source and binary forms, with or without | | 35 | * Redistribution and use in source and binary forms, with or without |
36 | * modification, are permitted provided that the following conditions | | 36 | * modification, are permitted provided that the following conditions |
37 | * are met: | | 37 | * are met: |
38 | * 1. Redistributions of source code must retain the above copyright | | 38 | * 1. Redistributions of source code must retain the above copyright |
39 | * notice, this list of conditions and the following disclaimer. | | 39 | * notice, this list of conditions and the following disclaimer. |
40 | * 2. Redistributions in binary form must reproduce the above copyright | | 40 | * 2. Redistributions in binary form must reproduce the above copyright |
41 | * notice, this list of conditions and the following disclaimer in the | | 41 | * notice, this list of conditions and the following disclaimer in the |
42 | * documentation and/or other materials provided with the distribution. | | 42 | * documentation and/or other materials provided with the distribution. |
43 | * 3. Neither the name of the University nor the names of its contributors | | 43 | * 3. Neither the name of the University nor the names of its contributors |
44 | * may be used to endorse or promote products derived from this software | | 44 | * may be used to endorse or promote products derived from this software |
45 | * without specific prior written permission. | | 45 | * without specific prior written permission. |
46 | * | | 46 | * |
47 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | | 47 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
48 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | | 48 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
49 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | | 49 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
50 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | | 50 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
51 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 51 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
52 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | | 52 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
53 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 53 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
54 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 54 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
55 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 55 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
56 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 56 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
57 | * SUCH DAMAGE. | | 57 | * SUCH DAMAGE. |
58 | * | | 58 | * |
59 | * @(#)lfs_vnops.c 8.13 (Berkeley) 6/10/95 | | 59 | * @(#)lfs_vnops.c 8.13 (Berkeley) 6/10/95 |
60 | */ | | 60 | */ |
61 | | | 61 | |
62 | #include <sys/cdefs.h> | | 62 | #include <sys/cdefs.h> |
63 | __KERNEL_RCSID(0, "$NetBSD: lfs_pages.c,v 1.14 2017/06/10 05:29:36 maya Exp $"); | | 63 | __KERNEL_RCSID(0, "$NetBSD: lfs_pages.c,v 1.15 2017/08/19 14:22:49 maya Exp $"); |
64 | | | 64 | |
65 | #ifdef _KERNEL_OPT | | 65 | #ifdef _KERNEL_OPT |
66 | #include "opt_compat_netbsd.h" | | 66 | #include "opt_compat_netbsd.h" |
67 | #include "opt_uvm_page_trkown.h" | | 67 | #include "opt_uvm_page_trkown.h" |
68 | #endif | | 68 | #endif |
69 | | | 69 | |
70 | #include <sys/param.h> | | 70 | #include <sys/param.h> |
71 | #include <sys/systm.h> | | 71 | #include <sys/systm.h> |
72 | #include <sys/namei.h> | | 72 | #include <sys/namei.h> |
73 | #include <sys/resourcevar.h> | | 73 | #include <sys/resourcevar.h> |
74 | #include <sys/kernel.h> | | 74 | #include <sys/kernel.h> |
75 | #include <sys/file.h> | | 75 | #include <sys/file.h> |
76 | #include <sys/stat.h> | | 76 | #include <sys/stat.h> |
77 | #include <sys/buf.h> | | 77 | #include <sys/buf.h> |
78 | #include <sys/proc.h> | | 78 | #include <sys/proc.h> |
79 | #include <sys/mount.h> | | 79 | #include <sys/mount.h> |
80 | #include <sys/vnode.h> | | 80 | #include <sys/vnode.h> |
81 | #include <sys/pool.h> | | 81 | #include <sys/pool.h> |
82 | #include <sys/signalvar.h> | | 82 | #include <sys/signalvar.h> |
83 | #include <sys/kauth.h> | | 83 | #include <sys/kauth.h> |
84 | #include <sys/syslog.h> | | 84 | #include <sys/syslog.h> |
85 | #include <sys/fstrans.h> | | 85 | #include <sys/fstrans.h> |
86 | | | 86 | |
87 | #include <miscfs/fifofs/fifo.h> | | 87 | #include <miscfs/fifofs/fifo.h> |
88 | #include <miscfs/genfs/genfs.h> | | 88 | #include <miscfs/genfs/genfs.h> |
89 | #include <miscfs/specfs/specdev.h> | | 89 | #include <miscfs/specfs/specdev.h> |
90 | | | 90 | |
91 | #include <ufs/lfs/ulfs_inode.h> | | 91 | #include <ufs/lfs/ulfs_inode.h> |
92 | #include <ufs/lfs/ulfsmount.h> | | 92 | #include <ufs/lfs/ulfsmount.h> |
93 | #include <ufs/lfs/ulfs_bswap.h> | | 93 | #include <ufs/lfs/ulfs_bswap.h> |
94 | #include <ufs/lfs/ulfs_extern.h> | | 94 | #include <ufs/lfs/ulfs_extern.h> |
95 | | | 95 | |
96 | #include <uvm/uvm.h> | | 96 | #include <uvm/uvm.h> |
97 | #include <uvm/uvm_pmap.h> | | 97 | #include <uvm/uvm_pmap.h> |
98 | #include <uvm/uvm_stat.h> | | 98 | #include <uvm/uvm_stat.h> |
99 | #include <uvm/uvm_pager.h> | | 99 | #include <uvm/uvm_pager.h> |
100 | | | 100 | |
101 | #include <ufs/lfs/lfs.h> | | 101 | #include <ufs/lfs/lfs.h> |
102 | #include <ufs/lfs/lfs_accessors.h> | | 102 | #include <ufs/lfs/lfs_accessors.h> |
103 | #include <ufs/lfs/lfs_kernel.h> | | 103 | #include <ufs/lfs/lfs_kernel.h> |
104 | #include <ufs/lfs/lfs_extern.h> | | 104 | #include <ufs/lfs/lfs_extern.h> |
105 | | | 105 | |
106 | extern kcondvar_t lfs_writerd_cv; | | 106 | extern kcondvar_t lfs_writerd_cv; |
107 | | | 107 | |
108 | static int check_dirty(struct lfs *, struct vnode *, off_t, off_t, off_t, int, int, struct vm_page **); | | 108 | static int check_dirty(struct lfs *, struct vnode *, off_t, off_t, off_t, int, int, struct vm_page **); |
109 | | | 109 | |
110 | int | | 110 | int |
111 | lfs_getpages(void *v) | | 111 | lfs_getpages(void *v) |
112 | { | | 112 | { |
113 | struct vop_getpages_args /* { | | 113 | struct vop_getpages_args /* { |
114 | struct vnode *a_vp; | | 114 | struct vnode *a_vp; |
115 | voff_t a_offset; | | 115 | voff_t a_offset; |
116 | struct vm_page **a_m; | | 116 | struct vm_page **a_m; |
117 | int *a_count; | | 117 | int *a_count; |
118 | int a_centeridx; | | 118 | int a_centeridx; |
119 | vm_prot_t a_access_type; | | 119 | vm_prot_t a_access_type; |
120 | int a_advice; | | 120 | int a_advice; |
121 | int a_flags; | | 121 | int a_flags; |
122 | } */ *ap = v; | | 122 | } */ *ap = v; |
123 | | | 123 | |
124 | if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM && | | 124 | if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM && |
125 | (ap->a_access_type & VM_PROT_WRITE) != 0) { | | 125 | (ap->a_access_type & VM_PROT_WRITE) != 0) { |
126 | return EPERM; | | 126 | return EPERM; |
127 | } | | 127 | } |
128 | if ((ap->a_access_type & VM_PROT_WRITE) != 0) { | | 128 | if ((ap->a_access_type & VM_PROT_WRITE) != 0) { |
129 | mutex_enter(&lfs_lock); | | 129 | mutex_enter(&lfs_lock); |
130 | LFS_SET_UINO(VTOI(ap->a_vp), IN_MODIFIED); | | 130 | LFS_SET_UINO(VTOI(ap->a_vp), IN_MODIFIED); |
131 | mutex_exit(&lfs_lock); | | 131 | mutex_exit(&lfs_lock); |
132 | } | | 132 | } |
133 | | | 133 | |
134 | /* | | 134 | /* |
135 | * we're relying on the fact that genfs_getpages() always read in | | 135 | * we're relying on the fact that genfs_getpages() always read in |
136 | * entire filesystem blocks. | | 136 | * entire filesystem blocks. |
137 | */ | | 137 | */ |
138 | return genfs_getpages(v); | | 138 | return genfs_getpages(v); |
139 | } | | 139 | } |
140 | | | 140 | |
141 | /* | | 141 | /* |
142 | * Wait for a page to become unbusy, possibly printing diagnostic messages | | 142 | * Wait for a page to become unbusy, possibly printing diagnostic messages |
143 | * as well. | | 143 | * as well. |
144 | * | | 144 | * |
145 | * Called with vp->v_interlock held; return with it held. | | 145 | * Called with vp->v_interlock held; return with it held. |
146 | */ | | 146 | */ |
147 | static void | | 147 | static void |
148 | wait_for_page(struct vnode *vp, struct vm_page *pg, const char *label) | | 148 | wait_for_page(struct vnode *vp, struct vm_page *pg, const char *label) |
149 | { | | 149 | { |
150 | KASSERT(mutex_owned(vp->v_interlock)); | | 150 | KASSERT(mutex_owned(vp->v_interlock)); |
151 | if ((pg->flags & PG_BUSY) == 0) | | 151 | if ((pg->flags & PG_BUSY) == 0) |
152 | return; /* Nothing to wait for! */ | | 152 | return; /* Nothing to wait for! */ |
153 | | | 153 | |
154 | #if defined(DEBUG) && defined(UVM_PAGE_TRKOWN) | | 154 | #if defined(DEBUG) && defined(UVM_PAGE_TRKOWN) |
155 | static struct vm_page *lastpg; | | 155 | static struct vm_page *lastpg; |
156 | | | 156 | |
157 | if (label != NULL && pg != lastpg) { | | 157 | if (label != NULL && pg != lastpg) { |
158 | if (pg->owner_tag) { | | 158 | if (pg->owner_tag) { |
159 | printf("lfs_putpages[%d.%d]: %s: page %p owner %d.%d [%s]\n", | | 159 | printf("lfs_putpages[%d.%d]: %s: page %p owner %d.%d [%s]\n", |
160 | curproc->p_pid, curlwp->l_lid, label, | | 160 | curproc->p_pid, curlwp->l_lid, label, |
161 | pg, pg->owner, pg->lowner, pg->owner_tag); | | 161 | pg, pg->owner, pg->lowner, pg->owner_tag); |
162 | } else { | | 162 | } else { |
163 | printf("lfs_putpages[%d.%d]: %s: page %p unowned?!\n", | | 163 | printf("lfs_putpages[%d.%d]: %s: page %p unowned?!\n", |
164 | curproc->p_pid, curlwp->l_lid, label, pg); | | 164 | curproc->p_pid, curlwp->l_lid, label, pg); |
165 | } | | 165 | } |
166 | } | | 166 | } |
167 | lastpg = pg; | | 167 | lastpg = pg; |
168 | #endif | | 168 | #endif |
169 | | | 169 | |
170 | pg->flags |= PG_WANTED; | | 170 | pg->flags |= PG_WANTED; |
171 | UVM_UNLOCK_AND_WAIT(pg, vp->v_interlock, 0, "lfsput", 0); | | 171 | UVM_UNLOCK_AND_WAIT(pg, vp->v_interlock, 0, "lfsput", 0); |
172 | mutex_enter(vp->v_interlock); | | 172 | mutex_enter(vp->v_interlock); |
173 | } | | 173 | } |
174 | | | 174 | |
175 | /* | | 175 | /* |
176 | * This routine is called by lfs_putpages() when it can't complete the | | 176 | * This routine is called by lfs_putpages() when it can't complete the |
177 | * write because a page is busy. This means that either (1) someone, | | 177 | * write because a page is busy. This means that either (1) someone, |
178 | * possibly the pagedaemon, is looking at this page, and will give it up | | 178 | * possibly the pagedaemon, is looking at this page, and will give it up |
179 | * presently; or (2) we ourselves are holding the page busy in the | | 179 | * presently; or (2) we ourselves are holding the page busy in the |
180 | * process of being written (either gathered or actually on its way to | | 180 | * process of being written (either gathered or actually on its way to |
181 | * disk). We don't need to give up the segment lock, but we might need | | 181 | * disk). We don't need to give up the segment lock, but we might need |
182 | * to call lfs_writeseg() to expedite the page's journey to disk. | | 182 | * to call lfs_writeseg() to expedite the page's journey to disk. |
183 | * | | 183 | * |
184 | * Called with vp->v_interlock held; return with it held. | | 184 | * Called with vp->v_interlock held; return with it held. |
185 | */ | | 185 | */ |
186 | /* #define BUSYWAIT */ | | 186 | /* #define BUSYWAIT */ |
187 | static void | | 187 | static void |
188 | write_and_wait(struct lfs *fs, struct vnode *vp, struct vm_page *pg, | | 188 | write_and_wait(struct lfs *fs, struct vnode *vp, struct vm_page *pg, |
189 | int seglocked, const char *label) | | 189 | int seglocked, const char *label) |
190 | { | | 190 | { |
191 | KASSERT(mutex_owned(vp->v_interlock)); | | 191 | KASSERT(mutex_owned(vp->v_interlock)); |
192 | #ifndef BUSYWAIT | | 192 | #ifndef BUSYWAIT |
193 | struct inode *ip = VTOI(vp); | | 193 | struct inode *ip = VTOI(vp); |
194 | struct segment *sp = fs->lfs_sp; | | 194 | struct segment *sp = fs->lfs_sp; |
195 | int count = 0; | | 195 | int count = 0; |
196 | | | 196 | |
197 | if (pg == NULL) | | 197 | if (pg == NULL) |
198 | return; | | 198 | return; |
199 | | | 199 | |
200 | while (pg->flags & PG_BUSY && | | 200 | while (pg->flags & PG_BUSY && |
201 | pg->uobject == &vp->v_uobj) { | | 201 | pg->uobject == &vp->v_uobj) { |
202 | mutex_exit(vp->v_interlock); | | 202 | mutex_exit(vp->v_interlock); |
203 | if (sp->cbpp - sp->bpp > 1) { | | 203 | if (sp->cbpp - sp->bpp > 1) { |
204 | /* Write gathered pages */ | | 204 | /* Write gathered pages */ |
205 | lfs_updatemeta(sp); | | 205 | lfs_updatemeta(sp); |
206 | lfs_release_finfo(fs); | | 206 | lfs_release_finfo(fs); |
207 | (void) lfs_writeseg(fs, sp); | | 207 | (void) lfs_writeseg(fs, sp); |
208 | | | 208 | |
209 | /* | | 209 | /* |
210 | * Reinitialize FIP | | 210 | * Reinitialize FIP |
211 | */ | | 211 | */ |
212 | KASSERT(sp->vp == vp); | | 212 | KASSERT(sp->vp == vp); |
213 | lfs_acquire_finfo(fs, ip->i_number, | | 213 | lfs_acquire_finfo(fs, ip->i_number, |
214 | ip->i_gen); | | 214 | ip->i_gen); |
215 | } | | 215 | } |
216 | ++count; | | 216 | ++count; |
217 | mutex_enter(vp->v_interlock); | | 217 | mutex_enter(vp->v_interlock); |
218 | wait_for_page(vp, pg, label); | | 218 | wait_for_page(vp, pg, label); |
219 | } | | 219 | } |
220 | if (label != NULL && count > 1) { | | 220 | if (label != NULL && count > 1) { |
221 | DLOG((DLOG_PAGE, "lfs_putpages[%d]: %s: %sn = %d\n", | | 221 | DLOG((DLOG_PAGE, "lfs_putpages[%d]: %s: %sn = %d\n", |
222 | curproc->p_pid, label, (count > 0 ? "looping, " : ""), | | 222 | curproc->p_pid, label, (count > 0 ? "looping, " : ""), |
223 | count)); | | 223 | count)); |
224 | } | | 224 | } |
225 | #else | | 225 | #else |
226 | preempt(1); | | 226 | preempt(1); |
227 | #endif | | 227 | #endif |
228 | KASSERT(mutex_owned(vp->v_interlock)); | | 228 | KASSERT(mutex_owned(vp->v_interlock)); |
229 | } | | 229 | } |
230 | | | 230 | |
231 | /* | | 231 | /* |
232 | * Make sure that for all pages in every block in the given range, | | 232 | * Make sure that for all pages in every block in the given range, |
233 | * either all are dirty or all are clean. If any of the pages | | 233 | * either all are dirty or all are clean. If any of the pages |
234 | * we've seen so far are dirty, put the vnode on the paging chain, | | 234 | * we've seen so far are dirty, put the vnode on the paging chain, |
235 | * and mark it IN_PAGING. | | 235 | * and mark it IN_PAGING. |
236 | * | | 236 | * |
237 | * If checkfirst != 0, don't check all the pages but return at the | | 237 | * If checkfirst != 0, don't check all the pages but return at the |
238 | * first dirty page. | | 238 | * first dirty page. |
239 | */ | | 239 | */ |
240 | static int | | 240 | static int |
241 | check_dirty(struct lfs *fs, struct vnode *vp, | | 241 | check_dirty(struct lfs *fs, struct vnode *vp, |
242 | off_t startoffset, off_t endoffset, off_t blkeof, | | 242 | off_t startoffset, off_t endoffset, off_t blkeof, |
243 | int flags, int checkfirst, struct vm_page **pgp) | | 243 | int flags, int checkfirst, struct vm_page **pgp) |
244 | { | | 244 | { |
245 | int by_list; | | 245 | int by_list; |
246 | struct vm_page *curpg = NULL; /* XXX: gcc */ | | 246 | struct vm_page *curpg = NULL; /* XXX: gcc */ |
247 | struct vm_page *pgs[MAXBSIZE / MIN_PAGE_SIZE], *pg; | | 247 | struct vm_page *pgs[MAXBSIZE / MIN_PAGE_SIZE], *pg; |
248 | off_t soff = 0; /* XXX: gcc */ | | 248 | off_t soff = 0; /* XXX: gcc */ |
249 | voff_t off; | | 249 | voff_t off; |
250 | int i; | | 250 | int i; |
251 | int nonexistent; | | 251 | int nonexistent; |
252 | int any_dirty; /* number of dirty pages */ | | 252 | int any_dirty; /* number of dirty pages */ |
253 | int dirty; /* number of dirty pages in a block */ | | 253 | int dirty; /* number of dirty pages in a block */ |
254 | int tdirty; | | 254 | int tdirty; |
255 | int pages_per_block = lfs_sb_getbsize(fs) >> PAGE_SHIFT; | | 255 | int pages_per_block = lfs_sb_getbsize(fs) >> PAGE_SHIFT; |
256 | int pagedaemon = (curlwp == uvm.pagedaemon_lwp); | | 256 | int pagedaemon = (curlwp == uvm.pagedaemon_lwp); |
257 | | | 257 | |
258 | KASSERT(mutex_owned(vp->v_interlock)); | | 258 | KASSERT(mutex_owned(vp->v_interlock)); |
259 | ASSERT_MAYBE_SEGLOCK(fs); | | 259 | ASSERT_MAYBE_SEGLOCK(fs); |
260 | top: | | 260 | top: |
261 | by_list = (vp->v_uobj.uo_npages <= | | 261 | by_list = (vp->v_uobj.uo_npages <= |
262 | ((endoffset - startoffset) >> PAGE_SHIFT) * | | 262 | ((endoffset - startoffset) >> PAGE_SHIFT) * |
263 | UVM_PAGE_TREE_PENALTY); | | 263 | UVM_PAGE_TREE_PENALTY); |
264 | any_dirty = 0; | | 264 | any_dirty = 0; |
265 | | | 265 | |
266 | if (by_list) { | | 266 | if (by_list) { |
267 | curpg = TAILQ_FIRST(&vp->v_uobj.memq); | | 267 | curpg = TAILQ_FIRST(&vp->v_uobj.memq); |
268 | } else { | | 268 | } else { |
269 | soff = startoffset; | | 269 | soff = startoffset; |
270 | } | | 270 | } |
271 | while (by_list || soff < MIN(blkeof, endoffset)) { | | 271 | while (by_list || soff < MIN(blkeof, endoffset)) { |
272 | if (by_list) { | | 272 | if (by_list) { |
273 | /* | | 273 | /* |
274 | * Find the first page in a block. Skip | | 274 | * Find the first page in a block. Skip |
275 | * blocks outside our area of interest or beyond | | 275 | * blocks outside our area of interest or beyond |
276 | * the end of file. | | 276 | * the end of file. |
277 | */ | | 277 | */ |
278 | KASSERT(curpg == NULL | | 278 | KASSERT(curpg == NULL |
279 | || (curpg->flags & PG_MARKER) == 0); | | 279 | || (curpg->flags & PG_MARKER) == 0); |
280 | if (pages_per_block > 1) { | | 280 | if (pages_per_block > 1) { |
281 | while (curpg && | | 281 | while (curpg && |
282 | ((curpg->offset & lfs_sb_getbmask(fs)) || | | 282 | ((curpg->offset & lfs_sb_getbmask(fs)) || |
283 | curpg->offset >= vp->v_size || | | 283 | curpg->offset >= vp->v_size || |
284 | curpg->offset >= endoffset)) { | | 284 | curpg->offset >= endoffset)) { |
285 | curpg = TAILQ_NEXT(curpg, listq.queue); | | 285 | curpg = TAILQ_NEXT(curpg, listq.queue); |
286 | KASSERT(curpg == NULL || | | 286 | KASSERT(curpg == NULL || |
287 | (curpg->flags & PG_MARKER) == 0); | | 287 | (curpg->flags & PG_MARKER) == 0); |
288 | } | | 288 | } |
289 | } | | 289 | } |
290 | if (curpg == NULL) | | 290 | if (curpg == NULL) |
291 | break; | | 291 | break; |
292 | soff = curpg->offset; | | 292 | soff = curpg->offset; |
293 | } | | 293 | } |
294 | | | 294 | |
295 | /* | | 295 | /* |
296 | * Mark all pages in extended range busy; find out if any | | 296 | * Mark all pages in extended range busy; find out if any |
297 | * of them are dirty. | | 297 | * of them are dirty. |
298 | */ | | 298 | */ |
299 | nonexistent = dirty = 0; | | 299 | nonexistent = dirty = 0; |
300 | for (i = 0; i == 0 || i < pages_per_block; i++) { | | 300 | for (i = 0; i == 0 || i < pages_per_block; i++) { |
301 | KASSERT(mutex_owned(vp->v_interlock)); | | 301 | KASSERT(mutex_owned(vp->v_interlock)); |
302 | if (by_list && pages_per_block <= 1) { | | 302 | if (by_list && pages_per_block <= 1) { |
303 | pgs[i] = pg = curpg; | | 303 | pgs[i] = pg = curpg; |
304 | } else { | | 304 | } else { |
305 | off = soff + (i << PAGE_SHIFT); | | 305 | off = soff + (i << PAGE_SHIFT); |
306 | pgs[i] = pg = uvm_pagelookup(&vp->v_uobj, off); | | 306 | pgs[i] = pg = uvm_pagelookup(&vp->v_uobj, off); |
307 | if (pg == NULL) { | | 307 | if (pg == NULL) { |
308 | ++nonexistent; | | 308 | ++nonexistent; |
309 | continue; | | 309 | continue; |
310 | } | | 310 | } |
311 | } | | 311 | } |
312 | KASSERT(pg != NULL); | | 312 | KASSERT(pg != NULL); |
313 | | | 313 | |
314 | /* | | 314 | /* |
315 | * If we're holding the segment lock, we can deadlock | | 315 | * If we're holding the segment lock, we can deadlock |
316 | * against a process that has our page and is waiting | | 316 | * against a process that has our page and is waiting |
317 | * for the cleaner, while the cleaner waits for the | | 317 | * for the cleaner, while the cleaner waits for the |
318 | * segment lock. Just bail in that case. | | 318 | * segment lock. Just bail in that case. |
319 | */ | | 319 | */ |
320 | if ((pg->flags & PG_BUSY) && | | 320 | if ((pg->flags & PG_BUSY) && |
321 | (pagedaemon || LFS_SEGLOCK_HELD(fs))) { | | 321 | (pagedaemon || LFS_SEGLOCK_HELD(fs))) { |
322 | if (i > 0) | | 322 | if (i > 0) |
323 | uvm_page_unbusy(pgs, i); | | 323 | uvm_page_unbusy(pgs, i); |
324 | DLOG((DLOG_PAGE, "lfs_putpages: avoiding 3-way or pagedaemon deadlock\n")); | | 324 | DLOG((DLOG_PAGE, "lfs_putpages: avoiding 3-way or pagedaemon deadlock\n")); |
325 | if (pgp) | | 325 | if (pgp) |
326 | *pgp = pg; | | 326 | *pgp = pg; |
327 | KASSERT(mutex_owned(vp->v_interlock)); | | 327 | KASSERT(mutex_owned(vp->v_interlock)); |
328 | return -1; | | 328 | return -1; |
329 | } | | 329 | } |
330 | | | 330 | |
331 | while (pg->flags & PG_BUSY) { | | 331 | while (pg->flags & PG_BUSY) { |
332 | wait_for_page(vp, pg, NULL); | | 332 | wait_for_page(vp, pg, NULL); |
333 | KASSERT(mutex_owned(vp->v_interlock)); | | 333 | KASSERT(mutex_owned(vp->v_interlock)); |
334 | if (i > 0) | | 334 | if (i > 0) |
335 | uvm_page_unbusy(pgs, i); | | 335 | uvm_page_unbusy(pgs, i); |
336 | KASSERT(mutex_owned(vp->v_interlock)); | | 336 | KASSERT(mutex_owned(vp->v_interlock)); |
337 | goto top; | | 337 | goto top; |
338 | } | | 338 | } |
339 | pg->flags |= PG_BUSY; | | 339 | pg->flags |= PG_BUSY; |
340 | UVM_PAGE_OWN(pg, "lfs_putpages"); | | 340 | UVM_PAGE_OWN(pg, "lfs_putpages"); |
341 | | | 341 | |
342 | pmap_page_protect(pg, VM_PROT_NONE); | | 342 | pmap_page_protect(pg, VM_PROT_NONE); |
343 | tdirty = (pmap_clear_modify(pg) || | | 343 | tdirty = (pmap_clear_modify(pg) || |
344 | (pg->flags & PG_CLEAN) == 0); | | 344 | (pg->flags & PG_CLEAN) == 0); |
345 | dirty += tdirty; | | 345 | dirty += tdirty; |
346 | } | | 346 | } |
347 | if (pages_per_block > 0 && nonexistent >= pages_per_block) { | | 347 | if (pages_per_block > 0 && nonexistent >= pages_per_block) { |
348 | if (by_list) { | | 348 | if (by_list) { |
349 | curpg = TAILQ_NEXT(curpg, listq.queue); | | 349 | curpg = TAILQ_NEXT(curpg, listq.queue); |
350 | } else { | | 350 | } else { |
351 | soff += lfs_sb_getbsize(fs); | | 351 | soff += lfs_sb_getbsize(fs); |
352 | } | | 352 | } |
353 | continue; | | 353 | continue; |
354 | } | | 354 | } |
355 | | | 355 | |
356 | any_dirty += dirty; | | 356 | any_dirty += dirty; |
357 | KASSERT(nonexistent == 0); | | 357 | KASSERT(nonexistent == 0); |
358 | KASSERT(mutex_owned(vp->v_interlock)); | | 358 | KASSERT(mutex_owned(vp->v_interlock)); |
359 | | | 359 | |
360 | /* | | 360 | /* |
361 | * If any are dirty make all dirty; unbusy them, | | 361 | * If any are dirty make all dirty; unbusy them, |
362 | * but if we were asked to clean, wire them so that | | 362 | * but if we were asked to clean, wire them so that |
363 | * the pagedaemon doesn't bother us about them while | | 363 | * the pagedaemon doesn't bother us about them while |
364 | * they're on their way to disk. | | 364 | * they're on their way to disk. |
365 | */ | | 365 | */ |
366 | for (i = 0; i == 0 || i < pages_per_block; i++) { | | 366 | for (i = 0; i == 0 || i < pages_per_block; i++) { |
367 | KASSERT(mutex_owned(vp->v_interlock)); | | 367 | KASSERT(mutex_owned(vp->v_interlock)); |
368 | pg = pgs[i]; | | 368 | pg = pgs[i]; |
369 | KASSERT(!((pg->flags & PG_CLEAN) && (pg->flags & PG_DELWRI))); | | 369 | KASSERT(!((pg->flags & PG_CLEAN) && (pg->flags & PG_DELWRI))); |
370 | KASSERT(pg->flags & PG_BUSY); | | 370 | KASSERT(pg->flags & PG_BUSY); |
371 | if (dirty) { | | 371 | if (dirty) { |
372 | pg->flags &= ~PG_CLEAN; | | 372 | pg->flags &= ~PG_CLEAN; |
373 | if (flags & PGO_FREE) { | | 373 | if (flags & PGO_FREE) { |
374 | /* | | 374 | /* |
375 | * Wire the page so that | | 375 | * Wire the page so that |
376 | * pdaemon doesn't see it again. | | 376 | * pdaemon doesn't see it again. |
377 | */ | | 377 | */ |
378 | mutex_enter(&uvm_pageqlock); | | 378 | mutex_enter(&uvm_pageqlock); |
379 | uvm_pagewire(pg); | | 379 | uvm_pagewire(pg); |
380 | mutex_exit(&uvm_pageqlock); | | 380 | mutex_exit(&uvm_pageqlock); |
381 | | | 381 | |
382 | /* Suspended write flag */ | | 382 | /* Suspended write flag */ |
383 | pg->flags |= PG_DELWRI; | | 383 | pg->flags |= PG_DELWRI; |
384 | } | | 384 | } |
385 | } | | 385 | } |
386 | if (pg->flags & PG_WANTED) | | 386 | if (pg->flags & PG_WANTED) |
387 | wakeup(pg); | | 387 | wakeup(pg); |
388 | pg->flags &= ~(PG_WANTED|PG_BUSY); | | 388 | pg->flags &= ~(PG_WANTED|PG_BUSY); |
389 | UVM_PAGE_OWN(pg, NULL); | | 389 | UVM_PAGE_OWN(pg, NULL); |
390 | } | | 390 | } |
391 | | | 391 | |
392 | if (checkfirst && any_dirty) | | 392 | if (checkfirst && any_dirty) |
393 | break; | | 393 | break; |
394 | | | 394 | |
395 | if (by_list) { | | 395 | if (by_list) { |
396 | curpg = TAILQ_NEXT(curpg, listq.queue); | | 396 | curpg = TAILQ_NEXT(curpg, listq.queue); |
397 | } else { | | 397 | } else { |
398 | soff += MAX(PAGE_SIZE, lfs_sb_getbsize(fs)); | | 398 | soff += MAX(PAGE_SIZE, lfs_sb_getbsize(fs)); |
399 | } | | 399 | } |
400 | } | | 400 | } |
401 | | | 401 | |
402 | KASSERT(mutex_owned(vp->v_interlock)); | | 402 | KASSERT(mutex_owned(vp->v_interlock)); |
403 | return any_dirty; | | 403 | return any_dirty; |
404 | } | | 404 | } |
405 | | | 405 | |
406 | /* | | 406 | /* |
407 | * lfs_putpages functions like genfs_putpages except that | | 407 | * lfs_putpages functions like genfs_putpages except that |
408 | * | | 408 | * |
409 | * (1) It needs to bounds-check the incoming requests to ensure that | | 409 | * (1) It needs to bounds-check the incoming requests to ensure that |
410 | * they are block-aligned; if they are not, expand the range and | | 410 | * they are block-aligned; if they are not, expand the range and |
411 | * do the right thing in case, e.g., the requested range is clean | | 411 | * do the right thing in case, e.g., the requested range is clean |
412 | * but the expanded range is dirty. | | 412 | * but the expanded range is dirty. |
413 | * | | 413 | * |
414 | * (2) It needs to explicitly send blocks to be written when it is done. | | 414 | * (2) It needs to explicitly send blocks to be written when it is done. |
415 | * If VOP_PUTPAGES is called without the seglock held, we simply take | | 415 | * If VOP_PUTPAGES is called without the seglock held, we simply take |
416 | * the seglock and let lfs_segunlock wait for us. | | 416 | * the seglock and let lfs_segunlock wait for us. |
417 | * XXX There might be a bad situation if we have to flush a vnode while | | 417 | * XXX There might be a bad situation if we have to flush a vnode while |
418 | * XXX lfs_markv is in operation. As of this writing we panic in this | | 418 | * XXX lfs_markv is in operation. As of this writing we panic in this |
419 | * XXX case. | | 419 | * XXX case. |
420 | * | | 420 | * |
421 | * Assumptions: | | 421 | * Assumptions: |
422 | * | | 422 | * |
423 | * (1) The caller does not hold any pages in this vnode busy. If it does, | | 423 | * (1) The caller does not hold any pages in this vnode busy. If it does, |
424 | * there is a danger that when we expand the page range and busy the | | 424 | * there is a danger that when we expand the page range and busy the |
425 | * pages we will deadlock. | | 425 | * pages we will deadlock. |
426 | * | | 426 | * |
427 | * (2) We are called with vp->v_interlock held; we must return with it | | 427 | * (2) We are called with vp->v_interlock held; we must return with it |
428 | * released. | | 428 | * released. |
429 | * | | 429 | * |
430 | * (3) We don't absolutely have to free pages right away, provided that | | 430 | * (3) We don't absolutely have to free pages right away, provided that |
431 | * the request does not have PGO_SYNCIO. When the pagedaemon gives | | 431 | * the request does not have PGO_SYNCIO. When the pagedaemon gives |
432 | * us a request with PGO_FREE, we take the pages out of the paging | | 432 | * us a request with PGO_FREE, we take the pages out of the paging |
433 | * queue and wake up the writer, which will handle freeing them for us. | | 433 | * queue and wake up the writer, which will handle freeing them for us. |
434 | * | | 434 | * |
435 | * We ensure that for any filesystem block, all pages for that | | 435 | * We ensure that for any filesystem block, all pages for that |
436 | * block are either resident or not, even if those pages are higher | | 436 | * block are either resident or not, even if those pages are higher |
437 | * than EOF; that means that we will be getting requests to free | | 437 | * than EOF; that means that we will be getting requests to free |
438 | * "unused" pages above EOF all the time, and should ignore them. | | 438 | * "unused" pages above EOF all the time, and should ignore them. |
439 | * | | 439 | * |
440 | * (4) If we are called with PGO_LOCKED, the finfo array we are to write | | 440 | * (4) If we are called with PGO_LOCKED, the finfo array we are to write |
441 | * into has been set up for us by lfs_writefile. If not, we will | | 441 | * into has been set up for us by lfs_writefile. If not, we will |
442 | * have to handle allocating and/or freeing an finfo entry. | | 442 | * have to handle allocating and/or freeing an finfo entry. |
443 | * | | 443 | * |
444 | * XXX note that we're (ab)using PGO_LOCKED as "seglock held". | | 444 | * XXX note that we're (ab)using PGO_LOCKED as "seglock held". |
445 | */ | | 445 | */ |
446 | | | 446 | |
447 | /* How many times to loop before we should start to worry */ | | 447 | /* How many times to loop before we should start to worry */ |
448 | #define TOOMANY 4 | | 448 | #define TOOMANY 4 |
449 | | | 449 | |
450 | int | | 450 | int |
451 | lfs_putpages(void *v) | | 451 | lfs_putpages(void *v) |
452 | { | | 452 | { |
453 | int error; | | 453 | int error; |
454 | struct vop_putpages_args /* { | | 454 | struct vop_putpages_args /* { |
455 | struct vnode *a_vp; | | 455 | struct vnode *a_vp; |
456 | voff_t a_offlo; | | 456 | voff_t a_offlo; |
457 | voff_t a_offhi; | | 457 | voff_t a_offhi; |
458 | int a_flags; | | 458 | int a_flags; |
459 | } */ *ap = v; | | 459 | } */ *ap = v; |
460 | struct vnode *vp; | | 460 | struct vnode *vp; |
461 | struct inode *ip; | | 461 | struct inode *ip; |
462 | struct lfs *fs; | | 462 | struct lfs *fs; |
463 | struct segment *sp; | | 463 | struct segment *sp; |
464 | off_t origoffset, startoffset, endoffset, origendoffset, blkeof; | | 464 | off_t origoffset, startoffset, endoffset, origendoffset, blkeof; |
465 | off_t off, max_endoffset; | | 465 | off_t off, max_endoffset; |
466 | bool seglocked, sync, pagedaemon, reclaim; | | 466 | bool seglocked, sync, pagedaemon, reclaim; |
467 | struct vm_page *pg, *busypg; | | 467 | struct vm_page *pg, *busypg; |
468 | UVMHIST_FUNC("lfs_putpages"); UVMHIST_CALLED(ubchist); | | 468 | UVMHIST_FUNC("lfs_putpages"); UVMHIST_CALLED(ubchist); |
469 | struct mount *trans_mp; | | 469 | struct mount *trans_mp; |
470 | int oreclaim = 0; | | 470 | int oreclaim = 0; |
471 | int donewriting = 0; | | 471 | int donewriting = 0; |
472 | #ifdef DEBUG | | 472 | #ifdef DEBUG |
473 | int debug_n_again, debug_n_dirtyclean; | | 473 | int debug_n_again, debug_n_dirtyclean; |
474 | #endif | | 474 | #endif |
475 | | | 475 | |
476 | vp = ap->a_vp; | | 476 | vp = ap->a_vp; |
477 | ip = VTOI(vp); | | 477 | ip = VTOI(vp); |
478 | fs = ip->i_lfs; | | 478 | fs = ip->i_lfs; |
479 | sync = (ap->a_flags & PGO_SYNCIO) != 0; | | 479 | sync = (ap->a_flags & PGO_SYNCIO) != 0; |
480 | reclaim = (ap->a_flags & PGO_RECLAIM) != 0; | | 480 | reclaim = (ap->a_flags & PGO_RECLAIM) != 0; |
481 | pagedaemon = (curlwp == uvm.pagedaemon_lwp); | | 481 | pagedaemon = (curlwp == uvm.pagedaemon_lwp); |
482 | trans_mp = NULL; | | 482 | trans_mp = NULL; |
483 | | | 483 | |
484 | KASSERT(mutex_owned(vp->v_interlock)); | | 484 | KASSERT(mutex_owned(vp->v_interlock)); |
485 | | | 485 | |
486 | /* Putpages does nothing for metadata. */ | | 486 | /* Putpages does nothing for metadata. */ |
487 | if (vp == fs->lfs_ivnode || vp->v_type != VREG) { | | 487 | if (vp == fs->lfs_ivnode || vp->v_type != VREG) { |
488 | mutex_exit(vp->v_interlock); | | 488 | mutex_exit(vp->v_interlock); |
489 | return 0; | | 489 | return 0; |
490 | } | | 490 | } |
491 | | | 491 | |
492 | retry: | | 492 | retry: |
493 | /* | | 493 | /* |
494 | * If there are no pages, don't do anything. | | 494 | * If there are no pages, don't do anything. |
495 | */ | | 495 | */ |
496 | if (vp->v_uobj.uo_npages == 0) { | | 496 | if (vp->v_uobj.uo_npages == 0) { |
497 | if (TAILQ_EMPTY(&vp->v_uobj.memq) && | | 497 | if (TAILQ_EMPTY(&vp->v_uobj.memq) && |
498 | (vp->v_iflag & VI_ONWORKLST) && | | 498 | (vp->v_iflag & VI_ONWORKLST) && |
499 | LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { | | 499 | LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { |
500 | vp->v_iflag &= ~VI_WRMAPDIRTY; | | 500 | vp->v_iflag &= ~VI_WRMAPDIRTY; |
501 | vn_syncer_remove_from_worklist(vp); | | 501 | vn_syncer_remove_from_worklist(vp); |
502 | } | | 502 | } |
503 | if (trans_mp) | | 503 | if (trans_mp) |
504 | fstrans_done(trans_mp); | | 504 | fstrans_done(trans_mp); |
505 | mutex_exit(vp->v_interlock); | | 505 | mutex_exit(vp->v_interlock); |
506 | | | 506 | |
507 | /* Remove us from paging queue, if we were on it */ | | 507 | /* Remove us from paging queue, if we were on it */ |
508 | mutex_enter(&lfs_lock); | | 508 | mutex_enter(&lfs_lock); |
509 | if (ip->i_state & IN_PAGING) { | | 509 | if (ip->i_state & IN_PAGING) { |
510 | ip->i_state &= ~IN_PAGING; | | 510 | ip->i_state &= ~IN_PAGING; |
511 | TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); | | 511 | TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); |
512 | } | | 512 | } |
513 | mutex_exit(&lfs_lock); | | 513 | mutex_exit(&lfs_lock); |
514 | | | 514 | |
515 | KASSERT(!mutex_owned(vp->v_interlock)); | | 515 | KASSERT(!mutex_owned(vp->v_interlock)); |
516 | return 0; | | 516 | return 0; |
517 | } | | 517 | } |
518 | | | 518 | |
519 | blkeof = lfs_blkroundup(fs, ip->i_size); | | 519 | blkeof = lfs_blkroundup(fs, ip->i_size); |
520 | | | 520 | |
521 | /* | | 521 | /* |
522 | * Ignore requests to free pages past EOF but in the same block | | 522 | * Ignore requests to free pages past EOF but in the same block |
523 | * as EOF, unless the vnode is being reclaimed or the request | | 523 | * as EOF, unless the vnode is being reclaimed or the request |
524 | * is synchronous. (If the request is sync, it comes from | | 524 | * is synchronous. (If the request is sync, it comes from |
525 | * lfs_truncate.) | | 525 | * lfs_truncate.) |
526 | * | | 526 | * |
527 | * To avoid being flooded with this request, make these pages | | 527 | * To avoid being flooded with this request, make these pages |
528 | * look "active". | | 528 | * look "active". |
529 | */ | | 529 | */ |
530 | if (!sync && !reclaim && | | 530 | if (!sync && !reclaim && |
531 | ap->a_offlo >= ip->i_size && ap->a_offlo < blkeof) { | | 531 | ap->a_offlo >= ip->i_size && ap->a_offlo < blkeof) { |
532 | origoffset = ap->a_offlo; | | 532 | origoffset = ap->a_offlo; |
533 | for (off = origoffset; off < blkeof; off += lfs_sb_getbsize(fs)) { | | 533 | for (off = origoffset; off < blkeof; off += lfs_sb_getbsize(fs)) { |
534 | pg = uvm_pagelookup(&vp->v_uobj, off); | | 534 | pg = uvm_pagelookup(&vp->v_uobj, off); |
535 | KASSERT(pg != NULL); | | 535 | KASSERT(pg != NULL); |
536 | while (pg->flags & PG_BUSY) { | | 536 | while (pg->flags & PG_BUSY) { |
537 | pg->flags |= PG_WANTED; | | 537 | pg->flags |= PG_WANTED; |
538 | UVM_UNLOCK_AND_WAIT(pg, vp->v_interlock, 0, | | 538 | UVM_UNLOCK_AND_WAIT(pg, vp->v_interlock, 0, |
539 | "lfsput2", 0); | | 539 | "lfsput2", 0); |
540 | mutex_enter(vp->v_interlock); | | 540 | mutex_enter(vp->v_interlock); |
541 | } | | 541 | } |
542 | mutex_enter(&uvm_pageqlock); | | 542 | mutex_enter(&uvm_pageqlock); |
543 | uvm_pageactivate(pg); | | 543 | uvm_pageactivate(pg); |
544 | mutex_exit(&uvm_pageqlock); | | 544 | mutex_exit(&uvm_pageqlock); |
545 | } | | 545 | } |
546 | ap->a_offlo = blkeof; | | 546 | ap->a_offlo = blkeof; |
547 | if (ap->a_offhi > 0 && ap->a_offhi <= ap->a_offlo) { | | 547 | if (ap->a_offhi > 0 && ap->a_offhi <= ap->a_offlo) { |
548 | mutex_exit(vp->v_interlock); | | 548 | mutex_exit(vp->v_interlock); |
549 | return 0; | | 549 | return 0; |
550 | } | | 550 | } |
551 | } | | 551 | } |
552 | | | 552 | |
553 | /* | | 553 | /* |
554 | * Extend page range to start and end at block boundaries. | | 554 | * Extend page range to start and end at block boundaries. |
555 | * (For the purposes of VOP_PUTPAGES, fragments don't exist.) | | 555 | * (For the purposes of VOP_PUTPAGES, fragments don't exist.) |
556 | */ | | 556 | */ |
557 | origoffset = ap->a_offlo; | | 557 | origoffset = ap->a_offlo; |
558 | origendoffset = ap->a_offhi; | | 558 | origendoffset = ap->a_offhi; |
559 | startoffset = origoffset & ~(lfs_sb_getbmask(fs)); | | 559 | startoffset = origoffset & ~(lfs_sb_getbmask(fs)); |
560 | max_endoffset = (trunc_page(LLONG_MAX) >> lfs_sb_getbshift(fs)) | | 560 | max_endoffset = (trunc_page(LLONG_MAX) >> lfs_sb_getbshift(fs)) |
561 | << lfs_sb_getbshift(fs); | | 561 | << lfs_sb_getbshift(fs); |
562 | | | 562 | |
563 | if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) { | | 563 | if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) { |
564 | endoffset = max_endoffset; | | 564 | endoffset = max_endoffset; |
565 | origendoffset = endoffset; | | 565 | origendoffset = endoffset; |
566 | } else { | | 566 | } else { |
567 | origendoffset = round_page(ap->a_offhi); | | 567 | origendoffset = round_page(ap->a_offhi); |
568 | endoffset = round_page(lfs_blkroundup(fs, origendoffset)); | | 568 | endoffset = round_page(lfs_blkroundup(fs, origendoffset)); |
569 | } | | 569 | } |
570 | | | 570 | |
571 | KASSERT(startoffset > 0 || endoffset >= startoffset); | | 571 | KASSERT(startoffset > 0 || endoffset >= startoffset); |
572 | if (startoffset == endoffset) { | | 572 | if (startoffset == endoffset) { |
573 | /* Nothing to do, why were we called? */ | | 573 | /* Nothing to do, why were we called? */ |
574 | mutex_exit(vp->v_interlock); | | 574 | mutex_exit(vp->v_interlock); |
575 | DLOG((DLOG_PAGE, "lfs_putpages: startoffset = endoffset = %" | | 575 | DLOG((DLOG_PAGE, "lfs_putpages: startoffset = endoffset = %" |
576 | PRId64 "\n", startoffset)); | | 576 | PRId64 "\n", startoffset)); |
577 | return 0; | | 577 | return 0; |
578 | } | | 578 | } |
579 | | | 579 | |
580 | ap->a_offlo = startoffset; | | 580 | ap->a_offlo = startoffset; |
581 | ap->a_offhi = endoffset; | | 581 | ap->a_offhi = endoffset; |
582 | | | 582 | |
583 | /* | | 583 | /* |
584 | * If not cleaning, just send the pages through genfs_putpages | | 584 | * If not cleaning, just send the pages through genfs_putpages |
585 | * to be returned to the pool. | | 585 | * to be returned to the pool. |
586 | */ | | 586 | */ |
587 | if (!(ap->a_flags & PGO_CLEANIT)) { | | 587 | if (!(ap->a_flags & PGO_CLEANIT)) { |
588 | DLOG((DLOG_PAGE, "lfs_putpages: no cleanit vn %p ino %d (flags %x)\n", | | 588 | DLOG((DLOG_PAGE, "lfs_putpages: no cleanit vn %p ino %d (flags %x)\n", |
589 | vp, (int)ip->i_number, ap->a_flags)); | | 589 | vp, (int)ip->i_number, ap->a_flags)); |
590 | int r = genfs_putpages(v); | | 590 | int r = genfs_putpages(v); |
591 | KASSERT(!mutex_owned(vp->v_interlock)); | | 591 | KASSERT(!mutex_owned(vp->v_interlock)); |
592 | return r; | | 592 | return r; |
593 | } | | 593 | } |
594 | | | 594 | |
595 | if (trans_mp /* && (ap->a_flags & PGO_CLEANIT) != 0 */) { | | 595 | if (trans_mp /* && (ap->a_flags & PGO_CLEANIT) != 0 */) { |
596 | if (pagedaemon) { | | 596 | if (pagedaemon) { |
597 | /* Pagedaemon must not sleep here. */ | | 597 | /* Pagedaemon must not sleep here. */ |
598 | trans_mp = vp->v_mount; | | 598 | trans_mp = vp->v_mount; |
599 | error = fstrans_start_nowait(trans_mp); | | 599 | error = fstrans_start_nowait(trans_mp); |
600 | if (error) { | | 600 | if (error) { |
601 | mutex_exit(vp->v_interlock); | | 601 | mutex_exit(vp->v_interlock); |
602 | return error; | | 602 | return error; |
603 | } | | 603 | } |
604 | } else { | | 604 | } else { |
605 | /* | | 605 | /* |
606 | * Cannot use vdeadcheck() here as this operation | | 606 | * Cannot use vdeadcheck() here as this operation |
607 | * usually gets used from VOP_RECLAIM(). Test for | | 607 | * usually gets used from VOP_RECLAIM(). Test for |
608 | * change of v_mount instead and retry on change. | | 608 | * change of v_mount instead and retry on change. |
609 | */ | | 609 | */ |
610 | mutex_exit(vp->v_interlock); | | 610 | mutex_exit(vp->v_interlock); |
611 | trans_mp = vp->v_mount; | | 611 | trans_mp = vp->v_mount; |
612 | fstrans_start(trans_mp); | | 612 | fstrans_start(trans_mp); |
613 | if (vp->v_mount != trans_mp) { | | 613 | if (vp->v_mount != trans_mp) { |
614 | fstrans_done(trans_mp); | | 614 | fstrans_done(trans_mp); |
615 | trans_mp = NULL; | | 615 | trans_mp = NULL; |
616 | } | | 616 | } |
617 | } | | 617 | } |
618 | mutex_enter(vp->v_interlock); | | 618 | mutex_enter(vp->v_interlock); |
619 | goto retry; | | 619 | goto retry; |
620 | } | | 620 | } |
621 | | | 621 | |
622 | /* Set PGO_BUSYFAIL to avoid deadlocks */ | | 622 | /* Set PGO_BUSYFAIL to avoid deadlocks */ |
623 | ap->a_flags |= PGO_BUSYFAIL; | | 623 | ap->a_flags |= PGO_BUSYFAIL; |
624 | | | 624 | |
625 | /* | | 625 | /* |
626 | * Likewise, if we are asked to clean but the pages are not | | 626 | * Likewise, if we are asked to clean but the pages are not |
627 | * dirty, we can just free them using genfs_putpages. | | 627 | * dirty, we can just free them using genfs_putpages. |
628 | */ | | 628 | */ |
629 | #ifdef DEBUG | | 629 | #ifdef DEBUG |
630 | debug_n_dirtyclean = 0; | | 630 | debug_n_dirtyclean = 0; |
631 | #endif | | 631 | #endif |
632 | do { | | 632 | do { |
633 | int r; | | 633 | int r; |
634 | KASSERT(mutex_owned(vp->v_interlock)); | | 634 | KASSERT(mutex_owned(vp->v_interlock)); |
635 | | | 635 | |
636 | /* Count the number of dirty pages */ | | 636 | /* Count the number of dirty pages */ |
637 | r = check_dirty(fs, vp, startoffset, endoffset, blkeof, | | 637 | r = check_dirty(fs, vp, startoffset, endoffset, blkeof, |
638 | ap->a_flags, 1, NULL); | | 638 | ap->a_flags, 1, NULL); |
639 | if (r < 0) { | | 639 | if (r < 0) { |
640 | /* Pages are busy with another process */ | | 640 | /* Pages are busy with another process */ |
641 | mutex_exit(vp->v_interlock); | | 641 | mutex_exit(vp->v_interlock); |
642 | error = EDEADLK; | | 642 | error = EDEADLK; |
643 | goto out; | | 643 | goto out; |
644 | } | | 644 | } |
645 | if (r > 0) /* Some pages are dirty */ | | 645 | if (r > 0) /* Some pages are dirty */ |
646 | break; | | 646 | break; |
647 | | | 647 | |
648 | /* | | 648 | /* |
649 | * Sometimes pages are dirtied between the time that | | 649 | * Sometimes pages are dirtied between the time that |
650 | * we check and the time we try to clean them. | | 650 | * we check and the time we try to clean them. |
651 | * Instruct lfs_gop_write to return EDEADLK in this case | | 651 | * Instruct lfs_gop_write to return EDEADLK in this case |
652 | * so we can write them properly. | | 652 | * so we can write them properly. |
653 | */ | | 653 | */ |
654 | ip->i_lfs_iflags |= LFSI_NO_GOP_WRITE; | | 654 | ip->i_lfs_iflags |= LFSI_NO_GOP_WRITE; |
655 | r = genfs_do_putpages(vp, startoffset, endoffset, | | 655 | r = genfs_do_putpages(vp, startoffset, endoffset, |
656 | ap->a_flags & ~PGO_SYNCIO, &busypg); | | 656 | ap->a_flags & ~PGO_SYNCIO, &busypg); |
657 | ip->i_lfs_iflags &= ~LFSI_NO_GOP_WRITE; | | 657 | ip->i_lfs_iflags &= ~LFSI_NO_GOP_WRITE; |
658 | if (r != EDEADLK) { | | 658 | if (r != EDEADLK) { |
659 | KASSERT(!mutex_owned(vp->v_interlock)); | | 659 | KASSERT(!mutex_owned(vp->v_interlock)); |
660 | error = r; | | 660 | error = r; |
661 | goto out; | | 661 | goto out; |
662 | } | | 662 | } |
663 | | | 663 | |
664 | /* One of the pages was busy. Start over. */ | | 664 | /* One of the pages was busy. Start over. */ |
665 | mutex_enter(vp->v_interlock); | | 665 | mutex_enter(vp->v_interlock); |
666 | wait_for_page(vp, busypg, "dirtyclean"); | | 666 | wait_for_page(vp, busypg, "dirtyclean"); |
667 | #ifdef DEBUG | | 667 | #ifdef DEBUG |
668 | ++debug_n_dirtyclean; | | 668 | ++debug_n_dirtyclean; |
669 | #endif | | 669 | #endif |
670 | } while(1); | | 670 | } while(1); |
671 | | | 671 | |
672 | #ifdef DEBUG | | 672 | #ifdef DEBUG |
673 | if (debug_n_dirtyclean > TOOMANY) | | 673 | if (debug_n_dirtyclean > TOOMANY) |
674 | DLOG((DLOG_PAGE, "lfs_putpages: dirtyclean: looping, n = %d\n", | | 674 | DLOG((DLOG_PAGE, "lfs_putpages: dirtyclean: looping, n = %d\n", |
675 | debug_n_dirtyclean)); | | 675 | debug_n_dirtyclean)); |
676 | #endif | | 676 | #endif |
677 | | | 677 | |
678 | /* | | 678 | /* |
679 | * Dirty and asked to clean. | | 679 | * Dirty and asked to clean. |
680 | * | | 680 | * |
681 | * Pagedaemon can't actually write LFS pages; wake up | | 681 | * Pagedaemon can't actually write LFS pages; wake up |
682 | * the writer to take care of that. The writer will | | 682 | * the writer to take care of that. The writer will |
683 | * notice the pager inode queue and act on that. | | 683 | * notice the pager inode queue and act on that. |
684 | * | | 684 | * |
685 | * XXX We must drop the vp->interlock before taking the lfs_lock or we | | 685 | * XXX We must drop the vp->interlock before taking the lfs_lock or we |
686 | * get a nasty deadlock with lfs_flush_pchain(). | | 686 | * get a nasty deadlock with lfs_flush_pchain(). |
687 | */ | | 687 | */ |
688 | if (pagedaemon) { | | 688 | if (pagedaemon) { |
689 | mutex_exit(vp->v_interlock); | | 689 | mutex_exit(vp->v_interlock); |
690 | mutex_enter(&lfs_lock); | | 690 | mutex_enter(&lfs_lock); |
691 | if (!(ip->i_state & IN_PAGING)) { | | 691 | if (!(ip->i_state & IN_PAGING)) { |
692 | ip->i_state |= IN_PAGING; | | 692 | ip->i_state |= IN_PAGING; |
693 | TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, i_lfs_pchain); | | 693 | TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, i_lfs_pchain); |
694 | } | | 694 | } |
695 | cv_broadcast(&lfs_writerd_cv); | | 695 | cv_broadcast(&lfs_writerd_cv); |
696 | mutex_exit(&lfs_lock); | | 696 | mutex_exit(&lfs_lock); |
697 | preempt(); | | 697 | preempt(); |
698 | KASSERT(!mutex_owned(vp->v_interlock)); | | 698 | KASSERT(!mutex_owned(vp->v_interlock)); |
699 | error = EWOULDBLOCK; | | 699 | error = EWOULDBLOCK; |
700 | goto out; | | 700 | goto out; |
701 | } | | 701 | } |
702 | | | 702 | |
703 | /* | | 703 | /* |
704 | * If this is a file created in a recent dirop, we can't flush its | | 704 | * If this is a file created in a recent dirop, we can't flush its |
705 | * inode until the dirop is complete. Drain dirops, then flush the | | 705 | * inode until the dirop is complete. Drain dirops, then flush the |
706 | * filesystem (taking care of any other pending dirops while we're | | 706 | * filesystem (taking care of any other pending dirops while we're |
707 | * at it). | | 707 | * at it). |
708 | */ | | 708 | */ |
709 | if ((ap->a_flags & (PGO_CLEANIT|PGO_LOCKED)) == PGO_CLEANIT && | | 709 | if ((ap->a_flags & (PGO_CLEANIT|PGO_LOCKED)) == PGO_CLEANIT && |
710 | (vp->v_uflag & VU_DIROP)) { | | 710 | (vp->v_uflag & VU_DIROP)) { |
711 | DLOG((DLOG_PAGE, "lfs_putpages: flushing VU_DIROP\n")); | | 711 | DLOG((DLOG_PAGE, "lfs_putpages: flushing VU_DIROP\n")); |
712 | | | 712 | |
713 | lfs_writer_enter(fs, "ppdirop"); | | 713 | lfs_writer_enter(fs, "ppdirop"); |
714 | | | 714 | |
715 | /* Note if we hold the vnode locked */ | | 715 | /* Note if we hold the vnode locked */ |
716 | if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE) | | 716 | if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE) |
717 | { | | 717 | { |
718 | DLOG((DLOG_PAGE, "lfs_putpages: dirop inode already locked\n")); | | 718 | DLOG((DLOG_PAGE, "lfs_putpages: dirop inode already locked\n")); |
719 | } else { | | 719 | } else { |
720 | DLOG((DLOG_PAGE, "lfs_putpages: dirop inode not locked\n")); | | 720 | DLOG((DLOG_PAGE, "lfs_putpages: dirop inode not locked\n")); |
721 | } | | 721 | } |
722 | mutex_exit(vp->v_interlock); | | 722 | mutex_exit(vp->v_interlock); |
723 | | | 723 | |
724 | mutex_enter(&lfs_lock); | | 724 | mutex_enter(&lfs_lock); |
725 | lfs_flush_fs(fs, sync ? SEGM_SYNC : 0); | | 725 | lfs_flush_fs(fs, sync ? SEGM_SYNC : 0); |
726 | mutex_exit(&lfs_lock); | | 726 | mutex_exit(&lfs_lock); |
727 | | | 727 | |
728 | mutex_enter(vp->v_interlock); | | 728 | mutex_enter(vp->v_interlock); |
729 | lfs_writer_leave(fs); | | 729 | lfs_writer_leave(fs); |
730 | | | 730 | |
731 | /* The flush will have cleaned out this vnode as well, | | 731 | /* |
732 | no need to do more to it. */ | | 732 | * The flush will have cleaned out this vnode as well, |
| | | 733 | * no need to do more to it. |
| | | 734 | * XXX then why are we falling through and continuing? |
| | | 735 | */ |
733 | } | | 736 | } |
734 | | | 737 | |
735 | /* | | 738 | /* |
736 | * This is it. We are going to write some pages. From here on | | 739 | * This is it. We are going to write some pages. From here on |
737 | * down it's all just mechanics. | | 740 | * down it's all just mechanics. |
738 | * | | 741 | * |
739 | * Don't let genfs_putpages wait; lfs_segunlock will wait for us. | | 742 | * Don't let genfs_putpages wait; lfs_segunlock will wait for us. |
740 | */ | | 743 | */ |
741 | ap->a_flags &= ~PGO_SYNCIO; | | 744 | ap->a_flags &= ~PGO_SYNCIO; |
742 | | | 745 | |
743 | /* | | 746 | /* |
744 | * If we've already got the seglock, flush the node and return. | | 747 | * If we've already got the seglock, flush the node and return. |
745 | * The FIP has already been set up for us by lfs_writefile, | | 748 | * The FIP has already been set up for us by lfs_writefile, |
746 | * and FIP cleanup and lfs_updatemeta will also be done there, | | 749 | * and FIP cleanup and lfs_updatemeta will also be done there, |
747 | * unless genfs_putpages returns EDEADLK; then we must flush | | 750 | * unless genfs_putpages returns EDEADLK; then we must flush |
748 | * what we have, and correct FIP and segment header accounting. | | 751 | * what we have, and correct FIP and segment header accounting. |
749 | */ | | 752 | */ |
750 | get_seglock: | | 753 | get_seglock: |
751 | /* | | 754 | /* |
752 | * If we are not called with the segment locked, lock it. | | 755 | * If we are not called with the segment locked, lock it. |
753 | * Account for a new FIP in the segment header, and set sp->vp. | | 756 | * Account for a new FIP in the segment header, and set sp->vp. |
754 | * (This should duplicate the setup at the top of lfs_writefile().) | | 757 | * (This should duplicate the setup at the top of lfs_writefile().) |
755 | */ | | 758 | */ |
756 | seglocked = (ap->a_flags & PGO_LOCKED) != 0; | | 759 | seglocked = (ap->a_flags & PGO_LOCKED) != 0; |
757 | if (!seglocked) { | | 760 | if (!seglocked) { |
758 | mutex_exit(vp->v_interlock); | | 761 | mutex_exit(vp->v_interlock); |
759 | error = lfs_seglock(fs, SEGM_PROT | (sync ? SEGM_SYNC : 0)); | | 762 | error = lfs_seglock(fs, SEGM_PROT | (sync ? SEGM_SYNC : 0)); |
760 | if (error != 0) { | | 763 | if (error != 0) { |
761 | KASSERT(!mutex_owned(vp->v_interlock)); | | 764 | KASSERT(!mutex_owned(vp->v_interlock)); |
762 | goto out; | | 765 | goto out; |
763 | } | | 766 | } |
764 | mutex_enter(vp->v_interlock); | | 767 | mutex_enter(vp->v_interlock); |
765 | lfs_acquire_finfo(fs, ip->i_number, ip->i_gen); | | 768 | lfs_acquire_finfo(fs, ip->i_number, ip->i_gen); |
766 | } | | 769 | } |
767 | sp = fs->lfs_sp; | | 770 | sp = fs->lfs_sp; |
768 | KASSERT(sp->vp == NULL); | | 771 | KASSERT(sp->vp == NULL); |
769 | sp->vp = vp; | | 772 | sp->vp = vp; |
770 | | | 773 | |
771 | /* Note segments written by reclaim; only for debugging */ | | 774 | /* Note segments written by reclaim; only for debugging */ |
772 | if (vdead_check(vp, VDEAD_NOWAIT) != 0) { | | 775 | if (vdead_check(vp, VDEAD_NOWAIT) != 0) { |
773 | sp->seg_flags |= SEGM_RECLAIM; | | 776 | sp->seg_flags |= SEGM_RECLAIM; |
774 | fs->lfs_reclino = ip->i_number; | | 777 | fs->lfs_reclino = ip->i_number; |
775 | } | | 778 | } |
776 | | | 779 | |
777 | /* | | 780 | /* |
778 | * Ensure that the partial segment is marked SS_DIROP if this | | 781 | * Ensure that the partial segment is marked SS_DIROP if this |
779 | * vnode is a DIROP. | | 782 | * vnode is a DIROP. |
780 | */ | | 783 | */ |
781 | if (!seglocked && vp->v_uflag & VU_DIROP) { | | 784 | if (!seglocked && vp->v_uflag & VU_DIROP) { |
782 | SEGSUM *ssp = sp->segsum; | | 785 | SEGSUM *ssp = sp->segsum; |
783 | | | 786 | |
784 | lfs_ss_setflags(fs, ssp, | | 787 | lfs_ss_setflags(fs, ssp, |
785 | lfs_ss_getflags(fs, ssp) | (SS_DIROP|SS_CONT)); | | 788 | lfs_ss_getflags(fs, ssp) | (SS_DIROP|SS_CONT)); |
786 | } | | 789 | } |
787 | | | 790 | |
788 | /* | | 791 | /* |
789 | * Loop over genfs_putpages until all pages are gathered. | | 792 | * Loop over genfs_putpages until all pages are gathered. |
790 | * genfs_putpages() drops the interlock, so reacquire it if necessary. | | 793 | * genfs_putpages() drops the interlock, so reacquire it if necessary. |
791 | * Whenever we lose the interlock we have to rerun check_dirty, as | | 794 | * Whenever we lose the interlock we have to rerun check_dirty, as |
792 | * well, since more pages might have been dirtied in our absence. | | 795 | * well, since more pages might have been dirtied in our absence. |
793 | */ | | 796 | */ |
794 | #ifdef DEBUG | | 797 | #ifdef DEBUG |
795 | debug_n_again = 0; | | 798 | debug_n_again = 0; |
796 | #endif | | 799 | #endif |
797 | do { | | 800 | do { |
798 | busypg = NULL; | | 801 | busypg = NULL; |
799 | KASSERT(mutex_owned(vp->v_interlock)); | | 802 | KASSERT(mutex_owned(vp->v_interlock)); |
800 | if (check_dirty(fs, vp, startoffset, endoffset, blkeof, | | 803 | if (check_dirty(fs, vp, startoffset, endoffset, blkeof, |
801 | ap->a_flags, 0, &busypg) < 0) { | | 804 | ap->a_flags, 0, &busypg) < 0) { |
802 | mutex_exit(vp->v_interlock); | | 805 | mutex_exit(vp->v_interlock); |
803 | /* XXX why? --ks */ | | 806 | /* XXX why? --ks */ |
804 | mutex_enter(vp->v_interlock); | | 807 | mutex_enter(vp->v_interlock); |
805 | write_and_wait(fs, vp, busypg, seglocked, NULL); | | 808 | write_and_wait(fs, vp, busypg, seglocked, NULL); |
806 | if (!seglocked) { | | 809 | if (!seglocked) { |
807 | mutex_exit(vp->v_interlock); | | 810 | mutex_exit(vp->v_interlock); |
808 | lfs_release_finfo(fs); | | 811 | lfs_release_finfo(fs); |
809 | lfs_segunlock(fs); | | 812 | lfs_segunlock(fs); |
810 | mutex_enter(vp->v_interlock); | | 813 | mutex_enter(vp->v_interlock); |
811 | } | | 814 | } |
812 | sp->vp = NULL; | | 815 | sp->vp = NULL; |
813 | goto get_seglock; | | 816 | goto get_seglock; |
814 | } | | 817 | } |
815 | | | 818 | |
816 | busypg = NULL; | | 819 | busypg = NULL; |
817 | KASSERT(!mutex_owned(&uvm_pageqlock)); | | 820 | KASSERT(!mutex_owned(&uvm_pageqlock)); |
818 | oreclaim = (ap->a_flags & PGO_RECLAIM); | | 821 | oreclaim = (ap->a_flags & PGO_RECLAIM); |
819 | ap->a_flags &= ~PGO_RECLAIM; | | 822 | ap->a_flags &= ~PGO_RECLAIM; |
820 | error = genfs_do_putpages(vp, startoffset, endoffset, | | 823 | error = genfs_do_putpages(vp, startoffset, endoffset, |
821 | ap->a_flags, &busypg); | | 824 | ap->a_flags, &busypg); |
822 | ap->a_flags |= oreclaim; | | 825 | ap->a_flags |= oreclaim; |
823 | | | 826 | |
824 | if (error == EDEADLK || error == EAGAIN) { | | 827 | if (error == EDEADLK || error == EAGAIN) { |
825 | DLOG((DLOG_PAGE, "lfs_putpages: genfs_putpages returned" | | 828 | DLOG((DLOG_PAGE, "lfs_putpages: genfs_putpages returned" |
826 | " %d ino %d off %jx (seg %d)\n", error, | | 829 | " %d ino %d off %jx (seg %d)\n", error, |
827 | ip->i_number, (uintmax_t)lfs_sb_getoffset(fs), | | 830 | ip->i_number, (uintmax_t)lfs_sb_getoffset(fs), |
828 | lfs_dtosn(fs, lfs_sb_getoffset(fs)))); | | 831 | lfs_dtosn(fs, lfs_sb_getoffset(fs)))); |
829 | | | 832 | |
830 | if (oreclaim) { | | 833 | if (oreclaim) { |
831 | mutex_enter(vp->v_interlock); | | 834 | mutex_enter(vp->v_interlock); |
832 | write_and_wait(fs, vp, busypg, seglocked, "again"); | | 835 | write_and_wait(fs, vp, busypg, seglocked, "again"); |
833 | mutex_exit(vp->v_interlock); | | 836 | mutex_exit(vp->v_interlock); |
834 | } else { | | 837 | } else { |
835 | if ((sp->seg_flags & SEGM_SINGLE) && | | 838 | if ((sp->seg_flags & SEGM_SINGLE) && |
836 | lfs_sb_getcurseg(fs) != fs->lfs_startseg) | | 839 | lfs_sb_getcurseg(fs) != fs->lfs_startseg) |
837 | donewriting = 1; | | 840 | donewriting = 1; |
838 | } | | 841 | } |
839 | } else if (error) { | | 842 | } else if (error) { |
840 | DLOG((DLOG_PAGE, "lfs_putpages: genfs_putpages returned" | | 843 | DLOG((DLOG_PAGE, "lfs_putpages: genfs_putpages returned" |
841 | " %d ino %d off %jx (seg %d)\n", error, | | 844 | " %d ino %d off %jx (seg %d)\n", error, |
842 | (int)ip->i_number, (uintmax_t)lfs_sb_getoffset(fs), | | 845 | (int)ip->i_number, (uintmax_t)lfs_sb_getoffset(fs), |
843 | lfs_dtosn(fs, lfs_sb_getoffset(fs)))); | | 846 | lfs_dtosn(fs, lfs_sb_getoffset(fs)))); |
844 | } | | 847 | } |
845 | /* genfs_do_putpages loses the interlock */ | | 848 | /* genfs_do_putpages loses the interlock */ |
846 | #ifdef DEBUG | | 849 | #ifdef DEBUG |
847 | ++debug_n_again; | | 850 | ++debug_n_again; |
848 | #endif | | 851 | #endif |
849 | if (oreclaim && error == EAGAIN) { | | 852 | if (oreclaim && error == EAGAIN) { |
850 | DLOG((DLOG_PAGE, "vp %p ino %d vi_flags %x a_flags %x avoiding vclean panic\n", | | 853 | DLOG((DLOG_PAGE, "vp %p ino %d vi_flags %x a_flags %x avoiding vclean panic\n", |
851 | vp, (int)ip->i_number, vp->v_iflag, ap->a_flags)); | | 854 | vp, (int)ip->i_number, vp->v_iflag, ap->a_flags)); |
852 | mutex_enter(vp->v_interlock); | | 855 | mutex_enter(vp->v_interlock); |
853 | } | | 856 | } |
854 | if (error == EDEADLK) | | 857 | if (error == EDEADLK) |
855 | mutex_enter(vp->v_interlock); | | 858 | mutex_enter(vp->v_interlock); |
856 | } while (error == EDEADLK || (oreclaim && error == EAGAIN)); | | 859 | } while (error == EDEADLK || (oreclaim && error == EAGAIN)); |
857 | #ifdef DEBUG | | 860 | #ifdef DEBUG |
858 | if (debug_n_again > TOOMANY) | | 861 | if (debug_n_again > TOOMANY) |
859 | DLOG((DLOG_PAGE, "lfs_putpages: again: looping, n = %d\n", debug_n_again)); | | 862 | DLOG((DLOG_PAGE, "lfs_putpages: again: looping, n = %d\n", debug_n_again)); |
860 | #endif | | 863 | #endif |
861 | | | 864 | |
862 | KASSERT(sp != NULL && sp->vp == vp); | | 865 | KASSERT(sp != NULL && sp->vp == vp); |
863 | if (!seglocked && !donewriting) { | | 866 | if (!seglocked && !donewriting) { |
864 | sp->vp = NULL; | | 867 | sp->vp = NULL; |
865 | | | 868 | |
866 | /* Write indirect blocks as well */ | | 869 | /* Write indirect blocks as well */ |
867 | lfs_gather(fs, fs->lfs_sp, vp, lfs_match_indir); | | 870 | lfs_gather(fs, fs->lfs_sp, vp, lfs_match_indir); |
868 | lfs_gather(fs, fs->lfs_sp, vp, lfs_match_dindir); | | 871 | lfs_gather(fs, fs->lfs_sp, vp, lfs_match_dindir); |
869 | lfs_gather(fs, fs->lfs_sp, vp, lfs_match_tindir); | | 872 | lfs_gather(fs, fs->lfs_sp, vp, lfs_match_tindir); |
870 | | | 873 | |
871 | KASSERT(sp->vp == NULL); | | 874 | KASSERT(sp->vp == NULL); |
872 | sp->vp = vp; | | 875 | sp->vp = vp; |
873 | } | | 876 | } |
874 | | | 877 | |
875 | /* | | 878 | /* |
876 | * Blocks are now gathered into a segment waiting to be written. | | 879 | * Blocks are now gathered into a segment waiting to be written. |
877 | * All that's left to do is update metadata, and write them. | | 880 | * All that's left to do is update metadata, and write them. |
878 | */ | | 881 | */ |
879 | lfs_updatemeta(sp); | | 882 | lfs_updatemeta(sp); |
880 | KASSERT(sp->vp == vp); | | 883 | KASSERT(sp->vp == vp); |
881 | sp->vp = NULL; | | 884 | sp->vp = NULL; |
882 | | | 885 | |
883 | /* | | 886 | /* |
884 | * If we were called from lfs_writefile, we don't need to clean up | | 887 | * If we were called from lfs_writefile, we don't need to clean up |
885 | * the FIP or unlock the segment lock. We're done. | | 888 | * the FIP or unlock the segment lock. We're done. |
886 | */ | | 889 | */ |
887 | if (seglocked) { | | 890 | if (seglocked) { |
888 | KASSERT(!mutex_owned(vp->v_interlock)); | | 891 | KASSERT(!mutex_owned(vp->v_interlock)); |
889 | goto out; | | 892 | goto out; |
890 | } | | 893 | } |
891 | | | 894 | |
892 | /* Clean up FIP and send it to disk. */ | | 895 | /* Clean up FIP and send it to disk. */ |
893 | lfs_release_finfo(fs); | | 896 | lfs_release_finfo(fs); |
894 | lfs_writeseg(fs, fs->lfs_sp); | | 897 | lfs_writeseg(fs, fs->lfs_sp); |
895 | | | 898 | |
896 | /* | | 899 | /* |
897 | * Remove us from paging queue if we wrote all our pages. | | 900 | * Remove us from paging queue if we wrote all our pages. |
898 | */ | | 901 | */ |
899 | if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) { | | 902 | if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) { |
900 | mutex_enter(&lfs_lock); | | 903 | mutex_enter(&lfs_lock); |
901 | if (ip->i_state & IN_PAGING) { | | 904 | if (ip->i_state & IN_PAGING) { |
902 | ip->i_state &= ~IN_PAGING; | | 905 | ip->i_state &= ~IN_PAGING; |
903 | TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); | | 906 | TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); |
904 | } | | 907 | } |
905 | mutex_exit(&lfs_lock); | | 908 | mutex_exit(&lfs_lock); |
906 | } | | 909 | } |
907 | | | 910 | |
908 | /* | | 911 | /* |
909 | * XXX - with the malloc/copy writeseg, the pages are freed by now | | 912 | * XXX - with the malloc/copy writeseg, the pages are freed by now |
910 | * even if we don't wait (e.g. if we hold a nested lock). This | | 913 | * even if we don't wait (e.g. if we hold a nested lock). This |
911 | * will not be true if we stop using malloc/copy. | | 914 | * will not be true if we stop using malloc/copy. |
912 | */ | | 915 | */ |
913 | KASSERT(fs->lfs_sp->seg_flags & SEGM_PROT); | | 916 | KASSERT(fs->lfs_sp->seg_flags & SEGM_PROT); |
914 | lfs_segunlock(fs); | | 917 | lfs_segunlock(fs); |
915 | | | 918 | |
916 | /* | | 919 | /* |
917 | * Wait for v_numoutput to drop to zero. The seglock should | | 920 | * Wait for v_numoutput to drop to zero. The seglock should |
918 | * take care of this, but there is a slight possibility that | | 921 | * take care of this, but there is a slight possibility that |
919 | * aiodoned might not have got around to our buffers yet. | | 922 | * aiodoned might not have got around to our buffers yet. |
920 | */ | | 923 | */ |
921 | if (sync) { | | 924 | if (sync) { |
922 | mutex_enter(vp->v_interlock); | | 925 | mutex_enter(vp->v_interlock); |
923 | while (vp->v_numoutput > 0) { | | 926 | while (vp->v_numoutput > 0) { |
924 | DLOG((DLOG_PAGE, "lfs_putpages: ino %d sleeping on" | | 927 | DLOG((DLOG_PAGE, "lfs_putpages: ino %d sleeping on" |
925 | " num %d\n", ip->i_number, vp->v_numoutput)); | | 928 | " num %d\n", ip->i_number, vp->v_numoutput)); |
926 | cv_wait(&vp->v_cv, vp->v_interlock); | | 929 | cv_wait(&vp->v_cv, vp->v_interlock); |
927 | } | | 930 | } |
928 | mutex_exit(vp->v_interlock); | | 931 | mutex_exit(vp->v_interlock); |
929 | } | | 932 | } |
930 | | | 933 | |
931 | out:; | | 934 | out:; |
932 | if (trans_mp) | | 935 | if (trans_mp) |
933 | fstrans_done(trans_mp); | | 936 | fstrans_done(trans_mp); |
934 | KASSERT(!mutex_owned(vp->v_interlock)); | | 937 | KASSERT(!mutex_owned(vp->v_interlock)); |
935 | return error; | | 938 | return error; |
936 | } | | 939 | } |
937 | | | 940 | |