| @@ -1,704 +1,702 @@ | | | @@ -1,704 +1,702 @@ |
1 | /* $NetBSD: ufs_readwrite.c,v 1.118 2015/03/31 11:43:05 riastradh Exp $ */ | | 1 | /* $NetBSD: ufs_readwrite.c,v 1.119 2015/04/12 22:41:28 riastradh Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 1993 | | 4 | * Copyright (c) 1993 |
5 | * The Regents of the University of California. All rights reserved. | | 5 | * The Regents of the University of California. All rights reserved. |
6 | * | | 6 | * |
7 | * Redistribution and use in source and binary forms, with or without | | 7 | * Redistribution and use in source and binary forms, with or without |
8 | * modification, are permitted provided that the following conditions | | 8 | * modification, are permitted provided that the following conditions |
9 | * are met: | | 9 | * are met: |
10 | * 1. Redistributions of source code must retain the above copyright | | 10 | * 1. Redistributions of source code must retain the above copyright |
11 | * notice, this list of conditions and the following disclaimer. | | 11 | * notice, this list of conditions and the following disclaimer. |
12 | * 2. Redistributions in binary form must reproduce the above copyright | | 12 | * 2. Redistributions in binary form must reproduce the above copyright |
13 | * notice, this list of conditions and the following disclaimer in the | | 13 | * notice, this list of conditions and the following disclaimer in the |
14 | * documentation and/or other materials provided with the distribution. | | 14 | * documentation and/or other materials provided with the distribution. |
15 | * 3. Neither the name of the University nor the names of its contributors | | 15 | * 3. Neither the name of the University nor the names of its contributors |
16 | * may be used to endorse or promote products derived from this software | | 16 | * may be used to endorse or promote products derived from this software |
17 | * without specific prior written permission. | | 17 | * without specific prior written permission. |
18 | * | | 18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | | 19 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
20 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | | 20 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | | 21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | | 22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | | 24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
25 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 25 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
29 | * SUCH DAMAGE. | | 29 | * SUCH DAMAGE. |
30 | * | | 30 | * |
31 | * @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95 | | 31 | * @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95 |
32 | */ | | 32 | */ |
33 | | | 33 | |
34 | #include <sys/cdefs.h> | | 34 | #include <sys/cdefs.h> |
35 | __KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.118 2015/03/31 11:43:05 riastradh Exp $"); | | 35 | __KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.119 2015/04/12 22:41:28 riastradh Exp $"); |
36 | | | 36 | |
37 | #ifdef LFS_READWRITE | | 37 | #ifdef LFS_READWRITE |
38 | #define FS struct lfs | | 38 | #define FS struct lfs |
39 | #define I_FS i_lfs | | 39 | #define I_FS i_lfs |
40 | #define READ lfs_read | | 40 | #define READ lfs_read |
41 | #define READ_S "lfs_read" | | 41 | #define READ_S "lfs_read" |
42 | #define WRITE lfs_write | | 42 | #define WRITE lfs_write |
43 | #define WRITE_S "lfs_write" | | 43 | #define WRITE_S "lfs_write" |
44 | #define BUFRD lfs_bufrd | | 44 | #define BUFRD lfs_bufrd |
45 | #define BUFWR lfs_bufwr | | 45 | #define BUFWR lfs_bufwr |
46 | #define fs_bsize lfs_bsize | | 46 | #define fs_bsize lfs_bsize |
47 | #define fs_bmask lfs_bmask | | 47 | #define fs_bmask lfs_bmask |
48 | #define UFS_WAPBL_BEGIN(mp) 0 | | 48 | #define UFS_WAPBL_BEGIN(mp) 0 |
49 | #define UFS_WAPBL_END(mp) do { } while (0) | | 49 | #define UFS_WAPBL_END(mp) do { } while (0) |
50 | #define UFS_WAPBL_UPDATE(vp, access, modify, flags) do { } while (0) | | 50 | #define UFS_WAPBL_UPDATE(vp, access, modify, flags) do { } while (0) |
51 | #define ufs_blkoff lfs_blkoff | | 51 | #define ufs_blkoff lfs_blkoff |
52 | #define ufs_blksize lfs_blksize | | 52 | #define ufs_blksize lfs_blksize |
53 | #define ufs_lblkno lfs_lblkno | | 53 | #define ufs_lblkno lfs_lblkno |
54 | #define ufs_lblktosize lfs_lblktosize | | 54 | #define ufs_lblktosize lfs_lblktosize |
55 | #define ufs_blkroundup lfs_blkroundup | | 55 | #define ufs_blkroundup lfs_blkroundup |
56 | #else | | 56 | #else |
57 | #define FS struct fs | | 57 | #define FS struct fs |
58 | #define I_FS i_fs | | 58 | #define I_FS i_fs |
59 | #define READ ffs_read | | 59 | #define READ ffs_read |
60 | #define READ_S "ffs_read" | | 60 | #define READ_S "ffs_read" |
61 | #define WRITE ffs_write | | 61 | #define WRITE ffs_write |
62 | #define WRITE_S "ffs_write" | | 62 | #define WRITE_S "ffs_write" |
63 | #define BUFRD ffs_bufrd | | 63 | #define BUFRD ffs_bufrd |
64 | #define BUFWR ffs_bufwr | | 64 | #define BUFWR ffs_bufwr |
65 | #define ufs_blkoff ffs_blkoff | | 65 | #define ufs_blkoff ffs_blkoff |
66 | #define ufs_blksize ffs_blksize | | 66 | #define ufs_blksize ffs_blksize |
67 | #define ufs_lblkno ffs_lblkno | | 67 | #define ufs_lblkno ffs_lblkno |
68 | #define ufs_lblktosize ffs_lblktosize | | 68 | #define ufs_lblktosize ffs_lblktosize |
69 | #define ufs_blkroundup ffs_blkroundup | | 69 | #define ufs_blkroundup ffs_blkroundup |
70 | #endif | | 70 | #endif |
71 | | | 71 | |
72 | static int ufs_post_read_update(struct vnode *, int, int); | | 72 | static int ufs_post_read_update(struct vnode *, int, int); |
73 | static int ufs_post_write_update(struct vnode *, struct uio *, int, | | 73 | static int ufs_post_write_update(struct vnode *, struct uio *, int, |
74 | kauth_cred_t, off_t, int, int, int); | | 74 | kauth_cred_t, off_t, int, int, int); |
75 | | | 75 | |
76 | /* | | 76 | /* |
77 | * Vnode op for reading. | | 77 | * Vnode op for reading. |
78 | */ | | 78 | */ |
79 | /* ARGSUSED */ | | 79 | /* ARGSUSED */ |
80 | int | | 80 | int |
81 | READ(void *v) | | 81 | READ(void *v) |
82 | { | | 82 | { |
83 | struct vop_read_args /* { | | 83 | struct vop_read_args /* { |
84 | struct vnode *a_vp; | | 84 | struct vnode *a_vp; |
85 | struct uio *a_uio; | | 85 | struct uio *a_uio; |
86 | int a_ioflag; | | 86 | int a_ioflag; |
87 | kauth_cred_t a_cred; | | 87 | kauth_cred_t a_cred; |
88 | } */ *ap = v; | | 88 | } */ *ap = v; |
89 | struct vnode *vp; | | 89 | struct vnode *vp; |
90 | struct inode *ip; | | 90 | struct inode *ip; |
91 | struct uio *uio; | | 91 | struct uio *uio; |
92 | struct ufsmount *ump; | | 92 | struct ufsmount *ump; |
93 | vsize_t bytelen; | | 93 | vsize_t bytelen; |
94 | int error, ioflag, advice; | | 94 | int error, ioflag, advice; |
95 | | | 95 | |
96 | vp = ap->a_vp; | | 96 | vp = ap->a_vp; |
97 | ip = VTOI(vp); | | 97 | ip = VTOI(vp); |
98 | ump = ip->i_ump; | | 98 | ump = ip->i_ump; |
99 | uio = ap->a_uio; | | 99 | uio = ap->a_uio; |
100 | ioflag = ap->a_ioflag; | | 100 | ioflag = ap->a_ioflag; |
101 | error = 0; | | 101 | error = 0; |
102 | | | 102 | |
103 | KASSERT(uio->uio_rw == UIO_READ); | | 103 | KASSERT(uio->uio_rw == UIO_READ); |
104 | KASSERT(vp->v_type == VREG || vp->v_type == VDIR); | | 104 | KASSERT(vp->v_type == VREG || vp->v_type == VDIR); |
105 | | | 105 | |
106 | /* XXX Eliminate me by refusing directory reads from userland. */ | | 106 | /* XXX Eliminate me by refusing directory reads from userland. */ |
107 | if (vp->v_type == VDIR) | | 107 | if (vp->v_type == VDIR) |
108 | return BUFRD(vp, uio, ioflag, ap->a_cred); | | 108 | return BUFRD(vp, uio, ioflag, ap->a_cred); |
109 | #ifdef LFS_READWRITE | | 109 | #ifdef LFS_READWRITE |
110 | /* XXX Eliminate me by using ufs_bufio in lfs. */ | | 110 | /* XXX Eliminate me by using ufs_bufio in lfs. */ |
111 | if (vp->v_type == VREG && ip->i_number == LFS_IFILE_INUM) | | 111 | if (vp->v_type == VREG && ip->i_number == LFS_IFILE_INUM) |
112 | return BUFRD(vp, uio, ioflag, ap->a_cred); | | 112 | return BUFRD(vp, uio, ioflag, ap->a_cred); |
113 | #endif | | 113 | #endif |
114 | if ((u_int64_t)uio->uio_offset > ump->um_maxfilesize) | | 114 | if ((u_int64_t)uio->uio_offset > ump->um_maxfilesize) |
115 | return (EFBIG); | | 115 | return (EFBIG); |
116 | if (uio->uio_resid == 0) | | 116 | if (uio->uio_resid == 0) |
117 | return (0); | | 117 | return (0); |
118 | | | 118 | |
119 | #ifndef LFS_READWRITE | | 119 | #ifndef LFS_READWRITE |
120 | if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) == SF_SNAPSHOT) | | 120 | if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) == SF_SNAPSHOT) |
121 | return ffs_snapshot_read(vp, uio, ioflag); | | 121 | return ffs_snapshot_read(vp, uio, ioflag); |
122 | #endif /* !LFS_READWRITE */ | | 122 | #endif /* !LFS_READWRITE */ |
123 | | | 123 | |
124 | fstrans_start(vp->v_mount, FSTRANS_SHARED); | | 124 | fstrans_start(vp->v_mount, FSTRANS_SHARED); |
125 | | | 125 | |
126 | if (uio->uio_offset >= ip->i_size) | | 126 | if (uio->uio_offset >= ip->i_size) |
127 | goto out; | | 127 | goto out; |
128 | | | 128 | |
129 | KASSERT(vp->v_type == VREG); | | 129 | KASSERT(vp->v_type == VREG); |
130 | advice = IO_ADV_DECODE(ap->a_ioflag); | | 130 | advice = IO_ADV_DECODE(ap->a_ioflag); |
131 | while (uio->uio_resid > 0) { | | 131 | while (uio->uio_resid > 0) { |
132 | if (ioflag & IO_DIRECT) { | | 132 | if (ioflag & IO_DIRECT) { |
133 | genfs_directio(vp, uio, ioflag); | | 133 | genfs_directio(vp, uio, ioflag); |
134 | } | | 134 | } |
135 | bytelen = MIN(ip->i_size - uio->uio_offset, uio->uio_resid); | | 135 | bytelen = MIN(ip->i_size - uio->uio_offset, uio->uio_resid); |
136 | if (bytelen == 0) | | 136 | if (bytelen == 0) |
137 | break; | | 137 | break; |
138 | error = ubc_uiomove(&vp->v_uobj, uio, bytelen, advice, | | 138 | error = ubc_uiomove(&vp->v_uobj, uio, bytelen, advice, |
139 | UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp)); | | 139 | UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp)); |
140 | if (error) | | 140 | if (error) |
141 | break; | | 141 | break; |
142 | } | | 142 | } |
143 | | | 143 | |
144 | out: | | 144 | out: |
145 | error = ufs_post_read_update(vp, ap->a_ioflag, error); | | 145 | error = ufs_post_read_update(vp, ap->a_ioflag, error); |
146 | fstrans_done(vp->v_mount); | | 146 | fstrans_done(vp->v_mount); |
147 | return (error); | | 147 | return (error); |
148 | } | | 148 | } |
149 | | | 149 | |
150 | /* | | 150 | /* |
151 | * UFS op for reading via the buffer cache | | 151 | * UFS op for reading via the buffer cache |
152 | */ | | 152 | */ |
153 | int | | 153 | int |
154 | BUFRD(struct vnode *vp, struct uio *uio, int ioflag, kauth_cred_t cred) | | 154 | BUFRD(struct vnode *vp, struct uio *uio, int ioflag, kauth_cred_t cred) |
155 | { | | 155 | { |
156 | struct inode *ip; | | 156 | struct inode *ip; |
157 | struct ufsmount *ump; | | 157 | struct ufsmount *ump; |
158 | FS *fs; | | 158 | FS *fs; |
159 | struct buf *bp; | | 159 | struct buf *bp; |
160 | daddr_t lbn, nextlbn; | | 160 | daddr_t lbn, nextlbn; |
161 | off_t bytesinfile; | | 161 | off_t bytesinfile; |
162 | long size, xfersize, blkoffset; | | 162 | long size, xfersize, blkoffset; |
163 | int error; | | 163 | int error; |
164 | | | 164 | |
165 | KASSERT(VOP_ISLOCKED(vp)); | | 165 | KASSERT(VOP_ISLOCKED(vp)); |
166 | KASSERT(vp->v_type == VDIR || vp->v_type == VLNK); | | 166 | KASSERT(vp->v_type == VDIR || vp->v_type == VLNK); |
167 | KASSERT(uio->uio_rw == UIO_READ); | | 167 | KASSERT(uio->uio_rw == UIO_READ); |
168 | | | 168 | |
169 | ip = VTOI(vp); | | 169 | ip = VTOI(vp); |
170 | ump = ip->i_ump; | | 170 | ump = ip->i_ump; |
171 | fs = ip->I_FS; | | 171 | fs = ip->I_FS; |
172 | error = 0; | | 172 | error = 0; |
173 | | | 173 | |
174 | KASSERT(vp->v_type != VLNK || ip->i_size >= ump->um_maxsymlinklen); | | 174 | KASSERT(vp->v_type != VLNK || ip->i_size >= ump->um_maxsymlinklen); |
175 | KASSERT(vp->v_type != VLNK || ump->um_maxsymlinklen != 0 || | | 175 | KASSERT(vp->v_type != VLNK || ump->um_maxsymlinklen != 0 || |
176 | DIP(ip, blocks) == 0); | | 176 | DIP(ip, blocks) == 0); |
177 | | | 177 | |
178 | if (uio->uio_offset > ump->um_maxfilesize) | | 178 | if (uio->uio_offset > ump->um_maxfilesize) |
179 | return EFBIG; | | 179 | return EFBIG; |
180 | if (uio->uio_resid == 0) | | 180 | if (uio->uio_resid == 0) |
181 | return 0; | | 181 | return 0; |
182 | | | 182 | |
183 | #ifndef LFS_READWRITE | | 183 | #ifndef LFS_READWRITE |
184 | KASSERT(!ISSET(ip->i_flags, (SF_SNAPSHOT | SF_SNAPINVAL))); | | 184 | KASSERT(!ISSET(ip->i_flags, (SF_SNAPSHOT | SF_SNAPINVAL))); |
185 | #endif | | 185 | #endif |
186 | | | 186 | |
187 | fstrans_start(vp->v_mount, FSTRANS_SHARED); | | 187 | fstrans_start(vp->v_mount, FSTRANS_SHARED); |
188 | | | 188 | |
189 | if (uio->uio_offset >= ip->i_size) | | 189 | if (uio->uio_offset >= ip->i_size) |
190 | goto out; | | 190 | goto out; |
191 | | | 191 | |
192 | for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { | | 192 | for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { |
193 | bytesinfile = ip->i_size - uio->uio_offset; | | 193 | bytesinfile = ip->i_size - uio->uio_offset; |
194 | if (bytesinfile <= 0) | | 194 | if (bytesinfile <= 0) |
195 | break; | | 195 | break; |
196 | lbn = ufs_lblkno(fs, uio->uio_offset); | | 196 | lbn = ufs_lblkno(fs, uio->uio_offset); |
197 | nextlbn = lbn + 1; | | 197 | nextlbn = lbn + 1; |
198 | size = ufs_blksize(fs, ip, lbn); | | 198 | size = ufs_blksize(fs, ip, lbn); |
199 | blkoffset = ufs_blkoff(fs, uio->uio_offset); | | 199 | blkoffset = ufs_blkoff(fs, uio->uio_offset); |
200 | xfersize = MIN(MIN(fs->fs_bsize - blkoffset, uio->uio_resid), | | 200 | xfersize = MIN(MIN(fs->fs_bsize - blkoffset, uio->uio_resid), |
201 | bytesinfile); | | 201 | bytesinfile); |
202 | | | 202 | |
203 | if (ufs_lblktosize(fs, nextlbn) >= ip->i_size) | | 203 | if (ufs_lblktosize(fs, nextlbn) >= ip->i_size) |
204 | error = bread(vp, lbn, size, 0, &bp); | | 204 | error = bread(vp, lbn, size, 0, &bp); |
205 | else { | | 205 | else { |
206 | int nextsize = ufs_blksize(fs, ip, nextlbn); | | 206 | int nextsize = ufs_blksize(fs, ip, nextlbn); |
207 | error = breadn(vp, lbn, | | 207 | error = breadn(vp, lbn, |
208 | size, &nextlbn, &nextsize, 1, 0, &bp); | | 208 | size, &nextlbn, &nextsize, 1, 0, &bp); |
209 | } | | 209 | } |
210 | if (error) | | 210 | if (error) |
211 | break; | | 211 | break; |
212 | | | 212 | |
213 | /* | | 213 | /* |
214 | * We should only get non-zero b_resid when an I/O error | | 214 | * We should only get non-zero b_resid when an I/O error |
215 | * has occurred, which should cause us to break above. | | 215 | * has occurred, which should cause us to break above. |
216 | * However, if the short read did not cause an error, | | 216 | * However, if the short read did not cause an error, |
217 | * then we want to ensure that we do not uiomove bad | | 217 | * then we want to ensure that we do not uiomove bad |
218 | * or uninitialized data. | | 218 | * or uninitialized data. |
219 | */ | | 219 | */ |
220 | size -= bp->b_resid; | | 220 | size -= bp->b_resid; |
221 | if (size < xfersize) { | | 221 | if (size < xfersize) { |
222 | if (size == 0) | | 222 | if (size == 0) |
223 | break; | | 223 | break; |
224 | xfersize = size; | | 224 | xfersize = size; |
225 | } | | 225 | } |
226 | error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio); | | 226 | error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio); |
227 | if (error) | | 227 | if (error) |
228 | break; | | 228 | break; |
229 | brelse(bp, 0); | | 229 | brelse(bp, 0); |
230 | } | | 230 | } |
231 | if (bp != NULL) | | 231 | if (bp != NULL) |
232 | brelse(bp, 0); | | 232 | brelse(bp, 0); |
233 | | | 233 | |
234 | out: | | 234 | out: |
235 | error = ufs_post_read_update(vp, ioflag, error); | | 235 | error = ufs_post_read_update(vp, ioflag, error); |
236 | fstrans_done(vp->v_mount); | | 236 | fstrans_done(vp->v_mount); |
237 | return (error); | | 237 | return (error); |
238 | } | | 238 | } |
239 | | | 239 | |
240 | static int | | 240 | static int |
241 | ufs_post_read_update(struct vnode *vp, int ioflag, int oerror) | | 241 | ufs_post_read_update(struct vnode *vp, int ioflag, int oerror) |
242 | { | | 242 | { |
243 | struct inode *ip = VTOI(vp); | | 243 | struct inode *ip = VTOI(vp); |
244 | int error = oerror; | | 244 | int error = oerror; |
245 | | | 245 | |
246 | if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) { | | 246 | if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) { |
247 | ip->i_flag |= IN_ACCESS; | | 247 | ip->i_flag |= IN_ACCESS; |
248 | if ((ioflag & IO_SYNC) == IO_SYNC) { | | 248 | if ((ioflag & IO_SYNC) == IO_SYNC) { |
249 | error = UFS_WAPBL_BEGIN(vp->v_mount); | | 249 | error = UFS_WAPBL_BEGIN(vp->v_mount); |
250 | if (error) | | 250 | if (error) |
251 | goto out; | | 251 | goto out; |
252 | error = UFS_UPDATE(vp, NULL, NULL, UPDATE_WAIT); | | 252 | error = UFS_UPDATE(vp, NULL, NULL, UPDATE_WAIT); |
253 | UFS_WAPBL_END(vp->v_mount); | | 253 | UFS_WAPBL_END(vp->v_mount); |
254 | } | | 254 | } |
255 | } | | 255 | } |
256 | | | 256 | |
257 | out: | | 257 | out: |
258 | /* Read error overrides any inode update error. */ | | 258 | /* Read error overrides any inode update error. */ |
259 | if (oerror) | | 259 | if (oerror) |
260 | error = oerror; | | 260 | error = oerror; |
261 | return error; | | 261 | return error; |
262 | } | | 262 | } |
263 | | | 263 | |
264 | /* | | 264 | /* |
265 | * Vnode op for writing. | | 265 | * Vnode op for writing. |
266 | */ | | 266 | */ |
267 | int | | 267 | int |
268 | WRITE(void *v) | | 268 | WRITE(void *v) |
269 | { | | 269 | { |
270 | struct vop_write_args /* { | | 270 | struct vop_write_args /* { |
271 | struct vnode *a_vp; | | 271 | struct vnode *a_vp; |
272 | struct uio *a_uio; | | 272 | struct uio *a_uio; |
273 | int a_ioflag; | | 273 | int a_ioflag; |
274 | kauth_cred_t a_cred; | | 274 | kauth_cred_t a_cred; |
275 | } */ *ap = v; | | 275 | } */ *ap = v; |
276 | struct vnode *vp; | | 276 | struct vnode *vp; |
277 | struct uio *uio; | | 277 | struct uio *uio; |
278 | struct inode *ip; | | 278 | struct inode *ip; |
279 | FS *fs; | | 279 | FS *fs; |
280 | kauth_cred_t cred; | | 280 | kauth_cred_t cred; |
281 | off_t osize, origoff, oldoff, preallocoff, endallocoff, nsize; | | 281 | off_t osize, origoff, oldoff, preallocoff, endallocoff, nsize; |
282 | int blkoffset, error, flags, ioflag, resid; | | 282 | int blkoffset, error, flags, ioflag, resid; |
283 | int aflag; | | 283 | int aflag; |
284 | int extended=0; | | 284 | int extended=0; |
285 | vsize_t bytelen; | | 285 | vsize_t bytelen; |
286 | bool async; | | 286 | bool async; |
287 | struct ufsmount *ump; | | 287 | struct ufsmount *ump; |
288 | | | 288 | |
289 | cred = ap->a_cred; | | 289 | cred = ap->a_cred; |
290 | ioflag = ap->a_ioflag; | | 290 | ioflag = ap->a_ioflag; |
291 | uio = ap->a_uio; | | 291 | uio = ap->a_uio; |
292 | vp = ap->a_vp; | | 292 | vp = ap->a_vp; |
293 | ip = VTOI(vp); | | 293 | ip = VTOI(vp); |
294 | ump = ip->i_ump; | | 294 | ump = ip->i_ump; |
295 | | | 295 | |
296 | KASSERT(vp->v_size == ip->i_size); | | 296 | KASSERT(vp->v_size == ip->i_size); |
297 | KASSERT(uio->uio_rw == UIO_WRITE); | | 297 | KASSERT(uio->uio_rw == UIO_WRITE); |
298 | KASSERT(vp->v_type == VREG); | | 298 | KASSERT(vp->v_type == VREG); |
299 | KASSERT(!ISSET(ioflag, IO_JOURNALLOCKED)); | | 299 | KASSERT(!ISSET(ioflag, IO_JOURNALLOCKED)); |
300 | UFS_WAPBL_JUNLOCK_ASSERT(vp->v_mount); | | 300 | UFS_WAPBL_JUNLOCK_ASSERT(vp->v_mount); |
301 | | | 301 | |
302 | if (ioflag & IO_APPEND) | | 302 | if (ioflag & IO_APPEND) |
303 | uio->uio_offset = ip->i_size; | | 303 | uio->uio_offset = ip->i_size; |
304 | if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) | | 304 | if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) |
305 | return (EPERM); | | 305 | return (EPERM); |
306 | | | 306 | |
307 | fs = ip->I_FS; | | 307 | fs = ip->I_FS; |
308 | if (uio->uio_offset < 0 || | | 308 | if (uio->uio_offset < 0 || |
309 | (u_int64_t)uio->uio_offset + uio->uio_resid > ump->um_maxfilesize) | | 309 | (u_int64_t)uio->uio_offset + uio->uio_resid > ump->um_maxfilesize) |
310 | return (EFBIG); | | 310 | return (EFBIG); |
311 | #ifdef LFS_READWRITE | | 311 | #ifdef LFS_READWRITE |
312 | /* Disallow writes to the Ifile, even if noschg flag is removed */ | | 312 | /* Disallow writes to the Ifile, even if noschg flag is removed */ |
313 | /* XXX can this go away when the Ifile is no longer in the namespace? */ | | 313 | /* XXX can this go away when the Ifile is no longer in the namespace? */ |
314 | if (vp == fs->lfs_ivnode) | | 314 | if (vp == fs->lfs_ivnode) |
315 | return (EPERM); | | 315 | return (EPERM); |
316 | #endif | | 316 | #endif |
317 | if (uio->uio_resid == 0) | | 317 | if (uio->uio_resid == 0) |
318 | return (0); | | 318 | return (0); |
319 | | | 319 | |
320 | fstrans_start(vp->v_mount, FSTRANS_SHARED); | | 320 | fstrans_start(vp->v_mount, FSTRANS_SHARED); |
321 | | | 321 | |
322 | flags = ioflag & IO_SYNC ? B_SYNC : 0; | | 322 | flags = ioflag & IO_SYNC ? B_SYNC : 0; |
323 | async = vp->v_mount->mnt_flag & MNT_ASYNC; | | 323 | async = vp->v_mount->mnt_flag & MNT_ASYNC; |
324 | origoff = uio->uio_offset; | | 324 | origoff = uio->uio_offset; |
325 | resid = uio->uio_resid; | | 325 | resid = uio->uio_resid; |
326 | osize = ip->i_size; | | 326 | osize = ip->i_size; |
327 | error = 0; | | 327 | error = 0; |
328 | | | 328 | |
329 | KASSERT(vp->v_type == VREG); | | 329 | KASSERT(vp->v_type == VREG); |
330 | | | 330 | |
331 | /* | | 331 | /* |
332 | * XXX The entire write operation must occur in a single WAPBL | | 332 | * XXX The entire write operation must occur in a single WAPBL |
333 | * transaction because it may allocate disk blocks, if | | 333 | * transaction because it may allocate disk blocks, if |
334 | * appending or filling holes, which is allowed to happen only | | 334 | * appending or filling holes, which is allowed to happen only |
335 | * if the write fully succeeds. | | 335 | * if the write fully succeeds. |
336 | * | | 336 | * |
337 | * If ubc_uiomove fails in the middle with EFAULT, we can clean | | 337 | * If ubc_uiomove fails in the middle with EFAULT, we can clean |
338 | * up at the end with UFS_TRUNCATE. But if the power fails in | | 338 | * up at the end with UFS_TRUNCATE. But if the power fails in |
339 | * the middle, there would be nobody to deallocate the blocks, | | 339 | * the middle, there would be nobody to deallocate the blocks, |
340 | * without an fsck to globally analyze the file system. | | 340 | * without an fsck to globally analyze the file system. |
341 | * | | 341 | * |
342 | * If the increasingly inaccurately named WAPBL were augmented | | 342 | * If the increasingly inaccurately named WAPBL were augmented |
343 | * with rollback records for block allocations, then we could | | 343 | * with rollback records for block allocations, then we could |
344 | * split this into multiple transactions and commit the | | 344 | * split this into multiple transactions and commit the |
345 | * allocations in the last one. | | 345 | * allocations in the last one. |
346 | * | | 346 | * |
347 | * But WAPBL doesn't have that notion now, so we'll have to | | 347 | * But WAPBL doesn't have that notion now, so we'll have to |
348 | * live with gigantic transactions and WAPBL tentacles in | | 348 | * live with gigantic transactions and WAPBL tentacles in |
349 | * genfs_getpages/putpages to cope with the possibility that | | 349 | * genfs_getpages/putpages to cope with the possibility that |
350 | * the transaction may or may not be locked on entry to the | | 350 | * the transaction may or may not be locked on entry to the |
351 | * page cache. | | 351 | * page cache. |
352 | * | | 352 | * |
353 | * And even if we added that notion to WAPBL, it wouldn't help | | 353 | * And even if we added that notion to WAPBL, it wouldn't help |
354 | * us get rid of the tentacles in genfs_getpages/putpages | | 354 | * us get rid of the tentacles in genfs_getpages/putpages |
355 | * because we'd have to interoperate with old implementations | | 355 | * because we'd have to interoperate with old implementations |
356 | * that assume they can replay the log without fsck. | | 356 | * that assume they can replay the log without fsck. |
357 | */ | | 357 | */ |
358 | error = UFS_WAPBL_BEGIN(vp->v_mount); | | 358 | error = UFS_WAPBL_BEGIN(vp->v_mount); |
359 | if (error) { | | 359 | if (error) { |
360 | fstrans_done(vp->v_mount); | | 360 | fstrans_done(vp->v_mount); |
361 | return error; | | 361 | return error; |
362 | } | | 362 | } |
363 | | | 363 | |
364 | #ifdef LFS_READWRITE | | 364 | #ifdef LFS_READWRITE |
365 | async = true; | | 365 | async = true; |
366 | lfs_availwait(fs, btofsb(fs, uio->uio_resid)); | | 366 | lfs_availwait(fs, btofsb(fs, uio->uio_resid)); |
367 | lfs_check(vp, LFS_UNUSED_LBN, 0); | | 367 | lfs_check(vp, LFS_UNUSED_LBN, 0); |
368 | #endif /* !LFS_READWRITE */ | | 368 | #endif /* !LFS_READWRITE */ |
369 | | | 369 | |
370 | preallocoff = round_page(ufs_blkroundup(fs, MAX(osize, uio->uio_offset))); | | 370 | preallocoff = round_page(ufs_blkroundup(fs, MAX(osize, uio->uio_offset))); |
371 | aflag = ioflag & IO_SYNC ? B_SYNC : 0; | | 371 | aflag = ioflag & IO_SYNC ? B_SYNC : 0; |
372 | nsize = MAX(osize, uio->uio_offset + uio->uio_resid); | | 372 | nsize = MAX(osize, uio->uio_offset + uio->uio_resid); |
373 | endallocoff = nsize - ufs_blkoff(fs, nsize); | | 373 | endallocoff = nsize - ufs_blkoff(fs, nsize); |
374 | | | 374 | |
375 | /* | | 375 | /* |
376 | * if we're increasing the file size, deal with expanding | | 376 | * if we're increasing the file size, deal with expanding |
377 | * the fragment if there is one. | | 377 | * the fragment if there is one. |
378 | */ | | 378 | */ |
379 | | | 379 | |
380 | if (nsize > osize && ufs_lblkno(fs, osize) < UFS_NDADDR && | | 380 | if (nsize > osize && ufs_lblkno(fs, osize) < UFS_NDADDR && |
381 | ufs_lblkno(fs, osize) != ufs_lblkno(fs, nsize) && | | 381 | ufs_lblkno(fs, osize) != ufs_lblkno(fs, nsize) && |
382 | ufs_blkroundup(fs, osize) != osize) { | | 382 | ufs_blkroundup(fs, osize) != osize) { |
383 | off_t eob; | | 383 | off_t eob; |
384 | | | 384 | |
385 | eob = ufs_blkroundup(fs, osize); | | 385 | eob = ufs_blkroundup(fs, osize); |
386 | uvm_vnp_setwritesize(vp, eob); | | 386 | uvm_vnp_setwritesize(vp, eob); |
387 | error = ufs_balloc_range(vp, osize, eob - osize, cred, aflag); | | 387 | error = ufs_balloc_range(vp, osize, eob - osize, cred, aflag); |
388 | if (error) | | 388 | if (error) |
389 | goto out; | | 389 | goto out; |
390 | if (flags & B_SYNC) { | | 390 | if (flags & B_SYNC) { |
391 | mutex_enter(vp->v_interlock); | | 391 | mutex_enter(vp->v_interlock); |
392 | VOP_PUTPAGES(vp, trunc_page(osize & fs->fs_bmask), | | 392 | VOP_PUTPAGES(vp, trunc_page(osize & fs->fs_bmask), |
393 | round_page(eob), | | 393 | round_page(eob), |
394 | PGO_CLEANIT | PGO_SYNCIO | PGO_JOURNALLOCKED); | | 394 | PGO_CLEANIT | PGO_SYNCIO | PGO_JOURNALLOCKED); |
395 | } | | 395 | } |
396 | } | | 396 | } |
397 | | | 397 | |
398 | while (uio->uio_resid > 0) { | | 398 | while (uio->uio_resid > 0) { |
399 | int ubc_flags = UBC_WRITE; | | 399 | int ubc_flags = UBC_WRITE; |
400 | bool overwrite; /* if we're overwrite a whole block */ | | 400 | bool overwrite; /* if we're overwrite a whole block */ |
401 | off_t newoff; | | 401 | off_t newoff; |
402 | | | 402 | |
403 | if (ioflag & IO_DIRECT) { | | 403 | if (ioflag & IO_DIRECT) { |
404 | genfs_directio(vp, uio, ioflag | IO_JOURNALLOCKED); | | 404 | genfs_directio(vp, uio, ioflag | IO_JOURNALLOCKED); |
405 | } | | 405 | } |
406 | | | 406 | |
407 | oldoff = uio->uio_offset; | | 407 | oldoff = uio->uio_offset; |
408 | blkoffset = ufs_blkoff(fs, uio->uio_offset); | | 408 | blkoffset = ufs_blkoff(fs, uio->uio_offset); |
409 | bytelen = MIN(fs->fs_bsize - blkoffset, uio->uio_resid); | | 409 | bytelen = MIN(fs->fs_bsize - blkoffset, uio->uio_resid); |
410 | if (bytelen == 0) { | | 410 | if (bytelen == 0) { |
411 | break; | | 411 | break; |
412 | } | | 412 | } |
413 | | | 413 | |
414 | /* | | 414 | /* |
415 | * if we're filling in a hole, allocate the blocks now and | | 415 | * if we're filling in a hole, allocate the blocks now and |
416 | * initialize the pages first. if we're extending the file, | | 416 | * initialize the pages first. if we're extending the file, |
417 | * we can safely allocate blocks without initializing pages | | 417 | * we can safely allocate blocks without initializing pages |
418 | * since the new blocks will be inaccessible until the write | | 418 | * since the new blocks will be inaccessible until the write |
419 | * is complete. | | 419 | * is complete. |
420 | */ | | 420 | */ |
421 | overwrite = uio->uio_offset >= preallocoff && | | 421 | overwrite = uio->uio_offset >= preallocoff && |
422 | uio->uio_offset < endallocoff; | | 422 | uio->uio_offset < endallocoff; |
423 | if (!overwrite && (vp->v_vflag & VV_MAPPED) == 0 && | | 423 | if (!overwrite && (vp->v_vflag & VV_MAPPED) == 0 && |
424 | ufs_blkoff(fs, uio->uio_offset) == 0 && | | 424 | ufs_blkoff(fs, uio->uio_offset) == 0 && |
425 | (uio->uio_offset & PAGE_MASK) == 0) { | | 425 | (uio->uio_offset & PAGE_MASK) == 0) { |
426 | vsize_t len; | | 426 | vsize_t len; |
427 | | | 427 | |
428 | len = trunc_page(bytelen); | | 428 | len = trunc_page(bytelen); |
429 | len -= ufs_blkoff(fs, len); | | 429 | len -= ufs_blkoff(fs, len); |
430 | if (len > 0) { | | 430 | if (len > 0) { |
431 | overwrite = true; | | 431 | overwrite = true; |
432 | bytelen = len; | | 432 | bytelen = len; |
433 | } | | 433 | } |
434 | } | | 434 | } |
435 | | | 435 | |
436 | newoff = oldoff + bytelen; | | 436 | newoff = oldoff + bytelen; |
437 | if (vp->v_size < newoff) { | | 437 | if (vp->v_size < newoff) { |
438 | uvm_vnp_setwritesize(vp, newoff); | | 438 | uvm_vnp_setwritesize(vp, newoff); |
439 | } | | 439 | } |
440 | | | 440 | |
441 | if (!overwrite) { | | 441 | if (!overwrite) { |
442 | error = ufs_balloc_range(vp, uio->uio_offset, bytelen, | | 442 | error = ufs_balloc_range(vp, uio->uio_offset, bytelen, |
443 | cred, aflag); | | 443 | cred, aflag); |
444 | if (error) | | 444 | if (error) |
445 | break; | | 445 | break; |
446 | } else { | | 446 | } else { |
447 | genfs_node_wrlock(vp); | | 447 | genfs_node_wrlock(vp); |
448 | error = GOP_ALLOC(vp, uio->uio_offset, bytelen, | | 448 | error = GOP_ALLOC(vp, uio->uio_offset, bytelen, |
449 | aflag, cred); | | 449 | aflag, cred); |
450 | genfs_node_unlock(vp); | | 450 | genfs_node_unlock(vp); |
451 | if (error) | | 451 | if (error) |
452 | break; | | 452 | break; |
453 | ubc_flags |= UBC_FAULTBUSY; | | 453 | ubc_flags |= UBC_FAULTBUSY; |
454 | } | | 454 | } |
455 | | | 455 | |
456 | /* | | 456 | /* |
457 | * copy the data. | | 457 | * copy the data. |
458 | */ | | 458 | */ |
459 | | | 459 | |
460 | error = ubc_uiomove(&vp->v_uobj, uio, bytelen, | | 460 | error = ubc_uiomove(&vp->v_uobj, uio, bytelen, |
461 | IO_ADV_DECODE(ioflag), ubc_flags | UBC_UNMAP_FLAG(vp)); | | 461 | IO_ADV_DECODE(ioflag), ubc_flags | UBC_UNMAP_FLAG(vp)); |
462 | | | 462 | |
463 | /* | | 463 | /* |
464 | * update UVM's notion of the size now that we've | | 464 | * update UVM's notion of the size now that we've |
465 | * copied the data into the vnode's pages. | | 465 | * copied the data into the vnode's pages. |
466 | * | | 466 | * |
467 | * we should update the size even when uiomove failed. | | 467 | * we should update the size even when uiomove failed. |
468 | */ | | 468 | */ |
469 | | | 469 | |
470 | if (vp->v_size < newoff) { | | 470 | if (vp->v_size < newoff) { |
471 | uvm_vnp_setsize(vp, newoff); | | 471 | uvm_vnp_setsize(vp, newoff); |
472 | extended = 1; | | 472 | extended = 1; |
473 | } | | 473 | } |
474 | | | 474 | |
475 | if (error) | | 475 | if (error) |
476 | break; | | 476 | break; |
477 | | | 477 | |
478 | /* | | 478 | /* |
479 | * flush what we just wrote if necessary. | | 479 | * flush what we just wrote if necessary. |
480 | * XXXUBC simplistic async flushing. | | 480 | * XXXUBC simplistic async flushing. |
481 | */ | | 481 | */ |
482 | | | 482 | |
483 | #ifndef LFS_READWRITE | | 483 | #ifndef LFS_READWRITE |
484 | if (!async && oldoff >> 16 != uio->uio_offset >> 16) { | | 484 | if (!async && oldoff >> 16 != uio->uio_offset >> 16) { |
485 | mutex_enter(vp->v_interlock); | | 485 | mutex_enter(vp->v_interlock); |
486 | error = VOP_PUTPAGES(vp, (oldoff >> 16) << 16, | | 486 | error = VOP_PUTPAGES(vp, (oldoff >> 16) << 16, |
487 | (uio->uio_offset >> 16) << 16, | | 487 | (uio->uio_offset >> 16) << 16, |
488 | PGO_CLEANIT | PGO_JOURNALLOCKED | PGO_LAZY); | | 488 | PGO_CLEANIT | PGO_JOURNALLOCKED | PGO_LAZY); |
489 | if (error) | | 489 | if (error) |
490 | break; | | 490 | break; |
491 | } | | 491 | } |
492 | #endif | | 492 | #endif |
493 | } | | 493 | } |
494 | if (error == 0 && ioflag & IO_SYNC) { | | 494 | if (error == 0 && ioflag & IO_SYNC) { |
495 | mutex_enter(vp->v_interlock); | | 495 | mutex_enter(vp->v_interlock); |
496 | error = VOP_PUTPAGES(vp, trunc_page(origoff & fs->fs_bmask), | | 496 | error = VOP_PUTPAGES(vp, trunc_page(origoff & fs->fs_bmask), |
497 | round_page(ufs_blkroundup(fs, uio->uio_offset)), | | 497 | round_page(ufs_blkroundup(fs, uio->uio_offset)), |
498 | PGO_CLEANIT | PGO_SYNCIO | PGO_JOURNALLOCKED); | | 498 | PGO_CLEANIT | PGO_SYNCIO | PGO_JOURNALLOCKED); |
499 | } | | 499 | } |
500 | | | 500 | |
501 | out: | | 501 | out: |
502 | error = ufs_post_write_update(vp, uio, ioflag, cred, osize, resid, | | 502 | error = ufs_post_write_update(vp, uio, ioflag, cred, osize, resid, |
503 | extended, error); | | 503 | extended, error); |
504 | UFS_WAPBL_END(vp->v_mount); | | 504 | UFS_WAPBL_END(vp->v_mount); |
505 | fstrans_done(vp->v_mount); | | 505 | fstrans_done(vp->v_mount); |
506 | | | 506 | |
507 | return (error); | | 507 | return (error); |
508 | } | | 508 | } |
509 | | | 509 | |
510 | /* | | 510 | /* |
511 | * UFS op for writing via the buffer cache | | 511 | * UFS op for writing via the buffer cache |
512 | */ | | 512 | */ |
513 | int | | 513 | int |
514 | BUFWR(struct vnode *vp, struct uio *uio, int ioflag, kauth_cred_t cred) | | 514 | BUFWR(struct vnode *vp, struct uio *uio, int ioflag, kauth_cred_t cred) |
515 | { | | 515 | { |
516 | struct inode *ip; | | 516 | struct inode *ip; |
517 | struct ufsmount *ump; | | 517 | struct ufsmount *ump; |
518 | FS *fs; | | 518 | FS *fs; |
519 | int flags; | | 519 | int flags; |
520 | struct buf *bp; | | 520 | struct buf *bp; |
521 | off_t osize, origoff; | | 521 | off_t osize, origoff; |
522 | int resid, xfersize, size, blkoffset; | | 522 | int resid, xfersize, size, blkoffset; |
523 | daddr_t lbn; | | 523 | daddr_t lbn; |
524 | int extended=0; | | 524 | int extended=0; |
525 | int error; | | 525 | int error; |
526 | #ifdef LFS_READWRITE | | 526 | #ifdef LFS_READWRITE |
527 | bool need_unreserve = false; | | 527 | bool need_unreserve = false; |
528 | #endif | | 528 | #endif |
529 | | | 529 | |
530 | KASSERT(ISSET(ioflag, IO_NODELOCKED)); | | 530 | KASSERT(ISSET(ioflag, IO_NODELOCKED)); |
531 | KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); | | 531 | KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); |
532 | KASSERT(vp->v_type == VDIR || vp->v_type == VLNK); | | 532 | KASSERT(vp->v_type == VDIR || vp->v_type == VLNK); |
533 | KASSERT(vp->v_type != VDIR || ISSET(ioflag, IO_SYNC)); | | 533 | KASSERT(vp->v_type != VDIR || ISSET(ioflag, IO_SYNC)); |
534 | KASSERT(uio->uio_rw == UIO_WRITE); | | 534 | KASSERT(uio->uio_rw == UIO_WRITE); |
535 | KASSERT(ISSET(ioflag, IO_JOURNALLOCKED)); | | 535 | KASSERT(ISSET(ioflag, IO_JOURNALLOCKED)); |
536 | UFS_WAPBL_JLOCK_ASSERT(vp->v_mount); | | 536 | UFS_WAPBL_JLOCK_ASSERT(vp->v_mount); |
537 | | | 537 | |
538 | ip = VTOI(vp); | | 538 | ip = VTOI(vp); |
539 | ump = ip->i_ump; | | 539 | ump = ip->i_ump; |
540 | fs = ip->I_FS; | | 540 | fs = ip->I_FS; |
541 | | | 541 | |
542 | KASSERT(vp->v_size == ip->i_size); | | 542 | KASSERT(vp->v_size == ip->i_size); |
543 | | | 543 | |
544 | if (uio->uio_offset < 0 || | | 544 | if (uio->uio_offset < 0 || |
545 | uio->uio_resid > ump->um_maxfilesize || | | 545 | uio->uio_resid > ump->um_maxfilesize || |
546 | uio->uio_offset > (ump->um_maxfilesize - uio->uio_resid)) | | 546 | uio->uio_offset > (ump->um_maxfilesize - uio->uio_resid)) |
547 | return EFBIG; | | 547 | return EFBIG; |
548 | #ifdef LFS_READWRITE | | 548 | #ifdef LFS_READWRITE |
549 | KASSERT(vp != fs->lfs_ivnode); | | 549 | KASSERT(vp != fs->lfs_ivnode); |
550 | #endif | | 550 | #endif |
551 | if (uio->uio_resid == 0) | | 551 | if (uio->uio_resid == 0) |
552 | return 0; | | 552 | return 0; |
553 | | | 553 | |
554 | fstrans_start(vp->v_mount, FSTRANS_SHARED); | | 554 | fstrans_start(vp->v_mount, FSTRANS_SHARED); |
555 | | | 555 | |
556 | flags = ioflag & IO_SYNC ? B_SYNC : 0; | | 556 | flags = ioflag & IO_SYNC ? B_SYNC : 0; |
557 | origoff = uio->uio_offset; | | 557 | origoff = uio->uio_offset; |
558 | resid = uio->uio_resid; | | 558 | resid = uio->uio_resid; |
559 | osize = ip->i_size; | | 559 | osize = ip->i_size; |
560 | error = 0; | | 560 | error = 0; |
561 | | | 561 | |
562 | KASSERT(vp->v_type != VREG); | | 562 | KASSERT(vp->v_type != VREG); |
563 | | | 563 | |
564 | #ifdef LFS_READWRITE | | 564 | #ifdef LFS_READWRITE |
565 | lfs_availwait(fs, btofsb(fs, uio->uio_resid)); | | 565 | lfs_availwait(fs, btofsb(fs, uio->uio_resid)); |
566 | lfs_check(vp, LFS_UNUSED_LBN, 0); | | 566 | lfs_check(vp, LFS_UNUSED_LBN, 0); |
567 | #endif /* !LFS_READWRITE */ | | 567 | #endif /* !LFS_READWRITE */ |
568 | | | 568 | |
569 | /* XXX Should never have pages cached here. */ | | 569 | /* XXX Should never have pages cached here. */ |
570 | mutex_enter(vp->v_interlock); | | 570 | KASSERT(vp->v_uobj.uo_npages == 0); |
571 | VOP_PUTPAGES(vp, trunc_page(origoff), round_page(origoff + resid), | | | |
572 | PGO_CLEANIT | PGO_FREE | PGO_SYNCIO | PGO_JOURNALLOCKED); | | | |
573 | while (uio->uio_resid > 0) { | | 571 | while (uio->uio_resid > 0) { |
574 | lbn = ufs_lblkno(fs, uio->uio_offset); | | 572 | lbn = ufs_lblkno(fs, uio->uio_offset); |
575 | blkoffset = ufs_blkoff(fs, uio->uio_offset); | | 573 | blkoffset = ufs_blkoff(fs, uio->uio_offset); |
576 | xfersize = MIN(fs->fs_bsize - blkoffset, uio->uio_resid); | | 574 | xfersize = MIN(fs->fs_bsize - blkoffset, uio->uio_resid); |
577 | if (fs->fs_bsize > xfersize) | | 575 | if (fs->fs_bsize > xfersize) |
578 | flags |= B_CLRBUF; | | 576 | flags |= B_CLRBUF; |
579 | else | | 577 | else |
580 | flags &= ~B_CLRBUF; | | 578 | flags &= ~B_CLRBUF; |
581 | | | 579 | |
582 | #ifdef LFS_READWRITE | | 580 | #ifdef LFS_READWRITE |
583 | error = lfs_reserve(fs, vp, NULL, | | 581 | error = lfs_reserve(fs, vp, NULL, |
584 | btofsb(fs, (UFS_NIADDR + 1) << fs->lfs_bshift)); | | 582 | btofsb(fs, (UFS_NIADDR + 1) << fs->lfs_bshift)); |
585 | if (error) | | 583 | if (error) |
586 | break; | | 584 | break; |
587 | need_unreserve = true; | | 585 | need_unreserve = true; |
588 | #endif | | 586 | #endif |
589 | error = UFS_BALLOC(vp, uio->uio_offset, xfersize, cred, flags, | | 587 | error = UFS_BALLOC(vp, uio->uio_offset, xfersize, cred, flags, |
590 | &bp); | | 588 | &bp); |
591 | | | 589 | |
592 | if (error) | | 590 | if (error) |
593 | break; | | 591 | break; |
594 | if (uio->uio_offset + xfersize > ip->i_size) { | | 592 | if (uio->uio_offset + xfersize > ip->i_size) { |
595 | ip->i_size = uio->uio_offset + xfersize; | | 593 | ip->i_size = uio->uio_offset + xfersize; |
596 | DIP_ASSIGN(ip, size, ip->i_size); | | 594 | DIP_ASSIGN(ip, size, ip->i_size); |
597 | uvm_vnp_setsize(vp, ip->i_size); | | 595 | uvm_vnp_setsize(vp, ip->i_size); |
598 | extended = 1; | | 596 | extended = 1; |
599 | } | | 597 | } |
600 | size = ufs_blksize(fs, ip, lbn) - bp->b_resid; | | 598 | size = ufs_blksize(fs, ip, lbn) - bp->b_resid; |
601 | if (xfersize > size) | | 599 | if (xfersize > size) |
602 | xfersize = size; | | 600 | xfersize = size; |
603 | | | 601 | |
604 | error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio); | | 602 | error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio); |
605 | | | 603 | |
606 | /* | | 604 | /* |
607 | * if we didn't clear the block and the uiomove failed, | | 605 | * if we didn't clear the block and the uiomove failed, |
608 | * the buf will now contain part of some other file, | | 606 | * the buf will now contain part of some other file, |
609 | * so we need to invalidate it. | | 607 | * so we need to invalidate it. |
610 | */ | | 608 | */ |
611 | if (error && (flags & B_CLRBUF) == 0) { | | 609 | if (error && (flags & B_CLRBUF) == 0) { |
612 | brelse(bp, BC_INVAL); | | 610 | brelse(bp, BC_INVAL); |
613 | break; | | 611 | break; |
614 | } | | 612 | } |
615 | #ifdef LFS_READWRITE | | 613 | #ifdef LFS_READWRITE |
616 | (void)VOP_BWRITE(bp->b_vp, bp); | | 614 | (void)VOP_BWRITE(bp->b_vp, bp); |
617 | lfs_reserve(fs, vp, NULL, | | 615 | lfs_reserve(fs, vp, NULL, |
618 | -btofsb(fs, (UFS_NIADDR + 1) << fs->lfs_bshift)); | | 616 | -btofsb(fs, (UFS_NIADDR + 1) << fs->lfs_bshift)); |
619 | need_unreserve = false; | | 617 | need_unreserve = false; |
620 | #else | | 618 | #else |
621 | if (ioflag & IO_SYNC) | | 619 | if (ioflag & IO_SYNC) |
622 | (void)bwrite(bp); | | 620 | (void)bwrite(bp); |
623 | else if (xfersize + blkoffset == fs->fs_bsize) | | 621 | else if (xfersize + blkoffset == fs->fs_bsize) |
624 | bawrite(bp); | | 622 | bawrite(bp); |
625 | else | | 623 | else |
626 | bdwrite(bp); | | 624 | bdwrite(bp); |
627 | #endif | | 625 | #endif |
628 | if (error || xfersize == 0) | | 626 | if (error || xfersize == 0) |
629 | break; | | 627 | break; |
630 | } | | 628 | } |
631 | #ifdef LFS_READWRITE | | 629 | #ifdef LFS_READWRITE |
632 | if (need_unreserve) { | | 630 | if (need_unreserve) { |
633 | lfs_reserve(fs, vp, NULL, | | 631 | lfs_reserve(fs, vp, NULL, |
634 | -btofsb(fs, (UFS_NIADDR + 1) << fs->lfs_bshift)); | | 632 | -btofsb(fs, (UFS_NIADDR + 1) << fs->lfs_bshift)); |
635 | } | | 633 | } |
636 | #endif | | 634 | #endif |
637 | | | 635 | |
638 | error = ufs_post_write_update(vp, uio, ioflag, cred, osize, resid, | | 636 | error = ufs_post_write_update(vp, uio, ioflag, cred, osize, resid, |
639 | extended, error); | | 637 | extended, error); |
640 | fstrans_done(vp->v_mount); | | 638 | fstrans_done(vp->v_mount); |
641 | | | 639 | |
642 | return (error); | | 640 | return (error); |
643 | } | | 641 | } |
644 | | | 642 | |
645 | static int | | 643 | static int |
646 | ufs_post_write_update(struct vnode *vp, struct uio *uio, int ioflag, | | 644 | ufs_post_write_update(struct vnode *vp, struct uio *uio, int ioflag, |
647 | kauth_cred_t cred, off_t osize, int resid, int extended, int oerror) | | 645 | kauth_cred_t cred, off_t osize, int resid, int extended, int oerror) |
648 | { | | 646 | { |
649 | struct inode *ip = VTOI(vp); | | 647 | struct inode *ip = VTOI(vp); |
650 | int error = oerror; | | 648 | int error = oerror; |
651 | | | 649 | |
652 | /* Trigger ctime and mtime updates, and atime if MNT_RELATIME. */ | | 650 | /* Trigger ctime and mtime updates, and atime if MNT_RELATIME. */ |
653 | ip->i_flag |= IN_CHANGE | IN_UPDATE; | | 651 | ip->i_flag |= IN_CHANGE | IN_UPDATE; |
654 | if (vp->v_mount->mnt_flag & MNT_RELATIME) | | 652 | if (vp->v_mount->mnt_flag & MNT_RELATIME) |
655 | ip->i_flag |= IN_ACCESS; | | 653 | ip->i_flag |= IN_ACCESS; |
656 | | | 654 | |
657 | /* | | 655 | /* |
658 | * If we successfully wrote any data and we are not the superuser, | | 656 | * If we successfully wrote any data and we are not the superuser, |
659 | * we clear the setuid and setgid bits as a precaution against | | 657 | * we clear the setuid and setgid bits as a precaution against |
660 | * tampering. | | 658 | * tampering. |
661 | */ | | 659 | */ |
662 | if (resid > uio->uio_resid && cred) { | | 660 | if (resid > uio->uio_resid && cred) { |
663 | if (ip->i_mode & ISUID) { | | 661 | if (ip->i_mode & ISUID) { |
664 | if (kauth_authorize_vnode(cred, | | 662 | if (kauth_authorize_vnode(cred, |
665 | KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) { | | 663 | KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) { |
666 | ip->i_mode &= ~ISUID; | | 664 | ip->i_mode &= ~ISUID; |
667 | DIP_ASSIGN(ip, mode, ip->i_mode); | | 665 | DIP_ASSIGN(ip, mode, ip->i_mode); |
668 | } | | 666 | } |
669 | } | | 667 | } |
670 | | | 668 | |
671 | if (ip->i_mode & ISGID) { | | 669 | if (ip->i_mode & ISGID) { |
672 | if (kauth_authorize_vnode(cred, | | 670 | if (kauth_authorize_vnode(cred, |
673 | KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) { | | 671 | KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) { |
674 | ip->i_mode &= ~ISGID; | | 672 | ip->i_mode &= ~ISGID; |
675 | DIP_ASSIGN(ip, mode, ip->i_mode); | | 673 | DIP_ASSIGN(ip, mode, ip->i_mode); |
676 | } | | 674 | } |
677 | } | | 675 | } |
678 | } | | 676 | } |
679 | | | 677 | |
680 | /* If we successfully wrote anything, notify kevent listeners. */ | | 678 | /* If we successfully wrote anything, notify kevent listeners. */ |
681 | if (resid > uio->uio_resid) | | 679 | if (resid > uio->uio_resid) |
682 | VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0)); | | 680 | VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0)); |
683 | | | 681 | |
684 | /* | | 682 | /* |
685 | * Update the size on disk: truncate back to original size on | | 683 | * Update the size on disk: truncate back to original size on |
686 | * error, or reflect the new size on success. | | 684 | * error, or reflect the new size on success. |
687 | */ | | 685 | */ |
688 | if (error) { | | 686 | if (error) { |
689 | (void) UFS_TRUNCATE(vp, osize, ioflag & IO_SYNC, cred); | | 687 | (void) UFS_TRUNCATE(vp, osize, ioflag & IO_SYNC, cred); |
690 | uio->uio_offset -= resid - uio->uio_resid; | | 688 | uio->uio_offset -= resid - uio->uio_resid; |
691 | uio->uio_resid = resid; | | 689 | uio->uio_resid = resid; |
692 | } else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC) | | 690 | } else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC) |
693 | error = UFS_UPDATE(vp, NULL, NULL, UPDATE_WAIT); | | 691 | error = UFS_UPDATE(vp, NULL, NULL, UPDATE_WAIT); |
694 | else | | 692 | else |
695 | UFS_WAPBL_UPDATE(vp, NULL, NULL, 0); | | 693 | UFS_WAPBL_UPDATE(vp, NULL, NULL, 0); |
696 | | | 694 | |
697 | /* Make sure the vnode uvm size matches the inode file size. */ | | 695 | /* Make sure the vnode uvm size matches the inode file size. */ |
698 | KASSERT(vp->v_size == ip->i_size); | | 696 | KASSERT(vp->v_size == ip->i_size); |
699 | | | 697 | |
700 | /* Write error overrides any inode update error. */ | | 698 | /* Write error overrides any inode update error. */ |
701 | if (oerror) | | 699 | if (oerror) |
702 | error = oerror; | | 700 | error = oerror; |
703 | return error; | | 701 | return error; |
704 | } | | 702 | } |