Mon Mar 29 02:13:38 2021 UTC ()
Don't use legacy VM types.


(simonb)
diff -r1.6 -r1.7 src/sys/fs/nfs/client/nfs_clbio.c
diff -r1.3 -r1.4 src/sys/fs/nfs/server/nfs_nfsdport.c

cvs diff -r1.6 -r1.7 src/sys/fs/nfs/client/nfs_clbio.c (switch to unified diff)

--- src/sys/fs/nfs/client/nfs_clbio.c 2020/09/29 03:02:19 1.6
+++ src/sys/fs/nfs/client/nfs_clbio.c 2021/03/29 02:13:37 1.7
@@ -1,1285 +1,1285 @@ @@ -1,1285 +1,1285 @@
1/* $NetBSD: nfs_clbio.c,v 1.6 2020/09/29 03:02:19 msaitoh Exp $ */ 1/* $NetBSD: nfs_clbio.c,v 1.7 2021/03/29 02:13:37 simonb Exp $ */
2/*- 2/*-
3 * Copyright (c) 1989, 1993 3 * Copyright (c) 1989, 1993
4 * The Regents of the University of California. All rights reserved. 4 * The Regents of the University of California. All rights reserved.
5 * 5 *
6 * This code is derived from software contributed to Berkeley by 6 * This code is derived from software contributed to Berkeley by
7 * Rick Macklem at The University of Guelph. 7 * Rick Macklem at The University of Guelph.
8 * 8 *
9 * Redistribution and use in source and binary forms, with or without 9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions 10 * modification, are permitted provided that the following conditions
11 * are met: 11 * are met:
12 * 1. Redistributions of source code must retain the above copyright 12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer. 13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright 14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the 15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution. 16 * documentation and/or other materials provided with the distribution.
17 * 4. Neither the name of the University nor the names of its contributors 17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software 18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission. 19 * without specific prior written permission.
20 * 20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE. 31 * SUCH DAMAGE.
32 * 32 *
33 * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 33 * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95
34 */ 34 */
35 35
36#include <sys/cdefs.h> 36#include <sys/cdefs.h>
37/* __FBSDID("FreeBSD: head/sys/fs/nfsclient/nfs_clbio.c 304026 2016-08-12 22:44:59Z rmacklem "); */ 37/* __FBSDID("FreeBSD: head/sys/fs/nfsclient/nfs_clbio.c 304026 2016-08-12 22:44:59Z rmacklem "); */
38__RCSID("$NetBSD: nfs_clbio.c,v 1.6 2020/09/29 03:02:19 msaitoh Exp $"); 38__RCSID("$NetBSD: nfs_clbio.c,v 1.7 2021/03/29 02:13:37 simonb Exp $");
39 39
40#include <sys/param.h> 40#include <sys/param.h>
41#include <sys/systm.h> 41#include <sys/systm.h>
42#include <sys/buf.h> 42#include <sys/buf.h>
43#include <sys/kernel.h> 43#include <sys/kernel.h>
44#include <sys/mount.h> 44#include <sys/mount.h>
45#include <sys/rwlock.h> 45#include <sys/rwlock.h>
46#include <sys/vmmeter.h> 46#include <sys/vmmeter.h>
47#include <sys/vnode.h> 47#include <sys/vnode.h>
48 48
49#include <fs/nfs/common/nfsport.h> 49#include <fs/nfs/common/nfsport.h>
50#include <fs/nfs/client/nfsmount.h> 50#include <fs/nfs/client/nfsmount.h>
51#include <fs/nfs/client/nfs.h> 51#include <fs/nfs/client/nfs.h>
52#include <fs/nfs/client/nfsnode.h> 52#include <fs/nfs/client/nfsnode.h>
53#include <fs/nfs/client/nfs_kdtrace.h> 53#include <fs/nfs/client/nfs_kdtrace.h>
54 54
55extern int newnfs_directio_allow_mmap; 55extern int newnfs_directio_allow_mmap;
56extern struct nfsstatsv1 nfsstatsv1; 56extern struct nfsstatsv1 nfsstatsv1;
57extern struct mtx ncl_iod_mutex; 57extern struct mtx ncl_iod_mutex;
58extern int ncl_numasync; 58extern int ncl_numasync;
59extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON]; 59extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
60extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON]; 60extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
61extern int newnfs_directio_enable; 61extern int newnfs_directio_enable;
62extern int nfs_keep_dirty_on_error; 62extern int nfs_keep_dirty_on_error;
63 63
64int ncl_pbuf_freecnt = -1; /* start out unlimited */ 64int ncl_pbuf_freecnt = -1; /* start out unlimited */
65 65
66static struct buf *nfs_getcacheblk(struct vnode *vp, daddr_t bn, int size, 66static struct buf *nfs_getcacheblk(struct vnode *vp, daddr_t bn, int size,
67 struct thread *td); 67 struct thread *td);
68static int nfs_directio_write(struct vnode *vp, struct uio *uiop, 68static int nfs_directio_write(struct vnode *vp, struct uio *uiop,
69 struct ucred *cred, int ioflag); 69 struct ucred *cred, int ioflag);
70 70
71/* 71/*
72 * Vnode op for VM getpages. 72 * Vnode op for VM getpages.
73 */ 73 */
74int 74int
75ncl_getpages(struct vop_getpages_args *ap) 75ncl_getpages(struct vop_getpages_args *ap)
76{ 76{
77 int i, error, nextoff, size, toff, count, npages; 77 int i, error, nextoff, size, toff, count, npages;
78 struct uio uio; 78 struct uio uio;
79 struct iovec iov; 79 struct iovec iov;
80 vm_offset_t kva; 80 vaddr_t kva;
81 struct buf *bp; 81 struct buf *bp;
82 struct vnode *vp; 82 struct vnode *vp;
83 struct thread *td; 83 struct thread *td;
84 struct ucred *cred; 84 struct ucred *cred;
85 struct nfsmount *nmp; 85 struct nfsmount *nmp;
86 vm_object_t object; 86 vm_object_t object;
87 vm_page_t *pages; 87 vm_page_t *pages;
88 struct nfsnode *np; 88 struct nfsnode *np;
89 89
90 vp = ap->a_vp; 90 vp = ap->a_vp;
91 np = VTONFS(vp); 91 np = VTONFS(vp);
92 td = curthread; /* XXX */ 92 td = curthread; /* XXX */
93 cred = curthread->td_ucred; /* XXX */ 93 cred = curthread->td_ucred; /* XXX */
94 nmp = VFSTONFS(vp->v_mount); 94 nmp = VFSTONFS(vp->v_mount);
95 pages = ap->a_m; 95 pages = ap->a_m;
96 npages = ap->a_count; 96 npages = ap->a_count;
97 97
98 if ((object = vp->v_object) == NULL) { 98 if ((object = vp->v_object) == NULL) {
99 printf("ncl_getpages: called with non-merged cache vnode\n"); 99 printf("ncl_getpages: called with non-merged cache vnode\n");
100 return (VM_PAGER_ERROR); 100 return (VM_PAGER_ERROR);
101 } 101 }
102 102
103 if (newnfs_directio_enable && !newnfs_directio_allow_mmap) { 103 if (newnfs_directio_enable && !newnfs_directio_allow_mmap) {
104 mtx_lock(&np->n_mtx); 104 mtx_lock(&np->n_mtx);
105 if ((np->n_flag & NNONCACHE) && (vp->v_type == VREG)) { 105 if ((np->n_flag & NNONCACHE) && (vp->v_type == VREG)) {
106 mtx_unlock(&np->n_mtx); 106 mtx_unlock(&np->n_mtx);
107 printf("ncl_getpages: called on non-cacheable vnode\n"); 107 printf("ncl_getpages: called on non-cacheable vnode\n");
108 return (VM_PAGER_ERROR); 108 return (VM_PAGER_ERROR);
109 } else 109 } else
110 mtx_unlock(&np->n_mtx); 110 mtx_unlock(&np->n_mtx);
111 } 111 }
112 112
113 mtx_lock(&nmp->nm_mtx); 113 mtx_lock(&nmp->nm_mtx);
114 if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && 114 if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
115 (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { 115 (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
116 mtx_unlock(&nmp->nm_mtx); 116 mtx_unlock(&nmp->nm_mtx);
117 /* We'll never get here for v4, because we always have fsinfo */ 117 /* We'll never get here for v4, because we always have fsinfo */
118 (void)ncl_fsinfo(nmp, vp, cred, td); 118 (void)ncl_fsinfo(nmp, vp, cred, td);
119 } else 119 } else
120 mtx_unlock(&nmp->nm_mtx); 120 mtx_unlock(&nmp->nm_mtx);
121 121
122 /* 122 /*
123 * If the requested page is partially valid, just return it and 123 * If the requested page is partially valid, just return it and
124 * allow the pager to zero-out the blanks. Partially valid pages 124 * allow the pager to zero-out the blanks. Partially valid pages
125 * can only occur at the file EOF. 125 * can only occur at the file EOF.
126 * 126 *
127 * XXXGL: is that true for NFS, where short read can occur??? 127 * XXXGL: is that true for NFS, where short read can occur???
128 */ 128 */
129 VM_OBJECT_WLOCK(object); 129 VM_OBJECT_WLOCK(object);
130 if (pages[npages - 1]->valid != 0 && --npages == 0) 130 if (pages[npages - 1]->valid != 0 && --npages == 0)
131 goto out; 131 goto out;
132 VM_OBJECT_WUNLOCK(object); 132 VM_OBJECT_WUNLOCK(object);
133 133
134 /* 134 /*
135 * We use only the kva address for the buffer, but this is extremely 135 * We use only the kva address for the buffer, but this is extremely
136 * convenient and fast. 136 * convenient and fast.
137 */ 137 */
138 bp = getpbuf(&ncl_pbuf_freecnt); 138 bp = getpbuf(&ncl_pbuf_freecnt);
139 139
140 kva = (vm_offset_t) bp->b_data; 140 kva = (vaddr_t) bp->b_data;
141 pmap_qenter(kva, pages, npages); 141 pmap_qenter(kva, pages, npages);
142 PCPU_INC(cnt.v_vnodein); 142 PCPU_INC(cnt.v_vnodein);
143 PCPU_ADD(cnt.v_vnodepgsin, npages); 143 PCPU_ADD(cnt.v_vnodepgsin, npages);
144 144
145 count = npages << PAGE_SHIFT; 145 count = npages << PAGE_SHIFT;
146 iov.iov_base = (caddr_t) kva; 146 iov.iov_base = (caddr_t) kva;
147 iov.iov_len = count; 147 iov.iov_len = count;
148 uio.uio_iov = &iov; 148 uio.uio_iov = &iov;
149 uio.uio_iovcnt = 1; 149 uio.uio_iovcnt = 1;
150 uio.uio_offset = IDX_TO_OFF(pages[0]->pindex); 150 uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
151 uio.uio_resid = count; 151 uio.uio_resid = count;
152 uio.uio_segflg = UIO_SYSSPACE; 152 uio.uio_segflg = UIO_SYSSPACE;
153 uio.uio_rw = UIO_READ; 153 uio.uio_rw = UIO_READ;
154 uio.uio_td = td; 154 uio.uio_td = td;
155 155
156 error = ncl_readrpc(vp, &uio, cred); 156 error = ncl_readrpc(vp, &uio, cred);
157 pmap_qremove(kva, npages); 157 pmap_qremove(kva, npages);
158 158
159 relpbuf(bp, &ncl_pbuf_freecnt); 159 relpbuf(bp, &ncl_pbuf_freecnt);
160 160
161 if (error && (uio.uio_resid == count)) { 161 if (error && (uio.uio_resid == count)) {
162 printf("ncl_getpages: error %d\n", error); 162 printf("ncl_getpages: error %d\n", error);
163 return (VM_PAGER_ERROR); 163 return (VM_PAGER_ERROR);
164 } 164 }
165 165
166 /* 166 /*
167 * Calculate the number of bytes read and validate only that number 167 * Calculate the number of bytes read and validate only that number
168 * of bytes. Note that due to pending writes, size may be 0. This 168 * of bytes. Note that due to pending writes, size may be 0. This
169 * does not mean that the remaining data is invalid! 169 * does not mean that the remaining data is invalid!
170 */ 170 */
171 171
172 size = count - uio.uio_resid; 172 size = count - uio.uio_resid;
173 VM_OBJECT_WLOCK(object); 173 VM_OBJECT_WLOCK(object);
174 for (i = 0, toff = 0; i < npages; i++, toff = nextoff) { 174 for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
175 vm_page_t m; 175 vm_page_t m;
176 nextoff = toff + PAGE_SIZE; 176 nextoff = toff + PAGE_SIZE;
177 m = pages[i]; 177 m = pages[i];
178 178
179 if (nextoff <= size) { 179 if (nextoff <= size) {
180 /* 180 /*
181 * Read operation filled an entire page 181 * Read operation filled an entire page
182 */ 182 */
183 m->valid = VM_PAGE_BITS_ALL; 183 m->valid = VM_PAGE_BITS_ALL;
184 KASSERT(m->dirty == 0, 184 KASSERT(m->dirty == 0,
185 ("nfs_getpages: page %p is dirty", m)); 185 ("nfs_getpages: page %p is dirty", m));
186 } else if (size > toff) { 186 } else if (size > toff) {
187 /* 187 /*
188 * Read operation filled a partial page. 188 * Read operation filled a partial page.
189 */ 189 */
190 m->valid = 0; 190 m->valid = 0;
191 vm_page_set_valid_range(m, 0, size - toff); 191 vm_page_set_valid_range(m, 0, size - toff);
192 KASSERT(m->dirty == 0, 192 KASSERT(m->dirty == 0,
193 ("nfs_getpages: page %p is dirty", m)); 193 ("nfs_getpages: page %p is dirty", m));
194 } else { 194 } else {
195 /* 195 /*
196 * Read operation was short. If no error 196 * Read operation was short. If no error
197 * occurred we may have hit a zero-fill 197 * occurred we may have hit a zero-fill
198 * section. We leave valid set to 0, and page 198 * section. We leave valid set to 0, and page
199 * is freed by vm_page_readahead_finish() if 199 * is freed by vm_page_readahead_finish() if
200 * its index is not equal to requested, or 200 * its index is not equal to requested, or
201 * page is zeroed and set valid by 201 * page is zeroed and set valid by
202 * vm_pager_get_pages() for requested page. 202 * vm_pager_get_pages() for requested page.
203 */ 203 */
204 ; 204 ;
205 } 205 }
206 } 206 }
207out: 207out:
208 VM_OBJECT_WUNLOCK(object); 208 VM_OBJECT_WUNLOCK(object);
209 if (ap->a_rbehind) 209 if (ap->a_rbehind)
210 *ap->a_rbehind = 0; 210 *ap->a_rbehind = 0;
211 if (ap->a_rahead) 211 if (ap->a_rahead)
212 *ap->a_rahead = 0; 212 *ap->a_rahead = 0;
213 return (VM_PAGER_OK); 213 return (VM_PAGER_OK);
214} 214}
215 215
216/* 216/*
217 * Vnode op for VM putpages. 217 * Vnode op for VM putpages.
218 */ 218 */
219int 219int
220ncl_putpages(struct vop_putpages_args *ap) 220ncl_putpages(struct vop_putpages_args *ap)
221{ 221{
222 struct uio uio; 222 struct uio uio;
223 struct iovec iov; 223 struct iovec iov;
224 vm_offset_t kva; 224 vaddr_t kva;
225 struct buf *bp; 225 struct buf *bp;
226 int iomode, must_commit, i, error, npages, count; 226 int iomode, must_commit, i, error, npages, count;
227 off_t offset; 227 off_t offset;
228 int *rtvals; 228 int *rtvals;
229 struct vnode *vp; 229 struct vnode *vp;
230 struct thread *td; 230 struct thread *td;
231 struct ucred *cred; 231 struct ucred *cred;
232 struct nfsmount *nmp; 232 struct nfsmount *nmp;
233 struct nfsnode *np; 233 struct nfsnode *np;
234 vm_page_t *pages; 234 vm_page_t *pages;
235 235
236 vp = ap->a_vp; 236 vp = ap->a_vp;
237 np = VTONFS(vp); 237 np = VTONFS(vp);
238 td = curthread; /* XXX */ 238 td = curthread; /* XXX */
239 /* Set the cred to n_writecred for the write rpcs. */ 239 /* Set the cred to n_writecred for the write rpcs. */
240 if (np->n_writecred != NULL) 240 if (np->n_writecred != NULL)
241 cred = crhold(np->n_writecred); 241 cred = crhold(np->n_writecred);
242 else 242 else
243 cred = crhold(curthread->td_ucred); /* XXX */ 243 cred = crhold(curthread->td_ucred); /* XXX */
244 nmp = VFSTONFS(vp->v_mount); 244 nmp = VFSTONFS(vp->v_mount);
245 pages = ap->a_m; 245 pages = ap->a_m;
246 count = ap->a_count; 246 count = ap->a_count;
247 rtvals = ap->a_rtvals; 247 rtvals = ap->a_rtvals;
248 npages = btoc(count); 248 npages = btoc(count);
249 offset = IDX_TO_OFF(pages[0]->pindex); 249 offset = IDX_TO_OFF(pages[0]->pindex);
250 250
251 mtx_lock(&nmp->nm_mtx); 251 mtx_lock(&nmp->nm_mtx);
252 if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && 252 if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
253 (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { 253 (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
254 mtx_unlock(&nmp->nm_mtx); 254 mtx_unlock(&nmp->nm_mtx);
255 (void)ncl_fsinfo(nmp, vp, cred, td); 255 (void)ncl_fsinfo(nmp, vp, cred, td);
256 } else 256 } else
257 mtx_unlock(&nmp->nm_mtx); 257 mtx_unlock(&nmp->nm_mtx);
258 258
259 mtx_lock(&np->n_mtx); 259 mtx_lock(&np->n_mtx);
260 if (newnfs_directio_enable && !newnfs_directio_allow_mmap && 260 if (newnfs_directio_enable && !newnfs_directio_allow_mmap &&
261 (np->n_flag & NNONCACHE) && (vp->v_type == VREG)) { 261 (np->n_flag & NNONCACHE) && (vp->v_type == VREG)) {
262 mtx_unlock(&np->n_mtx); 262 mtx_unlock(&np->n_mtx);
263 printf("ncl_putpages: called on noncache-able vnode\n"); 263 printf("ncl_putpages: called on noncache-able vnode\n");
264 mtx_lock(&np->n_mtx); 264 mtx_lock(&np->n_mtx);
265 } 265 }
266 266
267 for (i = 0; i < npages; i++) 267 for (i = 0; i < npages; i++)
268 rtvals[i] = VM_PAGER_ERROR; 268 rtvals[i] = VM_PAGER_ERROR;
269 269
270 /* 270 /*
271 * When putting pages, do not extend file past EOF. 271 * When putting pages, do not extend file past EOF.
272 */ 272 */
273 if (offset + count > np->n_size) { 273 if (offset + count > np->n_size) {
274 count = np->n_size - offset; 274 count = np->n_size - offset;
275 if (count < 0) 275 if (count < 0)
276 count = 0; 276 count = 0;
277 } 277 }
278 mtx_unlock(&np->n_mtx); 278 mtx_unlock(&np->n_mtx);
279 279
280 /* 280 /*
281 * We use only the kva address for the buffer, but this is extremely 281 * We use only the kva address for the buffer, but this is extremely
282 * convenient and fast. 282 * convenient and fast.
283 */ 283 */
284 bp = getpbuf(&ncl_pbuf_freecnt); 284 bp = getpbuf(&ncl_pbuf_freecnt);
285 285
286 kva = (vm_offset_t) bp->b_data; 286 kva = (vaddr_t) bp->b_data;
287 pmap_qenter(kva, pages, npages); 287 pmap_qenter(kva, pages, npages);
288 PCPU_INC(cnt.v_vnodeout); 288 PCPU_INC(cnt.v_vnodeout);
289 PCPU_ADD(cnt.v_vnodepgsout, count); 289 PCPU_ADD(cnt.v_vnodepgsout, count);
290 290
291 iov.iov_base = (caddr_t) kva; 291 iov.iov_base = (caddr_t) kva;
292 iov.iov_len = count; 292 iov.iov_len = count;
293 uio.uio_iov = &iov; 293 uio.uio_iov = &iov;
294 uio.uio_iovcnt = 1; 294 uio.uio_iovcnt = 1;
295 uio.uio_offset = offset; 295 uio.uio_offset = offset;
296 uio.uio_resid = count; 296 uio.uio_resid = count;
297 uio.uio_segflg = UIO_SYSSPACE; 297 uio.uio_segflg = UIO_SYSSPACE;
298 uio.uio_rw = UIO_WRITE; 298 uio.uio_rw = UIO_WRITE;
299 uio.uio_td = td; 299 uio.uio_td = td;
300 300
301 if ((ap->a_sync & VM_PAGER_PUT_SYNC) == 0) 301 if ((ap->a_sync & VM_PAGER_PUT_SYNC) == 0)
302 iomode = NFSWRITE_UNSTABLE; 302 iomode = NFSWRITE_UNSTABLE;
303 else 303 else
304 iomode = NFSWRITE_FILESYNC; 304 iomode = NFSWRITE_FILESYNC;
305 305
306 error = ncl_writerpc(vp, &uio, cred, &iomode, &must_commit, 0); 306 error = ncl_writerpc(vp, &uio, cred, &iomode, &must_commit, 0);
307 crfree(cred); 307 crfree(cred);
308 308
309 pmap_qremove(kva, npages); 309 pmap_qremove(kva, npages);
310 relpbuf(bp, &ncl_pbuf_freecnt); 310 relpbuf(bp, &ncl_pbuf_freecnt);
311 311
312 if (error == 0 || !nfs_keep_dirty_on_error) { 312 if (error == 0 || !nfs_keep_dirty_on_error) {
313 vnode_pager_undirty_pages(pages, rtvals, count - uio.uio_resid); 313 vnode_pager_undirty_pages(pages, rtvals, count - uio.uio_resid);
314 if (must_commit) 314 if (must_commit)
315 ncl_clearcommit(vp->v_mount); 315 ncl_clearcommit(vp->v_mount);
316 } 316 }
317 return rtvals[0]; 317 return rtvals[0];
318} 318}
319 319
320/* 320/*
321 * For nfs, cache consistency can only be maintained approximately. 321 * For nfs, cache consistency can only be maintained approximately.
322 * Although RFC1094 does not specify the criteria, the following is 322 * Although RFC1094 does not specify the criteria, the following is
323 * believed to be compatible with the reference port. 323 * believed to be compatible with the reference port.
324 * For nfs: 324 * For nfs:
325 * If the file's modify time on the server has changed since the 325 * If the file's modify time on the server has changed since the
326 * last read rpc or you have written to the file, 326 * last read rpc or you have written to the file,
327 * you may have lost data cache consistency with the 327 * you may have lost data cache consistency with the
328 * server, so flush all of the file's data out of the cache. 328 * server, so flush all of the file's data out of the cache.
329 * Then force a getattr rpc to ensure that you have up to date 329 * Then force a getattr rpc to ensure that you have up to date
330 * attributes. 330 * attributes.
331 * NB: This implies that cache data can be read when up to 331 * NB: This implies that cache data can be read when up to
332 * NFS_ATTRTIMEO seconds out of date. If you find that you need current 332 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
333 * attributes this could be forced by setting n_attrstamp to 0 before 333 * attributes this could be forced by setting n_attrstamp to 0 before
334 * the VOP_GETATTR() call. 334 * the VOP_GETATTR() call.
335 */ 335 */
336static inline int 336static inline int
337nfs_bioread_check_cons(struct vnode *vp, struct thread *td, struct ucred *cred) 337nfs_bioread_check_cons(struct vnode *vp, struct thread *td, struct ucred *cred)
338{ 338{
339 int error = 0; 339 int error = 0;
340 struct vattr vattr; 340 struct vattr vattr;
341 struct nfsnode *np = VTONFS(vp); 341 struct nfsnode *np = VTONFS(vp);
342 int old_lock; 342 int old_lock;
343 343
344 /* 344 /*
345 * Grab the exclusive lock before checking whether the cache is 345 * Grab the exclusive lock before checking whether the cache is
346 * consistent. 346 * consistent.
347 * XXX - We can make this cheaper later (by acquiring cheaper locks). 347 * XXX - We can make this cheaper later (by acquiring cheaper locks).
348 * But for now, this suffices. 348 * But for now, this suffices.
349 */ 349 */
350 old_lock = ncl_upgrade_vnlock(vp); 350 old_lock = ncl_upgrade_vnlock(vp);
351 if (vp->v_iflag & VI_DOOMED) { 351 if (vp->v_iflag & VI_DOOMED) {
352 ncl_downgrade_vnlock(vp, old_lock); 352 ncl_downgrade_vnlock(vp, old_lock);
353 return (EBADF); 353 return (EBADF);
354 } 354 }
355 355
356 mtx_lock(&np->n_mtx); 356 mtx_lock(&np->n_mtx);
357 if (np->n_flag & NMODIFIED) { 357 if (np->n_flag & NMODIFIED) {
358 mtx_unlock(&np->n_mtx); 358 mtx_unlock(&np->n_mtx);
359 if (vp->v_type != VREG) { 359 if (vp->v_type != VREG) {
360 if (vp->v_type != VDIR) 360 if (vp->v_type != VDIR)
361 panic("nfs: bioread, not dir"); 361 panic("nfs: bioread, not dir");
362 ncl_invaldir(vp); 362 ncl_invaldir(vp);
363 error = ncl_vinvalbuf(vp, V_SAVE, td, 1); 363 error = ncl_vinvalbuf(vp, V_SAVE, td, 1);
364 if (error) 364 if (error)
365 goto out; 365 goto out;
366 } 366 }
367 np->n_attrstamp = 0; 367 np->n_attrstamp = 0;
368 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 368 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
369 error = VOP_GETATTR(vp, &vattr, cred); 369 error = VOP_GETATTR(vp, &vattr, cred);
370 if (error) 370 if (error)
371 goto out; 371 goto out;
372 mtx_lock(&np->n_mtx); 372 mtx_lock(&np->n_mtx);
373 np->n_mtime = vattr.va_mtime; 373 np->n_mtime = vattr.va_mtime;
374 mtx_unlock(&np->n_mtx); 374 mtx_unlock(&np->n_mtx);
375 } else { 375 } else {
376 mtx_unlock(&np->n_mtx); 376 mtx_unlock(&np->n_mtx);
377 error = VOP_GETATTR(vp, &vattr, cred); 377 error = VOP_GETATTR(vp, &vattr, cred);
378 if (error) 378 if (error)
379 return (error); 379 return (error);
380 mtx_lock(&np->n_mtx); 380 mtx_lock(&np->n_mtx);
381 if ((np->n_flag & NSIZECHANGED) 381 if ((np->n_flag & NSIZECHANGED)
382 || (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime))) { 382 || (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime))) {
383 mtx_unlock(&np->n_mtx); 383 mtx_unlock(&np->n_mtx);
384 if (vp->v_type == VDIR) 384 if (vp->v_type == VDIR)
385 ncl_invaldir(vp); 385 ncl_invaldir(vp);
386 error = ncl_vinvalbuf(vp, V_SAVE, td, 1); 386 error = ncl_vinvalbuf(vp, V_SAVE, td, 1);
387 if (error) 387 if (error)
388 goto out; 388 goto out;
389 mtx_lock(&np->n_mtx); 389 mtx_lock(&np->n_mtx);
390 np->n_mtime = vattr.va_mtime; 390 np->n_mtime = vattr.va_mtime;
391 np->n_flag &= ~NSIZECHANGED; 391 np->n_flag &= ~NSIZECHANGED;
392 } 392 }
393 mtx_unlock(&np->n_mtx); 393 mtx_unlock(&np->n_mtx);
394 } 394 }
395out: 395out:
396 ncl_downgrade_vnlock(vp, old_lock); 396 ncl_downgrade_vnlock(vp, old_lock);
397 return error; 397 return error;
398} 398}
399 399
400/* 400/*
401 * Vnode op for read using bio 401 * Vnode op for read using bio
402 */ 402 */
403int 403int
404ncl_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred) 404ncl_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
405{ 405{
406 struct nfsnode *np = VTONFS(vp); 406 struct nfsnode *np = VTONFS(vp);
407 int biosize, i; 407 int biosize, i;
408 struct buf *bp, *rabp; 408 struct buf *bp, *rabp;
409 struct thread *td; 409 struct thread *td;
410 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 410 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
411 daddr_t lbn, rabn; 411 daddr_t lbn, rabn;
412 int bcount; 412 int bcount;
413 int seqcount; 413 int seqcount;
414 int nra, error = 0, n = 0, on = 0; 414 int nra, error = 0, n = 0, on = 0;
415 off_t tmp_off; 415 off_t tmp_off;
416 416
417 KASSERT(uio->uio_rw == UIO_READ, ("ncl_read mode")); 417 KASSERT(uio->uio_rw == UIO_READ, ("ncl_read mode"));
418 if (uio->uio_resid == 0) 418 if (uio->uio_resid == 0)
419 return (0); 419 return (0);
420 if (uio->uio_offset < 0) /* XXX VDIR cookies can be negative */ 420 if (uio->uio_offset < 0) /* XXX VDIR cookies can be negative */
421 return (EINVAL); 421 return (EINVAL);
422 td = uio->uio_td; 422 td = uio->uio_td;
423 423
424 mtx_lock(&nmp->nm_mtx); 424 mtx_lock(&nmp->nm_mtx);
425 if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && 425 if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
426 (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { 426 (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
427 mtx_unlock(&nmp->nm_mtx); 427 mtx_unlock(&nmp->nm_mtx);
428 (void)ncl_fsinfo(nmp, vp, cred, td); 428 (void)ncl_fsinfo(nmp, vp, cred, td);
429 mtx_lock(&nmp->nm_mtx); 429 mtx_lock(&nmp->nm_mtx);
430 } 430 }
431 if (nmp->nm_rsize == 0 || nmp->nm_readdirsize == 0) 431 if (nmp->nm_rsize == 0 || nmp->nm_readdirsize == 0)
432 (void) newnfs_iosize(nmp); 432 (void) newnfs_iosize(nmp);
433 433
434 tmp_off = uio->uio_offset + uio->uio_resid; 434 tmp_off = uio->uio_offset + uio->uio_resid;
435 if (vp->v_type != VDIR && 435 if (vp->v_type != VDIR &&
436 (tmp_off > nmp->nm_maxfilesize || tmp_off < uio->uio_offset)) { 436 (tmp_off > nmp->nm_maxfilesize || tmp_off < uio->uio_offset)) {
437 mtx_unlock(&nmp->nm_mtx); 437 mtx_unlock(&nmp->nm_mtx);
438 return (EFBIG); 438 return (EFBIG);
439 } 439 }
440 mtx_unlock(&nmp->nm_mtx); 440 mtx_unlock(&nmp->nm_mtx);
441 441
442 if (newnfs_directio_enable && (ioflag & IO_DIRECT) && (vp->v_type == VREG)) 442 if (newnfs_directio_enable && (ioflag & IO_DIRECT) && (vp->v_type == VREG))
443 /* No caching/ no readaheads. Just read data into the user buffer */ 443 /* No caching/ no readaheads. Just read data into the user buffer */
444 return ncl_readrpc(vp, uio, cred); 444 return ncl_readrpc(vp, uio, cred);
445 445
446 biosize = vp->v_bufobj.bo_bsize; 446 biosize = vp->v_bufobj.bo_bsize;
447 seqcount = (int)((off_t)(ioflag >> IO_SEQSHIFT) * biosize / BKVASIZE); 447 seqcount = (int)((off_t)(ioflag >> IO_SEQSHIFT) * biosize / BKVASIZE);
448 448
449 error = nfs_bioread_check_cons(vp, td, cred); 449 error = nfs_bioread_check_cons(vp, td, cred);
450 if (error) 450 if (error)
451 return error; 451 return error;
452 452
453 do { 453 do {
454 u_quad_t nsize; 454 u_quad_t nsize;
455 455
456 mtx_lock(&np->n_mtx); 456 mtx_lock(&np->n_mtx);
457 nsize = np->n_size; 457 nsize = np->n_size;
458 mtx_unlock(&np->n_mtx); 458 mtx_unlock(&np->n_mtx);
459 459
460 switch (vp->v_type) { 460 switch (vp->v_type) {
461 case VREG: 461 case VREG:
462 NFSINCRGLOBAL(nfsstatsv1.biocache_reads); 462 NFSINCRGLOBAL(nfsstatsv1.biocache_reads);
463 lbn = uio->uio_offset / biosize; 463 lbn = uio->uio_offset / biosize;
464 on = uio->uio_offset - (lbn * biosize); 464 on = uio->uio_offset - (lbn * biosize);
465 465
466 /* 466 /*
467 * Start the read ahead(s), as required. 467 * Start the read ahead(s), as required.
468 */ 468 */
469 if (nmp->nm_readahead > 0) { 469 if (nmp->nm_readahead > 0) {
470 for (nra = 0; nra < nmp->nm_readahead && nra < seqcount && 470 for (nra = 0; nra < nmp->nm_readahead && nra < seqcount &&
471 (off_t)(lbn + 1 + nra) * biosize < nsize; nra++) { 471 (off_t)(lbn + 1 + nra) * biosize < nsize; nra++) {
472 rabn = lbn + 1 + nra; 472 rabn = lbn + 1 + nra;
473 if (incore(&vp->v_bufobj, rabn) == NULL) { 473 if (incore(&vp->v_bufobj, rabn) == NULL) {
474 rabp = nfs_getcacheblk(vp, rabn, biosize, td); 474 rabp = nfs_getcacheblk(vp, rabn, biosize, td);
475 if (!rabp) { 475 if (!rabp) {
476 error = newnfs_sigintr(nmp, td); 476 error = newnfs_sigintr(nmp, td);
477 return (error ? error : EINTR); 477 return (error ? error : EINTR);
478 } 478 }
479 if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) { 479 if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
480 rabp->b_flags |= B_ASYNC; 480 rabp->b_flags |= B_ASYNC;
481 rabp->b_iocmd = BIO_READ; 481 rabp->b_iocmd = BIO_READ;
482 vfs_busy_pages(rabp, 0); 482 vfs_busy_pages(rabp, 0);
483 if (ncl_asyncio(nmp, rabp, cred, td)) { 483 if (ncl_asyncio(nmp, rabp, cred, td)) {
484 rabp->b_flags |= B_INVAL; 484 rabp->b_flags |= B_INVAL;
485 rabp->b_ioflags |= BIO_ERROR; 485 rabp->b_ioflags |= BIO_ERROR;
486 vfs_unbusy_pages(rabp); 486 vfs_unbusy_pages(rabp);
487 brelse(rabp); 487 brelse(rabp);
488 break; 488 break;
489 } 489 }
490 } else { 490 } else {
491 brelse(rabp); 491 brelse(rabp);
492 } 492 }
493 } 493 }
494 } 494 }
495 } 495 }
496 496
497 /* Note that bcount is *not* DEV_BSIZE aligned. */ 497 /* Note that bcount is *not* DEV_BSIZE aligned. */
498 bcount = biosize; 498 bcount = biosize;
499 if ((off_t)lbn * biosize >= nsize) { 499 if ((off_t)lbn * biosize >= nsize) {
500 bcount = 0; 500 bcount = 0;
501 } else if ((off_t)(lbn + 1) * biosize > nsize) { 501 } else if ((off_t)(lbn + 1) * biosize > nsize) {
502 bcount = nsize - (off_t)lbn * biosize; 502 bcount = nsize - (off_t)lbn * biosize;
503 } 503 }
504 bp = nfs_getcacheblk(vp, lbn, bcount, td); 504 bp = nfs_getcacheblk(vp, lbn, bcount, td);
505 505
506 if (!bp) { 506 if (!bp) {
507 error = newnfs_sigintr(nmp, td); 507 error = newnfs_sigintr(nmp, td);
508 return (error ? error : EINTR); 508 return (error ? error : EINTR);
509 } 509 }
510 510
511 /* 511 /*
512 * If B_CACHE is not set, we must issue the read. If this 512 * If B_CACHE is not set, we must issue the read. If this
513 * fails, we return an error. 513 * fails, we return an error.
514 */ 514 */
515 515
516 if ((bp->b_flags & B_CACHE) == 0) { 516 if ((bp->b_flags & B_CACHE) == 0) {
517 bp->b_iocmd = BIO_READ; 517 bp->b_iocmd = BIO_READ;
518 vfs_busy_pages(bp, 0); 518 vfs_busy_pages(bp, 0);
519 error = ncl_doio(vp, bp, cred, td, 0); 519 error = ncl_doio(vp, bp, cred, td, 0);
520 if (error) { 520 if (error) {
521 brelse(bp); 521 brelse(bp);
522 return (error); 522 return (error);
523 } 523 }
524 } 524 }
525 525
526 /* 526 /*
527 * on is the offset into the current bp. Figure out how many 527 * on is the offset into the current bp. Figure out how many
528 * bytes we can copy out of the bp. Note that bcount is 528 * bytes we can copy out of the bp. Note that bcount is
529 * NOT DEV_BSIZE aligned. 529 * NOT DEV_BSIZE aligned.
530 * 530 *
531 * Then figure out how many bytes we can copy into the uio. 531 * Then figure out how many bytes we can copy into the uio.
532 */ 532 */
533 533
534 n = 0; 534 n = 0;
535 if (on < bcount) 535 if (on < bcount)
536 n = MIN((unsigned)(bcount - on), uio->uio_resid); 536 n = MIN((unsigned)(bcount - on), uio->uio_resid);
537 break; 537 break;
538 case VLNK: 538 case VLNK:
539 NFSINCRGLOBAL(nfsstatsv1.biocache_readlinks); 539 NFSINCRGLOBAL(nfsstatsv1.biocache_readlinks);
540 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, td); 540 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, td);
541 if (!bp) { 541 if (!bp) {
542 error = newnfs_sigintr(nmp, td); 542 error = newnfs_sigintr(nmp, td);
543 return (error ? error : EINTR); 543 return (error ? error : EINTR);
544 } 544 }
545 if ((bp->b_flags & B_CACHE) == 0) { 545 if ((bp->b_flags & B_CACHE) == 0) {
546 bp->b_iocmd = BIO_READ; 546 bp->b_iocmd = BIO_READ;
547 vfs_busy_pages(bp, 0); 547 vfs_busy_pages(bp, 0);
548 error = ncl_doio(vp, bp, cred, td, 0); 548 error = ncl_doio(vp, bp, cred, td, 0);
549 if (error) { 549 if (error) {
550 bp->b_ioflags |= BIO_ERROR; 550 bp->b_ioflags |= BIO_ERROR;
551 brelse(bp); 551 brelse(bp);
552 return (error); 552 return (error);
553 } 553 }
554 } 554 }
555 n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 555 n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
556 on = 0; 556 on = 0;
557 break; 557 break;
558 case VDIR: 558 case VDIR:
559 NFSINCRGLOBAL(nfsstatsv1.biocache_readdirs); 559 NFSINCRGLOBAL(nfsstatsv1.biocache_readdirs);
560 if (np->n_direofoffset 560 if (np->n_direofoffset
561 && uio->uio_offset >= np->n_direofoffset) { 561 && uio->uio_offset >= np->n_direofoffset) {
562 return (0); 562 return (0);
563 } 563 }
564 lbn = (uoff_t)uio->uio_offset / NFS_DIRBLKSIZ; 564 lbn = (uoff_t)uio->uio_offset / NFS_DIRBLKSIZ;
565 on = uio->uio_offset & (NFS_DIRBLKSIZ - 1); 565 on = uio->uio_offset & (NFS_DIRBLKSIZ - 1);
566 bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, td); 566 bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, td);
567 if (!bp) { 567 if (!bp) {
568 error = newnfs_sigintr(nmp, td); 568 error = newnfs_sigintr(nmp, td);
569 return (error ? error : EINTR); 569 return (error ? error : EINTR);
570 } 570 }
571 if ((bp->b_flags & B_CACHE) == 0) { 571 if ((bp->b_flags & B_CACHE) == 0) {
572 bp->b_iocmd = BIO_READ; 572 bp->b_iocmd = BIO_READ;
573 vfs_busy_pages(bp, 0); 573 vfs_busy_pages(bp, 0);
574 error = ncl_doio(vp, bp, cred, td, 0); 574 error = ncl_doio(vp, bp, cred, td, 0);
575 if (error) { 575 if (error) {
576 brelse(bp); 576 brelse(bp);
577 } 577 }
578 while (error == NFSERR_BAD_COOKIE) { 578 while (error == NFSERR_BAD_COOKIE) {
579 ncl_invaldir(vp); 579 ncl_invaldir(vp);
580 error = ncl_vinvalbuf(vp, 0, td, 1); 580 error = ncl_vinvalbuf(vp, 0, td, 1);
581 /* 581 /*
582 * Yuck! The directory has been modified on the 582 * Yuck! The directory has been modified on the
583 * server. The only way to get the block is by 583 * server. The only way to get the block is by
584 * reading from the beginning to get all the 584 * reading from the beginning to get all the
585 * offset cookies. 585 * offset cookies.
586 * 586 *
587 * Leave the last bp intact unless there is an error. 587 * Leave the last bp intact unless there is an error.
588 * Loop back up to the while if the error is another 588 * Loop back up to the while if the error is another
589 * NFSERR_BAD_COOKIE (double yuch!). 589 * NFSERR_BAD_COOKIE (double yuch!).
590 */ 590 */
591 for (i = 0; i <= lbn && !error; i++) { 591 for (i = 0; i <= lbn && !error; i++) {
592 if (np->n_direofoffset 592 if (np->n_direofoffset
593 && (i * NFS_DIRBLKSIZ) >= np->n_direofoffset) 593 && (i * NFS_DIRBLKSIZ) >= np->n_direofoffset)
594 return (0); 594 return (0);
595 bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, td); 595 bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, td);
596 if (!bp) { 596 if (!bp) {
597 error = newnfs_sigintr(nmp, td); 597 error = newnfs_sigintr(nmp, td);
598 return (error ? error : EINTR); 598 return (error ? error : EINTR);
599 } 599 }
600 if ((bp->b_flags & B_CACHE) == 0) { 600 if ((bp->b_flags & B_CACHE) == 0) {
601 bp->b_iocmd = BIO_READ; 601 bp->b_iocmd = BIO_READ;
602 vfs_busy_pages(bp, 0); 602 vfs_busy_pages(bp, 0);
603 error = ncl_doio(vp, bp, cred, td, 0); 603 error = ncl_doio(vp, bp, cred, td, 0);
604 /* 604 /*
605 * no error + B_INVAL == directory EOF, 605 * no error + B_INVAL == directory EOF,
606 * use the block. 606 * use the block.
607 */ 607 */
608 if (error == 0 && (bp->b_flags & B_INVAL)) 608 if (error == 0 && (bp->b_flags & B_INVAL))
609 break; 609 break;
610 } 610 }
611 /* 611 /*
612 * An error will throw away the block and the 612 * An error will throw away the block and the
613 * for loop will break out. If no error and this 613 * for loop will break out. If no error and this
614 * is not the block we want, we throw away the 614 * is not the block we want, we throw away the
615 * block and go for the next one via the for loop. 615 * block and go for the next one via the for loop.
616 */ 616 */
617 if (error || i < lbn) 617 if (error || i < lbn)
618 brelse(bp); 618 brelse(bp);
619 } 619 }
620 } 620 }
621 /* 621 /*
622 * The above while is repeated if we hit another cookie 622 * The above while is repeated if we hit another cookie
623 * error. If we hit an error and it wasn't a cookie error, 623 * error. If we hit an error and it wasn't a cookie error,
624 * we give up. 624 * we give up.
625 */ 625 */
626 if (error) 626 if (error)
627 return (error); 627 return (error);
628 } 628 }
629 629
630 /* 630 /*
631 * If not eof and read aheads are enabled, start one. 631 * If not eof and read aheads are enabled, start one.
632 * (You need the current block first, so that you have the 632 * (You need the current block first, so that you have the
633 * directory offset cookie of the next block.) 633 * directory offset cookie of the next block.)
634 */ 634 */
635 if (nmp->nm_readahead > 0 && 635 if (nmp->nm_readahead > 0 &&
636 (bp->b_flags & B_INVAL) == 0 && 636 (bp->b_flags & B_INVAL) == 0 &&
637 (np->n_direofoffset == 0 || 637 (np->n_direofoffset == 0 ||
638 (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) && 638 (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) &&
639 incore(&vp->v_bufobj, lbn + 1) == NULL) { 639 incore(&vp->v_bufobj, lbn + 1) == NULL) {
640 rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, td); 640 rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, td);
641 if (rabp) { 641 if (rabp) {
642 if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) { 642 if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
643 rabp->b_flags |= B_ASYNC; 643 rabp->b_flags |= B_ASYNC;
644 rabp->b_iocmd = BIO_READ; 644 rabp->b_iocmd = BIO_READ;
645 vfs_busy_pages(rabp, 0); 645 vfs_busy_pages(rabp, 0);
646 if (ncl_asyncio(nmp, rabp, cred, td)) { 646 if (ncl_asyncio(nmp, rabp, cred, td)) {
647 rabp->b_flags |= B_INVAL; 647 rabp->b_flags |= B_INVAL;
648 rabp->b_ioflags |= BIO_ERROR; 648 rabp->b_ioflags |= BIO_ERROR;
649 vfs_unbusy_pages(rabp); 649 vfs_unbusy_pages(rabp);
650 brelse(rabp); 650 brelse(rabp);
651 } 651 }
652 } else { 652 } else {
653 brelse(rabp); 653 brelse(rabp);
654 } 654 }
655 } 655 }
656 } 656 }
657 /* 657 /*
658 * Unlike VREG files, whos buffer size ( bp->b_bcount ) is 658 * Unlike VREG files, whos buffer size ( bp->b_bcount ) is
659 * chopped for the EOF condition, we cannot tell how large 659 * chopped for the EOF condition, we cannot tell how large
660 * NFS directories are going to be until we hit EOF. So 660 * NFS directories are going to be until we hit EOF. So
661 * an NFS directory buffer is *not* chopped to its EOF. Now, 661 * an NFS directory buffer is *not* chopped to its EOF. Now,
662 * it just so happens that b_resid will effectively chop it 662 * it just so happens that b_resid will effectively chop it
663 * to EOF. *BUT* this information is lost if the buffer goes 663 * to EOF. *BUT* this information is lost if the buffer goes
664 * away and is reconstituted into a B_CACHE state ( due to 664 * away and is reconstituted into a B_CACHE state ( due to
665 * being VMIO ) later. So we keep track of the directory eof 665 * being VMIO ) later. So we keep track of the directory eof
666 * in np->n_direofoffset and chop it off as an extra step 666 * in np->n_direofoffset and chop it off as an extra step
667 * right here. 667 * right here.
668 */ 668 */
669 n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on); 669 n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on);
670 if (np->n_direofoffset && n > np->n_direofoffset - uio->uio_offset) 670 if (np->n_direofoffset && n > np->n_direofoffset - uio->uio_offset)
671 n = np->n_direofoffset - uio->uio_offset; 671 n = np->n_direofoffset - uio->uio_offset;
672 break; 672 break;
673 default: 673 default:
674 printf(" ncl_bioread: type %x unexpected\n", vp->v_type); 674 printf(" ncl_bioread: type %x unexpected\n", vp->v_type);
675 bp = NULL; 675 bp = NULL;
676 break; 676 break;
677 } 677 }
678 678
679 if (n > 0) { 679 if (n > 0) {
680 error = vn_io_fault_uiomove(bp->b_data + on, (int)n, uio); 680 error = vn_io_fault_uiomove(bp->b_data + on, (int)n, uio);
681 } 681 }
682 if (vp->v_type == VLNK) 682 if (vp->v_type == VLNK)
683 n = 0; 683 n = 0;
684 if (bp != NULL) 684 if (bp != NULL)
685 brelse(bp); 685 brelse(bp);
686 } while (error == 0 && uio->uio_resid > 0 && n > 0); 686 } while (error == 0 && uio->uio_resid > 0 && n > 0);
687 return (error); 687 return (error);
688} 688}
689 689
690/* 690/*
691 * The NFS write path cannot handle iovecs with len > 1. So we need to 691 * The NFS write path cannot handle iovecs with len > 1. So we need to
692 * break up iovecs accordingly (restricting them to wsize). 692 * break up iovecs accordingly (restricting them to wsize).
693 * For the SYNC case, we can do this with 1 copy (user buffer -> mbuf). 693 * For the SYNC case, we can do this with 1 copy (user buffer -> mbuf).
694 * For the ASYNC case, 2 copies are needed. The first a copy from the 694 * For the ASYNC case, 2 copies are needed. The first a copy from the
695 * user buffer to a staging buffer and then a second copy from the staging 695 * user buffer to a staging buffer and then a second copy from the staging
696 * buffer to mbufs. This can be optimized by copying from the user buffer 696 * buffer to mbufs. This can be optimized by copying from the user buffer
697 * directly into mbufs and passing the chain down, but that requires a 697 * directly into mbufs and passing the chain down, but that requires a
698 * fair amount of re-working of the relevant codepaths (and can be done 698 * fair amount of re-working of the relevant codepaths (and can be done
699 * later). 699 * later).
700 */ 700 */
701static int 701static int
702nfs_directio_write(vp, uiop, cred, ioflag) 702nfs_directio_write(vp, uiop, cred, ioflag)
703 struct vnode *vp; 703 struct vnode *vp;
704 struct uio *uiop; 704 struct uio *uiop;
705 struct ucred *cred; 705 struct ucred *cred;
706 int ioflag; 706 int ioflag;
707{ 707{
708 int error; 708 int error;
709 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 709 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
710 struct thread *td = uiop->uio_td; 710 struct thread *td = uiop->uio_td;
711 int size; 711 int size;
712 int wsize; 712 int wsize;
713 713
714 mtx_lock(&nmp->nm_mtx); 714 mtx_lock(&nmp->nm_mtx);
715 wsize = nmp->nm_wsize; 715 wsize = nmp->nm_wsize;
716 mtx_unlock(&nmp->nm_mtx); 716 mtx_unlock(&nmp->nm_mtx);
717 if (ioflag & IO_SYNC) { 717 if (ioflag & IO_SYNC) {
718 int iomode, must_commit; 718 int iomode, must_commit;
719 struct uio uio; 719 struct uio uio;
720 struct iovec iov; 720 struct iovec iov;
721do_sync: 721do_sync:
722 while (uiop->uio_resid > 0) { 722 while (uiop->uio_resid > 0) {
723 size = MIN(uiop->uio_resid, wsize); 723 size = MIN(uiop->uio_resid, wsize);
724 size = MIN(uiop->uio_iov->iov_len, size); 724 size = MIN(uiop->uio_iov->iov_len, size);
725 iov.iov_base = uiop->uio_iov->iov_base; 725 iov.iov_base = uiop->uio_iov->iov_base;
726 iov.iov_len = size; 726 iov.iov_len = size;
727 uio.uio_iov = &iov; 727 uio.uio_iov = &iov;
728 uio.uio_iovcnt = 1; 728 uio.uio_iovcnt = 1;
729 uio.uio_offset = uiop->uio_offset; 729 uio.uio_offset = uiop->uio_offset;
730 uio.uio_resid = size; 730 uio.uio_resid = size;
731 uio.uio_segflg = UIO_USERSPACE; 731 uio.uio_segflg = UIO_USERSPACE;
732 uio.uio_rw = UIO_WRITE; 732 uio.uio_rw = UIO_WRITE;
733 uio.uio_td = td; 733 uio.uio_td = td;
734 iomode = NFSWRITE_FILESYNC; 734 iomode = NFSWRITE_FILESYNC;
735 error = ncl_writerpc(vp, &uio, cred, &iomode, 735 error = ncl_writerpc(vp, &uio, cred, &iomode,
736 &must_commit, 0); 736 &must_commit, 0);
737 KASSERT((must_commit == 0), 737 KASSERT((must_commit == 0),
738 ("ncl_directio_write: Did not commit write")); 738 ("ncl_directio_write: Did not commit write"));
739 if (error) 739 if (error)
740 return (error); 740 return (error);
741 uiop->uio_offset += size; 741 uiop->uio_offset += size;
742 uiop->uio_resid -= size; 742 uiop->uio_resid -= size;
743 if (uiop->uio_iov->iov_len <= size) { 743 if (uiop->uio_iov->iov_len <= size) {
744 uiop->uio_iovcnt--; 744 uiop->uio_iovcnt--;
745 uiop->uio_iov++; 745 uiop->uio_iov++;
746 } else { 746 } else {
747 uiop->uio_iov->iov_base = 747 uiop->uio_iov->iov_base =
748 (char *)uiop->uio_iov->iov_base + size; 748 (char *)uiop->uio_iov->iov_base + size;
749 uiop->uio_iov->iov_len -= size; 749 uiop->uio_iov->iov_len -= size;
750 } 750 }
751 } 751 }
752 } else { 752 } else {
753 struct uio *t_uio; 753 struct uio *t_uio;
754 struct iovec *t_iov; 754 struct iovec *t_iov;
755 struct buf *bp; 755 struct buf *bp;
756 756
757 /* 757 /*
758 * Break up the write into blocksize chunks and hand these 758 * Break up the write into blocksize chunks and hand these
759 * over to nfsiod's for write back. 759 * over to nfsiod's for write back.
760 * Unfortunately, this incurs a copy of the data. Since 760 * Unfortunately, this incurs a copy of the data. Since
761 * the user could modify the buffer before the write is 761 * the user could modify the buffer before the write is
762 * initiated. 762 * initiated.
763 * 763 *
764 * The obvious optimization here is that one of the 2 copies 764 * The obvious optimization here is that one of the 2 copies
765 * in the async write path can be eliminated by copying the 765 * in the async write path can be eliminated by copying the
766 * data here directly into mbufs and passing the mbuf chain 766 * data here directly into mbufs and passing the mbuf chain
767 * down. But that will require a fair amount of re-working 767 * down. But that will require a fair amount of re-working
768 * of the code and can be done if there's enough interest 768 * of the code and can be done if there's enough interest
769 * in NFS directio access. 769 * in NFS directio access.
770 */ 770 */
771 while (uiop->uio_resid > 0) { 771 while (uiop->uio_resid > 0) {
772 size = MIN(uiop->uio_resid, wsize); 772 size = MIN(uiop->uio_resid, wsize);
773 size = MIN(uiop->uio_iov->iov_len, size); 773 size = MIN(uiop->uio_iov->iov_len, size);
774 bp = getpbuf(&ncl_pbuf_freecnt); 774 bp = getpbuf(&ncl_pbuf_freecnt);
775 t_uio = malloc(sizeof(struct uio), M_NFSDIRECTIO, M_WAITOK); 775 t_uio = malloc(sizeof(struct uio), M_NFSDIRECTIO, M_WAITOK);
776 t_iov = malloc(sizeof(struct iovec), M_NFSDIRECTIO, M_WAITOK); 776 t_iov = malloc(sizeof(struct iovec), M_NFSDIRECTIO, M_WAITOK);
777 t_iov->iov_base = malloc(size, M_NFSDIRECTIO, M_WAITOK); 777 t_iov->iov_base = malloc(size, M_NFSDIRECTIO, M_WAITOK);
778 t_iov->iov_len = size; 778 t_iov->iov_len = size;
779 t_uio->uio_iov = t_iov; 779 t_uio->uio_iov = t_iov;
780 t_uio->uio_iovcnt = 1; 780 t_uio->uio_iovcnt = 1;
781 t_uio->uio_offset = uiop->uio_offset; 781 t_uio->uio_offset = uiop->uio_offset;
782 t_uio->uio_resid = size; 782 t_uio->uio_resid = size;
783 t_uio->uio_segflg = UIO_SYSSPACE; 783 t_uio->uio_segflg = UIO_SYSSPACE;
784 t_uio->uio_rw = UIO_WRITE; 784 t_uio->uio_rw = UIO_WRITE;
785 t_uio->uio_td = td; 785 t_uio->uio_td = td;
786 KASSERT(uiop->uio_segflg == UIO_USERSPACE || 786 KASSERT(uiop->uio_segflg == UIO_USERSPACE ||
787 uiop->uio_segflg == UIO_SYSSPACE, 787 uiop->uio_segflg == UIO_SYSSPACE,
788 ("nfs_directio_write: Bad uio_segflg")); 788 ("nfs_directio_write: Bad uio_segflg"));
789 if (uiop->uio_segflg == UIO_USERSPACE) { 789 if (uiop->uio_segflg == UIO_USERSPACE) {
790 error = copyin(uiop->uio_iov->iov_base, 790 error = copyin(uiop->uio_iov->iov_base,
791 t_iov->iov_base, size); 791 t_iov->iov_base, size);
792 if (error != 0) 792 if (error != 0)
793 goto err_free; 793 goto err_free;
794 } else 794 } else
795 /* 795 /*
796 * UIO_SYSSPACE may never happen, but handle 796 * UIO_SYSSPACE may never happen, but handle
797 * it just in case it does. 797 * it just in case it does.
798 */ 798 */
799 bcopy(uiop->uio_iov->iov_base, t_iov->iov_base, 799 bcopy(uiop->uio_iov->iov_base, t_iov->iov_base,
800 size); 800 size);
801 bp->b_flags |= B_DIRECT; 801 bp->b_flags |= B_DIRECT;
802 bp->b_iocmd = BIO_WRITE; 802 bp->b_iocmd = BIO_WRITE;
803 if (cred != NOCRED) { 803 if (cred != NOCRED) {
804 crhold(cred); 804 crhold(cred);
805 bp->b_wcred = cred; 805 bp->b_wcred = cred;
806 } else 806 } else
807 bp->b_wcred = NOCRED; 807 bp->b_wcred = NOCRED;
808 bp->b_caller1 = (void *)t_uio; 808 bp->b_caller1 = (void *)t_uio;
809 bp->b_vp = vp; 809 bp->b_vp = vp;
810 error = ncl_asyncio(nmp, bp, NOCRED, td); 810 error = ncl_asyncio(nmp, bp, NOCRED, td);
811err_free: 811err_free:
812 if (error) { 812 if (error) {
813 free(t_iov->iov_base, M_NFSDIRECTIO); 813 free(t_iov->iov_base, M_NFSDIRECTIO);
814 free(t_iov, M_NFSDIRECTIO); 814 free(t_iov, M_NFSDIRECTIO);
815 free(t_uio, M_NFSDIRECTIO); 815 free(t_uio, M_NFSDIRECTIO);
816 bp->b_vp = NULL; 816 bp->b_vp = NULL;
817 relpbuf(bp, &ncl_pbuf_freecnt); 817 relpbuf(bp, &ncl_pbuf_freecnt);
818 if (error == EINTR) 818 if (error == EINTR)
819 return (error); 819 return (error);
820 goto do_sync; 820 goto do_sync;
821 } 821 }
822 uiop->uio_offset += size; 822 uiop->uio_offset += size;
823 uiop->uio_resid -= size; 823 uiop->uio_resid -= size;
824 if (uiop->uio_iov->iov_len <= size) { 824 if (uiop->uio_iov->iov_len <= size) {
825 uiop->uio_iovcnt--; 825 uiop->uio_iovcnt--;
826 uiop->uio_iov++; 826 uiop->uio_iov++;
827 } else { 827 } else {
828 uiop->uio_iov->iov_base = 828 uiop->uio_iov->iov_base =
829 (char *)uiop->uio_iov->iov_base + size; 829 (char *)uiop->uio_iov->iov_base + size;
830 uiop->uio_iov->iov_len -= size; 830 uiop->uio_iov->iov_len -= size;
831 } 831 }
832 } 832 }
833 } 833 }
834 return (0); 834 return (0);
835} 835}
836 836
837/* 837/*
838 * Vnode op for write using bio 838 * Vnode op for write using bio
839 */ 839 */
840int 840int
841ncl_write(struct vop_write_args *ap) 841ncl_write(struct vop_write_args *ap)
842{ 842{
843 int biosize; 843 int biosize;
844 struct uio *uio = ap->a_uio; 844 struct uio *uio = ap->a_uio;
845 struct thread *td = uio->uio_td; 845 struct thread *td = uio->uio_td;
846 struct vnode *vp = ap->a_vp; 846 struct vnode *vp = ap->a_vp;
847 struct nfsnode *np = VTONFS(vp); 847 struct nfsnode *np = VTONFS(vp);
848 struct ucred *cred = ap->a_cred; 848 struct ucred *cred = ap->a_cred;
849 int ioflag = ap->a_ioflag; 849 int ioflag = ap->a_ioflag;
850 struct buf *bp; 850 struct buf *bp;
851 struct vattr vattr; 851 struct vattr vattr;
852 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 852 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
853 daddr_t lbn; 853 daddr_t lbn;
854 int bcount, noncontig_write, obcount; 854 int bcount, noncontig_write, obcount;
855 int bp_cached, n, on, error = 0, error1, wouldcommit; 855 int bp_cached, n, on, error = 0, error1, wouldcommit;
856 size_t orig_resid, local_resid; 856 size_t orig_resid, local_resid;
857 off_t orig_size, tmp_off; 857 off_t orig_size, tmp_off;
858 858
859 KASSERT(uio->uio_rw == UIO_WRITE, ("ncl_write mode")); 859 KASSERT(uio->uio_rw == UIO_WRITE, ("ncl_write mode"));
860 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 860 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread,
861 ("ncl_write proc")); 861 ("ncl_write proc"));
862 if (vp->v_type != VREG) 862 if (vp->v_type != VREG)
863 return (EIO); 863 return (EIO);
864 mtx_lock(&np->n_mtx); 864 mtx_lock(&np->n_mtx);
865 if (np->n_flag & NWRITEERR) { 865 if (np->n_flag & NWRITEERR) {
866 np->n_flag &= ~NWRITEERR; 866 np->n_flag &= ~NWRITEERR;
867 mtx_unlock(&np->n_mtx); 867 mtx_unlock(&np->n_mtx);
868 return (np->n_error); 868 return (np->n_error);
869 } else 869 } else
870 mtx_unlock(&np->n_mtx); 870 mtx_unlock(&np->n_mtx);
871 mtx_lock(&nmp->nm_mtx); 871 mtx_lock(&nmp->nm_mtx);
872 if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && 872 if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
873 (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { 873 (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
874 mtx_unlock(&nmp->nm_mtx); 874 mtx_unlock(&nmp->nm_mtx);
875 (void)ncl_fsinfo(nmp, vp, cred, td); 875 (void)ncl_fsinfo(nmp, vp, cred, td);
876 mtx_lock(&nmp->nm_mtx); 876 mtx_lock(&nmp->nm_mtx);
877 } 877 }
878 if (nmp->nm_wsize == 0) 878 if (nmp->nm_wsize == 0)
879 (void) newnfs_iosize(nmp); 879 (void) newnfs_iosize(nmp);
880 mtx_unlock(&nmp->nm_mtx); 880 mtx_unlock(&nmp->nm_mtx);
881 881
882 /* 882 /*
883 * Synchronously flush pending buffers if we are in synchronous 883 * Synchronously flush pending buffers if we are in synchronous
884 * mode or if we are appending. 884 * mode or if we are appending.
885 */ 885 */
886 if (ioflag & (IO_APPEND | IO_SYNC)) { 886 if (ioflag & (IO_APPEND | IO_SYNC)) {
887 mtx_lock(&np->n_mtx); 887 mtx_lock(&np->n_mtx);
888 if (np->n_flag & NMODIFIED) { 888 if (np->n_flag & NMODIFIED) {
889 mtx_unlock(&np->n_mtx); 889 mtx_unlock(&np->n_mtx);
890#ifdef notyet /* Needs matching nonblock semantics elsewhere, too. */ 890#ifdef notyet /* Needs matching nonblock semantics elsewhere, too. */
891 /* 891 /*
892 * Require non-blocking, synchronous writes to 892 * Require non-blocking, synchronous writes to
893 * dirty files to inform the program it needs 893 * dirty files to inform the program it needs
894 * to fsync(2) explicitly. 894 * to fsync(2) explicitly.
895 */ 895 */
896 if (ioflag & IO_NDELAY) 896 if (ioflag & IO_NDELAY)
897 return (EAGAIN); 897 return (EAGAIN);
898#endif 898#endif
899 np->n_attrstamp = 0; 899 np->n_attrstamp = 0;
900 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 900 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
901 error = ncl_vinvalbuf(vp, V_SAVE, td, 1); 901 error = ncl_vinvalbuf(vp, V_SAVE, td, 1);
902 if (error) 902 if (error)
903 return (error); 903 return (error);
904 } else 904 } else
905 mtx_unlock(&np->n_mtx); 905 mtx_unlock(&np->n_mtx);
906 } 906 }
907 907
908 orig_resid = uio->uio_resid; 908 orig_resid = uio->uio_resid;
909 mtx_lock(&np->n_mtx); 909 mtx_lock(&np->n_mtx);
910 orig_size = np->n_size; 910 orig_size = np->n_size;
911 mtx_unlock(&np->n_mtx); 911 mtx_unlock(&np->n_mtx);
912 912
913 /* 913 /*
914 * If IO_APPEND then load uio_offset. We restart here if we cannot 914 * If IO_APPEND then load uio_offset. We restart here if we cannot
915 * get the append lock. 915 * get the append lock.
916 */ 916 */
917 if (ioflag & IO_APPEND) { 917 if (ioflag & IO_APPEND) {
918 np->n_attrstamp = 0; 918 np->n_attrstamp = 0;
919 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 919 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
920 error = VOP_GETATTR(vp, &vattr, cred); 920 error = VOP_GETATTR(vp, &vattr, cred);
921 if (error) 921 if (error)
922 return (error); 922 return (error);
923 mtx_lock(&np->n_mtx); 923 mtx_lock(&np->n_mtx);
924 uio->uio_offset = np->n_size; 924 uio->uio_offset = np->n_size;
925 mtx_unlock(&np->n_mtx); 925 mtx_unlock(&np->n_mtx);
926 } 926 }
927 927
928 if (uio->uio_offset < 0) 928 if (uio->uio_offset < 0)
929 return (EINVAL); 929 return (EINVAL);
930 tmp_off = uio->uio_offset + uio->uio_resid; 930 tmp_off = uio->uio_offset + uio->uio_resid;
931 if (tmp_off > nmp->nm_maxfilesize || tmp_off < uio->uio_offset) 931 if (tmp_off > nmp->nm_maxfilesize || tmp_off < uio->uio_offset)
932 return (EFBIG); 932 return (EFBIG);
933 if (uio->uio_resid == 0) 933 if (uio->uio_resid == 0)
934 return (0); 934 return (0);
935 935
936 if (newnfs_directio_enable && (ioflag & IO_DIRECT) && vp->v_type == VREG) 936 if (newnfs_directio_enable && (ioflag & IO_DIRECT) && vp->v_type == VREG)
937 return nfs_directio_write(vp, uio, cred, ioflag); 937 return nfs_directio_write(vp, uio, cred, ioflag);
938 938
939 /* 939 /*
940 * Maybe this should be above the vnode op call, but so long as 940 * Maybe this should be above the vnode op call, but so long as
941 * file servers have no limits, i don't think it matters 941 * file servers have no limits, i don't think it matters
942 */ 942 */
943 if (vn_rlimit_fsize(vp, uio, td)) 943 if (vn_rlimit_fsize(vp, uio, td))
944 return (EFBIG); 944 return (EFBIG);
945 945
946 biosize = vp->v_bufobj.bo_bsize; 946 biosize = vp->v_bufobj.bo_bsize;
947 /* 947 /*
948 * Find all of this file's B_NEEDCOMMIT buffers. If our writes 948 * Find all of this file's B_NEEDCOMMIT buffers. If our writes
949 * would exceed the local maximum per-file write commit size when 949 * would exceed the local maximum per-file write commit size when
950 * combined with those, we must decide whether to flush, 950 * combined with those, we must decide whether to flush,
951 * go synchronous, or return error. We don't bother checking 951 * go synchronous, or return error. We don't bother checking
952 * IO_UNIT -- we just make all writes atomic anyway, as there's 952 * IO_UNIT -- we just make all writes atomic anyway, as there's
953 * no point optimizing for something that really won't ever happen. 953 * no point optimizing for something that really won't ever happen.
954 */ 954 */
955 wouldcommit = 0; 955 wouldcommit = 0;
956 if (!(ioflag & IO_SYNC)) { 956 if (!(ioflag & IO_SYNC)) {
957 int nflag; 957 int nflag;
958 958
959 mtx_lock(&np->n_mtx); 959 mtx_lock(&np->n_mtx);
960 nflag = np->n_flag; 960 nflag = np->n_flag;
961 mtx_unlock(&np->n_mtx); 961 mtx_unlock(&np->n_mtx);
962 if (nflag & NMODIFIED) { 962 if (nflag & NMODIFIED) {
963 BO_LOCK(&vp->v_bufobj); 963 BO_LOCK(&vp->v_bufobj);
964 if (vp->v_bufobj.bo_dirty.bv_cnt != 0) { 964 if (vp->v_bufobj.bo_dirty.bv_cnt != 0) {
965 TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, 965 TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd,
966 b_bobufs) { 966 b_bobufs) {
967 if (bp->b_flags & B_NEEDCOMMIT) 967 if (bp->b_flags & B_NEEDCOMMIT)
968 wouldcommit += bp->b_bcount; 968 wouldcommit += bp->b_bcount;
969 } 969 }
970 } 970 }
971 BO_UNLOCK(&vp->v_bufobj); 971 BO_UNLOCK(&vp->v_bufobj);
972 } 972 }
973 } 973 }
974 974
975 do { 975 do {
976 if (!(ioflag & IO_SYNC)) { 976 if (!(ioflag & IO_SYNC)) {
977 wouldcommit += biosize; 977 wouldcommit += biosize;
978 if (wouldcommit > nmp->nm_wcommitsize) { 978 if (wouldcommit > nmp->nm_wcommitsize) {
979 np->n_attrstamp = 0; 979 np->n_attrstamp = 0;
980 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 980 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
981 error = ncl_vinvalbuf(vp, V_SAVE, td, 1); 981 error = ncl_vinvalbuf(vp, V_SAVE, td, 1);
982 if (error) 982 if (error)
983 return (error); 983 return (error);
984 wouldcommit = biosize; 984 wouldcommit = biosize;
985 } 985 }
986 } 986 }
987 987
988 NFSINCRGLOBAL(nfsstatsv1.biocache_writes); 988 NFSINCRGLOBAL(nfsstatsv1.biocache_writes);
989 lbn = uio->uio_offset / biosize; 989 lbn = uio->uio_offset / biosize;
990 on = uio->uio_offset - (lbn * biosize); 990 on = uio->uio_offset - (lbn * biosize);
991 n = MIN((unsigned)(biosize - on), uio->uio_resid); 991 n = MIN((unsigned)(biosize - on), uio->uio_resid);
992again: 992again:
993 /* 993 /*
994 * Handle direct append and file extension cases, calculate 994 * Handle direct append and file extension cases, calculate
995 * unaligned buffer size. 995 * unaligned buffer size.
996 */ 996 */
997 mtx_lock(&np->n_mtx); 997 mtx_lock(&np->n_mtx);
998 if ((np->n_flag & NHASBEENLOCKED) == 0 && 998 if ((np->n_flag & NHASBEENLOCKED) == 0 &&
999 (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0) 999 (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0)
1000 noncontig_write = 1; 1000 noncontig_write = 1;
1001 else 1001 else
1002 noncontig_write = 0; 1002 noncontig_write = 0;
1003 if ((uio->uio_offset == np->n_size || 1003 if ((uio->uio_offset == np->n_size ||
1004 (noncontig_write != 0 && 1004 (noncontig_write != 0 &&
1005 lbn == (np->n_size / biosize) && 1005 lbn == (np->n_size / biosize) &&
1006 uio->uio_offset + n > np->n_size)) && n) { 1006 uio->uio_offset + n > np->n_size)) && n) {
1007 mtx_unlock(&np->n_mtx); 1007 mtx_unlock(&np->n_mtx);
1008 /* 1008 /*
1009 * Get the buffer (in its pre-append state to maintain 1009 * Get the buffer (in its pre-append state to maintain
1010 * B_CACHE if it was previously set). Resize the 1010 * B_CACHE if it was previously set). Resize the
1011 * nfsnode after we have locked the buffer to prevent 1011 * nfsnode after we have locked the buffer to prevent
1012 * readers from reading garbage. 1012 * readers from reading garbage.
1013 */ 1013 */
1014 obcount = np->n_size - (lbn * biosize); 1014 obcount = np->n_size - (lbn * biosize);
1015 bp = nfs_getcacheblk(vp, lbn, obcount, td); 1015 bp = nfs_getcacheblk(vp, lbn, obcount, td);
1016 1016
1017 if (bp != NULL) { 1017 if (bp != NULL) {
1018 long save; 1018 long save;
1019 1019
1020 mtx_lock(&np->n_mtx); 1020 mtx_lock(&np->n_mtx);
1021 np->n_size = uio->uio_offset + n; 1021 np->n_size = uio->uio_offset + n;
1022 np->n_flag |= NMODIFIED; 1022 np->n_flag |= NMODIFIED;
1023 vnode_pager_setsize(vp, np->n_size); 1023 vnode_pager_setsize(vp, np->n_size);
1024 mtx_unlock(&np->n_mtx); 1024 mtx_unlock(&np->n_mtx);
1025 1025
1026 save = bp->b_flags & B_CACHE; 1026 save = bp->b_flags & B_CACHE;
1027 bcount = on + n; 1027 bcount = on + n;
1028 allocbuf(bp, bcount); 1028 allocbuf(bp, bcount);
1029 bp->b_flags |= save; 1029 bp->b_flags |= save;
1030 if (noncontig_write != 0 && on > obcount) 1030 if (noncontig_write != 0 && on > obcount)
1031 vfs_bio_bzero_buf(bp, obcount, on - 1031 vfs_bio_bzero_buf(bp, obcount, on -
1032 obcount); 1032 obcount);
1033 } 1033 }
1034 } else { 1034 } else {
1035 /* 1035 /*
1036 * Obtain the locked cache block first, and then 1036 * Obtain the locked cache block first, and then
1037 * adjust the file's size as appropriate. 1037 * adjust the file's size as appropriate.
1038 */ 1038 */
1039 bcount = on + n; 1039 bcount = on + n;
1040 if ((off_t)lbn * biosize + bcount < np->n_size) { 1040 if ((off_t)lbn * biosize + bcount < np->n_size) {
1041 if ((off_t)(lbn + 1) * biosize < np->n_size) 1041 if ((off_t)(lbn + 1) * biosize < np->n_size)
1042 bcount = biosize; 1042 bcount = biosize;
1043 else 1043 else
1044 bcount = np->n_size - (off_t)lbn * biosize; 1044 bcount = np->n_size - (off_t)lbn * biosize;
1045 } 1045 }
1046 mtx_unlock(&np->n_mtx); 1046 mtx_unlock(&np->n_mtx);
1047 bp = nfs_getcacheblk(vp, lbn, bcount, td); 1047 bp = nfs_getcacheblk(vp, lbn, bcount, td);
1048 mtx_lock(&np->n_mtx); 1048 mtx_lock(&np->n_mtx);
1049 if (uio->uio_offset + n > np->n_size) { 1049 if (uio->uio_offset + n > np->n_size) {
1050 np->n_size = uio->uio_offset + n; 1050 np->n_size = uio->uio_offset + n;
1051 np->n_flag |= NMODIFIED; 1051 np->n_flag |= NMODIFIED;
1052 vnode_pager_setsize(vp, np->n_size); 1052 vnode_pager_setsize(vp, np->n_size);
1053 } 1053 }
1054 mtx_unlock(&np->n_mtx); 1054 mtx_unlock(&np->n_mtx);
1055 } 1055 }
1056 1056
1057 if (!bp) { 1057 if (!bp) {
1058 error = newnfs_sigintr(nmp, td); 1058 error = newnfs_sigintr(nmp, td);
1059 if (!error) 1059 if (!error)
1060 error = EINTR; 1060 error = EINTR;
1061 break; 1061 break;
1062 } 1062 }
1063 1063
1064 /* 1064 /*
1065 * Issue a READ if B_CACHE is not set. In special-append 1065 * Issue a READ if B_CACHE is not set. In special-append
1066 * mode, B_CACHE is based on the buffer prior to the write 1066 * mode, B_CACHE is based on the buffer prior to the write
1067 * op and is typically set, avoiding the read. If a read 1067 * op and is typically set, avoiding the read. If a read
1068 * is required in special append mode, the server will 1068 * is required in special append mode, the server will
1069 * probably send us a short-read since we extended the file 1069 * probably send us a short-read since we extended the file
1070 * on our end, resulting in b_resid == 0 and, thusly, 1070 * on our end, resulting in b_resid == 0 and, thusly,
1071 * B_CACHE getting set. 1071 * B_CACHE getting set.
1072 * 1072 *
1073 * We can also avoid issuing the read if the write covers 1073 * We can also avoid issuing the read if the write covers
1074 * the entire buffer. We have to make sure the buffer state 1074 * the entire buffer. We have to make sure the buffer state
1075 * is reasonable in this case since we will not be initiating 1075 * is reasonable in this case since we will not be initiating
1076 * I/O. See the comments in kern/vfs_bio.c's getblk() for 1076 * I/O. See the comments in kern/vfs_bio.c's getblk() for
1077 * more information. 1077 * more information.
1078 * 1078 *
1079 * B_CACHE may also be set due to the buffer being cached 1079 * B_CACHE may also be set due to the buffer being cached
1080 * normally. 1080 * normally.
1081 */ 1081 */
1082 1082
1083 bp_cached = 1; 1083 bp_cached = 1;
1084 if (on == 0 && n == bcount) { 1084 if (on == 0 && n == bcount) {
1085 if ((bp->b_flags & B_CACHE) == 0) 1085 if ((bp->b_flags & B_CACHE) == 0)
1086 bp_cached = 0; 1086 bp_cached = 0;
1087 bp->b_flags |= B_CACHE; 1087 bp->b_flags |= B_CACHE;
1088 bp->b_flags &= ~B_INVAL; 1088 bp->b_flags &= ~B_INVAL;
1089 bp->b_ioflags &= ~BIO_ERROR; 1089 bp->b_ioflags &= ~BIO_ERROR;
1090 } 1090 }
1091 1091
1092 if ((bp->b_flags & B_CACHE) == 0) { 1092 if ((bp->b_flags & B_CACHE) == 0) {
1093 bp->b_iocmd = BIO_READ; 1093 bp->b_iocmd = BIO_READ;
1094 vfs_busy_pages(bp, 0); 1094 vfs_busy_pages(bp, 0);
1095 error = ncl_doio(vp, bp, cred, td, 0); 1095 error = ncl_doio(vp, bp, cred, td, 0);
1096 if (error) { 1096 if (error) {
1097 brelse(bp); 1097 brelse(bp);
1098 break; 1098 break;
1099 } 1099 }
1100 } 1100 }
1101 if (bp->b_wcred == NOCRED) 1101 if (bp->b_wcred == NOCRED)
1102 bp->b_wcred = crhold(cred); 1102 bp->b_wcred = crhold(cred);
1103 mtx_lock(&np->n_mtx); 1103 mtx_lock(&np->n_mtx);
1104 np->n_flag |= NMODIFIED; 1104 np->n_flag |= NMODIFIED;
1105 mtx_unlock(&np->n_mtx); 1105 mtx_unlock(&np->n_mtx);
1106 1106
1107 /* 1107 /*
1108 * If dirtyend exceeds file size, chop it down. This should 1108 * If dirtyend exceeds file size, chop it down. This should
1109 * not normally occur but there is an append race where it 1109 * not normally occur but there is an append race where it
1110 * might occur XXX, so we log it. 1110 * might occur XXX, so we log it.
1111 * 1111 *
1112 * If the chopping creates a reverse-indexed or degenerate 1112 * If the chopping creates a reverse-indexed or degenerate
1113 * situation with dirtyoff/end, we 0 both of them. 1113 * situation with dirtyoff/end, we 0 both of them.
1114 */ 1114 */
1115 1115
1116 if (bp->b_dirtyend > bcount) { 1116 if (bp->b_dirtyend > bcount) {
1117 printf("NFS append race @%lx:%d\n", 1117 printf("NFS append race @%lx:%d\n",
1118 (long)bp->b_blkno * DEV_BSIZE, 1118 (long)bp->b_blkno * DEV_BSIZE,
1119 bp->b_dirtyend - bcount); 1119 bp->b_dirtyend - bcount);
1120 bp->b_dirtyend = bcount; 1120 bp->b_dirtyend = bcount;
1121 } 1121 }
1122 1122
1123 if (bp->b_dirtyoff >= bp->b_dirtyend) 1123 if (bp->b_dirtyoff >= bp->b_dirtyend)
1124 bp->b_dirtyoff = bp->b_dirtyend = 0; 1124 bp->b_dirtyoff = bp->b_dirtyend = 0;
1125 1125
1126 /* 1126 /*
1127 * If the new write will leave a contiguous dirty 1127 * If the new write will leave a contiguous dirty
1128 * area, just update the b_dirtyoff and b_dirtyend, 1128 * area, just update the b_dirtyoff and b_dirtyend,
1129 * otherwise force a write rpc of the old dirty area. 1129 * otherwise force a write rpc of the old dirty area.
1130 * 1130 *
1131 * If there has been a file lock applied to this file 1131 * If there has been a file lock applied to this file
1132 * or vfs.nfs.old_noncontig_writing is set, do the following: 1132 * or vfs.nfs.old_noncontig_writing is set, do the following:
1133 * While it is possible to merge discontiguous writes due to 1133 * While it is possible to merge discontiguous writes due to
1134 * our having a B_CACHE buffer ( and thus valid read data 1134 * our having a B_CACHE buffer ( and thus valid read data
1135 * for the hole), we don't because it could lead to 1135 * for the hole), we don't because it could lead to
1136 * significant cache coherency problems with multiple clients, 1136 * significant cache coherency problems with multiple clients,
1137 * especially if locking is implemented later on. 1137 * especially if locking is implemented later on.
1138 * 1138 *
1139 * If vfs.nfs.old_noncontig_writing is not set and there has 1139 * If vfs.nfs.old_noncontig_writing is not set and there has
1140 * not been file locking done on this file: 1140 * not been file locking done on this file:
1141 * Relax coherency a bit for the sake of performance and 1141 * Relax coherency a bit for the sake of performance and
1142 * expand the current dirty region to contain the new 1142 * expand the current dirty region to contain the new
1143 * write even if it means we mark some non-dirty data as 1143 * write even if it means we mark some non-dirty data as
1144 * dirty. 1144 * dirty.
1145 */ 1145 */
1146 1146
1147 if (noncontig_write == 0 && bp->b_dirtyend > 0 && 1147 if (noncontig_write == 0 && bp->b_dirtyend > 0 &&
1148 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 1148 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
1149 if (bwrite(bp) == EINTR) { 1149 if (bwrite(bp) == EINTR) {
1150 error = EINTR; 1150 error = EINTR;
1151 break; 1151 break;
1152 } 1152 }
1153 goto again; 1153 goto again;
1154 } 1154 }
1155 1155
1156 local_resid = uio->uio_resid; 1156 local_resid = uio->uio_resid;
1157 error = vn_io_fault_uiomove((char *)bp->b_data + on, n, uio); 1157 error = vn_io_fault_uiomove((char *)bp->b_data + on, n, uio);
1158 1158
1159 if (error != 0 && !bp_cached) { 1159 if (error != 0 && !bp_cached) {
1160 /* 1160 /*
1161 * This block has no other content than what 1161 * This block has no other content than what
1162 * possibly was written by the faulty uiomove. 1162 * possibly was written by the faulty uiomove.
1163 * Release it, forgetting the data pages, to 1163 * Release it, forgetting the data pages, to
1164 * prevent the leak of uninitialized data to 1164 * prevent the leak of uninitialized data to
1165 * usermode. 1165 * usermode.
1166 */ 1166 */
1167 bp->b_ioflags |= BIO_ERROR; 1167 bp->b_ioflags |= BIO_ERROR;
1168 brelse(bp); 1168 brelse(bp);
1169 uio->uio_offset -= local_resid - uio->uio_resid; 1169 uio->uio_offset -= local_resid - uio->uio_resid;
1170 uio->uio_resid = local_resid; 1170 uio->uio_resid = local_resid;
1171 break; 1171 break;
1172 } 1172 }
1173 1173
1174 /* 1174 /*
1175 * Since this block is being modified, it must be written 1175 * Since this block is being modified, it must be written
1176 * again and not just committed. Since write clustering does 1176 * again and not just committed. Since write clustering does
1177 * not work for the stage 1 data write, only the stage 2 1177 * not work for the stage 1 data write, only the stage 2
1178 * commit rpc, we have to clear B_CLUSTEROK as well. 1178 * commit rpc, we have to clear B_CLUSTEROK as well.
1179 */ 1179 */
1180 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); 1180 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
1181 1181
1182 /* 1182 /*
1183 * Get the partial update on the progress made from 1183 * Get the partial update on the progress made from
1184 * uiomove, if an error occurred. 1184 * uiomove, if an error occurred.
1185 */ 1185 */
1186 if (error != 0) 1186 if (error != 0)
1187 n = local_resid - uio->uio_resid; 1187 n = local_resid - uio->uio_resid;
1188 1188
1189 /* 1189 /*
1190 * Only update dirtyoff/dirtyend if not a degenerate 1190 * Only update dirtyoff/dirtyend if not a degenerate
1191 * condition. 1191 * condition.
1192 */ 1192 */
1193 if (n > 0) { 1193 if (n > 0) {
1194 if (bp->b_dirtyend > 0) { 1194 if (bp->b_dirtyend > 0) {
1195 bp->b_dirtyoff = uimin(on, bp->b_dirtyoff); 1195 bp->b_dirtyoff = uimin(on, bp->b_dirtyoff);
1196 bp->b_dirtyend = uimax((on + n), bp->b_dirtyend); 1196 bp->b_dirtyend = uimax((on + n), bp->b_dirtyend);
1197 } else { 1197 } else {
1198 bp->b_dirtyoff = on; 1198 bp->b_dirtyoff = on;
1199 bp->b_dirtyend = on + n; 1199 bp->b_dirtyend = on + n;
1200 } 1200 }
1201 vfs_bio_set_valid(bp, on, n); 1201 vfs_bio_set_valid(bp, on, n);
1202 } 1202 }
1203 1203
1204 /* 1204 /*
1205 * If IO_SYNC do bwrite(). 1205 * If IO_SYNC do bwrite().
1206 * 1206 *
1207 * IO_INVAL appears to be unused. The idea appears to be 1207 * IO_INVAL appears to be unused. The idea appears to be
1208 * to turn off caching in this case. Very odd. XXX 1208 * to turn off caching in this case. Very odd. XXX
1209 */ 1209 */
1210 if ((ioflag & IO_SYNC)) { 1210 if ((ioflag & IO_SYNC)) {
1211 if (ioflag & IO_INVAL) 1211 if (ioflag & IO_INVAL)
1212 bp->b_flags |= B_NOCACHE; 1212 bp->b_flags |= B_NOCACHE;
1213 error1 = bwrite(bp); 1213 error1 = bwrite(bp);
1214 if (error1 != 0) { 1214 if (error1 != 0) {
1215 if (error == 0) 1215 if (error == 0)
1216 error = error1; 1216 error = error1;
1217 break; 1217 break;
1218 } 1218 }
1219 } else if ((n + on) == biosize) { 1219 } else if ((n + on) == biosize) {
1220 bp->b_flags |= B_ASYNC; 1220 bp->b_flags |= B_ASYNC;
1221 (void) ncl_writebp(bp, 0, NULL); 1221 (void) ncl_writebp(bp, 0, NULL);
1222 } else { 1222 } else {
1223 bdwrite(bp); 1223 bdwrite(bp);
1224 } 1224 }
1225 1225
1226 if (error != 0) 1226 if (error != 0)
1227 break; 1227 break;
1228 } while (uio->uio_resid > 0 && n > 0); 1228 } while (uio->uio_resid > 0 && n > 0);
1229 1229
1230 if (error != 0) { 1230 if (error != 0) {
1231 if (ioflag & IO_UNIT) { 1231 if (ioflag & IO_UNIT) {
1232 VATTR_NULL(&vattr); 1232 VATTR_NULL(&vattr);
1233 vattr.va_size = orig_size; 1233 vattr.va_size = orig_size;
1234 /* IO_SYNC is handled implicitly */ 1234 /* IO_SYNC is handled implicitly */
1235 (void)VOP_SETATTR(vp, &vattr, cred); 1235 (void)VOP_SETATTR(vp, &vattr, cred);
1236 uio->uio_offset -= orig_resid - uio->uio_resid; 1236 uio->uio_offset -= orig_resid - uio->uio_resid;
1237 uio->uio_resid = orig_resid; 1237 uio->uio_resid = orig_resid;
1238 } 1238 }
1239 } 1239 }
1240 1240
1241 return (error); 1241 return (error);
1242} 1242}
1243 1243
1244/* 1244/*
1245 * Get an nfs cache block. 1245 * Get an nfs cache block.
1246 * 1246 *
1247 * Allocate a new one if the block isn't currently in the cache 1247 * Allocate a new one if the block isn't currently in the cache
1248 * and return the block marked busy. If the calling process is 1248 * and return the block marked busy. If the calling process is
1249 * interrupted by a signal for an interruptible mount point, return 1249 * interrupted by a signal for an interruptible mount point, return
1250 * NULL. 1250 * NULL.
1251 * 1251 *
1252 * The caller must carefully deal with the possible B_INVAL state of 1252 * The caller must carefully deal with the possible B_INVAL state of
1253 * the buffer. ncl_doio() clears B_INVAL (and ncl_asyncio() clears it 1253 * the buffer. ncl_doio() clears B_INVAL (and ncl_asyncio() clears it
1254 * indirectly), so synchronous reads can be issued without worrying about 1254 * indirectly), so synchronous reads can be issued without worrying about
1255 * the B_INVAL state. We have to be a little more careful when dealing 1255 * the B_INVAL state. We have to be a little more careful when dealing
1256 * with writes (see comments in nfs_write()) when extending a file past 1256 * with writes (see comments in nfs_write()) when extending a file past
1257 * its EOF. 1257 * its EOF.
1258 */ 1258 */
1259static struct buf * 1259static struct buf *
1260nfs_getcacheblk(struct vnode *vp, daddr_t bn, int size, struct thread *td) 1260nfs_getcacheblk(struct vnode *vp, daddr_t bn, int size, struct thread *td)
1261{ 1261{
1262 struct buf *bp; 1262 struct buf *bp;
1263 struct mount *mp; 1263 struct mount *mp;
1264 struct nfsmount *nmp; 1264 struct nfsmount *nmp;
1265 1265
1266 mp = vp->v_mount; 1266 mp = vp->v_mount;
1267 nmp = VFSTONFS(mp); 1267 nmp = VFSTONFS(mp);
1268 1268
1269 if (nmp->nm_flag & NFSMNT_INT) { 1269 if (nmp->nm_flag & NFSMNT_INT) {
1270 sigset_t oldset; 1270 sigset_t oldset;
1271 1271
1272 newnfs_set_sigmask(td, &oldset); 1272 newnfs_set_sigmask(td, &oldset);
1273 bp = getblk(vp, bn, size, PCATCH, 0, 0); 1273 bp = getblk(vp, bn, size, PCATCH, 0, 0);
1274 newnfs_restore_sigmask(td, &oldset); 1274 newnfs_restore_sigmask(td, &oldset);
1275 while (bp == NULL) { 1275 while (bp == NULL) {
1276 if (newnfs_sigintr(nmp, td)) 1276 if (newnfs_sigintr(nmp, td))
1277 return (NULL); 1277 return (NULL);
1278 bp = getblk(vp, bn, size, 0, 2 * hz, 0); 1278 bp = getblk(vp, bn, size, 0, 2 * hz, 0);
1279 } 1279 }
1280 } else { 1280 } else {
1281 bp = getblk(vp, bn, size, 0, 0, 0); 1281 bp = getblk(vp, bn, size, 0, 0, 0);
1282 } 1282 }
1283 1283
1284 if (vp->v_type == VREG) 1284 if (vp->v_type == VREG)
1285 bp->b_blkno = bn * (vp->v_bufobj.bo_bsize / DEV_BSIZE); 1285 bp->b_blkno = bn * (vp->v_bufobj.bo_bsize / DEV_BSIZE);

cvs diff -r1.3 -r1.4 src/sys/fs/nfs/server/nfs_nfsdport.c (switch to unified diff)

--- src/sys/fs/nfs/server/nfs_nfsdport.c 2018/09/03 16:29:34 1.3
+++ src/sys/fs/nfs/server/nfs_nfsdport.c 2021/03/29 02:13:38 1.4
@@ -1,1138 +1,1138 @@ @@ -1,1138 +1,1138 @@
1/* $NetBSD: nfs_nfsdport.c,v 1.3 2018/09/03 16:29:34 riastradh Exp $ */ 1/* $NetBSD: nfs_nfsdport.c,v 1.4 2021/03/29 02:13:38 simonb Exp $ */
2/*- 2/*-
3 * Copyright (c) 1989, 1993 3 * Copyright (c) 1989, 1993
4 * The Regents of the University of California. All rights reserved. 4 * The Regents of the University of California. All rights reserved.
5 * 5 *
6 * This code is derived from software contributed to Berkeley by 6 * This code is derived from software contributed to Berkeley by
7 * Rick Macklem at The University of Guelph. 7 * Rick Macklem at The University of Guelph.
8 * 8 *
9 * Redistribution and use in source and binary forms, with or without 9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions 10 * modification, are permitted provided that the following conditions
11 * are met: 11 * are met:
12 * 1. Redistributions of source code must retain the above copyright 12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer. 13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright 14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the 15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution. 16 * documentation and/or other materials provided with the distribution.
17 * 4. Neither the name of the University nor the names of its contributors 17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software 18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission. 19 * without specific prior written permission.
20 * 20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE. 31 * SUCH DAMAGE.
32 * 32 *
33 */ 33 */
34 34
35#include <sys/cdefs.h> 35#include <sys/cdefs.h>
36/* __FBSDID("FreeBSD: head/sys/fs/nfsserver/nfs_nfsdport.c 308212 2016-11-02 12:43:15Z kib "); */ 36/* __FBSDID("FreeBSD: head/sys/fs/nfsserver/nfs_nfsdport.c 308212 2016-11-02 12:43:15Z kib "); */
37__RCSID("$NetBSD: nfs_nfsdport.c,v 1.3 2018/09/03 16:29:34 riastradh Exp $"); 37__RCSID("$NetBSD: nfs_nfsdport.c,v 1.4 2021/03/29 02:13:38 simonb Exp $");
38 38
39#if 0 39#if 0
40#include <sys/capsicum.h> 40#include <sys/capsicum.h>
41#endif 41#endif
42 42
43/* 43/*
44 * Functions that perform the vfs operations required by the routines in 44 * Functions that perform the vfs operations required by the routines in
45 * nfsd_serv.c. It is hoped that this change will make the server more 45 * nfsd_serv.c. It is hoped that this change will make the server more
46 * portable. 46 * portable.
47 */ 47 */
48 48
49#include <fs/nfs/common/nfsport.h> 49#include <fs/nfs/common/nfsport.h>
50#include <sys/hash.h> 50#include <sys/hash.h>
51#include <sys/sysctl.h> 51#include <sys/sysctl.h>
52 52
53#if 0 53#if 0
54#include <nlm/nlm_prot.h> 54#include <nlm/nlm_prot.h>
55#include <nlm/nlm.h> 55#include <nlm/nlm.h>
56#endif 56#endif
57 57
58FEATURE(nfsd, "NFSv4 server"); 58FEATURE(nfsd, "NFSv4 server");
59 59
60extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; 60extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1;
61extern int nfsrv_useacl; 61extern int nfsrv_useacl;
62extern int newnfs_numnfsd; 62extern int newnfs_numnfsd;
63extern struct mount nfsv4root_mnt; 63extern struct mount nfsv4root_mnt;
64extern struct nfsrv_stablefirst nfsrv_stablefirst; 64extern struct nfsrv_stablefirst nfsrv_stablefirst;
65extern void (*nfsd_call_servertimer)(void); 65extern void (*nfsd_call_servertimer)(void);
66extern SVCPOOL *nfsrvd_pool; 66extern SVCPOOL *nfsrvd_pool;
67extern struct nfsv4lock nfsd_suspend_lock; 67extern struct nfsv4lock nfsd_suspend_lock;
68extern struct nfsclienthashhead *nfsclienthash; 68extern struct nfsclienthashhead *nfsclienthash;
69extern struct nfslockhashhead *nfslockhash; 69extern struct nfslockhashhead *nfslockhash;
70extern struct nfssessionhash *nfssessionhash; 70extern struct nfssessionhash *nfssessionhash;
71extern int nfsrv_sessionhashsize; 71extern int nfsrv_sessionhashsize;
72extern struct nfsstatsv1 nfsstatsv1; 72extern struct nfsstatsv1 nfsstatsv1;
73struct vfsoptlist nfsv4root_opt, nfsv4root_newopt; 73struct vfsoptlist nfsv4root_opt, nfsv4root_newopt;
74NFSDLOCKMUTEX; 74NFSDLOCKMUTEX;
75struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; 75struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE];
76struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE]; 76struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE];
77struct mtx nfsrc_udpmtx; 77struct mtx nfsrc_udpmtx;
78struct mtx nfs_v4root_mutex; 78struct mtx nfs_v4root_mutex;
79struct nfsrvfh nfs_rootfh, nfs_pubfh; 79struct nfsrvfh nfs_rootfh, nfs_pubfh;
80int nfs_pubfhset = 0, nfs_rootfhset = 0; 80int nfs_pubfhset = 0, nfs_rootfhset = 0;
81struct proc *nfsd_master_proc = NULL; 81struct proc *nfsd_master_proc = NULL;
82int nfsd_debuglevel = 0; 82int nfsd_debuglevel = 0;
83static pid_t nfsd_master_pid = (pid_t)-1; 83static pid_t nfsd_master_pid = (pid_t)-1;
84static char nfsd_master_comm[MAXCOMLEN + 1]; 84static char nfsd_master_comm[MAXCOMLEN + 1];
85static struct timeval nfsd_master_start; 85static struct timeval nfsd_master_start;
86static uint32_t nfsv4_sysid = 0; 86static uint32_t nfsv4_sysid = 0;
87 87
88static int nfssvc_srvcall(struct thread *, struct nfssvc_args *, 88static int nfssvc_srvcall(struct thread *, struct nfssvc_args *,
89 struct ucred *); 89 struct ucred *);
90 90
91int nfsrv_enable_crossmntpt = 1; 91int nfsrv_enable_crossmntpt = 1;
92static int nfs_commit_blks; 92static int nfs_commit_blks;
93static int nfs_commit_miss; 93static int nfs_commit_miss;
94extern int nfsrv_issuedelegs; 94extern int nfsrv_issuedelegs;
95extern int nfsrv_dolocallocks; 95extern int nfsrv_dolocallocks;
96extern int nfsd_enable_stringtouid; 96extern int nfsd_enable_stringtouid;
97 97
98SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW, 0, "NFS server"); 98SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW, 0, "NFS server");
99SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW, 99SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW,
100 &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points"); 100 &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points");
101SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 101SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks,
102 0, ""); 102 0, "");
103SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 103SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss,
104 0, ""); 104 0, "");
105SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW, 105SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW,
106 &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations"); 106 &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations");
107SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW, 107SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW,
108 &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files"); 108 &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files");
109SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel, 109SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel,
110 0, "Debug level for NFS server"); 110 0, "Debug level for NFS server");
111SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, CTLFLAG_RW, 111SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, CTLFLAG_RW,
112 &nfsd_enable_stringtouid, 0, "Enable nfsd to accept numeric owner_names"); 112 &nfsd_enable_stringtouid, 0, "Enable nfsd to accept numeric owner_names");
113 113
114#define MAX_REORDERED_RPC 16 114#define MAX_REORDERED_RPC 16
115#define NUM_HEURISTIC 1031 115#define NUM_HEURISTIC 1031
116#define NHUSE_INIT 64 116#define NHUSE_INIT 64
117#define NHUSE_INC 16 117#define NHUSE_INC 16
118#define NHUSE_MAX 2048 118#define NHUSE_MAX 2048
119 119
120static struct nfsheur { 120static struct nfsheur {
121 struct vnode *nh_vp; /* vp to match (unreferenced pointer) */ 121 struct vnode *nh_vp; /* vp to match (unreferenced pointer) */
122 off_t nh_nextoff; /* next offset for sequential detection */ 122 off_t nh_nextoff; /* next offset for sequential detection */
123 int nh_use; /* use count for selection */ 123 int nh_use; /* use count for selection */
124 int nh_seqcount; /* heuristic */ 124 int nh_seqcount; /* heuristic */
125} nfsheur[NUM_HEURISTIC]; 125} nfsheur[NUM_HEURISTIC];
126 126
127 127
128/* 128/*
129 * Heuristic to detect sequential operation. 129 * Heuristic to detect sequential operation.
130 */ 130 */
131static struct nfsheur * 131static struct nfsheur *
132nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp) 132nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp)
133{ 133{
134 struct nfsheur *nh; 134 struct nfsheur *nh;
135 int hi, try; 135 int hi, try;
136 136
137 /* Locate best candidate. */ 137 /* Locate best candidate. */
138 try = 32; 138 try = 32;
139 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC; 139 hi = ((int)(vaddr_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
140 nh = &nfsheur[hi]; 140 nh = &nfsheur[hi];
141 while (try--) { 141 while (try--) {
142 if (nfsheur[hi].nh_vp == vp) { 142 if (nfsheur[hi].nh_vp == vp) {
143 nh = &nfsheur[hi]; 143 nh = &nfsheur[hi];
144 break; 144 break;
145 } 145 }
146 if (nfsheur[hi].nh_use > 0) 146 if (nfsheur[hi].nh_use > 0)
147 --nfsheur[hi].nh_use; 147 --nfsheur[hi].nh_use;
148 hi = (hi + 1) % NUM_HEURISTIC; 148 hi = (hi + 1) % NUM_HEURISTIC;
149 if (nfsheur[hi].nh_use < nh->nh_use) 149 if (nfsheur[hi].nh_use < nh->nh_use)
150 nh = &nfsheur[hi]; 150 nh = &nfsheur[hi];
151 } 151 }
152 152
153 /* Initialize hint if this is a new file. */ 153 /* Initialize hint if this is a new file. */
154 if (nh->nh_vp != vp) { 154 if (nh->nh_vp != vp) {
155 nh->nh_vp = vp; 155 nh->nh_vp = vp;
156 nh->nh_nextoff = uio->uio_offset; 156 nh->nh_nextoff = uio->uio_offset;
157 nh->nh_use = NHUSE_INIT; 157 nh->nh_use = NHUSE_INIT;
158 if (uio->uio_offset == 0) 158 if (uio->uio_offset == 0)
159 nh->nh_seqcount = 4; 159 nh->nh_seqcount = 4;
160 else 160 else
161 nh->nh_seqcount = 1; 161 nh->nh_seqcount = 1;
162 } 162 }
163 163
164 /* Calculate heuristic. */ 164 /* Calculate heuristic. */
165 if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) || 165 if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) ||
166 uio->uio_offset == nh->nh_nextoff) { 166 uio->uio_offset == nh->nh_nextoff) {
167 /* See comments in vfs_vnops.c:sequential_heuristic(). */ 167 /* See comments in vfs_vnops.c:sequential_heuristic(). */
168 nh->nh_seqcount += howmany(uio->uio_resid, 16384); 168 nh->nh_seqcount += howmany(uio->uio_resid, 16384);
169 if (nh->nh_seqcount > IO_SEQMAX) 169 if (nh->nh_seqcount > IO_SEQMAX)
170 nh->nh_seqcount = IO_SEQMAX; 170 nh->nh_seqcount = IO_SEQMAX;
171 } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC * 171 } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC *
172 imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) { 172 imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) {
173 /* Probably a reordered RPC, leave seqcount alone. */ 173 /* Probably a reordered RPC, leave seqcount alone. */
174 } else if (nh->nh_seqcount > 1) { 174 } else if (nh->nh_seqcount > 1) {
175 nh->nh_seqcount /= 2; 175 nh->nh_seqcount /= 2;
176 } else { 176 } else {
177 nh->nh_seqcount = 0; 177 nh->nh_seqcount = 0;
178 } 178 }
179 nh->nh_use += NHUSE_INC; 179 nh->nh_use += NHUSE_INC;
180 if (nh->nh_use > NHUSE_MAX) 180 if (nh->nh_use > NHUSE_MAX)
181 nh->nh_use = NHUSE_MAX; 181 nh->nh_use = NHUSE_MAX;
182 return (nh); 182 return (nh);
183} 183}
184 184
185/* 185/*
186 * Get attributes into nfsvattr structure. 186 * Get attributes into nfsvattr structure.
187 */ 187 */
188int 188int
189nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, 189nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
190 struct thread *p, int vpislocked) 190 struct thread *p, int vpislocked)
191{ 191{
192 int error, lockedit = 0; 192 int error, lockedit = 0;
193 193
194 if (vpislocked == 0) { 194 if (vpislocked == 0) {
195 /* 195 /*
196 * When vpislocked == 0, the vnode is either exclusively 196 * When vpislocked == 0, the vnode is either exclusively
197 * locked by this thread or not locked by this thread. 197 * locked by this thread or not locked by this thread.
198 * As such, shared lock it, if not exclusively locked. 198 * As such, shared lock it, if not exclusively locked.
199 */ 199 */
200 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { 200 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) {
201 lockedit = 1; 201 lockedit = 1;
202 NFSVOPLOCK(vp, LK_SHARED | LK_RETRY); 202 NFSVOPLOCK(vp, LK_SHARED | LK_RETRY);
203 } 203 }
204 } 204 }
205 error = VOP_GETATTR(vp, &nvap->na_vattr, cred); 205 error = VOP_GETATTR(vp, &nvap->na_vattr, cred);
206 if (lockedit != 0) 206 if (lockedit != 0)
207 NFSVOPUNLOCK(vp, 0); 207 NFSVOPUNLOCK(vp, 0);
208 208
209 NFSEXITCODE(error); 209 NFSEXITCODE(error);
210 return (error); 210 return (error);
211} 211}
212 212
213/* 213/*
214 * Get a file handle for a vnode. 214 * Get a file handle for a vnode.
215 */ 215 */
216int 216int
217nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p) 217nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p)
218{ 218{
219 int error; 219 int error;
220 220
221 NFSBZERO((caddr_t)fhp, sizeof(fhandle_t)); 221 NFSBZERO((caddr_t)fhp, sizeof(fhandle_t));
222 fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; 222 fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
223 error = VOP_VPTOFH(vp, &fhp->fh_fid); 223 error = VOP_VPTOFH(vp, &fhp->fh_fid);
224 224
225 NFSEXITCODE(error); 225 NFSEXITCODE(error);
226 return (error); 226 return (error);
227} 227}
228 228
229/* 229/*
230 * Perform access checking for vnodes obtained from file handles that would 230 * Perform access checking for vnodes obtained from file handles that would
231 * refer to files already opened by a Unix client. You cannot just use 231 * refer to files already opened by a Unix client. You cannot just use
232 * vn_writechk() and VOP_ACCESSX() for two reasons. 232 * vn_writechk() and VOP_ACCESSX() for two reasons.
233 * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write 233 * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write
234 * case. 234 * case.
235 * 2 - The owner is to be given access irrespective of mode bits for some 235 * 2 - The owner is to be given access irrespective of mode bits for some
236 * operations, so that processes that chmod after opening a file don't 236 * operations, so that processes that chmod after opening a file don't
237 * break. 237 * break.
238 */ 238 */
239int 239int
240nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred, 240nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred,
241 struct nfsexstuff *exp, struct thread *p, int override, int vpislocked, 241 struct nfsexstuff *exp, struct thread *p, int override, int vpislocked,
242 u_int32_t *supportedtypep) 242 u_int32_t *supportedtypep)
243{ 243{
244 struct vattr vattr; 244 struct vattr vattr;
245 int error = 0, getret = 0; 245 int error = 0, getret = 0;
246 246
247 if (vpislocked == 0) { 247 if (vpislocked == 0) {
248 if (NFSVOPLOCK(vp, LK_SHARED) != 0) { 248 if (NFSVOPLOCK(vp, LK_SHARED) != 0) {
249 error = EPERM; 249 error = EPERM;
250 goto out; 250 goto out;
251 } 251 }
252 } 252 }
253 if (accmode & VWRITE) { 253 if (accmode & VWRITE) {
254 /* Just vn_writechk() changed to check rdonly */ 254 /* Just vn_writechk() changed to check rdonly */
255 /* 255 /*
256 * Disallow write attempts on read-only file systems; 256 * Disallow write attempts on read-only file systems;
257 * unless the file is a socket or a block or character 257 * unless the file is a socket or a block or character
258 * device resident on the file system. 258 * device resident on the file system.
259 */ 259 */
260 if (NFSVNO_EXRDONLY(exp) || 260 if (NFSVNO_EXRDONLY(exp) ||
261 (vp->v_mount->mnt_flag & MNT_RDONLY)) { 261 (vp->v_mount->mnt_flag & MNT_RDONLY)) {
262 switch (vp->v_type) { 262 switch (vp->v_type) {
263 case VREG: 263 case VREG:
264 case VDIR: 264 case VDIR:
265 case VLNK: 265 case VLNK:
266 error = EROFS; 266 error = EROFS;
267 default: 267 default:
268 break; 268 break;
269 } 269 }
270 } 270 }
271 /* 271 /*
272 * If there's shared text associated with 272 * If there's shared text associated with
273 * the inode, try to free it up once. If 273 * the inode, try to free it up once. If
274 * we fail, we can't allow writing. 274 * we fail, we can't allow writing.
275 */ 275 */
276 if (VOP_IS_TEXT(vp) && error == 0) 276 if (VOP_IS_TEXT(vp) && error == 0)
277 error = ETXTBSY; 277 error = ETXTBSY;
278 } 278 }
279 if (error != 0) { 279 if (error != 0) {
280 if (vpislocked == 0) 280 if (vpislocked == 0)
281 NFSVOPUNLOCK(vp, 0); 281 NFSVOPUNLOCK(vp, 0);
282 goto out; 282 goto out;
283 } 283 }
284 284
285 /* 285 /*
286 * Should the override still be applied when ACLs are enabled? 286 * Should the override still be applied when ACLs are enabled?
287 */ 287 */
288 error = VOP_ACCESSX(vp, accmode, cred, p); 288 error = VOP_ACCESSX(vp, accmode, cred, p);
289 if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) { 289 if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) {
290 /* 290 /*
291 * Try again with VEXPLICIT_DENY, to see if the test for 291 * Try again with VEXPLICIT_DENY, to see if the test for
292 * deletion is supported. 292 * deletion is supported.
293 */ 293 */
294 error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p); 294 error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p);
295 if (error == 0) { 295 if (error == 0) {
296 if (vp->v_type == VDIR) { 296 if (vp->v_type == VDIR) {
297 accmode &= ~(VDELETE | VDELETE_CHILD); 297 accmode &= ~(VDELETE | VDELETE_CHILD);
298 accmode |= VWRITE; 298 accmode |= VWRITE;
299 error = VOP_ACCESSX(vp, accmode, cred, p); 299 error = VOP_ACCESSX(vp, accmode, cred, p);
300 } else if (supportedtypep != NULL) { 300 } else if (supportedtypep != NULL) {
301 *supportedtypep &= ~NFSACCESS_DELETE; 301 *supportedtypep &= ~NFSACCESS_DELETE;
302 } 302 }
303 } 303 }
304 } 304 }
305 305
306 /* 306 /*
307 * Allow certain operations for the owner (reads and writes 307 * Allow certain operations for the owner (reads and writes
308 * on files that are already open). 308 * on files that are already open).
309 */ 309 */
310 if (override != NFSACCCHK_NOOVERRIDE && 310 if (override != NFSACCCHK_NOOVERRIDE &&
311 (error == EPERM || error == EACCES)) { 311 (error == EPERM || error == EACCES)) {
312 if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT)) 312 if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT))
313 error = 0; 313 error = 0;
314 else if (override & NFSACCCHK_ALLOWOWNER) { 314 else if (override & NFSACCCHK_ALLOWOWNER) {
315 getret = VOP_GETATTR(vp, &vattr, cred); 315 getret = VOP_GETATTR(vp, &vattr, cred);
316 if (getret == 0 && cred->cr_uid == vattr.va_uid) 316 if (getret == 0 && cred->cr_uid == vattr.va_uid)
317 error = 0; 317 error = 0;
318 } 318 }
319 } 319 }
320 if (vpislocked == 0) 320 if (vpislocked == 0)
321 NFSVOPUNLOCK(vp, 0); 321 NFSVOPUNLOCK(vp, 0);
322 322
323out: 323out:
324 NFSEXITCODE(error); 324 NFSEXITCODE(error);
325 return (error); 325 return (error);
326} 326}
327 327
328/* 328/*
329 * Set attribute(s) vnop. 329 * Set attribute(s) vnop.
330 */ 330 */
331int 331int
332nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, 332nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
333 struct thread *p, struct nfsexstuff *exp) 333 struct thread *p, struct nfsexstuff *exp)
334{ 334{
335 int error; 335 int error;
336 336
337 error = VOP_SETATTR(vp, &nvap->na_vattr, cred); 337 error = VOP_SETATTR(vp, &nvap->na_vattr, cred);
338 NFSEXITCODE(error); 338 NFSEXITCODE(error);
339 return (error); 339 return (error);
340} 340}
341 341
342/* 342/*
343 * Set up nameidata for a lookup() call and do it. 343 * Set up nameidata for a lookup() call and do it.
344 */ 344 */
345int 345int
346nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp, 346nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp,
347 struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p, 347 struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p,
348 struct vnode **retdirp) 348 struct vnode **retdirp)
349{ 349{
350 struct componentname *cnp = &ndp->ni_cnd; 350 struct componentname *cnp = &ndp->ni_cnd;
351 int i; 351 int i;
352 struct iovec aiov; 352 struct iovec aiov;
353 struct uio auio; 353 struct uio auio;
354 int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen; 354 int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen;
355 int error = 0, crossmnt; 355 int error = 0, crossmnt;
356 char *cp; 356 char *cp;
357 357
358 *retdirp = NULL; 358 *retdirp = NULL;
359 cnp->cn_nameptr = cnp->cn_pnbuf; 359 cnp->cn_nameptr = cnp->cn_pnbuf;
360 ndp->ni_lcf = 0; 360 ndp->ni_lcf = 0;
361 /* 361 /*
362 * Extract and set starting directory. 362 * Extract and set starting directory.
363 */ 363 */
364 if (dp->v_type != VDIR) { 364 if (dp->v_type != VDIR) {
365 if (islocked) 365 if (islocked)
366 vput(dp); 366 vput(dp);
367 else 367 else
368 vrele(dp); 368 vrele(dp);
369 nfsvno_relpathbuf(ndp); 369 nfsvno_relpathbuf(ndp);
370 error = ENOTDIR; 370 error = ENOTDIR;
371 goto out1; 371 goto out1;
372 } 372 }
373 if (islocked) 373 if (islocked)
374 NFSVOPUNLOCK(dp, 0); 374 NFSVOPUNLOCK(dp, 0);
375 VREF(dp); 375 VREF(dp);
376 *retdirp = dp; 376 *retdirp = dp;
377 if (NFSVNO_EXRDONLY(exp)) 377 if (NFSVNO_EXRDONLY(exp))
378 cnp->cn_flags |= RDONLY; 378 cnp->cn_flags |= RDONLY;
379 ndp->ni_segflg = UIO_SYSSPACE; 379 ndp->ni_segflg = UIO_SYSSPACE;
380 crossmnt = 1; 380 crossmnt = 1;
381 381
382 if (nd->nd_flag & ND_PUBLOOKUP) { 382 if (nd->nd_flag & ND_PUBLOOKUP) {
383 ndp->ni_loopcnt = 0; 383 ndp->ni_loopcnt = 0;
384 if (cnp->cn_pnbuf[0] == '/') { 384 if (cnp->cn_pnbuf[0] == '/') {
385 vrele(dp); 385 vrele(dp);
386 /* 386 /*
387 * Check for degenerate pathnames here, since lookup() 387 * Check for degenerate pathnames here, since lookup()
388 * panics on them. 388 * panics on them.
389 */ 389 */
390 for (i = 1; i < ndp->ni_pathlen; i++) 390 for (i = 1; i < ndp->ni_pathlen; i++)
391 if (cnp->cn_pnbuf[i] != '/') 391 if (cnp->cn_pnbuf[i] != '/')
392 break; 392 break;
393 if (i == ndp->ni_pathlen) { 393 if (i == ndp->ni_pathlen) {
394 error = NFSERR_ACCES; 394 error = NFSERR_ACCES;
395 goto out; 395 goto out;
396 } 396 }
397 dp = rootvnode; 397 dp = rootvnode;
398 VREF(dp); 398 VREF(dp);
399 } 399 }
400 } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) || 400 } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) ||
401 (nd->nd_flag & ND_NFSV4) == 0) { 401 (nd->nd_flag & ND_NFSV4) == 0) {
402 /* 402 /*
403 * Only cross mount points for NFSv4 when doing a 403 * Only cross mount points for NFSv4 when doing a
404 * mount while traversing the file system above 404 * mount while traversing the file system above
405 * the mount point, unless nfsrv_enable_crossmntpt is set. 405 * the mount point, unless nfsrv_enable_crossmntpt is set.
406 */ 406 */
407 cnp->cn_flags |= NOCROSSMOUNT; 407 cnp->cn_flags |= NOCROSSMOUNT;
408 crossmnt = 0; 408 crossmnt = 0;
409 } 409 }
410 410
411 /* 411 /*
412 * Initialize for scan, set ni_startdir and bump ref on dp again 412 * Initialize for scan, set ni_startdir and bump ref on dp again
413 * because lookup() will dereference ni_startdir. 413 * because lookup() will dereference ni_startdir.
414 */ 414 */
415 415
416 cnp->cn_thread = p; 416 cnp->cn_thread = p;
417 ndp->ni_startdir = dp; 417 ndp->ni_startdir = dp;
418 ndp->ni_rootdir = rootvnode; 418 ndp->ni_rootdir = rootvnode;
419 ndp->ni_topdir = NULL; 419 ndp->ni_topdir = NULL;
420 420
421 if (!lockleaf) 421 if (!lockleaf)
422 cnp->cn_flags |= LOCKLEAF; 422 cnp->cn_flags |= LOCKLEAF;
423 for (;;) { 423 for (;;) {
424 cnp->cn_nameptr = cnp->cn_pnbuf; 424 cnp->cn_nameptr = cnp->cn_pnbuf;
425 /* 425 /*
426 * Call lookup() to do the real work. If an error occurs, 426 * Call lookup() to do the real work. If an error occurs,
427 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and 427 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
428 * we do not have to dereference anything before returning. 428 * we do not have to dereference anything before returning.
429 * In either case ni_startdir will be dereferenced and NULLed 429 * In either case ni_startdir will be dereferenced and NULLed
430 * out. 430 * out.
431 */ 431 */
432 error = lookup(ndp); 432 error = lookup(ndp);
433 if (error) 433 if (error)
434 break; 434 break;
435 435
436 /* 436 /*
437 * Check for encountering a symbolic link. Trivial 437 * Check for encountering a symbolic link. Trivial
438 * termination occurs if no symlink encountered. 438 * termination occurs if no symlink encountered.
439 */ 439 */
440 if ((cnp->cn_flags & ISSYMLINK) == 0) { 440 if ((cnp->cn_flags & ISSYMLINK) == 0) {
441 if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) 441 if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0)
442 nfsvno_relpathbuf(ndp); 442 nfsvno_relpathbuf(ndp);
443 if (ndp->ni_vp && !lockleaf) 443 if (ndp->ni_vp && !lockleaf)
444 NFSVOPUNLOCK(ndp->ni_vp, 0); 444 NFSVOPUNLOCK(ndp->ni_vp, 0);
445 break; 445 break;
446 } 446 }
447 447
448 /* 448 /*
449 * Validate symlink 449 * Validate symlink
450 */ 450 */
451 if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) 451 if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
452 NFSVOPUNLOCK(ndp->ni_dvp, 0); 452 NFSVOPUNLOCK(ndp->ni_dvp, 0);
453 if (!(nd->nd_flag & ND_PUBLOOKUP)) { 453 if (!(nd->nd_flag & ND_PUBLOOKUP)) {
454 error = EINVAL; 454 error = EINVAL;
455 goto badlink2; 455 goto badlink2;
456 } 456 }
457 457
458 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 458 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
459 error = ELOOP; 459 error = ELOOP;
460 goto badlink2; 460 goto badlink2;
461 } 461 }
462 if (ndp->ni_pathlen > 1) 462 if (ndp->ni_pathlen > 1)
463 cp = uma_zalloc(namei_zone, M_WAITOK); 463 cp = uma_zalloc(namei_zone, M_WAITOK);
464 else 464 else
465 cp = cnp->cn_pnbuf; 465 cp = cnp->cn_pnbuf;
466 aiov.iov_base = cp; 466 aiov.iov_base = cp;
467 aiov.iov_len = MAXPATHLEN; 467 aiov.iov_len = MAXPATHLEN;
468 auio.uio_iov = &aiov; 468 auio.uio_iov = &aiov;
469 auio.uio_iovcnt = 1; 469 auio.uio_iovcnt = 1;
470 auio.uio_offset = 0; 470 auio.uio_offset = 0;
471 auio.uio_rw = UIO_READ; 471 auio.uio_rw = UIO_READ;
472 auio.uio_segflg = UIO_SYSSPACE; 472 auio.uio_segflg = UIO_SYSSPACE;
473 auio.uio_td = NULL; 473 auio.uio_td = NULL;
474 auio.uio_resid = MAXPATHLEN; 474 auio.uio_resid = MAXPATHLEN;
475 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); 475 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
476 if (error) { 476 if (error) {
477 badlink1: 477 badlink1:
478 if (ndp->ni_pathlen > 1) 478 if (ndp->ni_pathlen > 1)
479 uma_zfree(namei_zone, cp); 479 uma_zfree(namei_zone, cp);
480 badlink2: 480 badlink2:
481 vrele(ndp->ni_dvp); 481 vrele(ndp->ni_dvp);
482 vput(ndp->ni_vp); 482 vput(ndp->ni_vp);
483 break; 483 break;
484 } 484 }
485 linklen = MAXPATHLEN - auio.uio_resid; 485 linklen = MAXPATHLEN - auio.uio_resid;
486 if (linklen == 0) { 486 if (linklen == 0) {
487 error = ENOENT; 487 error = ENOENT;
488 goto badlink1; 488 goto badlink1;
489 } 489 }
490 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { 490 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
491 error = ENAMETOOLONG; 491 error = ENAMETOOLONG;
492 goto badlink1; 492 goto badlink1;
493 } 493 }
494 494
495 /* 495 /*
496 * Adjust or replace path 496 * Adjust or replace path
497 */ 497 */
498 if (ndp->ni_pathlen > 1) { 498 if (ndp->ni_pathlen > 1) {
499 NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen); 499 NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
500 uma_zfree(namei_zone, cnp->cn_pnbuf); 500 uma_zfree(namei_zone, cnp->cn_pnbuf);
501 cnp->cn_pnbuf = cp; 501 cnp->cn_pnbuf = cp;
502 } else 502 } else
503 cnp->cn_pnbuf[linklen] = '\0'; 503 cnp->cn_pnbuf[linklen] = '\0';
504 ndp->ni_pathlen += linklen; 504 ndp->ni_pathlen += linklen;
505 505
506 /* 506 /*
507 * Cleanup refs for next loop and check if root directory 507 * Cleanup refs for next loop and check if root directory
508 * should replace current directory. Normally ni_dvp 508 * should replace current directory. Normally ni_dvp
509 * becomes the new base directory and is cleaned up when 509 * becomes the new base directory and is cleaned up when
510 * we loop. Explicitly null pointers after invalidation 510 * we loop. Explicitly null pointers after invalidation
511 * to clarify operation. 511 * to clarify operation.
512 */ 512 */
513 vput(ndp->ni_vp); 513 vput(ndp->ni_vp);
514 ndp->ni_vp = NULL; 514 ndp->ni_vp = NULL;
515 515
516 if (cnp->cn_pnbuf[0] == '/') { 516 if (cnp->cn_pnbuf[0] == '/') {
517 vrele(ndp->ni_dvp); 517 vrele(ndp->ni_dvp);
518 ndp->ni_dvp = ndp->ni_rootdir; 518 ndp->ni_dvp = ndp->ni_rootdir;
519 VREF(ndp->ni_dvp); 519 VREF(ndp->ni_dvp);
520 } 520 }
521 ndp->ni_startdir = ndp->ni_dvp; 521 ndp->ni_startdir = ndp->ni_dvp;
522 ndp->ni_dvp = NULL; 522 ndp->ni_dvp = NULL;
523 } 523 }
524 if (!lockleaf) 524 if (!lockleaf)
525 cnp->cn_flags &= ~LOCKLEAF; 525 cnp->cn_flags &= ~LOCKLEAF;
526 526
527out: 527out:
528 if (error) { 528 if (error) {
529 nfsvno_relpathbuf(ndp); 529 nfsvno_relpathbuf(ndp);
530 ndp->ni_vp = NULL; 530 ndp->ni_vp = NULL;
531 ndp->ni_dvp = NULL; 531 ndp->ni_dvp = NULL;
532 ndp->ni_startdir = NULL; 532 ndp->ni_startdir = NULL;
533 } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) { 533 } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
534 ndp->ni_dvp = NULL; 534 ndp->ni_dvp = NULL;
535 } 535 }
536 536
537out1: 537out1:
538 NFSEXITCODE2(error, nd); 538 NFSEXITCODE2(error, nd);
539 return (error); 539 return (error);
540} 540}
541 541
542/* 542/*
543 * Set up a pathname buffer and return a pointer to it and, optionally 543 * Set up a pathname buffer and return a pointer to it and, optionally
544 * set a hash pointer. 544 * set a hash pointer.
545 */ 545 */
546void 546void
547nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp) 547nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp)
548{ 548{
549 struct componentname *cnp = &ndp->ni_cnd; 549 struct componentname *cnp = &ndp->ni_cnd;
550 550
551 cnp->cn_flags |= (NOMACCHECK | HASBUF); 551 cnp->cn_flags |= (NOMACCHECK | HASBUF);
552 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 552 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
553 if (hashpp != NULL) 553 if (hashpp != NULL)
554 *hashpp = NULL; 554 *hashpp = NULL;
555 *bufpp = cnp->cn_pnbuf; 555 *bufpp = cnp->cn_pnbuf;
556} 556}
557 557
558/* 558/*
559 * Release the above path buffer, if not released by nfsvno_namei(). 559 * Release the above path buffer, if not released by nfsvno_namei().
560 */ 560 */
561void 561void
562nfsvno_relpathbuf(struct nameidata *ndp) 562nfsvno_relpathbuf(struct nameidata *ndp)
563{ 563{
564 564
565 if ((ndp->ni_cnd.cn_flags & HASBUF) == 0) 565 if ((ndp->ni_cnd.cn_flags & HASBUF) == 0)
566 panic("nfsrelpath"); 566 panic("nfsrelpath");
567 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 567 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
568 ndp->ni_cnd.cn_flags &= ~HASBUF; 568 ndp->ni_cnd.cn_flags &= ~HASBUF;
569} 569}
570 570
571/* 571/*
572 * Readlink vnode op into an mbuf list. 572 * Readlink vnode op into an mbuf list.
573 */ 573 */
574int 574int
575nfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p, 575nfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p,
576 struct mbuf **mpp, struct mbuf **mpendp, int *lenp) 576 struct mbuf **mpp, struct mbuf **mpendp, int *lenp)
577{ 577{
578 struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN]; 578 struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
579 struct iovec *ivp = iv; 579 struct iovec *ivp = iv;
580 struct uio io, *uiop = &io; 580 struct uio io, *uiop = &io;
581 struct mbuf *mp, *mp2 = NULL, *mp3 = NULL; 581 struct mbuf *mp, *mp2 = NULL, *mp3 = NULL;
582 int i, len, tlen, error = 0; 582 int i, len, tlen, error = 0;
583 583
584 len = 0; 584 len = 0;
585 i = 0; 585 i = 0;
586 while (len < NFS_MAXPATHLEN) { 586 while (len < NFS_MAXPATHLEN) {
587 NFSMGET(mp); 587 NFSMGET(mp);
588 MCLGET(mp, M_WAITOK); 588 MCLGET(mp, M_WAITOK);
589 mp->m_len = M_SIZE(mp); 589 mp->m_len = M_SIZE(mp);
590 if (len == 0) { 590 if (len == 0) {
591 mp3 = mp2 = mp; 591 mp3 = mp2 = mp;
592 } else { 592 } else {
593 mp2->m_next = mp; 593 mp2->m_next = mp;
594 mp2 = mp; 594 mp2 = mp;
595 } 595 }
596 if ((len + mp->m_len) > NFS_MAXPATHLEN) { 596 if ((len + mp->m_len) > NFS_MAXPATHLEN) {
597 mp->m_len = NFS_MAXPATHLEN - len; 597 mp->m_len = NFS_MAXPATHLEN - len;
598 len = NFS_MAXPATHLEN; 598 len = NFS_MAXPATHLEN;
599 } else { 599 } else {
600 len += mp->m_len; 600 len += mp->m_len;
601 } 601 }
602 ivp->iov_base = mtod(mp, caddr_t); 602 ivp->iov_base = mtod(mp, caddr_t);
603 ivp->iov_len = mp->m_len; 603 ivp->iov_len = mp->m_len;
604 i++; 604 i++;
605 ivp++; 605 ivp++;
606 } 606 }
607 uiop->uio_iov = iv; 607 uiop->uio_iov = iv;
608 uiop->uio_iovcnt = i; 608 uiop->uio_iovcnt = i;
609 uiop->uio_offset = 0; 609 uiop->uio_offset = 0;
610 uiop->uio_resid = len; 610 uiop->uio_resid = len;
611 uiop->uio_rw = UIO_READ; 611 uiop->uio_rw = UIO_READ;
612 uiop->uio_segflg = UIO_SYSSPACE; 612 uiop->uio_segflg = UIO_SYSSPACE;
613 uiop->uio_td = NULL; 613 uiop->uio_td = NULL;
614 error = VOP_READLINK(vp, uiop, cred); 614 error = VOP_READLINK(vp, uiop, cred);
615 if (error) { 615 if (error) {
616 m_freem(mp3); 616 m_freem(mp3);
617 *lenp = 0; 617 *lenp = 0;
618 goto out; 618 goto out;
619 } 619 }
620 if (uiop->uio_resid > 0) { 620 if (uiop->uio_resid > 0) {
621 len -= uiop->uio_resid; 621 len -= uiop->uio_resid;
622 tlen = NFSM_RNDUP(len); 622 tlen = NFSM_RNDUP(len);
623 nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len); 623 nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len);
624 } 624 }
625 *lenp = len; 625 *lenp = len;
626 *mpp = mp3; 626 *mpp = mp3;
627 *mpendp = mp; 627 *mpendp = mp;
628 628
629out: 629out:
630 NFSEXITCODE(error); 630 NFSEXITCODE(error);
631 return (error); 631 return (error);
632} 632}
633 633
634/* 634/*
635 * Read vnode op call into mbuf list. 635 * Read vnode op call into mbuf list.
636 */ 636 */
637int 637int
638nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, 638nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred,
639 struct thread *p, struct mbuf **mpp, struct mbuf **mpendp) 639 struct thread *p, struct mbuf **mpp, struct mbuf **mpendp)
640{ 640{
641 struct mbuf *m; 641 struct mbuf *m;
642 int i; 642 int i;
643 struct iovec *iv; 643 struct iovec *iv;
644 struct iovec *iv2; 644 struct iovec *iv2;
645 int error = 0, len, left, siz, tlen, ioflag = 0; 645 int error = 0, len, left, siz, tlen, ioflag = 0;
646 struct mbuf *m2 = NULL, *m3; 646 struct mbuf *m2 = NULL, *m3;
647 struct uio io, *uiop = &io; 647 struct uio io, *uiop = &io;
648 struct nfsheur *nh; 648 struct nfsheur *nh;
649 649
650 len = left = NFSM_RNDUP(cnt); 650 len = left = NFSM_RNDUP(cnt);
651 m3 = NULL; 651 m3 = NULL;
652 /* 652 /*
653 * Generate the mbuf list with the uio_iov ref. to it. 653 * Generate the mbuf list with the uio_iov ref. to it.
654 */ 654 */
655 i = 0; 655 i = 0;
656 while (left > 0) { 656 while (left > 0) {
657 NFSMGET(m); 657 NFSMGET(m);
658 MCLGET(m, M_WAITOK); 658 MCLGET(m, M_WAITOK);
659 m->m_len = 0; 659 m->m_len = 0;
660 siz = uimin(M_TRAILINGSPACE(m), left); 660 siz = uimin(M_TRAILINGSPACE(m), left);
661 left -= siz; 661 left -= siz;
662 i++; 662 i++;
663 if (m3) 663 if (m3)
664 m2->m_next = m; 664 m2->m_next = m;
665 else 665 else
666 m3 = m; 666 m3 = m;
667 m2 = m; 667 m2 = m;
668 } 668 }
669 MALLOC(iv, struct iovec *, i * sizeof (struct iovec), 669 MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
670 M_TEMP, M_WAITOK); 670 M_TEMP, M_WAITOK);
671 uiop->uio_iov = iv2 = iv; 671 uiop->uio_iov = iv2 = iv;
672 m = m3; 672 m = m3;
673 left = len; 673 left = len;
674 i = 0; 674 i = 0;
675 while (left > 0) { 675 while (left > 0) {
676 if (m == NULL) 676 if (m == NULL)
677 panic("nfsvno_read iov"); 677 panic("nfsvno_read iov");
678 siz = uimin(M_TRAILINGSPACE(m), left); 678 siz = uimin(M_TRAILINGSPACE(m), left);
679 if (siz > 0) { 679 if (siz > 0) {
680 iv->iov_base = mtod(m, caddr_t) + m->m_len; 680 iv->iov_base = mtod(m, caddr_t) + m->m_len;
681 iv->iov_len = siz; 681 iv->iov_len = siz;
682 m->m_len += siz; 682 m->m_len += siz;
683 left -= siz; 683 left -= siz;
684 iv++; 684 iv++;
685 i++; 685 i++;
686 } 686 }
687 m = m->m_next; 687 m = m->m_next;
688 } 688 }
689 uiop->uio_iovcnt = i; 689 uiop->uio_iovcnt = i;
690 uiop->uio_offset = off; 690 uiop->uio_offset = off;
691 uiop->uio_resid = len; 691 uiop->uio_resid = len;
692 uiop->uio_rw = UIO_READ; 692 uiop->uio_rw = UIO_READ;
693 uiop->uio_segflg = UIO_SYSSPACE; 693 uiop->uio_segflg = UIO_SYSSPACE;
694 uiop->uio_td = NULL; 694 uiop->uio_td = NULL;
695 nh = nfsrv_sequential_heuristic(uiop, vp); 695 nh = nfsrv_sequential_heuristic(uiop, vp);
696 ioflag |= nh->nh_seqcount << IO_SEQSHIFT; 696 ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
697 /* XXX KDM make this more systematic? */ 697 /* XXX KDM make this more systematic? */
698 nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid; 698 nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid;
699 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); 699 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
700 FREE((caddr_t)iv2, M_TEMP); 700 FREE((caddr_t)iv2, M_TEMP);
701 if (error) { 701 if (error) {
702 m_freem(m3); 702 m_freem(m3);
703 *mpp = NULL; 703 *mpp = NULL;
704 goto out; 704 goto out;
705 } 705 }
706 nh->nh_nextoff = uiop->uio_offset; 706 nh->nh_nextoff = uiop->uio_offset;
707 tlen = len - uiop->uio_resid; 707 tlen = len - uiop->uio_resid;
708 cnt = cnt < tlen ? cnt : tlen; 708 cnt = cnt < tlen ? cnt : tlen;
709 tlen = NFSM_RNDUP(cnt); 709 tlen = NFSM_RNDUP(cnt);
710 if (tlen == 0) { 710 if (tlen == 0) {
711 m_freem(m3); 711 m_freem(m3);
712 m3 = NULL; 712 m3 = NULL;
713 } else if (len != tlen || tlen != cnt) 713 } else if (len != tlen || tlen != cnt)
714 nfsrv_adj(m3, len - tlen, tlen - cnt); 714 nfsrv_adj(m3, len - tlen, tlen - cnt);
715 *mpp = m3; 715 *mpp = m3;
716 *mpendp = m2; 716 *mpendp = m2;
717 717
718out: 718out:
719 NFSEXITCODE(error); 719 NFSEXITCODE(error);
720 return (error); 720 return (error);
721} 721}
722 722
723/* 723/*
724 * Write vnode op from an mbuf list. 724 * Write vnode op from an mbuf list.
725 */ 725 */
726int 726int
727nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int stable, 727nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int stable,
728 struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p) 728 struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p)
729{ 729{
730 struct iovec *ivp; 730 struct iovec *ivp;
731 int i, len; 731 int i, len;
732 struct iovec *iv; 732 struct iovec *iv;
733 int ioflags, error; 733 int ioflags, error;
734 struct uio io, *uiop = &io; 734 struct uio io, *uiop = &io;
735 struct nfsheur *nh; 735 struct nfsheur *nh;
736 736
737 MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP, 737 MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
738 M_WAITOK); 738 M_WAITOK);
739 uiop->uio_iov = iv = ivp; 739 uiop->uio_iov = iv = ivp;
740 uiop->uio_iovcnt = cnt; 740 uiop->uio_iovcnt = cnt;
741 i = mtod(mp, caddr_t) + mp->m_len - cp; 741 i = mtod(mp, caddr_t) + mp->m_len - cp;
742 len = retlen; 742 len = retlen;
743 while (len > 0) { 743 while (len > 0) {
744 if (mp == NULL) 744 if (mp == NULL)
745 panic("nfsvno_write"); 745 panic("nfsvno_write");
746 if (i > 0) { 746 if (i > 0) {
747 i = uimin(i, len); 747 i = uimin(i, len);
748 ivp->iov_base = cp; 748 ivp->iov_base = cp;
749 ivp->iov_len = i; 749 ivp->iov_len = i;
750 ivp++; 750 ivp++;
751 len -= i; 751 len -= i;
752 } 752 }
753 mp = mp->m_next; 753 mp = mp->m_next;
754 if (mp) { 754 if (mp) {
755 i = mp->m_len; 755 i = mp->m_len;
756 cp = mtod(mp, caddr_t); 756 cp = mtod(mp, caddr_t);
757 } 757 }
758 } 758 }
759 759
760 if (stable == NFSWRITE_UNSTABLE) 760 if (stable == NFSWRITE_UNSTABLE)
761 ioflags = IO_NODELOCKED; 761 ioflags = IO_NODELOCKED;
762 else 762 else
763 ioflags = (IO_SYNC | IO_NODELOCKED); 763 ioflags = (IO_SYNC | IO_NODELOCKED);
764 uiop->uio_resid = retlen; 764 uiop->uio_resid = retlen;
765 uiop->uio_rw = UIO_WRITE; 765 uiop->uio_rw = UIO_WRITE;
766 uiop->uio_segflg = UIO_SYSSPACE; 766 uiop->uio_segflg = UIO_SYSSPACE;
767 NFSUIOPROC(uiop, p); 767 NFSUIOPROC(uiop, p);
768 uiop->uio_offset = off; 768 uiop->uio_offset = off;
769 nh = nfsrv_sequential_heuristic(uiop, vp); 769 nh = nfsrv_sequential_heuristic(uiop, vp);
770 ioflags |= nh->nh_seqcount << IO_SEQSHIFT; 770 ioflags |= nh->nh_seqcount << IO_SEQSHIFT;
771 /* XXX KDM make this more systematic? */ 771 /* XXX KDM make this more systematic? */
772 nfsstatsv1.srvbytes[NFSV4OP_WRITE] += uiop->uio_resid; 772 nfsstatsv1.srvbytes[NFSV4OP_WRITE] += uiop->uio_resid;
773 error = VOP_WRITE(vp, uiop, ioflags, cred); 773 error = VOP_WRITE(vp, uiop, ioflags, cred);
774 if (error == 0) 774 if (error == 0)
775 nh->nh_nextoff = uiop->uio_offset; 775 nh->nh_nextoff = uiop->uio_offset;
776 FREE((caddr_t)iv, M_TEMP); 776 FREE((caddr_t)iv, M_TEMP);
777 777
778 NFSEXITCODE(error); 778 NFSEXITCODE(error);
779 return (error); 779 return (error);
780} 780}
781 781
782/* 782/*
783 * Common code for creating a regular file (plus special files for V2). 783 * Common code for creating a regular file (plus special files for V2).
784 */ 784 */
785int 785int
786nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp, 786nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp,
787 struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp, 787 struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp,
788 int32_t *cverf, NFSDEV_T rdev, struct thread *p, struct nfsexstuff *exp) 788 int32_t *cverf, NFSDEV_T rdev, struct thread *p, struct nfsexstuff *exp)
789{ 789{
790 u_quad_t tempsize; 790 u_quad_t tempsize;
791 int error; 791 int error;
792 792
793 error = nd->nd_repstat; 793 error = nd->nd_repstat;
794 if (!error && ndp->ni_vp == NULL) { 794 if (!error && ndp->ni_vp == NULL) {
795 if (nvap->na_type == VREG || nvap->na_type == VSOCK) { 795 if (nvap->na_type == VREG || nvap->na_type == VSOCK) {
796 vrele(ndp->ni_startdir); 796 vrele(ndp->ni_startdir);
797 error = VOP_CREATE(ndp->ni_dvp, 797 error = VOP_CREATE(ndp->ni_dvp,
798 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); 798 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
799 vput(ndp->ni_dvp); 799 vput(ndp->ni_dvp);
800 nfsvno_relpathbuf(ndp); 800 nfsvno_relpathbuf(ndp);
801 if (!error) { 801 if (!error) {
802 if (*exclusive_flagp) { 802 if (*exclusive_flagp) {
803 *exclusive_flagp = 0; 803 *exclusive_flagp = 0;
804 NFSVNO_ATTRINIT(nvap); 804 NFSVNO_ATTRINIT(nvap);
805 nvap->na_atime.tv_sec = cverf[0]; 805 nvap->na_atime.tv_sec = cverf[0];
806 nvap->na_atime.tv_nsec = cverf[1]; 806 nvap->na_atime.tv_nsec = cverf[1];
807 error = VOP_SETATTR(ndp->ni_vp, 807 error = VOP_SETATTR(ndp->ni_vp,
808 &nvap->na_vattr, nd->nd_cred); 808 &nvap->na_vattr, nd->nd_cred);
809 if (error != 0) { 809 if (error != 0) {
810 vput(ndp->ni_vp); 810 vput(ndp->ni_vp);
811 ndp->ni_vp = NULL; 811 ndp->ni_vp = NULL;
812 error = NFSERR_NOTSUPP; 812 error = NFSERR_NOTSUPP;
813 } 813 }
814 } 814 }
815 } 815 }
816 /* 816 /*
817 * NFS V2 Only. nfsrvd_mknod() does this for V3. 817 * NFS V2 Only. nfsrvd_mknod() does this for V3.
818 * (This implies, just get out on an error.) 818 * (This implies, just get out on an error.)
819 */ 819 */
820 } else if (nvap->na_type == VCHR || nvap->na_type == VBLK || 820 } else if (nvap->na_type == VCHR || nvap->na_type == VBLK ||
821 nvap->na_type == VFIFO) { 821 nvap->na_type == VFIFO) {
822 if (nvap->na_type == VCHR && rdev == 0xffffffff) 822 if (nvap->na_type == VCHR && rdev == 0xffffffff)
823 nvap->na_type = VFIFO; 823 nvap->na_type = VFIFO;
824 if (nvap->na_type != VFIFO && 824 if (nvap->na_type != VFIFO &&
825 (error = priv_check_cred(nd->nd_cred, 825 (error = priv_check_cred(nd->nd_cred,
826 PRIV_VFS_MKNOD_DEV, 0))) { 826 PRIV_VFS_MKNOD_DEV, 0))) {
827 vrele(ndp->ni_startdir); 827 vrele(ndp->ni_startdir);
828 nfsvno_relpathbuf(ndp); 828 nfsvno_relpathbuf(ndp);
829 vput(ndp->ni_dvp); 829 vput(ndp->ni_dvp);
830 goto out; 830 goto out;
831 } 831 }
832 nvap->na_rdev = rdev; 832 nvap->na_rdev = rdev;
833 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, 833 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
834 &ndp->ni_cnd, &nvap->na_vattr); 834 &ndp->ni_cnd, &nvap->na_vattr);
835 vput(ndp->ni_dvp); 835 vput(ndp->ni_dvp);
836 nfsvno_relpathbuf(ndp); 836 nfsvno_relpathbuf(ndp);
837 vrele(ndp->ni_startdir); 837 vrele(ndp->ni_startdir);
838 if (error) 838 if (error)
839 goto out; 839 goto out;
840 } else { 840 } else {
841 vrele(ndp->ni_startdir); 841 vrele(ndp->ni_startdir);
842 nfsvno_relpathbuf(ndp); 842 nfsvno_relpathbuf(ndp);
843 vput(ndp->ni_dvp); 843 vput(ndp->ni_dvp);
844 error = ENXIO; 844 error = ENXIO;
845 goto out; 845 goto out;
846 } 846 }
847 *vpp = ndp->ni_vp; 847 *vpp = ndp->ni_vp;
848 } else { 848 } else {
849 /* 849 /*
850 * Handle cases where error is already set and/or 850 * Handle cases where error is already set and/or
851 * the file exists. 851 * the file exists.
852 * 1 - clean up the lookup 852 * 1 - clean up the lookup
853 * 2 - iff !error and na_size set, truncate it 853 * 2 - iff !error and na_size set, truncate it
854 */ 854 */
855 vrele(ndp->ni_startdir); 855 vrele(ndp->ni_startdir);
856 nfsvno_relpathbuf(ndp); 856 nfsvno_relpathbuf(ndp);
857 *vpp = ndp->ni_vp; 857 *vpp = ndp->ni_vp;
858 if (ndp->ni_dvp == *vpp) 858 if (ndp->ni_dvp == *vpp)
859 vrele(ndp->ni_dvp); 859 vrele(ndp->ni_dvp);
860 else 860 else
861 vput(ndp->ni_dvp); 861 vput(ndp->ni_dvp);
862 if (!error && nvap->na_size != VNOVAL) { 862 if (!error && nvap->na_size != VNOVAL) {
863 error = nfsvno_accchk(*vpp, VWRITE, 863 error = nfsvno_accchk(*vpp, VWRITE,
864 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 864 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
865 NFSACCCHK_VPISLOCKED, NULL); 865 NFSACCCHK_VPISLOCKED, NULL);
866 if (!error) { 866 if (!error) {
867 tempsize = nvap->na_size; 867 tempsize = nvap->na_size;
868 NFSVNO_ATTRINIT(nvap); 868 NFSVNO_ATTRINIT(nvap);
869 nvap->na_size = tempsize; 869 nvap->na_size = tempsize;
870 error = VOP_SETATTR(*vpp, 870 error = VOP_SETATTR(*vpp,
871 &nvap->na_vattr, nd->nd_cred); 871 &nvap->na_vattr, nd->nd_cred);
872 } 872 }
873 } 873 }
874 if (error) 874 if (error)
875 vput(*vpp); 875 vput(*vpp);
876 } 876 }
877 877
878out: 878out:
879 NFSEXITCODE(error); 879 NFSEXITCODE(error);
880 return (error); 880 return (error);
881} 881}
882 882
883/* 883/*
884 * Do a mknod vnode op. 884 * Do a mknod vnode op.
885 */ 885 */
886int 886int
887nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred, 887nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred,
888 struct thread *p) 888 struct thread *p)
889{ 889{
890 int error = 0; 890 int error = 0;
891 enum vtype vtyp; 891 enum vtype vtyp;
892 892
893 vtyp = nvap->na_type; 893 vtyp = nvap->na_type;
894 /* 894 /*
895 * Iff doesn't exist, create it. 895 * Iff doesn't exist, create it.
896 */ 896 */
897 if (ndp->ni_vp) { 897 if (ndp->ni_vp) {
898 vrele(ndp->ni_startdir); 898 vrele(ndp->ni_startdir);
899 nfsvno_relpathbuf(ndp); 899 nfsvno_relpathbuf(ndp);
900 vput(ndp->ni_dvp); 900 vput(ndp->ni_dvp);
901 vrele(ndp->ni_vp); 901 vrele(ndp->ni_vp);
902 error = EEXIST; 902 error = EEXIST;
903 goto out; 903 goto out;
904 } 904 }
905 if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { 905 if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
906 vrele(ndp->ni_startdir); 906 vrele(ndp->ni_startdir);
907 nfsvno_relpathbuf(ndp); 907 nfsvno_relpathbuf(ndp);
908 vput(ndp->ni_dvp); 908 vput(ndp->ni_dvp);
909 error = NFSERR_BADTYPE; 909 error = NFSERR_BADTYPE;
910 goto out; 910 goto out;
911 } 911 }
912 if (vtyp == VSOCK) { 912 if (vtyp == VSOCK) {
913 vrele(ndp->ni_startdir); 913 vrele(ndp->ni_startdir);
914 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, 914 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
915 &ndp->ni_cnd, &nvap->na_vattr); 915 &ndp->ni_cnd, &nvap->na_vattr);
916 vput(ndp->ni_dvp); 916 vput(ndp->ni_dvp);
917 nfsvno_relpathbuf(ndp); 917 nfsvno_relpathbuf(ndp);
918 } else { 918 } else {
919 if (nvap->na_type != VFIFO && 919 if (nvap->na_type != VFIFO &&
920 (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV, 0))) { 920 (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV, 0))) {
921 vrele(ndp->ni_startdir); 921 vrele(ndp->ni_startdir);
922 nfsvno_relpathbuf(ndp); 922 nfsvno_relpathbuf(ndp);
923 vput(ndp->ni_dvp); 923 vput(ndp->ni_dvp);
924 goto out; 924 goto out;
925 } 925 }
926 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, 926 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
927 &ndp->ni_cnd, &nvap->na_vattr); 927 &ndp->ni_cnd, &nvap->na_vattr);
928 vput(ndp->ni_dvp); 928 vput(ndp->ni_dvp);
929 nfsvno_relpathbuf(ndp); 929 nfsvno_relpathbuf(ndp);
930 vrele(ndp->ni_startdir); 930 vrele(ndp->ni_startdir);
931 /* 931 /*
932 * Since VOP_MKNOD returns the ni_vp, I can't 932 * Since VOP_MKNOD returns the ni_vp, I can't
933 * see any reason to do the lookup. 933 * see any reason to do the lookup.
934 */ 934 */
935 } 935 }
936 936
937out: 937out:
938 NFSEXITCODE(error); 938 NFSEXITCODE(error);
939 return (error); 939 return (error);
940} 940}
941 941
942/* 942/*
943 * Mkdir vnode op. 943 * Mkdir vnode op.
944 */ 944 */
945int 945int
946nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid, 946nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid,
947 struct ucred *cred, struct thread *p, struct nfsexstuff *exp) 947 struct ucred *cred, struct thread *p, struct nfsexstuff *exp)
948{ 948{
949 int error = 0; 949 int error = 0;
950 950
951 if (ndp->ni_vp != NULL) { 951 if (ndp->ni_vp != NULL) {
952 if (ndp->ni_dvp == ndp->ni_vp) 952 if (ndp->ni_dvp == ndp->ni_vp)
953 vrele(ndp->ni_dvp); 953 vrele(ndp->ni_dvp);
954 else 954 else
955 vput(ndp->ni_dvp); 955 vput(ndp->ni_dvp);
956 vrele(ndp->ni_vp); 956 vrele(ndp->ni_vp);
957 nfsvno_relpathbuf(ndp); 957 nfsvno_relpathbuf(ndp);
958 error = EEXIST; 958 error = EEXIST;
959 goto out; 959 goto out;
960 } 960 }
961 error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, 961 error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
962 &nvap->na_vattr); 962 &nvap->na_vattr);
963 vput(ndp->ni_dvp); 963 vput(ndp->ni_dvp);
964 nfsvno_relpathbuf(ndp); 964 nfsvno_relpathbuf(ndp);
965 965
966out: 966out:
967 NFSEXITCODE(error); 967 NFSEXITCODE(error);
968 return (error); 968 return (error);
969} 969}
970 970
971/* 971/*
972 * symlink vnode op. 972 * symlink vnode op.
973 */ 973 */
974int 974int
975nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp, 975nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp,
976 int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p, 976 int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p,
977 struct nfsexstuff *exp) 977 struct nfsexstuff *exp)
978{ 978{
979 int error = 0; 979 int error = 0;
980 980
981 if (ndp->ni_vp) { 981 if (ndp->ni_vp) {
982 vrele(ndp->ni_startdir); 982 vrele(ndp->ni_startdir);
983 nfsvno_relpathbuf(ndp); 983 nfsvno_relpathbuf(ndp);
984 if (ndp->ni_dvp == ndp->ni_vp) 984 if (ndp->ni_dvp == ndp->ni_vp)
985 vrele(ndp->ni_dvp); 985 vrele(ndp->ni_dvp);
986 else 986 else
987 vput(ndp->ni_dvp); 987 vput(ndp->ni_dvp);
988 vrele(ndp->ni_vp); 988 vrele(ndp->ni_vp);
989 error = EEXIST; 989 error = EEXIST;
990 goto out; 990 goto out;
991 } 991 }
992 992
993 error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, 993 error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
994 &nvap->na_vattr, pathcp); 994 &nvap->na_vattr, pathcp);
995 vput(ndp->ni_dvp); 995 vput(ndp->ni_dvp);
996 vrele(ndp->ni_startdir); 996 vrele(ndp->ni_startdir);
997 nfsvno_relpathbuf(ndp); 997 nfsvno_relpathbuf(ndp);
998 /* 998 /*
999 * Although FreeBSD still had the lookup code in 999 * Although FreeBSD still had the lookup code in
1000 * it for 7/current, there doesn't seem to be any 1000 * it for 7/current, there doesn't seem to be any
1001 * point, since VOP_SYMLINK() returns the ni_vp. 1001 * point, since VOP_SYMLINK() returns the ni_vp.
1002 * Just vput it for v2. 1002 * Just vput it for v2.
1003 */ 1003 */
1004 if (!not_v2 && !error) 1004 if (!not_v2 && !error)
1005 vput(ndp->ni_vp); 1005 vput(ndp->ni_vp);
1006 1006
1007out: 1007out:
1008 NFSEXITCODE(error); 1008 NFSEXITCODE(error);
1009 return (error); 1009 return (error);
1010} 1010}
1011 1011
1012/* 1012/*
1013 * Parse symbolic link arguments. 1013 * Parse symbolic link arguments.
1014 * This function has an ugly side effect. It will MALLOC() an area for 1014 * This function has an ugly side effect. It will MALLOC() an area for
1015 * the symlink and set iov_base to point to it, only if it succeeds. 1015 * the symlink and set iov_base to point to it, only if it succeeds.
1016 * So, if it returns with uiop->uio_iov->iov_base != NULL, that must 1016 * So, if it returns with uiop->uio_iov->iov_base != NULL, that must
1017 * be FREE'd later. 1017 * be FREE'd later.
1018 */ 1018 */
1019int 1019int
1020nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap, 1020nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap,
1021 struct thread *p, char **pathcpp, int *lenp) 1021 struct thread *p, char **pathcpp, int *lenp)
1022{ 1022{
1023 u_int32_t *tl; 1023 u_int32_t *tl;
1024 char *pathcp = NULL; 1024 char *pathcp = NULL;
1025 int error = 0, len; 1025 int error = 0, len;
1026 struct nfsv2_sattr *sp; 1026 struct nfsv2_sattr *sp;
1027 1027
1028 *pathcpp = NULL; 1028 *pathcpp = NULL;
1029 *lenp = 0; 1029 *lenp = 0;
1030 if ((nd->nd_flag & ND_NFSV3) && 1030 if ((nd->nd_flag & ND_NFSV3) &&
1031 (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, p))) 1031 (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, p)))
1032 goto nfsmout; 1032 goto nfsmout;
1033 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 1033 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1034 len = fxdr_unsigned(int, *tl); 1034 len = fxdr_unsigned(int, *tl);
1035 if (len > NFS_MAXPATHLEN || len <= 0) { 1035 if (len > NFS_MAXPATHLEN || len <= 0) {
1036 error = EBADRPC; 1036 error = EBADRPC;
1037 goto nfsmout; 1037 goto nfsmout;
1038 } 1038 }
1039 MALLOC(pathcp, caddr_t, len + 1, M_TEMP, M_WAITOK); 1039 MALLOC(pathcp, caddr_t, len + 1, M_TEMP, M_WAITOK);
1040 error = nfsrv_mtostr(nd, pathcp, len); 1040 error = nfsrv_mtostr(nd, pathcp, len);
1041 if (error) 1041 if (error)
1042 goto nfsmout; 1042 goto nfsmout;
1043 if (nd->nd_flag & ND_NFSV2) { 1043 if (nd->nd_flag & ND_NFSV2) {
1044 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); 1044 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1045 nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode); 1045 nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode);
1046 } 1046 }
1047 *pathcpp = pathcp; 1047 *pathcpp = pathcp;
1048 *lenp = len; 1048 *lenp = len;
1049 NFSEXITCODE2(0, nd); 1049 NFSEXITCODE2(0, nd);
1050 return (0); 1050 return (0);
1051nfsmout: 1051nfsmout:
1052 if (pathcp) 1052 if (pathcp)
1053 free(pathcp, M_TEMP); 1053 free(pathcp, M_TEMP);
1054 NFSEXITCODE2(error, nd); 1054 NFSEXITCODE2(error, nd);
1055 return (error); 1055 return (error);
1056} 1056}
1057 1057
1058/* 1058/*
1059 * Remove a non-directory object. 1059 * Remove a non-directory object.
1060 */ 1060 */
1061int 1061int
1062nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred, 1062nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred,
1063 struct thread *p, struct nfsexstuff *exp) 1063 struct thread *p, struct nfsexstuff *exp)
1064{ 1064{
1065 struct vnode *vp; 1065 struct vnode *vp;
1066 int error = 0; 1066 int error = 0;
1067 1067
1068 vp = ndp->ni_vp; 1068 vp = ndp->ni_vp;
1069 if (vp->v_type == VDIR) 1069 if (vp->v_type == VDIR)
1070 error = NFSERR_ISDIR; 1070 error = NFSERR_ISDIR;
1071 else if (is_v4) 1071 else if (is_v4)
1072 error = nfsrv_checkremove(vp, 1, p); 1072 error = nfsrv_checkremove(vp, 1, p);
1073 if (!error) 1073 if (!error)
1074 error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd); 1074 error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd);
1075 if (ndp->ni_dvp == vp) 1075 if (ndp->ni_dvp == vp)
1076 vrele(ndp->ni_dvp); 1076 vrele(ndp->ni_dvp);
1077 else 1077 else
1078 vput(ndp->ni_dvp); 1078 vput(ndp->ni_dvp);
1079 vput(vp); 1079 vput(vp);
1080 if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) 1080 if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0)
1081 nfsvno_relpathbuf(ndp); 1081 nfsvno_relpathbuf(ndp);
1082 NFSEXITCODE(error); 1082 NFSEXITCODE(error);
1083 return (error); 1083 return (error);
1084} 1084}
1085 1085
1086/* 1086/*
1087 * Remove a directory. 1087 * Remove a directory.
1088 */ 1088 */
1089int 1089int
1090nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred, 1090nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred,
1091 struct thread *p, struct nfsexstuff *exp) 1091 struct thread *p, struct nfsexstuff *exp)
1092{ 1092{
1093 struct vnode *vp; 1093 struct vnode *vp;
1094 int error = 0; 1094 int error = 0;
1095 1095
1096 vp = ndp->ni_vp; 1096 vp = ndp->ni_vp;
1097 if (vp->v_type != VDIR) { 1097 if (vp->v_type != VDIR) {
1098 error = ENOTDIR; 1098 error = ENOTDIR;
1099 goto out; 1099 goto out;
1100 } 1100 }
1101 /* 1101 /*
1102 * No rmdir "." please. 1102 * No rmdir "." please.
1103 */ 1103 */
1104 if (ndp->ni_dvp == vp) { 1104 if (ndp->ni_dvp == vp) {
1105 error = EINVAL; 1105 error = EINVAL;
1106 goto out; 1106 goto out;
1107 } 1107 }
1108 /* 1108 /*
1109 * The root of a mounted filesystem cannot be deleted. 1109 * The root of a mounted filesystem cannot be deleted.
1110 */ 1110 */
1111 if (vp->v_vflag & VV_ROOT) 1111 if (vp->v_vflag & VV_ROOT)
1112 error = EBUSY; 1112 error = EBUSY;
1113out: 1113out:
1114 if (!error) 1114 if (!error)
1115 error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd); 1115 error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd);
1116 if (ndp->ni_dvp == vp) 1116 if (ndp->ni_dvp == vp)
1117 vrele(ndp->ni_dvp); 1117 vrele(ndp->ni_dvp);
1118 else 1118 else
1119 vput(ndp->ni_dvp); 1119 vput(ndp->ni_dvp);
1120 vput(vp); 1120 vput(vp);
1121 if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) 1121 if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0)
1122 nfsvno_relpathbuf(ndp); 1122 nfsvno_relpathbuf(ndp);
1123 NFSEXITCODE(error); 1123 NFSEXITCODE(error);
1124 return (error); 1124 return (error);
1125} 1125}
1126 1126
1127/* 1127/*
1128 * Rename vnode op. 1128 * Rename vnode op.
1129 */ 1129 */
1130int 1130int
1131nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp, 1131nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp,
1132 u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p) 1132 u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p)
1133{ 1133{
1134 struct vnode *fvp, *tvp, *tdvp; 1134 struct vnode *fvp, *tvp, *tdvp;
1135 int error = 0; 1135 int error = 0;
1136 1136
1137 fvp = fromndp->ni_vp; 1137 fvp = fromndp->ni_vp;
1138 if (ndstat) { 1138 if (ndstat) {