Sun Apr 24 16:23:49 2011 UTC ()
Pull up following revision(s) (requested by tls in ticket #1600):
	sys/nfs/nfs_socket.c: revision 1.189
As suggested by matt@: change socket buffer reservations for NFS send/receive
to 3 times max RPC size rather than 2 times.  Avoids nasty TCP stalls observed
at Panix.  Will require increase to sbmax via sysctl for those running really
huge NFS rsize/wsize (>64K).


(riz)
diff -r1.173.4.8 -r1.173.4.9 src/sys/nfs/nfs_socket.c

cvs diff -r1.173.4.8 -r1.173.4.9 src/sys/nfs/nfs_socket.c (switch to unified diff)

--- src/sys/nfs/nfs_socket.c 2011/03/29 19:47:37 1.173.4.8
+++ src/sys/nfs/nfs_socket.c 2011/04/24 16:23:49 1.173.4.9
@@ -1,1347 +1,1347 @@ @@ -1,1347 +1,1347 @@
1/* $NetBSD: nfs_socket.c,v 1.173.4.8 2011/03/29 19:47:37 riz Exp $ */ 1/* $NetBSD: nfs_socket.c,v 1.173.4.9 2011/04/24 16:23:49 riz Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 1989, 1991, 1993, 1995 4 * Copyright (c) 1989, 1991, 1993, 1995
5 * The Regents of the University of California. All rights reserved. 5 * The Regents of the University of California. All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to Berkeley by 7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph. 8 * Rick Macklem at The University of Guelph.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors 18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software 19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission. 20 * without specific prior written permission.
21 * 21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE. 32 * SUCH DAMAGE.
33 * 33 *
34 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 34 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
35 */ 35 */
36 36
37/* 37/*
38 * Socket operations for use by nfs 38 * Socket operations for use by nfs
39 */ 39 */
40 40
41#include <sys/cdefs.h> 41#include <sys/cdefs.h>
42__KERNEL_RCSID(0, "$NetBSD: nfs_socket.c,v 1.173.4.8 2011/03/29 19:47:37 riz Exp $"); 42__KERNEL_RCSID(0, "$NetBSD: nfs_socket.c,v 1.173.4.9 2011/04/24 16:23:49 riz Exp $");
43 43
44#include "fs_nfs.h" 44#include "fs_nfs.h"
45#include "opt_nfs.h" 45#include "opt_nfs.h"
46#include "opt_nfsserver.h" 46#include "opt_nfsserver.h"
47#include "opt_mbuftrace.h" 47#include "opt_mbuftrace.h"
48#include "opt_inet.h" 48#include "opt_inet.h"
49 49
50#include <sys/param.h> 50#include <sys/param.h>
51#include <sys/systm.h> 51#include <sys/systm.h>
52#include <sys/evcnt.h> 52#include <sys/evcnt.h>
53#include <sys/callout.h> 53#include <sys/callout.h>
54#include <sys/proc.h> 54#include <sys/proc.h>
55#include <sys/mount.h> 55#include <sys/mount.h>
56#include <sys/kernel.h> 56#include <sys/kernel.h>
57#include <sys/kmem.h> 57#include <sys/kmem.h>
58#include <sys/mbuf.h> 58#include <sys/mbuf.h>
59#include <sys/vnode.h> 59#include <sys/vnode.h>
60#include <sys/domain.h> 60#include <sys/domain.h>
61#include <sys/protosw.h> 61#include <sys/protosw.h>
62#include <sys/socket.h> 62#include <sys/socket.h>
63#include <sys/socketvar.h> 63#include <sys/socketvar.h>
64#include <sys/syslog.h> 64#include <sys/syslog.h>
65#include <sys/tprintf.h> 65#include <sys/tprintf.h>
66#include <sys/namei.h> 66#include <sys/namei.h>
67#include <sys/signal.h> 67#include <sys/signal.h>
68#include <sys/signalvar.h> 68#include <sys/signalvar.h>
69#include <sys/kauth.h> 69#include <sys/kauth.h>
70 70
71#include <netinet/in.h> 71#include <netinet/in.h>
72#include <netinet/tcp.h> 72#include <netinet/tcp.h>
73 73
74#include <nfs/rpcv2.h> 74#include <nfs/rpcv2.h>
75#include <nfs/nfsproto.h> 75#include <nfs/nfsproto.h>
76#include <nfs/nfs.h> 76#include <nfs/nfs.h>
77#include <nfs/xdr_subs.h> 77#include <nfs/xdr_subs.h>
78#include <nfs/nfsm_subs.h> 78#include <nfs/nfsm_subs.h>
79#include <nfs/nfsmount.h> 79#include <nfs/nfsmount.h>
80#include <nfs/nfsnode.h> 80#include <nfs/nfsnode.h>
81#include <nfs/nfsrtt.h> 81#include <nfs/nfsrtt.h>
82#include <nfs/nfs_var.h> 82#include <nfs/nfs_var.h>
83 83
84#ifdef MBUFTRACE 84#ifdef MBUFTRACE
85struct mowner nfs_mowner = MOWNER_INIT("nfs",""); 85struct mowner nfs_mowner = MOWNER_INIT("nfs","");
86#endif 86#endif
87 87
88/* 88/*
89 * Estimate rto for an nfs rpc sent via. an unreliable datagram. 89 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
90 * Use the mean and mean deviation of rtt for the appropriate type of rpc 90 * Use the mean and mean deviation of rtt for the appropriate type of rpc
91 * for the frequent rpcs and a default for the others. 91 * for the frequent rpcs and a default for the others.
92 * The justification for doing "other" this way is that these rpcs 92 * The justification for doing "other" this way is that these rpcs
93 * happen so infrequently that timer est. would probably be stale. 93 * happen so infrequently that timer est. would probably be stale.
94 * Also, since many of these rpcs are 94 * Also, since many of these rpcs are
95 * non-idempotent, a conservative timeout is desired. 95 * non-idempotent, a conservative timeout is desired.
96 * getattr, lookup - A+2D 96 * getattr, lookup - A+2D
97 * read, write - A+4D 97 * read, write - A+4D
98 * other - nm_timeo 98 * other - nm_timeo
99 */ 99 */
100#define NFS_RTO(n, t) \ 100#define NFS_RTO(n, t) \
101 ((t) == 0 ? (n)->nm_timeo : \ 101 ((t) == 0 ? (n)->nm_timeo : \
102 ((t) < 3 ? \ 102 ((t) < 3 ? \
103 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ 103 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
104 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) 104 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
105#define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] 105#define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
106#define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] 106#define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
107/* 107/*
108 * External data, mostly RPC constants in XDR form 108 * External data, mostly RPC constants in XDR form
109 */ 109 */
110extern u_int32_t rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, 110extern u_int32_t rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers,
111 rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr, 111 rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr,
112 rpc_auth_kerb; 112 rpc_auth_kerb;
113extern u_int32_t nfs_prog; 113extern u_int32_t nfs_prog;
114extern const int nfsv3_procid[NFS_NPROCS]; 114extern const int nfsv3_procid[NFS_NPROCS];
115extern int nfs_ticks; 115extern int nfs_ticks;
116 116
117#ifdef DEBUG 117#ifdef DEBUG
118/* 118/*
119 * Avoid spamming the console with debugging messages. We only print 119 * Avoid spamming the console with debugging messages. We only print
120 * the nfs timer and reply error debugs every 10 seconds. 120 * the nfs timer and reply error debugs every 10 seconds.
121 */ 121 */
122static const struct timeval nfs_err_interval = { 10, 0 }; 122static const struct timeval nfs_err_interval = { 10, 0 };
123static struct timeval nfs_reply_last_err_time; 123static struct timeval nfs_reply_last_err_time;
124static struct timeval nfs_timer_last_err_time; 124static struct timeval nfs_timer_last_err_time;
125#endif 125#endif
126 126
127/* 127/*
128 * Defines which timer to use for the procnum. 128 * Defines which timer to use for the procnum.
129 * 0 - default 129 * 0 - default
130 * 1 - getattr 130 * 1 - getattr
131 * 2 - lookup 131 * 2 - lookup
132 * 3 - read 132 * 3 - read
133 * 4 - write 133 * 4 - write
134 */ 134 */
135static const int proct[NFS_NPROCS] = { 135static const int proct[NFS_NPROCS] = {
136 [NFSPROC_NULL] = 0, 136 [NFSPROC_NULL] = 0,
137 [NFSPROC_GETATTR] = 1, 137 [NFSPROC_GETATTR] = 1,
138 [NFSPROC_SETATTR] = 0, 138 [NFSPROC_SETATTR] = 0,
139 [NFSPROC_LOOKUP] = 2, 139 [NFSPROC_LOOKUP] = 2,
140 [NFSPROC_ACCESS] = 1, 140 [NFSPROC_ACCESS] = 1,
141 [NFSPROC_READLINK] = 3, 141 [NFSPROC_READLINK] = 3,
142 [NFSPROC_READ] = 3, 142 [NFSPROC_READ] = 3,
143 [NFSPROC_WRITE] = 4, 143 [NFSPROC_WRITE] = 4,
144 [NFSPROC_CREATE] = 0, 144 [NFSPROC_CREATE] = 0,
145 [NFSPROC_MKDIR] = 0, 145 [NFSPROC_MKDIR] = 0,
146 [NFSPROC_SYMLINK] = 0, 146 [NFSPROC_SYMLINK] = 0,
147 [NFSPROC_MKNOD] = 0, 147 [NFSPROC_MKNOD] = 0,
148 [NFSPROC_REMOVE] = 0, 148 [NFSPROC_REMOVE] = 0,
149 [NFSPROC_RMDIR] = 0, 149 [NFSPROC_RMDIR] = 0,
150 [NFSPROC_RENAME] = 0, 150 [NFSPROC_RENAME] = 0,
151 [NFSPROC_LINK] = 0, 151 [NFSPROC_LINK] = 0,
152 [NFSPROC_READDIR] = 3, 152 [NFSPROC_READDIR] = 3,
153 [NFSPROC_READDIRPLUS] = 3, 153 [NFSPROC_READDIRPLUS] = 3,
154 [NFSPROC_FSSTAT] = 0, 154 [NFSPROC_FSSTAT] = 0,
155 [NFSPROC_FSINFO] = 0, 155 [NFSPROC_FSINFO] = 0,
156 [NFSPROC_PATHCONF] = 0, 156 [NFSPROC_PATHCONF] = 0,
157 [NFSPROC_COMMIT] = 0, 157 [NFSPROC_COMMIT] = 0,
158 [NFSPROC_NOOP] = 0, 158 [NFSPROC_NOOP] = 0,
159}; 159};
160 160
161/* 161/*
162 * There is a congestion window for outstanding rpcs maintained per mount 162 * There is a congestion window for outstanding rpcs maintained per mount
163 * point. The cwnd size is adjusted in roughly the way that: 163 * point. The cwnd size is adjusted in roughly the way that:
164 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of 164 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
165 * SIGCOMM '88". ACM, August 1988. 165 * SIGCOMM '88". ACM, August 1988.
166 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout 166 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
167 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd 167 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
168 * of rpcs is in progress. 168 * of rpcs is in progress.
169 * (The sent count and cwnd are scaled for integer arith.) 169 * (The sent count and cwnd are scaled for integer arith.)
170 * Variants of "slow start" were tried and were found to be too much of a 170 * Variants of "slow start" were tried and were found to be too much of a
171 * performance hit (ave. rtt 3 times larger), 171 * performance hit (ave. rtt 3 times larger),
172 * I suspect due to the large rtt that nfs rpcs have. 172 * I suspect due to the large rtt that nfs rpcs have.
173 */ 173 */
174#define NFS_CWNDSCALE 256 174#define NFS_CWNDSCALE 256
175#define NFS_MAXCWND (NFS_CWNDSCALE * 32) 175#define NFS_MAXCWND (NFS_CWNDSCALE * 32)
176static const int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; 176static const int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
177int nfsrtton = 0; 177int nfsrtton = 0;
178struct nfsrtt nfsrtt; 178struct nfsrtt nfsrtt;
179struct nfsreqhead nfs_reqq; 179struct nfsreqhead nfs_reqq;
180static callout_t nfs_timer_ch; 180static callout_t nfs_timer_ch;
181static struct evcnt nfs_timer_ev; 181static struct evcnt nfs_timer_ev;
182static struct evcnt nfs_timer_start_ev; 182static struct evcnt nfs_timer_start_ev;
183static struct evcnt nfs_timer_stop_ev; 183static struct evcnt nfs_timer_stop_ev;
184 184
185#ifdef NFS 185#ifdef NFS
186static int nfs_sndlock(struct nfsmount *, struct nfsreq *); 186static int nfs_sndlock(struct nfsmount *, struct nfsreq *);
187static void nfs_sndunlock(struct nfsmount *); 187static void nfs_sndunlock(struct nfsmount *);
188#endif 188#endif
189static int nfs_rcvlock(struct nfsmount *, struct nfsreq *); 189static int nfs_rcvlock(struct nfsmount *, struct nfsreq *);
190static void nfs_rcvunlock(struct nfsmount *); 190static void nfs_rcvunlock(struct nfsmount *);
191 191
192#if defined(NFSSERVER) 192#if defined(NFSSERVER)
193static void nfsrv_wakenfsd_locked(struct nfssvc_sock *); 193static void nfsrv_wakenfsd_locked(struct nfssvc_sock *);
194#endif /* defined(NFSSERVER) */ 194#endif /* defined(NFSSERVER) */
195 195
196/* 196/*
197 * Initialize sockets and congestion for a new NFS connection. 197 * Initialize sockets and congestion for a new NFS connection.
198 * We do not free the sockaddr if error. 198 * We do not free the sockaddr if error.
199 */ 199 */
200int 200int
201nfs_connect(nmp, rep, l) 201nfs_connect(nmp, rep, l)
202 struct nfsmount *nmp; 202 struct nfsmount *nmp;
203 struct nfsreq *rep; 203 struct nfsreq *rep;
204 struct lwp *l; 204 struct lwp *l;
205{ 205{
206 struct socket *so; 206 struct socket *so;
207 int error, rcvreserve, sndreserve; 207 int error, rcvreserve, sndreserve;
208 struct sockaddr *saddr; 208 struct sockaddr *saddr;
209 struct sockaddr_in *sin; 209 struct sockaddr_in *sin;
210#ifdef INET6 210#ifdef INET6
211 struct sockaddr_in6 *sin6; 211 struct sockaddr_in6 *sin6;
212#endif 212#endif
213 struct mbuf *m; 213 struct mbuf *m;
214 int val; 214 int val;
215 215
216 nmp->nm_so = (struct socket *)0; 216 nmp->nm_so = (struct socket *)0;
217 saddr = mtod(nmp->nm_nam, struct sockaddr *); 217 saddr = mtod(nmp->nm_nam, struct sockaddr *);
218 error = socreate(saddr->sa_family, &nmp->nm_so, 218 error = socreate(saddr->sa_family, &nmp->nm_so,
219 nmp->nm_sotype, nmp->nm_soproto, l, NULL); 219 nmp->nm_sotype, nmp->nm_soproto, l, NULL);
220 if (error) 220 if (error)
221 goto bad; 221 goto bad;
222 so = nmp->nm_so; 222 so = nmp->nm_so;
223#ifdef MBUFTRACE 223#ifdef MBUFTRACE
224 so->so_mowner = &nfs_mowner; 224 so->so_mowner = &nfs_mowner;
225 so->so_rcv.sb_mowner = &nfs_mowner; 225 so->so_rcv.sb_mowner = &nfs_mowner;
226 so->so_snd.sb_mowner = &nfs_mowner; 226 so->so_snd.sb_mowner = &nfs_mowner;
227#endif 227#endif
228 nmp->nm_soflags = so->so_proto->pr_flags; 228 nmp->nm_soflags = so->so_proto->pr_flags;
229 229
230 /* 230 /*
231 * Some servers require that the client port be a reserved port number. 231 * Some servers require that the client port be a reserved port number.
232 */ 232 */
233 if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) { 233 if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
234 val = IP_PORTRANGE_LOW; 234 val = IP_PORTRANGE_LOW;
235 235
236 if ((error = so_setsockopt(NULL, so, IPPROTO_IP, IP_PORTRANGE, 236 if ((error = so_setsockopt(NULL, so, IPPROTO_IP, IP_PORTRANGE,
237 &val, sizeof(val)))) 237 &val, sizeof(val))))
238 goto bad; 238 goto bad;
239 m = m_get(M_WAIT, MT_SONAME); 239 m = m_get(M_WAIT, MT_SONAME);
240 MCLAIM(m, so->so_mowner); 240 MCLAIM(m, so->so_mowner);
241 sin = mtod(m, struct sockaddr_in *); 241 sin = mtod(m, struct sockaddr_in *);
242 sin->sin_len = m->m_len = sizeof (struct sockaddr_in); 242 sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
243 sin->sin_family = AF_INET; 243 sin->sin_family = AF_INET;
244 sin->sin_addr.s_addr = INADDR_ANY; 244 sin->sin_addr.s_addr = INADDR_ANY;
245 sin->sin_port = 0; 245 sin->sin_port = 0;
246 error = sobind(so, m, &lwp0); 246 error = sobind(so, m, &lwp0);
247 m_freem(m); 247 m_freem(m);
248 if (error) 248 if (error)
249 goto bad; 249 goto bad;
250 } 250 }
251#ifdef INET6 251#ifdef INET6
252 if (saddr->sa_family == AF_INET6 && (nmp->nm_flag & NFSMNT_RESVPORT)) { 252 if (saddr->sa_family == AF_INET6 && (nmp->nm_flag & NFSMNT_RESVPORT)) {
253 val = IPV6_PORTRANGE_LOW; 253 val = IPV6_PORTRANGE_LOW;
254 254
255 if ((error = so_setsockopt(NULL, so, IPPROTO_IPV6, 255 if ((error = so_setsockopt(NULL, so, IPPROTO_IPV6,
256 IPV6_PORTRANGE, &val, sizeof(val)))) 256 IPV6_PORTRANGE, &val, sizeof(val))))
257 goto bad; 257 goto bad;
258 m = m_get(M_WAIT, MT_SONAME); 258 m = m_get(M_WAIT, MT_SONAME);
259 MCLAIM(m, so->so_mowner); 259 MCLAIM(m, so->so_mowner);
260 sin6 = mtod(m, struct sockaddr_in6 *); 260 sin6 = mtod(m, struct sockaddr_in6 *);
261 sin6->sin6_len = m->m_len = sizeof (struct sockaddr_in6); 261 sin6->sin6_len = m->m_len = sizeof (struct sockaddr_in6);
262 sin6->sin6_family = AF_INET6; 262 sin6->sin6_family = AF_INET6;
263 sin6->sin6_addr = in6addr_any; 263 sin6->sin6_addr = in6addr_any;
264 sin6->sin6_port = 0; 264 sin6->sin6_port = 0;
265 error = sobind(so, m, &lwp0); 265 error = sobind(so, m, &lwp0);
266 m_freem(m); 266 m_freem(m);
267 if (error) 267 if (error)
268 goto bad; 268 goto bad;
269 } 269 }
270#endif 270#endif
271 271
272 /* 272 /*
273 * Protocols that do not require connections may be optionally left 273 * Protocols that do not require connections may be optionally left
274 * unconnected for servers that reply from a port other than NFS_PORT. 274 * unconnected for servers that reply from a port other than NFS_PORT.
275 */ 275 */
276 solock(so); 276 solock(so);
277 if (nmp->nm_flag & NFSMNT_NOCONN) { 277 if (nmp->nm_flag & NFSMNT_NOCONN) {
278 if (nmp->nm_soflags & PR_CONNREQUIRED) { 278 if (nmp->nm_soflags & PR_CONNREQUIRED) {
279 sounlock(so); 279 sounlock(so);
280 error = ENOTCONN; 280 error = ENOTCONN;
281 goto bad; 281 goto bad;
282 } 282 }
283 } else { 283 } else {
284 error = soconnect(so, nmp->nm_nam, l); 284 error = soconnect(so, nmp->nm_nam, l);
285 if (error) { 285 if (error) {
286 sounlock(so); 286 sounlock(so);
287 goto bad; 287 goto bad;
288 } 288 }
289 289
290 /* 290 /*
291 * Wait for the connection to complete. Cribbed from the 291 * Wait for the connection to complete. Cribbed from the
292 * connect system call but with the wait timing out so 292 * connect system call but with the wait timing out so
293 * that interruptible mounts don't hang here for a long time. 293 * that interruptible mounts don't hang here for a long time.
294 */ 294 */
295 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 295 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
296 (void)sowait(so, false, 2 * hz); 296 (void)sowait(so, false, 2 * hz);
297 if ((so->so_state & SS_ISCONNECTING) && 297 if ((so->so_state & SS_ISCONNECTING) &&
298 so->so_error == 0 && rep && 298 so->so_error == 0 && rep &&
299 (error = nfs_sigintr(nmp, rep, rep->r_lwp)) != 0){ 299 (error = nfs_sigintr(nmp, rep, rep->r_lwp)) != 0){
300 so->so_state &= ~SS_ISCONNECTING; 300 so->so_state &= ~SS_ISCONNECTING;
301 sounlock(so); 301 sounlock(so);
302 goto bad; 302 goto bad;
303 } 303 }
304 } 304 }
305 if (so->so_error) { 305 if (so->so_error) {
306 error = so->so_error; 306 error = so->so_error;
307 so->so_error = 0; 307 so->so_error = 0;
308 sounlock(so); 308 sounlock(so);
309 goto bad; 309 goto bad;
310 } 310 }
311 } 311 }
312 if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) { 312 if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
313 so->so_rcv.sb_timeo = (5 * hz); 313 so->so_rcv.sb_timeo = (5 * hz);
314 so->so_snd.sb_timeo = (5 * hz); 314 so->so_snd.sb_timeo = (5 * hz);
315 } else { 315 } else {
316 /* 316 /*
317 * enable receive timeout to detect server crash and reconnect. 317 * enable receive timeout to detect server crash and reconnect.
318 * otherwise, we can be stuck in soreceive forever. 318 * otherwise, we can be stuck in soreceive forever.
319 */ 319 */
320 so->so_rcv.sb_timeo = (5 * hz); 320 so->so_rcv.sb_timeo = (5 * hz);
321 so->so_snd.sb_timeo = 0; 321 so->so_snd.sb_timeo = 0;
322 } 322 }
323 if (nmp->nm_sotype == SOCK_DGRAM) { 323 if (nmp->nm_sotype == SOCK_DGRAM) {
324 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2; 324 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 3;
325 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + 325 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
326 NFS_MAXPKTHDR) * 2; 326 NFS_MAXPKTHDR) * 2;
327 } else if (nmp->nm_sotype == SOCK_SEQPACKET) { 327 } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
328 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2; 328 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 3;
329 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + 329 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
330 NFS_MAXPKTHDR) * 2; 330 NFS_MAXPKTHDR) * 3;
331 } else { 331 } else {
332 sounlock(so); 332 sounlock(so);
333 if (nmp->nm_sotype != SOCK_STREAM) 333 if (nmp->nm_sotype != SOCK_STREAM)
334 panic("nfscon sotype"); 334 panic("nfscon sotype");
335 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 335 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
336 val = 1; 336 val = 1;
337 so_setsockopt(NULL, so, SOL_SOCKET, SO_KEEPALIVE, &val, 337 so_setsockopt(NULL, so, SOL_SOCKET, SO_KEEPALIVE, &val,
338 sizeof(val)); 338 sizeof(val));
339 } 339 }
340 if (so->so_proto->pr_protocol == IPPROTO_TCP) { 340 if (so->so_proto->pr_protocol == IPPROTO_TCP) {
341 val = 1; 341 val = 1;
342 so_setsockopt(NULL, so, IPPROTO_TCP, TCP_NODELAY, &val, 342 so_setsockopt(NULL, so, IPPROTO_TCP, TCP_NODELAY, &val,
343 sizeof(val)); 343 sizeof(val));
344 } 344 }
345 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + 345 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR +
346 sizeof (u_int32_t)) * 2; 346 sizeof (u_int32_t)) * 3;
347 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + 347 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR +
348 sizeof (u_int32_t)) * 2; 348 sizeof (u_int32_t)) * 3;
349 solock(so); 349 solock(so);
350 } 350 }
351 error = soreserve(so, sndreserve, rcvreserve); 351 error = soreserve(so, sndreserve, rcvreserve);
352 if (error) { 352 if (error) {
353 sounlock(so); 353 sounlock(so);
354 goto bad; 354 goto bad;
355 } 355 }
356 so->so_rcv.sb_flags |= SB_NOINTR; 356 so->so_rcv.sb_flags |= SB_NOINTR;
357 so->so_snd.sb_flags |= SB_NOINTR; 357 so->so_snd.sb_flags |= SB_NOINTR;
358 sounlock(so); 358 sounlock(so);
359 359
360 /* Initialize other non-zero congestion variables */ 360 /* Initialize other non-zero congestion variables */
361 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] = 361 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] =
362 NFS_TIMEO << 3; 362 NFS_TIMEO << 3;
363 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = 363 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
364 nmp->nm_sdrtt[3] = 0; 364 nmp->nm_sdrtt[3] = 0;
365 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ 365 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
366 nmp->nm_sent = 0; 366 nmp->nm_sent = 0;
367 nmp->nm_timeouts = 0; 367 nmp->nm_timeouts = 0;
368 return (0); 368 return (0);
369 369
370bad: 370bad:
371 nfs_disconnect(nmp); 371 nfs_disconnect(nmp);
372 return (error); 372 return (error);
373} 373}
374 374
375/* 375/*
376 * Reconnect routine: 376 * Reconnect routine:
377 * Called when a connection is broken on a reliable protocol. 377 * Called when a connection is broken on a reliable protocol.
378 * - clean up the old socket 378 * - clean up the old socket
379 * - nfs_connect() again 379 * - nfs_connect() again
380 * - set R_MUSTRESEND for all outstanding requests on mount point 380 * - set R_MUSTRESEND for all outstanding requests on mount point
381 * If this fails the mount point is DEAD! 381 * If this fails the mount point is DEAD!
382 * nb: Must be called with the nfs_sndlock() set on the mount point. 382 * nb: Must be called with the nfs_sndlock() set on the mount point.
383 */ 383 */
384int 384int
385nfs_reconnect(struct nfsreq *rep) 385nfs_reconnect(struct nfsreq *rep)
386{ 386{
387 struct nfsreq *rp; 387 struct nfsreq *rp;
388 struct nfsmount *nmp = rep->r_nmp; 388 struct nfsmount *nmp = rep->r_nmp;
389 int error; 389 int error;
390 390
391 nfs_disconnect(nmp); 391 nfs_disconnect(nmp);
392 while ((error = nfs_connect(nmp, rep, &lwp0)) != 0) { 392 while ((error = nfs_connect(nmp, rep, &lwp0)) != 0) {
393 if (error == EINTR || error == ERESTART) 393 if (error == EINTR || error == ERESTART)
394 return (EINTR); 394 return (EINTR);
395 kpause("nfscn2", false, hz, NULL); 395 kpause("nfscn2", false, hz, NULL);
396 } 396 }
397 397
398 /* 398 /*
399 * Loop through outstanding request list and fix up all requests 399 * Loop through outstanding request list and fix up all requests
400 * on old socket. 400 * on old socket.
401 */ 401 */
402 TAILQ_FOREACH(rp, &nfs_reqq, r_chain) { 402 TAILQ_FOREACH(rp, &nfs_reqq, r_chain) {
403 if (rp->r_nmp == nmp) { 403 if (rp->r_nmp == nmp) {
404 if ((rp->r_flags & R_MUSTRESEND) == 0) 404 if ((rp->r_flags & R_MUSTRESEND) == 0)
405 rp->r_flags |= R_MUSTRESEND | R_REXMITTED; 405 rp->r_flags |= R_MUSTRESEND | R_REXMITTED;
406 rp->r_rexmit = 0; 406 rp->r_rexmit = 0;
407 } 407 }
408 } 408 }
409 return (0); 409 return (0);
410} 410}
411 411
412/* 412/*
413 * NFS disconnect. Clean up and unlink. 413 * NFS disconnect. Clean up and unlink.
414 */ 414 */
415void 415void
416nfs_disconnect(nmp) 416nfs_disconnect(nmp)
417 struct nfsmount *nmp; 417 struct nfsmount *nmp;
418{ 418{
419 struct socket *so; 419 struct socket *so;
420 int drain = 0; 420 int drain = 0;
421 421
422 if (nmp->nm_so) { 422 if (nmp->nm_so) {
423 so = nmp->nm_so; 423 so = nmp->nm_so;
424 nmp->nm_so = (struct socket *)0; 424 nmp->nm_so = (struct socket *)0;
425 solock(so); 425 solock(so);
426 soshutdown(so, SHUT_RDWR); 426 soshutdown(so, SHUT_RDWR);
427 sounlock(so); 427 sounlock(so);
428 drain = (nmp->nm_iflag & NFSMNT_DISMNT) != 0; 428 drain = (nmp->nm_iflag & NFSMNT_DISMNT) != 0;
429 if (drain) { 429 if (drain) {
430 /* 430 /*
431 * soshutdown() above should wake up the current 431 * soshutdown() above should wake up the current
432 * listener. 432 * listener.
433 * Now wake up those waiting for the receive lock, and 433 * Now wake up those waiting for the receive lock, and
434 * wait for them to go away unhappy, to prevent *nmp 434 * wait for them to go away unhappy, to prevent *nmp
435 * from evaporating while they're sleeping. 435 * from evaporating while they're sleeping.
436 */ 436 */
437 mutex_enter(&nmp->nm_lock); 437 mutex_enter(&nmp->nm_lock);
438 while (nmp->nm_waiters > 0) { 438 while (nmp->nm_waiters > 0) {
439 cv_broadcast(&nmp->nm_rcvcv); 439 cv_broadcast(&nmp->nm_rcvcv);
440 cv_broadcast(&nmp->nm_sndcv); 440 cv_broadcast(&nmp->nm_sndcv);
441 cv_wait(&nmp->nm_disconcv, &nmp->nm_lock); 441 cv_wait(&nmp->nm_disconcv, &nmp->nm_lock);
442 } 442 }
443 mutex_exit(&nmp->nm_lock); 443 mutex_exit(&nmp->nm_lock);
444 } 444 }
445 soclose(so); 445 soclose(so);
446 } 446 }
447#ifdef DIAGNOSTIC 447#ifdef DIAGNOSTIC
448 if (drain && (nmp->nm_waiters > 0)) 448 if (drain && (nmp->nm_waiters > 0))
449 panic("nfs_disconnect: waiters left after drain?"); 449 panic("nfs_disconnect: waiters left after drain?");
450#endif 450#endif
451} 451}
452 452
453void 453void
454nfs_safedisconnect(nmp) 454nfs_safedisconnect(nmp)
455 struct nfsmount *nmp; 455 struct nfsmount *nmp;
456{ 456{
457 struct nfsreq dummyreq; 457 struct nfsreq dummyreq;
458 458
459 memset(&dummyreq, 0, sizeof(dummyreq)); 459 memset(&dummyreq, 0, sizeof(dummyreq));
460 dummyreq.r_nmp = nmp; 460 dummyreq.r_nmp = nmp;
461 nfs_rcvlock(nmp, &dummyreq); /* XXX ignored error return */ 461 nfs_rcvlock(nmp, &dummyreq); /* XXX ignored error return */
462 nfs_disconnect(nmp); 462 nfs_disconnect(nmp);
463 nfs_rcvunlock(nmp); 463 nfs_rcvunlock(nmp);
464} 464}
465 465
466/* 466/*
467 * This is the nfs send routine. For connection based socket types, it 467 * This is the nfs send routine. For connection based socket types, it
468 * must be called with an nfs_sndlock() on the socket. 468 * must be called with an nfs_sndlock() on the socket.
469 * "rep == NULL" indicates that it has been called from a server. 469 * "rep == NULL" indicates that it has been called from a server.
470 * For the client side: 470 * For the client side:
471 * - return EINTR if the RPC is terminated, 0 otherwise 471 * - return EINTR if the RPC is terminated, 0 otherwise
472 * - set R_MUSTRESEND if the send fails for any reason 472 * - set R_MUSTRESEND if the send fails for any reason
473 * - do any cleanup required by recoverable socket errors (? ? ?) 473 * - do any cleanup required by recoverable socket errors (? ? ?)
474 * For the server side: 474 * For the server side:
475 * - return EINTR or ERESTART if interrupted by a signal 475 * - return EINTR or ERESTART if interrupted by a signal
476 * - return EPIPE if a connection is lost for connection based sockets (TCP...) 476 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
477 * - do any cleanup required by recoverable socket errors (? ? ?) 477 * - do any cleanup required by recoverable socket errors (? ? ?)
478 */ 478 */
479int 479int
480nfs_send(so, nam, top, rep, l) 480nfs_send(so, nam, top, rep, l)
481 struct socket *so; 481 struct socket *so;
482 struct mbuf *nam; 482 struct mbuf *nam;
483 struct mbuf *top; 483 struct mbuf *top;
484 struct nfsreq *rep; 484 struct nfsreq *rep;
485 struct lwp *l; 485 struct lwp *l;
486{ 486{
487 struct mbuf *sendnam; 487 struct mbuf *sendnam;
488 int error, soflags, flags; 488 int error, soflags, flags;
489 489
490 /* XXX nfs_doio()/nfs_request() calls with rep->r_lwp == NULL */ 490 /* XXX nfs_doio()/nfs_request() calls with rep->r_lwp == NULL */
491 if (l == NULL && rep->r_lwp == NULL) 491 if (l == NULL && rep->r_lwp == NULL)
492 l = curlwp; 492 l = curlwp;
493 493
494 if (rep) { 494 if (rep) {
495 if (rep->r_flags & R_SOFTTERM) { 495 if (rep->r_flags & R_SOFTTERM) {
496 m_freem(top); 496 m_freem(top);
497 return (EINTR); 497 return (EINTR);
498 } 498 }
499 if ((so = rep->r_nmp->nm_so) == NULL) { 499 if ((so = rep->r_nmp->nm_so) == NULL) {
500 rep->r_flags |= R_MUSTRESEND; 500 rep->r_flags |= R_MUSTRESEND;
501 m_freem(top); 501 m_freem(top);
502 return (0); 502 return (0);
503 } 503 }
504 rep->r_flags &= ~R_MUSTRESEND; 504 rep->r_flags &= ~R_MUSTRESEND;
505 soflags = rep->r_nmp->nm_soflags; 505 soflags = rep->r_nmp->nm_soflags;
506 } else 506 } else
507 soflags = so->so_proto->pr_flags; 507 soflags = so->so_proto->pr_flags;
508 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) 508 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
509 sendnam = (struct mbuf *)0; 509 sendnam = (struct mbuf *)0;
510 else 510 else
511 sendnam = nam; 511 sendnam = nam;
512 if (so->so_type == SOCK_SEQPACKET) 512 if (so->so_type == SOCK_SEQPACKET)
513 flags = MSG_EOR; 513 flags = MSG_EOR;
514 else 514 else
515 flags = 0; 515 flags = 0;
516 516
517 error = (*so->so_send)(so, sendnam, NULL, top, NULL, flags, l); 517 error = (*so->so_send)(so, sendnam, NULL, top, NULL, flags, l);
518 if (error) { 518 if (error) {
519 if (rep) { 519 if (rep) {
520 if (error == ENOBUFS && so->so_type == SOCK_DGRAM) { 520 if (error == ENOBUFS && so->so_type == SOCK_DGRAM) {
521 /* 521 /*
522 * We're too fast for the network/driver, 522 * We're too fast for the network/driver,
523 * and UDP isn't flowcontrolled. 523 * and UDP isn't flowcontrolled.
524 * We need to resend. This is not fatal, 524 * We need to resend. This is not fatal,
525 * just try again. 525 * just try again.
526 * 526 *
527 * Could be smarter here by doing some sort 527 * Could be smarter here by doing some sort
528 * of a backoff, but this is rare. 528 * of a backoff, but this is rare.
529 */ 529 */
530 rep->r_flags |= R_MUSTRESEND; 530 rep->r_flags |= R_MUSTRESEND;
531 } else { 531 } else {
532 if (error != EPIPE) 532 if (error != EPIPE)
533 log(LOG_INFO, 533 log(LOG_INFO,
534 "nfs send error %d for %s\n", 534 "nfs send error %d for %s\n",
535 error, 535 error,
536 rep->r_nmp->nm_mountp-> 536 rep->r_nmp->nm_mountp->
537 mnt_stat.f_mntfromname); 537 mnt_stat.f_mntfromname);
538 /* 538 /*
539 * Deal with errors for the client side. 539 * Deal with errors for the client side.
540 */ 540 */
541 if (rep->r_flags & R_SOFTTERM) 541 if (rep->r_flags & R_SOFTTERM)
542 error = EINTR; 542 error = EINTR;
543 else if (error != EMSGSIZE) 543 else if (error != EMSGSIZE)
544 rep->r_flags |= R_MUSTRESEND; 544 rep->r_flags |= R_MUSTRESEND;
545 } 545 }
546 } else { 546 } else {
547 /* 547 /*
548 * See above. This error can happen under normal 548 * See above. This error can happen under normal
549 * circumstances and the log is too noisy. 549 * circumstances and the log is too noisy.
550 * The error will still show up in nfsstat. 550 * The error will still show up in nfsstat.
551 */ 551 */
552 if (error != ENOBUFS || so->so_type != SOCK_DGRAM) 552 if (error != ENOBUFS || so->so_type != SOCK_DGRAM)
553 log(LOG_INFO, "nfsd send error %d\n", error); 553 log(LOG_INFO, "nfsd send error %d\n", error);
554 } 554 }
555 555
556 /* 556 /*
557 * Handle any recoverable (soft) socket errors here. (? ? ?) 557 * Handle any recoverable (soft) socket errors here. (? ? ?)
558 */ 558 */
559 if (error != EINTR && error != ERESTART && 559 if (error != EINTR && error != ERESTART &&
560 error != EWOULDBLOCK && error != EPIPE && 560 error != EWOULDBLOCK && error != EPIPE &&
561 error != EMSGSIZE) 561 error != EMSGSIZE)
562 error = 0; 562 error = 0;
563 } 563 }
564 return (error); 564 return (error);
565} 565}
566 566
567#ifdef NFS 567#ifdef NFS
568/* 568/*
569 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all 569 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
570 * done by soreceive(), but for SOCK_STREAM we must deal with the Record 570 * done by soreceive(), but for SOCK_STREAM we must deal with the Record
571 * Mark and consolidate the data into a new mbuf list. 571 * Mark and consolidate the data into a new mbuf list.
572 * nb: Sometimes TCP passes the data up to soreceive() in long lists of 572 * nb: Sometimes TCP passes the data up to soreceive() in long lists of
573 * small mbufs. 573 * small mbufs.
574 * For SOCK_STREAM we must be very careful to read an entire record once 574 * For SOCK_STREAM we must be very careful to read an entire record once
575 * we have read any of it, even if the system call has been interrupted. 575 * we have read any of it, even if the system call has been interrupted.
576 */ 576 */
577static int 577static int
578nfs_receive(struct nfsreq *rep, struct mbuf **aname, struct mbuf **mp, 578nfs_receive(struct nfsreq *rep, struct mbuf **aname, struct mbuf **mp,
579 struct lwp *l) 579 struct lwp *l)
580{ 580{
581 struct socket *so; 581 struct socket *so;
582 struct uio auio; 582 struct uio auio;
583 struct iovec aio; 583 struct iovec aio;
584 struct mbuf *m; 584 struct mbuf *m;
585 struct mbuf *control; 585 struct mbuf *control;
586 u_int32_t len; 586 u_int32_t len;
587 struct mbuf **getnam; 587 struct mbuf **getnam;
588 int error, sotype, rcvflg; 588 int error, sotype, rcvflg;
589 589
590 /* 590 /*
591 * Set up arguments for soreceive() 591 * Set up arguments for soreceive()
592 */ 592 */
593 *mp = (struct mbuf *)0; 593 *mp = (struct mbuf *)0;
594 *aname = (struct mbuf *)0; 594 *aname = (struct mbuf *)0;
595 sotype = rep->r_nmp->nm_sotype; 595 sotype = rep->r_nmp->nm_sotype;
596 596
597 /* 597 /*
598 * For reliable protocols, lock against other senders/receivers 598 * For reliable protocols, lock against other senders/receivers
599 * in case a reconnect is necessary. 599 * in case a reconnect is necessary.
600 * For SOCK_STREAM, first get the Record Mark to find out how much 600 * For SOCK_STREAM, first get the Record Mark to find out how much
601 * more there is to get. 601 * more there is to get.
602 * We must lock the socket against other receivers 602 * We must lock the socket against other receivers
603 * until we have an entire rpc request/reply. 603 * until we have an entire rpc request/reply.
604 */ 604 */
605 if (sotype != SOCK_DGRAM) { 605 if (sotype != SOCK_DGRAM) {
606 error = nfs_sndlock(rep->r_nmp, rep); 606 error = nfs_sndlock(rep->r_nmp, rep);
607 if (error) 607 if (error)
608 return (error); 608 return (error);
609tryagain: 609tryagain:
610 /* 610 /*
611 * Check for fatal errors and resending request. 611 * Check for fatal errors and resending request.
612 */ 612 */
613 /* 613 /*
614 * Ugh: If a reconnect attempt just happened, nm_so 614 * Ugh: If a reconnect attempt just happened, nm_so
615 * would have changed. NULL indicates a failed 615 * would have changed. NULL indicates a failed
616 * attempt that has essentially shut down this 616 * attempt that has essentially shut down this
617 * mount point. 617 * mount point.
618 */ 618 */
619 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { 619 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
620 nfs_sndunlock(rep->r_nmp); 620 nfs_sndunlock(rep->r_nmp);
621 return (EINTR); 621 return (EINTR);
622 } 622 }
623 so = rep->r_nmp->nm_so; 623 so = rep->r_nmp->nm_so;
624 if (!so) { 624 if (!so) {
625 error = nfs_reconnect(rep); 625 error = nfs_reconnect(rep);
626 if (error) { 626 if (error) {
627 nfs_sndunlock(rep->r_nmp); 627 nfs_sndunlock(rep->r_nmp);
628 return (error); 628 return (error);
629 } 629 }
630 goto tryagain; 630 goto tryagain;
631 } 631 }
632 while (rep->r_flags & R_MUSTRESEND) { 632 while (rep->r_flags & R_MUSTRESEND) {
633 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 633 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
634 nfsstats.rpcretries++; 634 nfsstats.rpcretries++;
635 rep->r_rtt = 0; 635 rep->r_rtt = 0;
636 rep->r_flags &= ~R_TIMING; 636 rep->r_flags &= ~R_TIMING;
637 error = nfs_send(so, rep->r_nmp->nm_nam, m, rep, l); 637 error = nfs_send(so, rep->r_nmp->nm_nam, m, rep, l);
638 if (error) { 638 if (error) {
639 if (error == EINTR || error == ERESTART || 639 if (error == EINTR || error == ERESTART ||
640 (error = nfs_reconnect(rep)) != 0) { 640 (error = nfs_reconnect(rep)) != 0) {
641 nfs_sndunlock(rep->r_nmp); 641 nfs_sndunlock(rep->r_nmp);
642 return (error); 642 return (error);
643 } 643 }
644 goto tryagain; 644 goto tryagain;
645 } 645 }
646 } 646 }
647 nfs_sndunlock(rep->r_nmp); 647 nfs_sndunlock(rep->r_nmp);
648 if (sotype == SOCK_STREAM) { 648 if (sotype == SOCK_STREAM) {
649 aio.iov_base = (void *) &len; 649 aio.iov_base = (void *) &len;
650 aio.iov_len = sizeof(u_int32_t); 650 aio.iov_len = sizeof(u_int32_t);
651 auio.uio_iov = &aio; 651 auio.uio_iov = &aio;
652 auio.uio_iovcnt = 1; 652 auio.uio_iovcnt = 1;
653 auio.uio_rw = UIO_READ; 653 auio.uio_rw = UIO_READ;
654 auio.uio_offset = 0; 654 auio.uio_offset = 0;
655 auio.uio_resid = sizeof(u_int32_t); 655 auio.uio_resid = sizeof(u_int32_t);
656 UIO_SETUP_SYSSPACE(&auio); 656 UIO_SETUP_SYSSPACE(&auio);
657 do { 657 do {
658 rcvflg = MSG_WAITALL; 658 rcvflg = MSG_WAITALL;
659 error = (*so->so_receive)(so, (struct mbuf **)0, &auio, 659 error = (*so->so_receive)(so, (struct mbuf **)0, &auio,
660 (struct mbuf **)0, (struct mbuf **)0, &rcvflg); 660 (struct mbuf **)0, (struct mbuf **)0, &rcvflg);
661 if (error == EWOULDBLOCK && rep) { 661 if (error == EWOULDBLOCK && rep) {
662 if (rep->r_flags & R_SOFTTERM) 662 if (rep->r_flags & R_SOFTTERM)
663 return (EINTR); 663 return (EINTR);
664 /* 664 /*
665 * if it seems that the server died after it 665 * if it seems that the server died after it
666 * received our request, set EPIPE so that 666 * received our request, set EPIPE so that
667 * we'll reconnect and retransmit requests. 667 * we'll reconnect and retransmit requests.
668 */ 668 */
669 if (rep->r_rexmit >= rep->r_nmp->nm_retry) { 669 if (rep->r_rexmit >= rep->r_nmp->nm_retry) {
670 nfsstats.rpctimeouts++; 670 nfsstats.rpctimeouts++;
671 error = EPIPE; 671 error = EPIPE;
672 } 672 }
673 } 673 }
674 } while (error == EWOULDBLOCK); 674 } while (error == EWOULDBLOCK);
675 if (!error && auio.uio_resid > 0) { 675 if (!error && auio.uio_resid > 0) {
676 /* 676 /*
677 * Don't log a 0 byte receive; it means 677 * Don't log a 0 byte receive; it means
678 * that the socket has been closed, and 678 * that the socket has been closed, and
679 * can happen during normal operation 679 * can happen during normal operation
680 * (forcible unmount or Solaris server). 680 * (forcible unmount or Solaris server).
681 */ 681 */
682 if (auio.uio_resid != sizeof (u_int32_t)) 682 if (auio.uio_resid != sizeof (u_int32_t))
683 log(LOG_INFO, 683 log(LOG_INFO,
684 "short receive (%lu/%lu) from nfs server %s\n", 684 "short receive (%lu/%lu) from nfs server %s\n",
685 (u_long)sizeof(u_int32_t) - auio.uio_resid, 685 (u_long)sizeof(u_int32_t) - auio.uio_resid,
686 (u_long)sizeof(u_int32_t), 686 (u_long)sizeof(u_int32_t),
687 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 687 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
688 error = EPIPE; 688 error = EPIPE;
689 } 689 }
690 if (error) 690 if (error)
691 goto errout; 691 goto errout;
692 len = ntohl(len) & ~0x80000000; 692 len = ntohl(len) & ~0x80000000;
693 /* 693 /*
694 * This is SERIOUS! We are out of sync with the sender 694 * This is SERIOUS! We are out of sync with the sender
695 * and forcing a disconnect/reconnect is all I can do. 695 * and forcing a disconnect/reconnect is all I can do.
696 */ 696 */
697 if (len > NFS_MAXPACKET) { 697 if (len > NFS_MAXPACKET) {
698 log(LOG_ERR, "%s (%d) from nfs server %s\n", 698 log(LOG_ERR, "%s (%d) from nfs server %s\n",
699 "impossible packet length", 699 "impossible packet length",
700 len, 700 len,
701 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 701 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
702 error = EFBIG; 702 error = EFBIG;
703 goto errout; 703 goto errout;
704 } 704 }
705 auio.uio_resid = len; 705 auio.uio_resid = len;
706 do { 706 do {
707 rcvflg = MSG_WAITALL; 707 rcvflg = MSG_WAITALL;
708 error = (*so->so_receive)(so, (struct mbuf **)0, 708 error = (*so->so_receive)(so, (struct mbuf **)0,
709 &auio, mp, (struct mbuf **)0, &rcvflg); 709 &auio, mp, (struct mbuf **)0, &rcvflg);
710 } while (error == EWOULDBLOCK || error == EINTR || 710 } while (error == EWOULDBLOCK || error == EINTR ||
711 error == ERESTART); 711 error == ERESTART);
712 if (!error && auio.uio_resid > 0) { 712 if (!error && auio.uio_resid > 0) {
713 if (len != auio.uio_resid) 713 if (len != auio.uio_resid)
714 log(LOG_INFO, 714 log(LOG_INFO,
715 "short receive (%lu/%d) from nfs server %s\n", 715 "short receive (%lu/%d) from nfs server %s\n",
716 (u_long)len - auio.uio_resid, len, 716 (u_long)len - auio.uio_resid, len,
717 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 717 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
718 error = EPIPE; 718 error = EPIPE;
719 } 719 }
720 } else { 720 } else {
721 /* 721 /*
722 * NB: Since uio_resid is big, MSG_WAITALL is ignored 722 * NB: Since uio_resid is big, MSG_WAITALL is ignored
723 * and soreceive() will return when it has either a 723 * and soreceive() will return when it has either a
724 * control msg or a data msg. 724 * control msg or a data msg.
725 * We have no use for control msg., but must grab them 725 * We have no use for control msg., but must grab them
726 * and then throw them away so we know what is going 726 * and then throw them away so we know what is going
727 * on. 727 * on.
728 */ 728 */
729 auio.uio_resid = len = 100000000; /* Anything Big */ 729 auio.uio_resid = len = 100000000; /* Anything Big */
730 /* not need to setup uio_vmspace */ 730 /* not need to setup uio_vmspace */
731 do { 731 do {
732 rcvflg = 0; 732 rcvflg = 0;
733 error = (*so->so_receive)(so, (struct mbuf **)0, 733 error = (*so->so_receive)(so, (struct mbuf **)0,
734 &auio, mp, &control, &rcvflg); 734 &auio, mp, &control, &rcvflg);
735 if (control) 735 if (control)
736 m_freem(control); 736 m_freem(control);
737 if (error == EWOULDBLOCK && rep) { 737 if (error == EWOULDBLOCK && rep) {
738 if (rep->r_flags & R_SOFTTERM) 738 if (rep->r_flags & R_SOFTTERM)
739 return (EINTR); 739 return (EINTR);
740 } 740 }
741 } while (error == EWOULDBLOCK || 741 } while (error == EWOULDBLOCK ||
742 (!error && *mp == NULL && control)); 742 (!error && *mp == NULL && control));
743 if ((rcvflg & MSG_EOR) == 0) 743 if ((rcvflg & MSG_EOR) == 0)
744 printf("Egad!!\n"); 744 printf("Egad!!\n");
745 if (!error && *mp == NULL) 745 if (!error && *mp == NULL)
746 error = EPIPE; 746 error = EPIPE;
747 len -= auio.uio_resid; 747 len -= auio.uio_resid;
748 } 748 }
749errout: 749errout:
750 if (error && error != EINTR && error != ERESTART) { 750 if (error && error != EINTR && error != ERESTART) {
751 m_freem(*mp); 751 m_freem(*mp);
752 *mp = (struct mbuf *)0; 752 *mp = (struct mbuf *)0;
753 if (error != EPIPE) 753 if (error != EPIPE)
754 log(LOG_INFO, 754 log(LOG_INFO,
755 "receive error %d from nfs server %s\n", 755 "receive error %d from nfs server %s\n",
756 error, 756 error,
757 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 757 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
758 error = nfs_sndlock(rep->r_nmp, rep); 758 error = nfs_sndlock(rep->r_nmp, rep);
759 if (!error) 759 if (!error)
760 error = nfs_reconnect(rep); 760 error = nfs_reconnect(rep);
761 if (!error) 761 if (!error)
762 goto tryagain; 762 goto tryagain;
763 else 763 else
764 nfs_sndunlock(rep->r_nmp); 764 nfs_sndunlock(rep->r_nmp);
765 } 765 }
766 } else { 766 } else {
767 if ((so = rep->r_nmp->nm_so) == NULL) 767 if ((so = rep->r_nmp->nm_so) == NULL)
768 return (EACCES); 768 return (EACCES);
769 if (so->so_state & SS_ISCONNECTED) 769 if (so->so_state & SS_ISCONNECTED)
770 getnam = (struct mbuf **)0; 770 getnam = (struct mbuf **)0;
771 else 771 else
772 getnam = aname; 772 getnam = aname;
773 auio.uio_resid = len = 1000000; 773 auio.uio_resid = len = 1000000;
774 /* not need to setup uio_vmspace */ 774 /* not need to setup uio_vmspace */
775 do { 775 do {
776 rcvflg = 0; 776 rcvflg = 0;
777 error = (*so->so_receive)(so, getnam, &auio, mp, 777 error = (*so->so_receive)(so, getnam, &auio, mp,
778 (struct mbuf **)0, &rcvflg); 778 (struct mbuf **)0, &rcvflg);
779 if (error == EWOULDBLOCK && 779 if (error == EWOULDBLOCK &&
780 (rep->r_flags & R_SOFTTERM)) 780 (rep->r_flags & R_SOFTTERM))
781 return (EINTR); 781 return (EINTR);
782 } while (error == EWOULDBLOCK); 782 } while (error == EWOULDBLOCK);
783 len -= auio.uio_resid; 783 len -= auio.uio_resid;
784 if (!error && *mp == NULL) 784 if (!error && *mp == NULL)
785 error = EPIPE; 785 error = EPIPE;
786 } 786 }
787 if (error) { 787 if (error) {
788 m_freem(*mp); 788 m_freem(*mp);
789 *mp = (struct mbuf *)0; 789 *mp = (struct mbuf *)0;
790 } 790 }
791 return (error); 791 return (error);
792} 792}
793 793
794/* 794/*
795 * Implement receipt of reply on a socket. 795 * Implement receipt of reply on a socket.
796 * We must search through the list of received datagrams matching them 796 * We must search through the list of received datagrams matching them
797 * with outstanding requests using the xid, until ours is found. 797 * with outstanding requests using the xid, until ours is found.
798 */ 798 */
799/* ARGSUSED */ 799/* ARGSUSED */
800static int 800static int
801nfs_reply(struct nfsreq *myrep, struct lwp *lwp) 801nfs_reply(struct nfsreq *myrep, struct lwp *lwp)
802{ 802{
803 struct nfsreq *rep; 803 struct nfsreq *rep;
804 struct nfsmount *nmp = myrep->r_nmp; 804 struct nfsmount *nmp = myrep->r_nmp;
805 int32_t t1; 805 int32_t t1;
806 struct mbuf *mrep, *nam, *md; 806 struct mbuf *mrep, *nam, *md;
807 u_int32_t rxid, *tl; 807 u_int32_t rxid, *tl;
808 char *dpos, *cp2; 808 char *dpos, *cp2;
809 int error; 809 int error;
810 810
811 /* 811 /*
812 * Loop around until we get our own reply 812 * Loop around until we get our own reply
813 */ 813 */
814 for (;;) { 814 for (;;) {
815 /* 815 /*
816 * Lock against other receivers so that I don't get stuck in 816 * Lock against other receivers so that I don't get stuck in
817 * sbwait() after someone else has received my reply for me. 817 * sbwait() after someone else has received my reply for me.
818 * Also necessary for connection based protocols to avoid 818 * Also necessary for connection based protocols to avoid
819 * race conditions during a reconnect. 819 * race conditions during a reconnect.
820 */ 820 */
821 error = nfs_rcvlock(nmp, myrep); 821 error = nfs_rcvlock(nmp, myrep);
822 if (error == EALREADY) 822 if (error == EALREADY)
823 return (0); 823 return (0);
824 if (error) 824 if (error)
825 return (error); 825 return (error);
826 /* 826 /*
827 * Get the next Rpc reply off the socket 827 * Get the next Rpc reply off the socket
828 */ 828 */
829 829
830 mutex_enter(&nmp->nm_lock); 830 mutex_enter(&nmp->nm_lock);
831 nmp->nm_waiters++; 831 nmp->nm_waiters++;
832 mutex_exit(&nmp->nm_lock); 832 mutex_exit(&nmp->nm_lock);
833 833
834 error = nfs_receive(myrep, &nam, &mrep, lwp); 834 error = nfs_receive(myrep, &nam, &mrep, lwp);
835 835
836 mutex_enter(&nmp->nm_lock); 836 mutex_enter(&nmp->nm_lock);
837 nmp->nm_waiters--; 837 nmp->nm_waiters--;
838 cv_signal(&nmp->nm_disconcv); 838 cv_signal(&nmp->nm_disconcv);
839 mutex_exit(&nmp->nm_lock); 839 mutex_exit(&nmp->nm_lock);
840 840
841 if (error) { 841 if (error) {
842 nfs_rcvunlock(nmp); 842 nfs_rcvunlock(nmp);
843 843
844 if (nmp->nm_iflag & NFSMNT_DISMNT) { 844 if (nmp->nm_iflag & NFSMNT_DISMNT) {
845 /* 845 /*
846 * Oops, we're going away now.. 846 * Oops, we're going away now..
847 */ 847 */
848 return error; 848 return error;
849 } 849 }
850 /* 850 /*
851 * Ignore routing errors on connectionless protocols? ? 851 * Ignore routing errors on connectionless protocols? ?
852 */ 852 */
853 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 853 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
854 nmp->nm_so->so_error = 0; 854 nmp->nm_so->so_error = 0;
855#ifdef DEBUG 855#ifdef DEBUG
856 if (ratecheck(&nfs_reply_last_err_time, 856 if (ratecheck(&nfs_reply_last_err_time,
857 &nfs_err_interval)) 857 &nfs_err_interval))
858 printf("%s: ignoring error %d\n", 858 printf("%s: ignoring error %d\n",
859 __func__, error); 859 __func__, error);
860#endif 860#endif
861 continue; 861 continue;
862 } 862 }
863 return (error); 863 return (error);
864 } 864 }
865 if (nam) 865 if (nam)
866 m_freem(nam); 866 m_freem(nam);
867 867
868 /* 868 /*
869 * Get the xid and check that it is an rpc reply 869 * Get the xid and check that it is an rpc reply
870 */ 870 */
871 md = mrep; 871 md = mrep;
872 dpos = mtod(md, void *); 872 dpos = mtod(md, void *);
873 nfsm_dissect(tl, u_int32_t *, 2*NFSX_UNSIGNED); 873 nfsm_dissect(tl, u_int32_t *, 2*NFSX_UNSIGNED);
874 rxid = *tl++; 874 rxid = *tl++;
875 if (*tl != rpc_reply) { 875 if (*tl != rpc_reply) {
876 nfsstats.rpcinvalid++; 876 nfsstats.rpcinvalid++;
877 m_freem(mrep); 877 m_freem(mrep);
878nfsmout: 878nfsmout:
879 nfs_rcvunlock(nmp); 879 nfs_rcvunlock(nmp);
880 continue; 880 continue;
881 } 881 }
882 882
883 /* 883 /*
884 * Loop through the request list to match up the reply 884 * Loop through the request list to match up the reply
885 * Iff no match, just drop the datagram 885 * Iff no match, just drop the datagram
886 */ 886 */
887 TAILQ_FOREACH(rep, &nfs_reqq, r_chain) { 887 TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
888 if (rep->r_mrep == NULL && rxid == rep->r_xid) { 888 if (rep->r_mrep == NULL && rxid == rep->r_xid) {
889 /* Found it.. */ 889 /* Found it.. */
890 rep->r_mrep = mrep; 890 rep->r_mrep = mrep;
891 rep->r_md = md; 891 rep->r_md = md;
892 rep->r_dpos = dpos; 892 rep->r_dpos = dpos;
893 if (nfsrtton) { 893 if (nfsrtton) {
894 struct rttl *rt; 894 struct rttl *rt;
895 895
896 rt = &nfsrtt.rttl[nfsrtt.pos]; 896 rt = &nfsrtt.rttl[nfsrtt.pos];
897 rt->proc = rep->r_procnum; 897 rt->proc = rep->r_procnum;
898 rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]); 898 rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
899 rt->sent = nmp->nm_sent; 899 rt->sent = nmp->nm_sent;
900 rt->cwnd = nmp->nm_cwnd; 900 rt->cwnd = nmp->nm_cwnd;
901 rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1]; 901 rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
902 rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1]; 902 rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
903 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsidx; 903 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsidx;
904 getmicrotime(&rt->tstamp); 904 getmicrotime(&rt->tstamp);
905 if (rep->r_flags & R_TIMING) 905 if (rep->r_flags & R_TIMING)
906 rt->rtt = rep->r_rtt; 906 rt->rtt = rep->r_rtt;
907 else 907 else
908 rt->rtt = 1000000; 908 rt->rtt = 1000000;
909 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ; 909 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
910 } 910 }
911 /* 911 /*
912 * Update congestion window. 912 * Update congestion window.
913 * Do the additive increase of 913 * Do the additive increase of
914 * one rpc/rtt. 914 * one rpc/rtt.
915 */ 915 */
916 if (nmp->nm_cwnd <= nmp->nm_sent) { 916 if (nmp->nm_cwnd <= nmp->nm_sent) {
917 nmp->nm_cwnd += 917 nmp->nm_cwnd +=
918 (NFS_CWNDSCALE * NFS_CWNDSCALE + 918 (NFS_CWNDSCALE * NFS_CWNDSCALE +
919 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; 919 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
920 if (nmp->nm_cwnd > NFS_MAXCWND) 920 if (nmp->nm_cwnd > NFS_MAXCWND)
921 nmp->nm_cwnd = NFS_MAXCWND; 921 nmp->nm_cwnd = NFS_MAXCWND;
922 } 922 }
923 rep->r_flags &= ~R_SENT; 923 rep->r_flags &= ~R_SENT;
924 nmp->nm_sent -= NFS_CWNDSCALE; 924 nmp->nm_sent -= NFS_CWNDSCALE;
925 /* 925 /*
926 * Update rtt using a gain of 0.125 on the mean 926 * Update rtt using a gain of 0.125 on the mean
927 * and a gain of 0.25 on the deviation. 927 * and a gain of 0.25 on the deviation.
928 */ 928 */
929 if (rep->r_flags & R_TIMING) { 929 if (rep->r_flags & R_TIMING) {
930 /* 930 /*
931 * Since the timer resolution of 931 * Since the timer resolution of
932 * NFS_HZ is so course, it can often 932 * NFS_HZ is so course, it can often
933 * result in r_rtt == 0. Since 933 * result in r_rtt == 0. Since
934 * r_rtt == N means that the actual 934 * r_rtt == N means that the actual
935 * rtt is between N+dt and N+2-dt ticks, 935 * rtt is between N+dt and N+2-dt ticks,
936 * add 1. 936 * add 1.
937 */ 937 */
938 t1 = rep->r_rtt + 1; 938 t1 = rep->r_rtt + 1;
939 t1 -= (NFS_SRTT(rep) >> 3); 939 t1 -= (NFS_SRTT(rep) >> 3);
940 NFS_SRTT(rep) += t1; 940 NFS_SRTT(rep) += t1;
941 if (t1 < 0) 941 if (t1 < 0)
942 t1 = -t1; 942 t1 = -t1;
943 t1 -= (NFS_SDRTT(rep) >> 2); 943 t1 -= (NFS_SDRTT(rep) >> 2);
944 NFS_SDRTT(rep) += t1; 944 NFS_SDRTT(rep) += t1;
945 } 945 }
946 nmp->nm_timeouts = 0; 946 nmp->nm_timeouts = 0;
947 break; 947 break;
948 } 948 }
949 } 949 }
950 nfs_rcvunlock(nmp); 950 nfs_rcvunlock(nmp);
951 /* 951 /*
952 * If not matched to a request, drop it. 952 * If not matched to a request, drop it.
953 * If it's mine, get out. 953 * If it's mine, get out.
954 */ 954 */
955 if (rep == 0) { 955 if (rep == 0) {
956 nfsstats.rpcunexpected++; 956 nfsstats.rpcunexpected++;
957 m_freem(mrep); 957 m_freem(mrep);
958 } else if (rep == myrep) { 958 } else if (rep == myrep) {
959 if (rep->r_mrep == NULL) 959 if (rep->r_mrep == NULL)
960 panic("nfsreply nil"); 960 panic("nfsreply nil");
961 return (0); 961 return (0);
962 } 962 }
963 } 963 }
964} 964}
965 965
966/* 966/*
967 * nfs_request - goes something like this 967 * nfs_request - goes something like this
968 * - fill in request struct 968 * - fill in request struct
969 * - links it into list 969 * - links it into list
970 * - calls nfs_send() for first transmit 970 * - calls nfs_send() for first transmit
971 * - calls nfs_receive() to get reply 971 * - calls nfs_receive() to get reply
972 * - break down rpc header and return with nfs reply pointed to 972 * - break down rpc header and return with nfs reply pointed to
973 * by mrep or error 973 * by mrep or error
974 * nb: always frees up mreq mbuf list 974 * nb: always frees up mreq mbuf list
975 */ 975 */
976int 976int
977nfs_request(np, mrest, procnum, lwp, cred, mrp, mdp, dposp, rexmitp) 977nfs_request(np, mrest, procnum, lwp, cred, mrp, mdp, dposp, rexmitp)
978 struct nfsnode *np; 978 struct nfsnode *np;
979 struct mbuf *mrest; 979 struct mbuf *mrest;
980 int procnum; 980 int procnum;
981 struct lwp *lwp; 981 struct lwp *lwp;
982 kauth_cred_t cred; 982 kauth_cred_t cred;
983 struct mbuf **mrp; 983 struct mbuf **mrp;
984 struct mbuf **mdp; 984 struct mbuf **mdp;
985 char **dposp; 985 char **dposp;
986 int *rexmitp; 986 int *rexmitp;
987{ 987{
988 struct mbuf *m, *mrep; 988 struct mbuf *m, *mrep;
989 struct nfsreq *rep; 989 struct nfsreq *rep;
990 u_int32_t *tl; 990 u_int32_t *tl;
991 int i; 991 int i;
992 struct nfsmount *nmp = VFSTONFS(np->n_vnode->v_mount); 992 struct nfsmount *nmp = VFSTONFS(np->n_vnode->v_mount);
993 struct mbuf *md, *mheadend; 993 struct mbuf *md, *mheadend;
994 char nickv[RPCX_NICKVERF]; 994 char nickv[RPCX_NICKVERF];
995 time_t waituntil; 995 time_t waituntil;
996 char *dpos, *cp2; 996 char *dpos, *cp2;
997 int t1, s, error = 0, mrest_len, auth_len, auth_type; 997 int t1, s, error = 0, mrest_len, auth_len, auth_type;
998 int trylater_delay = NFS_TRYLATERDEL, failed_auth = 0; 998 int trylater_delay = NFS_TRYLATERDEL, failed_auth = 0;
999 int verf_len, verf_type; 999 int verf_len, verf_type;
1000 u_int32_t xid; 1000 u_int32_t xid;
1001 char *auth_str, *verf_str; 1001 char *auth_str, *verf_str;
1002 NFSKERBKEY_T key; /* save session key */ 1002 NFSKERBKEY_T key; /* save session key */
1003 kauth_cred_t acred; 1003 kauth_cred_t acred;
1004 struct mbuf *mrest_backup = NULL; 1004 struct mbuf *mrest_backup = NULL;
1005 kauth_cred_t origcred = NULL; /* XXX: gcc */ 1005 kauth_cred_t origcred = NULL; /* XXX: gcc */
1006 bool retry_cred = true; 1006 bool retry_cred = true;
1007 bool use_opencred = (np->n_flag & NUSEOPENCRED) != 0; 1007 bool use_opencred = (np->n_flag & NUSEOPENCRED) != 0;
1008 1008
1009 if (rexmitp != NULL) 1009 if (rexmitp != NULL)
1010 *rexmitp = 0; 1010 *rexmitp = 0;
1011 1011
1012 acred = kauth_cred_alloc(); 1012 acred = kauth_cred_alloc();
1013 1013
1014tryagain_cred: 1014tryagain_cred:
1015 KASSERT(cred != NULL); 1015 KASSERT(cred != NULL);
1016 rep = kmem_alloc(sizeof(*rep), KM_SLEEP); 1016 rep = kmem_alloc(sizeof(*rep), KM_SLEEP);
1017 rep->r_nmp = nmp; 1017 rep->r_nmp = nmp;
1018 KASSERT(lwp == NULL || lwp == curlwp); 1018 KASSERT(lwp == NULL || lwp == curlwp);
1019 rep->r_lwp = lwp; 1019 rep->r_lwp = lwp;
1020 rep->r_procnum = procnum; 1020 rep->r_procnum = procnum;
1021 i = 0; 1021 i = 0;
1022 m = mrest; 1022 m = mrest;
1023 while (m) { 1023 while (m) {
1024 i += m->m_len; 1024 i += m->m_len;
1025 m = m->m_next; 1025 m = m->m_next;
1026 } 1026 }
1027 mrest_len = i; 1027 mrest_len = i;
1028 1028
1029 /* 1029 /*
1030 * Get the RPC header with authorization. 1030 * Get the RPC header with authorization.
1031 */ 1031 */
1032kerbauth: 1032kerbauth:
1033 verf_str = auth_str = (char *)0; 1033 verf_str = auth_str = (char *)0;
1034 if (nmp->nm_flag & NFSMNT_KERB) { 1034 if (nmp->nm_flag & NFSMNT_KERB) {
1035 verf_str = nickv; 1035 verf_str = nickv;
1036 verf_len = sizeof (nickv); 1036 verf_len = sizeof (nickv);
1037 auth_type = RPCAUTH_KERB4; 1037 auth_type = RPCAUTH_KERB4;
1038 memset((void *)key, 0, sizeof (key)); 1038 memset((void *)key, 0, sizeof (key));
1039 if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str, 1039 if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str,
1040 &auth_len, verf_str, verf_len)) { 1040 &auth_len, verf_str, verf_len)) {
1041 error = nfs_getauth(nmp, rep, cred, &auth_str, 1041 error = nfs_getauth(nmp, rep, cred, &auth_str,
1042 &auth_len, verf_str, &verf_len, key); 1042 &auth_len, verf_str, &verf_len, key);
1043 if (error) { 1043 if (error) {
1044 kmem_free(rep, sizeof(*rep)); 1044 kmem_free(rep, sizeof(*rep));
1045 m_freem(mrest); 1045 m_freem(mrest);
1046 KASSERT(kauth_cred_getrefcnt(acred) == 1); 1046 KASSERT(kauth_cred_getrefcnt(acred) == 1);
1047 kauth_cred_free(acred); 1047 kauth_cred_free(acred);
1048 return (error); 1048 return (error);
1049 } 1049 }
1050 } 1050 }
1051 retry_cred = false; 1051 retry_cred = false;
1052 } else { 1052 } else {
1053 /* AUTH_UNIX */ 1053 /* AUTH_UNIX */
1054 uid_t uid; 1054 uid_t uid;
1055 gid_t gid; 1055 gid_t gid;
1056 1056
1057 /* 1057 /*
1058 * on the most unix filesystems, permission checks are 1058 * on the most unix filesystems, permission checks are
1059 * done when the file is open(2)'ed. 1059 * done when the file is open(2)'ed.
1060 * ie. once a file is successfully open'ed, 1060 * ie. once a file is successfully open'ed,
1061 * following i/o operations never fail with EACCES. 1061 * following i/o operations never fail with EACCES.
1062 * we try to follow the semantics as far as possible. 1062 * we try to follow the semantics as far as possible.
1063 * 1063 *
1064 * note that we expect that the nfs server always grant 1064 * note that we expect that the nfs server always grant
1065 * accesses by the file's owner. 1065 * accesses by the file's owner.
1066 */ 1066 */
1067 origcred = cred; 1067 origcred = cred;
1068 switch (procnum) { 1068 switch (procnum) {
1069 case NFSPROC_READ: 1069 case NFSPROC_READ:
1070 case NFSPROC_WRITE: 1070 case NFSPROC_WRITE:
1071 case NFSPROC_COMMIT: 1071 case NFSPROC_COMMIT:
1072 uid = np->n_vattr->va_uid; 1072 uid = np->n_vattr->va_uid;
1073 gid = np->n_vattr->va_gid; 1073 gid = np->n_vattr->va_gid;
1074 if (kauth_cred_geteuid(cred) == uid && 1074 if (kauth_cred_geteuid(cred) == uid &&
1075 kauth_cred_getegid(cred) == gid) { 1075 kauth_cred_getegid(cred) == gid) {
1076 retry_cred = false; 1076 retry_cred = false;
1077 break; 1077 break;
1078 } 1078 }
1079 if (use_opencred) 1079 if (use_opencred)
1080 break; 1080 break;
1081 kauth_cred_setuid(acred, uid); 1081 kauth_cred_setuid(acred, uid);
1082 kauth_cred_seteuid(acred, uid); 1082 kauth_cred_seteuid(acred, uid);
1083 kauth_cred_setsvuid(acred, uid); 1083 kauth_cred_setsvuid(acred, uid);
1084 kauth_cred_setgid(acred, gid); 1084 kauth_cred_setgid(acred, gid);
1085 kauth_cred_setegid(acred, gid); 1085 kauth_cred_setegid(acred, gid);
1086 kauth_cred_setsvgid(acred, gid); 1086 kauth_cred_setsvgid(acred, gid);
1087 cred = acred; 1087 cred = acred;
1088 break; 1088 break;
1089 default: 1089 default:
1090 retry_cred = false; 1090 retry_cred = false;
1091 break; 1091 break;
1092 } 1092 }
1093 /* 1093 /*
1094 * backup mbuf chain if we can need it later to retry. 1094 * backup mbuf chain if we can need it later to retry.
1095 * 1095 *
1096 * XXX maybe we can keep a direct reference to 1096 * XXX maybe we can keep a direct reference to
1097 * mrest without doing m_copym, but it's ...ugly. 1097 * mrest without doing m_copym, but it's ...ugly.
1098 */ 1098 */
1099 if (retry_cred) 1099 if (retry_cred)
1100 mrest_backup = m_copym(mrest, 0, M_COPYALL, M_WAIT); 1100 mrest_backup = m_copym(mrest, 0, M_COPYALL, M_WAIT);
1101 auth_type = RPCAUTH_UNIX; 1101 auth_type = RPCAUTH_UNIX;
1102 /* XXX elad - ngroups */ 1102 /* XXX elad - ngroups */
1103 auth_len = (((kauth_cred_ngroups(cred) > nmp->nm_numgrps) ? 1103 auth_len = (((kauth_cred_ngroups(cred) > nmp->nm_numgrps) ?
1104 nmp->nm_numgrps : kauth_cred_ngroups(cred)) << 2) + 1104 nmp->nm_numgrps : kauth_cred_ngroups(cred)) << 2) +
1105 5 * NFSX_UNSIGNED; 1105 5 * NFSX_UNSIGNED;
1106 } 1106 }
1107 m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len, 1107 m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len,
1108 auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid); 1108 auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid);
1109 if (auth_str) 1109 if (auth_str)
1110 free(auth_str, M_TEMP); 1110 free(auth_str, M_TEMP);
1111 1111
1112 /* 1112 /*
1113 * For stream protocols, insert a Sun RPC Record Mark. 1113 * For stream protocols, insert a Sun RPC Record Mark.
1114 */ 1114 */
1115 if (nmp->nm_sotype == SOCK_STREAM) { 1115 if (nmp->nm_sotype == SOCK_STREAM) {
1116 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); 1116 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
1117 *mtod(m, u_int32_t *) = htonl(0x80000000 | 1117 *mtod(m, u_int32_t *) = htonl(0x80000000 |
1118 (m->m_pkthdr.len - NFSX_UNSIGNED)); 1118 (m->m_pkthdr.len - NFSX_UNSIGNED));
1119 } 1119 }
1120 rep->r_mreq = m; 1120 rep->r_mreq = m;
1121 rep->r_xid = xid; 1121 rep->r_xid = xid;
1122tryagain: 1122tryagain:
1123 if (nmp->nm_flag & NFSMNT_SOFT) 1123 if (nmp->nm_flag & NFSMNT_SOFT)
1124 rep->r_retry = nmp->nm_retry; 1124 rep->r_retry = nmp->nm_retry;
1125 else 1125 else
1126 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 1126 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
1127 rep->r_rtt = rep->r_rexmit = 0; 1127 rep->r_rtt = rep->r_rexmit = 0;
1128 if (proct[procnum] > 0) 1128 if (proct[procnum] > 0)
1129 rep->r_flags = R_TIMING; 1129 rep->r_flags = R_TIMING;
1130 else 1130 else
1131 rep->r_flags = 0; 1131 rep->r_flags = 0;
1132 rep->r_mrep = NULL; 1132 rep->r_mrep = NULL;
1133 1133
1134 /* 1134 /*
1135 * Do the client side RPC. 1135 * Do the client side RPC.
1136 */ 1136 */
1137 nfsstats.rpcrequests++; 1137 nfsstats.rpcrequests++;
1138 /* 1138 /*
1139 * Chain request into list of outstanding requests. Be sure 1139 * Chain request into list of outstanding requests. Be sure
1140 * to put it LAST so timer finds oldest requests first. 1140 * to put it LAST so timer finds oldest requests first.
1141 */ 1141 */
1142 s = splsoftnet(); 1142 s = splsoftnet();
1143 TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain); 1143 TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
1144 nfs_timer_start(); 1144 nfs_timer_start();
1145 1145
1146 /* 1146 /*
1147 * If backing off another request or avoiding congestion, don't 1147 * If backing off another request or avoiding congestion, don't
1148 * send this one now but let timer do it. If not timing a request, 1148 * send this one now but let timer do it. If not timing a request,
1149 * do it now. 1149 * do it now.
1150 */ 1150 */
1151 if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || 1151 if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
1152 (nmp->nm_flag & NFSMNT_DUMBTIMR) || nmp->nm_sent < nmp->nm_cwnd)) { 1152 (nmp->nm_flag & NFSMNT_DUMBTIMR) || nmp->nm_sent < nmp->nm_cwnd)) {
1153 splx(s); 1153 splx(s);
1154 if (nmp->nm_soflags & PR_CONNREQUIRED) 1154 if (nmp->nm_soflags & PR_CONNREQUIRED)
1155 error = nfs_sndlock(nmp, rep); 1155 error = nfs_sndlock(nmp, rep);
1156 if (!error) { 1156 if (!error) {
1157 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 1157 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
1158 error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep, lwp); 1158 error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep, lwp);
1159 if (nmp->nm_soflags & PR_CONNREQUIRED) 1159 if (nmp->nm_soflags & PR_CONNREQUIRED)
1160 nfs_sndunlock(nmp); 1160 nfs_sndunlock(nmp);
1161 } 1161 }
1162 s = splsoftnet(); 1162 s = splsoftnet();
1163 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) { 1163 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
1164 if ((rep->r_flags & R_SENT) == 0) { 1164 if ((rep->r_flags & R_SENT) == 0) {
1165 nmp->nm_sent += NFS_CWNDSCALE; 1165 nmp->nm_sent += NFS_CWNDSCALE;
1166 rep->r_flags |= R_SENT; 1166 rep->r_flags |= R_SENT;
1167 } 1167 }
1168 } 1168 }
1169 splx(s); 1169 splx(s);
1170 } else { 1170 } else {
1171 splx(s); 1171 splx(s);
1172 rep->r_rtt = -1; 1172 rep->r_rtt = -1;
1173 } 1173 }
1174 1174
1175 /* 1175 /*
1176 * Wait for the reply from our send or the timer's. 1176 * Wait for the reply from our send or the timer's.
1177 */ 1177 */
1178 if (!error || error == EPIPE || error == EWOULDBLOCK) 1178 if (!error || error == EPIPE || error == EWOULDBLOCK)
1179 error = nfs_reply(rep, lwp); 1179 error = nfs_reply(rep, lwp);
1180 1180
1181 /* 1181 /*
1182 * RPC done, unlink the request. 1182 * RPC done, unlink the request.
1183 */ 1183 */
1184 s = splsoftnet(); 1184 s = splsoftnet();
1185 TAILQ_REMOVE(&nfs_reqq, rep, r_chain); 1185 TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
1186 1186
1187 /* 1187 /*
1188 * Decrement the outstanding request count. 1188 * Decrement the outstanding request count.
1189 */ 1189 */
1190 if (rep->r_flags & R_SENT) { 1190 if (rep->r_flags & R_SENT) {
1191 rep->r_flags &= ~R_SENT; /* paranoia */ 1191 rep->r_flags &= ~R_SENT; /* paranoia */
1192 nmp->nm_sent -= NFS_CWNDSCALE; 1192 nmp->nm_sent -= NFS_CWNDSCALE;
1193 } 1193 }
1194 splx(s); 1194 splx(s);
1195 1195
1196 if (rexmitp != NULL) { 1196 if (rexmitp != NULL) {
1197 int rexmit; 1197 int rexmit;
1198 1198
1199 if (nmp->nm_sotype != SOCK_DGRAM) 1199 if (nmp->nm_sotype != SOCK_DGRAM)
1200 rexmit = (rep->r_flags & R_REXMITTED) != 0; 1200 rexmit = (rep->r_flags & R_REXMITTED) != 0;
1201 else 1201 else
1202 rexmit = rep->r_rexmit; 1202 rexmit = rep->r_rexmit;
1203 *rexmitp = rexmit; 1203 *rexmitp = rexmit;
1204 } 1204 }
1205 1205
1206 /* 1206 /*
1207 * If there was a successful reply and a tprintf msg. 1207 * If there was a successful reply and a tprintf msg.
1208 * tprintf a response. 1208 * tprintf a response.
1209 */ 1209 */
1210 if (!error && (rep->r_flags & R_TPRINTFMSG)) 1210 if (!error && (rep->r_flags & R_TPRINTFMSG))
1211 nfs_msg(rep->r_lwp, nmp->nm_mountp->mnt_stat.f_mntfromname, 1211 nfs_msg(rep->r_lwp, nmp->nm_mountp->mnt_stat.f_mntfromname,
1212 "is alive again"); 1212 "is alive again");
1213 mrep = rep->r_mrep; 1213 mrep = rep->r_mrep;
1214 md = rep->r_md; 1214 md = rep->r_md;
1215 dpos = rep->r_dpos; 1215 dpos = rep->r_dpos;
1216 if (error) 1216 if (error)
1217 goto nfsmout; 1217 goto nfsmout;
1218 1218
1219 /* 1219 /*
1220 * break down the rpc header and check if ok 1220 * break down the rpc header and check if ok
1221 */ 1221 */
1222 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 1222 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1223 if (*tl++ == rpc_msgdenied) { 1223 if (*tl++ == rpc_msgdenied) {
1224 if (*tl == rpc_mismatch) 1224 if (*tl == rpc_mismatch)
1225 error = EOPNOTSUPP; 1225 error = EOPNOTSUPP;
1226 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) { 1226 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
1227 if (!failed_auth) { 1227 if (!failed_auth) {
1228 failed_auth++; 1228 failed_auth++;
1229 mheadend->m_next = (struct mbuf *)0; 1229 mheadend->m_next = (struct mbuf *)0;
1230 m_freem(mrep); 1230 m_freem(mrep);
1231 m_freem(rep->r_mreq); 1231 m_freem(rep->r_mreq);
1232 goto kerbauth; 1232 goto kerbauth;
1233 } else 1233 } else
1234 error = EAUTH; 1234 error = EAUTH;
1235 } else 1235 } else
1236 error = EACCES; 1236 error = EACCES;
1237 m_freem(mrep); 1237 m_freem(mrep);
1238 goto nfsmout; 1238 goto nfsmout;
1239 } 1239 }
1240 1240
1241 /* 1241 /*
1242 * Grab any Kerberos verifier, otherwise just throw it away. 1242 * Grab any Kerberos verifier, otherwise just throw it away.
1243 */ 1243 */
1244 verf_type = fxdr_unsigned(int, *tl++); 1244 verf_type = fxdr_unsigned(int, *tl++);
1245 i = fxdr_unsigned(int32_t, *tl); 1245 i = fxdr_unsigned(int32_t, *tl);
1246 if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) { 1246 if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) {
1247 error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep); 1247 error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep);
1248 if (error) 1248 if (error)
1249 goto nfsmout; 1249 goto nfsmout;
1250 } else if (i > 0) 1250 } else if (i > 0)
1251 nfsm_adv(nfsm_rndup(i)); 1251 nfsm_adv(nfsm_rndup(i));
1252 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); 1252 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1253 /* 0 == ok */ 1253 /* 0 == ok */
1254 if (*tl == 0) { 1254 if (*tl == 0) {
1255 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); 1255 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1256 if (*tl != 0) { 1256 if (*tl != 0) {
1257 error = fxdr_unsigned(int, *tl); 1257 error = fxdr_unsigned(int, *tl);
1258 switch (error) { 1258 switch (error) {
1259 case NFSERR_PERM: 1259 case NFSERR_PERM:
1260 error = EPERM; 1260 error = EPERM;
1261 break; 1261 break;
1262 1262
1263 case NFSERR_NOENT: 1263 case NFSERR_NOENT:
1264 error = ENOENT; 1264 error = ENOENT;
1265 break; 1265 break;
1266 1266
1267 case NFSERR_IO: 1267 case NFSERR_IO:
1268 error = EIO; 1268 error = EIO;
1269 break; 1269 break;
1270 1270
1271 case NFSERR_NXIO: 1271 case NFSERR_NXIO:
1272 error = ENXIO; 1272 error = ENXIO;
1273 break; 1273 break;
1274 1274
1275 case NFSERR_ACCES: 1275 case NFSERR_ACCES:
1276 error = EACCES; 1276 error = EACCES;
1277 if (!retry_cred) 1277 if (!retry_cred)
1278 break; 1278 break;
1279 m_freem(mrep); 1279 m_freem(mrep);
1280 m_freem(rep->r_mreq); 1280 m_freem(rep->r_mreq);
1281 kmem_free(rep, sizeof(*rep)); 1281 kmem_free(rep, sizeof(*rep));
1282 use_opencred = !use_opencred; 1282 use_opencred = !use_opencred;
1283 if (mrest_backup == NULL) { 1283 if (mrest_backup == NULL) {
1284 /* m_copym failure */ 1284 /* m_copym failure */
1285 KASSERT( 1285 KASSERT(
1286 kauth_cred_getrefcnt(acred) == 1); 1286 kauth_cred_getrefcnt(acred) == 1);
1287 kauth_cred_free(acred); 1287 kauth_cred_free(acred);
1288 return ENOMEM; 1288 return ENOMEM;
1289 } 1289 }
1290 mrest = mrest_backup; 1290 mrest = mrest_backup;
1291 mrest_backup = NULL; 1291 mrest_backup = NULL;
1292 cred = origcred; 1292 cred = origcred;
1293 error = 0; 1293 error = 0;
1294 retry_cred = false; 1294 retry_cred = false;
1295 goto tryagain_cred; 1295 goto tryagain_cred;
1296 1296
1297 case NFSERR_EXIST: 1297 case NFSERR_EXIST:
1298 error = EEXIST; 1298 error = EEXIST;
1299 break; 1299 break;
1300 1300
1301 case NFSERR_XDEV: 1301 case NFSERR_XDEV:
1302 error = EXDEV; 1302 error = EXDEV;
1303 break; 1303 break;
1304 1304
1305 case NFSERR_NODEV: 1305 case NFSERR_NODEV:
1306 error = ENODEV; 1306 error = ENODEV;
1307 break; 1307 break;
1308 1308
1309 case NFSERR_NOTDIR: 1309 case NFSERR_NOTDIR:
1310 error = ENOTDIR; 1310 error = ENOTDIR;
1311 break; 1311 break;
1312 1312
1313 case NFSERR_ISDIR: 1313 case NFSERR_ISDIR:
1314 error = EISDIR; 1314 error = EISDIR;
1315 break; 1315 break;
1316 1316
1317 case NFSERR_INVAL: 1317 case NFSERR_INVAL:
1318 error = EINVAL; 1318 error = EINVAL;
1319 break; 1319 break;
1320 1320
1321 case NFSERR_FBIG: 1321 case NFSERR_FBIG:
1322 error = EFBIG; 1322 error = EFBIG;
1323 break; 1323 break;
1324 1324
1325 case NFSERR_NOSPC: 1325 case NFSERR_NOSPC:
1326 error = ENOSPC; 1326 error = ENOSPC;
1327 break; 1327 break;
1328 1328
1329 case NFSERR_ROFS: 1329 case NFSERR_ROFS:
1330 error = EROFS; 1330 error = EROFS;
1331 break; 1331 break;
1332 1332
1333 case NFSERR_MLINK: 1333 case NFSERR_MLINK:
1334 error = EMLINK; 1334 error = EMLINK;
1335 break; 1335 break;
1336 1336
1337 case NFSERR_TIMEDOUT: 1337 case NFSERR_TIMEDOUT:
1338 error = ETIMEDOUT; 1338 error = ETIMEDOUT;
1339 break; 1339 break;
1340 1340
1341 case NFSERR_NAMETOL: 1341 case NFSERR_NAMETOL:
1342 error = ENAMETOOLONG; 1342 error = ENAMETOOLONG;
1343 break; 1343 break;
1344 1344
1345 case NFSERR_NOTEMPTY: 1345 case NFSERR_NOTEMPTY:
1346 error = ENOTEMPTY; 1346 error = ENOTEMPTY;
1347 break; 1347 break;