| @@ -1,1347 +1,1347 @@ | | | @@ -1,1347 +1,1347 @@ |
1 | /* $NetBSD: nfs_socket.c,v 1.173.4.8 2011/03/29 19:47:37 riz Exp $ */ | | 1 | /* $NetBSD: nfs_socket.c,v 1.173.4.9 2011/04/24 16:23:49 riz Exp $ */ |
2 | | | 2 | |
3 | /* | | 3 | /* |
4 | * Copyright (c) 1989, 1991, 1993, 1995 | | 4 | * Copyright (c) 1989, 1991, 1993, 1995 |
5 | * The Regents of the University of California. All rights reserved. | | 5 | * The Regents of the University of California. All rights reserved. |
6 | * | | 6 | * |
7 | * This code is derived from software contributed to Berkeley by | | 7 | * This code is derived from software contributed to Berkeley by |
8 | * Rick Macklem at The University of Guelph. | | 8 | * Rick Macklem at The University of Guelph. |
9 | * | | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
12 | * are met: | | 12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright | | 15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the | | 16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. | | 17 | * documentation and/or other materials provided with the distribution. |
18 | * 3. Neither the name of the University nor the names of its contributors | | 18 | * 3. Neither the name of the University nor the names of its contributors |
19 | * may be used to endorse or promote products derived from this software | | 19 | * may be used to endorse or promote products derived from this software |
20 | * without specific prior written permission. | | 20 | * without specific prior written permission. |
21 | * | | 21 | * |
22 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | | 22 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
23 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | | 23 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
24 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | | 24 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
25 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | | 25 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
26 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 26 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
27 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | | 27 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
28 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 28 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
29 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 29 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
30 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 30 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
31 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 31 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
32 | * SUCH DAMAGE. | | 32 | * SUCH DAMAGE. |
33 | * | | 33 | * |
34 | * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 | | 34 | * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 |
35 | */ | | 35 | */ |
36 | | | 36 | |
37 | /* | | 37 | /* |
38 | * Socket operations for use by nfs | | 38 | * Socket operations for use by nfs |
39 | */ | | 39 | */ |
40 | | | 40 | |
41 | #include <sys/cdefs.h> | | 41 | #include <sys/cdefs.h> |
42 | __KERNEL_RCSID(0, "$NetBSD: nfs_socket.c,v 1.173.4.8 2011/03/29 19:47:37 riz Exp $"); | | 42 | __KERNEL_RCSID(0, "$NetBSD: nfs_socket.c,v 1.173.4.9 2011/04/24 16:23:49 riz Exp $"); |
43 | | | 43 | |
44 | #include "fs_nfs.h" | | 44 | #include "fs_nfs.h" |
45 | #include "opt_nfs.h" | | 45 | #include "opt_nfs.h" |
46 | #include "opt_nfsserver.h" | | 46 | #include "opt_nfsserver.h" |
47 | #include "opt_mbuftrace.h" | | 47 | #include "opt_mbuftrace.h" |
48 | #include "opt_inet.h" | | 48 | #include "opt_inet.h" |
49 | | | 49 | |
50 | #include <sys/param.h> | | 50 | #include <sys/param.h> |
51 | #include <sys/systm.h> | | 51 | #include <sys/systm.h> |
52 | #include <sys/evcnt.h> | | 52 | #include <sys/evcnt.h> |
53 | #include <sys/callout.h> | | 53 | #include <sys/callout.h> |
54 | #include <sys/proc.h> | | 54 | #include <sys/proc.h> |
55 | #include <sys/mount.h> | | 55 | #include <sys/mount.h> |
56 | #include <sys/kernel.h> | | 56 | #include <sys/kernel.h> |
57 | #include <sys/kmem.h> | | 57 | #include <sys/kmem.h> |
58 | #include <sys/mbuf.h> | | 58 | #include <sys/mbuf.h> |
59 | #include <sys/vnode.h> | | 59 | #include <sys/vnode.h> |
60 | #include <sys/domain.h> | | 60 | #include <sys/domain.h> |
61 | #include <sys/protosw.h> | | 61 | #include <sys/protosw.h> |
62 | #include <sys/socket.h> | | 62 | #include <sys/socket.h> |
63 | #include <sys/socketvar.h> | | 63 | #include <sys/socketvar.h> |
64 | #include <sys/syslog.h> | | 64 | #include <sys/syslog.h> |
65 | #include <sys/tprintf.h> | | 65 | #include <sys/tprintf.h> |
66 | #include <sys/namei.h> | | 66 | #include <sys/namei.h> |
67 | #include <sys/signal.h> | | 67 | #include <sys/signal.h> |
68 | #include <sys/signalvar.h> | | 68 | #include <sys/signalvar.h> |
69 | #include <sys/kauth.h> | | 69 | #include <sys/kauth.h> |
70 | | | 70 | |
71 | #include <netinet/in.h> | | 71 | #include <netinet/in.h> |
72 | #include <netinet/tcp.h> | | 72 | #include <netinet/tcp.h> |
73 | | | 73 | |
74 | #include <nfs/rpcv2.h> | | 74 | #include <nfs/rpcv2.h> |
75 | #include <nfs/nfsproto.h> | | 75 | #include <nfs/nfsproto.h> |
76 | #include <nfs/nfs.h> | | 76 | #include <nfs/nfs.h> |
77 | #include <nfs/xdr_subs.h> | | 77 | #include <nfs/xdr_subs.h> |
78 | #include <nfs/nfsm_subs.h> | | 78 | #include <nfs/nfsm_subs.h> |
79 | #include <nfs/nfsmount.h> | | 79 | #include <nfs/nfsmount.h> |
80 | #include <nfs/nfsnode.h> | | 80 | #include <nfs/nfsnode.h> |
81 | #include <nfs/nfsrtt.h> | | 81 | #include <nfs/nfsrtt.h> |
82 | #include <nfs/nfs_var.h> | | 82 | #include <nfs/nfs_var.h> |
83 | | | 83 | |
84 | #ifdef MBUFTRACE | | 84 | #ifdef MBUFTRACE |
85 | struct mowner nfs_mowner = MOWNER_INIT("nfs",""); | | 85 | struct mowner nfs_mowner = MOWNER_INIT("nfs",""); |
86 | #endif | | 86 | #endif |
87 | | | 87 | |
88 | /* | | 88 | /* |
89 | * Estimate rto for an nfs rpc sent via. an unreliable datagram. | | 89 | * Estimate rto for an nfs rpc sent via. an unreliable datagram. |
90 | * Use the mean and mean deviation of rtt for the appropriate type of rpc | | 90 | * Use the mean and mean deviation of rtt for the appropriate type of rpc |
91 | * for the frequent rpcs and a default for the others. | | 91 | * for the frequent rpcs and a default for the others. |
92 | * The justification for doing "other" this way is that these rpcs | | 92 | * The justification for doing "other" this way is that these rpcs |
93 | * happen so infrequently that timer est. would probably be stale. | | 93 | * happen so infrequently that timer est. would probably be stale. |
94 | * Also, since many of these rpcs are | | 94 | * Also, since many of these rpcs are |
95 | * non-idempotent, a conservative timeout is desired. | | 95 | * non-idempotent, a conservative timeout is desired. |
96 | * getattr, lookup - A+2D | | 96 | * getattr, lookup - A+2D |
97 | * read, write - A+4D | | 97 | * read, write - A+4D |
98 | * other - nm_timeo | | 98 | * other - nm_timeo |
99 | */ | | 99 | */ |
100 | #define NFS_RTO(n, t) \ | | 100 | #define NFS_RTO(n, t) \ |
101 | ((t) == 0 ? (n)->nm_timeo : \ | | 101 | ((t) == 0 ? (n)->nm_timeo : \ |
102 | ((t) < 3 ? \ | | 102 | ((t) < 3 ? \ |
103 | (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ | | 103 | (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ |
104 | ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) | | 104 | ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) |
105 | #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] | | 105 | #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] |
106 | #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] | | 106 | #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] |
107 | /* | | 107 | /* |
108 | * External data, mostly RPC constants in XDR form | | 108 | * External data, mostly RPC constants in XDR form |
109 | */ | | 109 | */ |
110 | extern u_int32_t rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, | | 110 | extern u_int32_t rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, |
111 | rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr, | | 111 | rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr, |
112 | rpc_auth_kerb; | | 112 | rpc_auth_kerb; |
113 | extern u_int32_t nfs_prog; | | 113 | extern u_int32_t nfs_prog; |
114 | extern const int nfsv3_procid[NFS_NPROCS]; | | 114 | extern const int nfsv3_procid[NFS_NPROCS]; |
115 | extern int nfs_ticks; | | 115 | extern int nfs_ticks; |
116 | | | 116 | |
117 | #ifdef DEBUG | | 117 | #ifdef DEBUG |
118 | /* | | 118 | /* |
119 | * Avoid spamming the console with debugging messages. We only print | | 119 | * Avoid spamming the console with debugging messages. We only print |
120 | * the nfs timer and reply error debugs every 10 seconds. | | 120 | * the nfs timer and reply error debugs every 10 seconds. |
121 | */ | | 121 | */ |
122 | static const struct timeval nfs_err_interval = { 10, 0 }; | | 122 | static const struct timeval nfs_err_interval = { 10, 0 }; |
123 | static struct timeval nfs_reply_last_err_time; | | 123 | static struct timeval nfs_reply_last_err_time; |
124 | static struct timeval nfs_timer_last_err_time; | | 124 | static struct timeval nfs_timer_last_err_time; |
125 | #endif | | 125 | #endif |
126 | | | 126 | |
127 | /* | | 127 | /* |
128 | * Defines which timer to use for the procnum. | | 128 | * Defines which timer to use for the procnum. |
129 | * 0 - default | | 129 | * 0 - default |
130 | * 1 - getattr | | 130 | * 1 - getattr |
131 | * 2 - lookup | | 131 | * 2 - lookup |
132 | * 3 - read | | 132 | * 3 - read |
133 | * 4 - write | | 133 | * 4 - write |
134 | */ | | 134 | */ |
135 | static const int proct[NFS_NPROCS] = { | | 135 | static const int proct[NFS_NPROCS] = { |
136 | [NFSPROC_NULL] = 0, | | 136 | [NFSPROC_NULL] = 0, |
137 | [NFSPROC_GETATTR] = 1, | | 137 | [NFSPROC_GETATTR] = 1, |
138 | [NFSPROC_SETATTR] = 0, | | 138 | [NFSPROC_SETATTR] = 0, |
139 | [NFSPROC_LOOKUP] = 2, | | 139 | [NFSPROC_LOOKUP] = 2, |
140 | [NFSPROC_ACCESS] = 1, | | 140 | [NFSPROC_ACCESS] = 1, |
141 | [NFSPROC_READLINK] = 3, | | 141 | [NFSPROC_READLINK] = 3, |
142 | [NFSPROC_READ] = 3, | | 142 | [NFSPROC_READ] = 3, |
143 | [NFSPROC_WRITE] = 4, | | 143 | [NFSPROC_WRITE] = 4, |
144 | [NFSPROC_CREATE] = 0, | | 144 | [NFSPROC_CREATE] = 0, |
145 | [NFSPROC_MKDIR] = 0, | | 145 | [NFSPROC_MKDIR] = 0, |
146 | [NFSPROC_SYMLINK] = 0, | | 146 | [NFSPROC_SYMLINK] = 0, |
147 | [NFSPROC_MKNOD] = 0, | | 147 | [NFSPROC_MKNOD] = 0, |
148 | [NFSPROC_REMOVE] = 0, | | 148 | [NFSPROC_REMOVE] = 0, |
149 | [NFSPROC_RMDIR] = 0, | | 149 | [NFSPROC_RMDIR] = 0, |
150 | [NFSPROC_RENAME] = 0, | | 150 | [NFSPROC_RENAME] = 0, |
151 | [NFSPROC_LINK] = 0, | | 151 | [NFSPROC_LINK] = 0, |
152 | [NFSPROC_READDIR] = 3, | | 152 | [NFSPROC_READDIR] = 3, |
153 | [NFSPROC_READDIRPLUS] = 3, | | 153 | [NFSPROC_READDIRPLUS] = 3, |
154 | [NFSPROC_FSSTAT] = 0, | | 154 | [NFSPROC_FSSTAT] = 0, |
155 | [NFSPROC_FSINFO] = 0, | | 155 | [NFSPROC_FSINFO] = 0, |
156 | [NFSPROC_PATHCONF] = 0, | | 156 | [NFSPROC_PATHCONF] = 0, |
157 | [NFSPROC_COMMIT] = 0, | | 157 | [NFSPROC_COMMIT] = 0, |
158 | [NFSPROC_NOOP] = 0, | | 158 | [NFSPROC_NOOP] = 0, |
159 | }; | | 159 | }; |
160 | | | 160 | |
161 | /* | | 161 | /* |
162 | * There is a congestion window for outstanding rpcs maintained per mount | | 162 | * There is a congestion window for outstanding rpcs maintained per mount |
163 | * point. The cwnd size is adjusted in roughly the way that: | | 163 | * point. The cwnd size is adjusted in roughly the way that: |
164 | * Van Jacobson, Congestion avoidance and Control, In "Proceedings of | | 164 | * Van Jacobson, Congestion avoidance and Control, In "Proceedings of |
165 | * SIGCOMM '88". ACM, August 1988. | | 165 | * SIGCOMM '88". ACM, August 1988. |
166 | * describes for TCP. The cwnd size is chopped in half on a retransmit timeout | | 166 | * describes for TCP. The cwnd size is chopped in half on a retransmit timeout |
167 | * and incremented by 1/cwnd when each rpc reply is received and a full cwnd | | 167 | * and incremented by 1/cwnd when each rpc reply is received and a full cwnd |
168 | * of rpcs is in progress. | | 168 | * of rpcs is in progress. |
169 | * (The sent count and cwnd are scaled for integer arith.) | | 169 | * (The sent count and cwnd are scaled for integer arith.) |
170 | * Variants of "slow start" were tried and were found to be too much of a | | 170 | * Variants of "slow start" were tried and were found to be too much of a |
171 | * performance hit (ave. rtt 3 times larger), | | 171 | * performance hit (ave. rtt 3 times larger), |
172 | * I suspect due to the large rtt that nfs rpcs have. | | 172 | * I suspect due to the large rtt that nfs rpcs have. |
173 | */ | | 173 | */ |
174 | #define NFS_CWNDSCALE 256 | | 174 | #define NFS_CWNDSCALE 256 |
175 | #define NFS_MAXCWND (NFS_CWNDSCALE * 32) | | 175 | #define NFS_MAXCWND (NFS_CWNDSCALE * 32) |
176 | static const int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; | | 176 | static const int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; |
177 | int nfsrtton = 0; | | 177 | int nfsrtton = 0; |
178 | struct nfsrtt nfsrtt; | | 178 | struct nfsrtt nfsrtt; |
179 | struct nfsreqhead nfs_reqq; | | 179 | struct nfsreqhead nfs_reqq; |
180 | static callout_t nfs_timer_ch; | | 180 | static callout_t nfs_timer_ch; |
181 | static struct evcnt nfs_timer_ev; | | 181 | static struct evcnt nfs_timer_ev; |
182 | static struct evcnt nfs_timer_start_ev; | | 182 | static struct evcnt nfs_timer_start_ev; |
183 | static struct evcnt nfs_timer_stop_ev; | | 183 | static struct evcnt nfs_timer_stop_ev; |
184 | | | 184 | |
185 | #ifdef NFS | | 185 | #ifdef NFS |
186 | static int nfs_sndlock(struct nfsmount *, struct nfsreq *); | | 186 | static int nfs_sndlock(struct nfsmount *, struct nfsreq *); |
187 | static void nfs_sndunlock(struct nfsmount *); | | 187 | static void nfs_sndunlock(struct nfsmount *); |
188 | #endif | | 188 | #endif |
189 | static int nfs_rcvlock(struct nfsmount *, struct nfsreq *); | | 189 | static int nfs_rcvlock(struct nfsmount *, struct nfsreq *); |
190 | static void nfs_rcvunlock(struct nfsmount *); | | 190 | static void nfs_rcvunlock(struct nfsmount *); |
191 | | | 191 | |
192 | #if defined(NFSSERVER) | | 192 | #if defined(NFSSERVER) |
193 | static void nfsrv_wakenfsd_locked(struct nfssvc_sock *); | | 193 | static void nfsrv_wakenfsd_locked(struct nfssvc_sock *); |
194 | #endif /* defined(NFSSERVER) */ | | 194 | #endif /* defined(NFSSERVER) */ |
195 | | | 195 | |
196 | /* | | 196 | /* |
197 | * Initialize sockets and congestion for a new NFS connection. | | 197 | * Initialize sockets and congestion for a new NFS connection. |
198 | * We do not free the sockaddr if error. | | 198 | * We do not free the sockaddr if error. |
199 | */ | | 199 | */ |
200 | int | | 200 | int |
201 | nfs_connect(nmp, rep, l) | | 201 | nfs_connect(nmp, rep, l) |
202 | struct nfsmount *nmp; | | 202 | struct nfsmount *nmp; |
203 | struct nfsreq *rep; | | 203 | struct nfsreq *rep; |
204 | struct lwp *l; | | 204 | struct lwp *l; |
205 | { | | 205 | { |
206 | struct socket *so; | | 206 | struct socket *so; |
207 | int error, rcvreserve, sndreserve; | | 207 | int error, rcvreserve, sndreserve; |
208 | struct sockaddr *saddr; | | 208 | struct sockaddr *saddr; |
209 | struct sockaddr_in *sin; | | 209 | struct sockaddr_in *sin; |
210 | #ifdef INET6 | | 210 | #ifdef INET6 |
211 | struct sockaddr_in6 *sin6; | | 211 | struct sockaddr_in6 *sin6; |
212 | #endif | | 212 | #endif |
213 | struct mbuf *m; | | 213 | struct mbuf *m; |
214 | int val; | | 214 | int val; |
215 | | | 215 | |
216 | nmp->nm_so = (struct socket *)0; | | 216 | nmp->nm_so = (struct socket *)0; |
217 | saddr = mtod(nmp->nm_nam, struct sockaddr *); | | 217 | saddr = mtod(nmp->nm_nam, struct sockaddr *); |
218 | error = socreate(saddr->sa_family, &nmp->nm_so, | | 218 | error = socreate(saddr->sa_family, &nmp->nm_so, |
219 | nmp->nm_sotype, nmp->nm_soproto, l, NULL); | | 219 | nmp->nm_sotype, nmp->nm_soproto, l, NULL); |
220 | if (error) | | 220 | if (error) |
221 | goto bad; | | 221 | goto bad; |
222 | so = nmp->nm_so; | | 222 | so = nmp->nm_so; |
223 | #ifdef MBUFTRACE | | 223 | #ifdef MBUFTRACE |
224 | so->so_mowner = &nfs_mowner; | | 224 | so->so_mowner = &nfs_mowner; |
225 | so->so_rcv.sb_mowner = &nfs_mowner; | | 225 | so->so_rcv.sb_mowner = &nfs_mowner; |
226 | so->so_snd.sb_mowner = &nfs_mowner; | | 226 | so->so_snd.sb_mowner = &nfs_mowner; |
227 | #endif | | 227 | #endif |
228 | nmp->nm_soflags = so->so_proto->pr_flags; | | 228 | nmp->nm_soflags = so->so_proto->pr_flags; |
229 | | | 229 | |
230 | /* | | 230 | /* |
231 | * Some servers require that the client port be a reserved port number. | | 231 | * Some servers require that the client port be a reserved port number. |
232 | */ | | 232 | */ |
233 | if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) { | | 233 | if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) { |
234 | val = IP_PORTRANGE_LOW; | | 234 | val = IP_PORTRANGE_LOW; |
235 | | | 235 | |
236 | if ((error = so_setsockopt(NULL, so, IPPROTO_IP, IP_PORTRANGE, | | 236 | if ((error = so_setsockopt(NULL, so, IPPROTO_IP, IP_PORTRANGE, |
237 | &val, sizeof(val)))) | | 237 | &val, sizeof(val)))) |
238 | goto bad; | | 238 | goto bad; |
239 | m = m_get(M_WAIT, MT_SONAME); | | 239 | m = m_get(M_WAIT, MT_SONAME); |
240 | MCLAIM(m, so->so_mowner); | | 240 | MCLAIM(m, so->so_mowner); |
241 | sin = mtod(m, struct sockaddr_in *); | | 241 | sin = mtod(m, struct sockaddr_in *); |
242 | sin->sin_len = m->m_len = sizeof (struct sockaddr_in); | | 242 | sin->sin_len = m->m_len = sizeof (struct sockaddr_in); |
243 | sin->sin_family = AF_INET; | | 243 | sin->sin_family = AF_INET; |
244 | sin->sin_addr.s_addr = INADDR_ANY; | | 244 | sin->sin_addr.s_addr = INADDR_ANY; |
245 | sin->sin_port = 0; | | 245 | sin->sin_port = 0; |
246 | error = sobind(so, m, &lwp0); | | 246 | error = sobind(so, m, &lwp0); |
247 | m_freem(m); | | 247 | m_freem(m); |
248 | if (error) | | 248 | if (error) |
249 | goto bad; | | 249 | goto bad; |
250 | } | | 250 | } |
251 | #ifdef INET6 | | 251 | #ifdef INET6 |
252 | if (saddr->sa_family == AF_INET6 && (nmp->nm_flag & NFSMNT_RESVPORT)) { | | 252 | if (saddr->sa_family == AF_INET6 && (nmp->nm_flag & NFSMNT_RESVPORT)) { |
253 | val = IPV6_PORTRANGE_LOW; | | 253 | val = IPV6_PORTRANGE_LOW; |
254 | | | 254 | |
255 | if ((error = so_setsockopt(NULL, so, IPPROTO_IPV6, | | 255 | if ((error = so_setsockopt(NULL, so, IPPROTO_IPV6, |
256 | IPV6_PORTRANGE, &val, sizeof(val)))) | | 256 | IPV6_PORTRANGE, &val, sizeof(val)))) |
257 | goto bad; | | 257 | goto bad; |
258 | m = m_get(M_WAIT, MT_SONAME); | | 258 | m = m_get(M_WAIT, MT_SONAME); |
259 | MCLAIM(m, so->so_mowner); | | 259 | MCLAIM(m, so->so_mowner); |
260 | sin6 = mtod(m, struct sockaddr_in6 *); | | 260 | sin6 = mtod(m, struct sockaddr_in6 *); |
261 | sin6->sin6_len = m->m_len = sizeof (struct sockaddr_in6); | | 261 | sin6->sin6_len = m->m_len = sizeof (struct sockaddr_in6); |
262 | sin6->sin6_family = AF_INET6; | | 262 | sin6->sin6_family = AF_INET6; |
263 | sin6->sin6_addr = in6addr_any; | | 263 | sin6->sin6_addr = in6addr_any; |
264 | sin6->sin6_port = 0; | | 264 | sin6->sin6_port = 0; |
265 | error = sobind(so, m, &lwp0); | | 265 | error = sobind(so, m, &lwp0); |
266 | m_freem(m); | | 266 | m_freem(m); |
267 | if (error) | | 267 | if (error) |
268 | goto bad; | | 268 | goto bad; |
269 | } | | 269 | } |
270 | #endif | | 270 | #endif |
271 | | | 271 | |
272 | /* | | 272 | /* |
273 | * Protocols that do not require connections may be optionally left | | 273 | * Protocols that do not require connections may be optionally left |
274 | * unconnected for servers that reply from a port other than NFS_PORT. | | 274 | * unconnected for servers that reply from a port other than NFS_PORT. |
275 | */ | | 275 | */ |
276 | solock(so); | | 276 | solock(so); |
277 | if (nmp->nm_flag & NFSMNT_NOCONN) { | | 277 | if (nmp->nm_flag & NFSMNT_NOCONN) { |
278 | if (nmp->nm_soflags & PR_CONNREQUIRED) { | | 278 | if (nmp->nm_soflags & PR_CONNREQUIRED) { |
279 | sounlock(so); | | 279 | sounlock(so); |
280 | error = ENOTCONN; | | 280 | error = ENOTCONN; |
281 | goto bad; | | 281 | goto bad; |
282 | } | | 282 | } |
283 | } else { | | 283 | } else { |
284 | error = soconnect(so, nmp->nm_nam, l); | | 284 | error = soconnect(so, nmp->nm_nam, l); |
285 | if (error) { | | 285 | if (error) { |
286 | sounlock(so); | | 286 | sounlock(so); |
287 | goto bad; | | 287 | goto bad; |
288 | } | | 288 | } |
289 | | | 289 | |
290 | /* | | 290 | /* |
291 | * Wait for the connection to complete. Cribbed from the | | 291 | * Wait for the connection to complete. Cribbed from the |
292 | * connect system call but with the wait timing out so | | 292 | * connect system call but with the wait timing out so |
293 | * that interruptible mounts don't hang here for a long time. | | 293 | * that interruptible mounts don't hang here for a long time. |
294 | */ | | 294 | */ |
295 | while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { | | 295 | while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { |
296 | (void)sowait(so, false, 2 * hz); | | 296 | (void)sowait(so, false, 2 * hz); |
297 | if ((so->so_state & SS_ISCONNECTING) && | | 297 | if ((so->so_state & SS_ISCONNECTING) && |
298 | so->so_error == 0 && rep && | | 298 | so->so_error == 0 && rep && |
299 | (error = nfs_sigintr(nmp, rep, rep->r_lwp)) != 0){ | | 299 | (error = nfs_sigintr(nmp, rep, rep->r_lwp)) != 0){ |
300 | so->so_state &= ~SS_ISCONNECTING; | | 300 | so->so_state &= ~SS_ISCONNECTING; |
301 | sounlock(so); | | 301 | sounlock(so); |
302 | goto bad; | | 302 | goto bad; |
303 | } | | 303 | } |
304 | } | | 304 | } |
305 | if (so->so_error) { | | 305 | if (so->so_error) { |
306 | error = so->so_error; | | 306 | error = so->so_error; |
307 | so->so_error = 0; | | 307 | so->so_error = 0; |
308 | sounlock(so); | | 308 | sounlock(so); |
309 | goto bad; | | 309 | goto bad; |
310 | } | | 310 | } |
311 | } | | 311 | } |
312 | if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) { | | 312 | if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) { |
313 | so->so_rcv.sb_timeo = (5 * hz); | | 313 | so->so_rcv.sb_timeo = (5 * hz); |
314 | so->so_snd.sb_timeo = (5 * hz); | | 314 | so->so_snd.sb_timeo = (5 * hz); |
315 | } else { | | 315 | } else { |
316 | /* | | 316 | /* |
317 | * enable receive timeout to detect server crash and reconnect. | | 317 | * enable receive timeout to detect server crash and reconnect. |
318 | * otherwise, we can be stuck in soreceive forever. | | 318 | * otherwise, we can be stuck in soreceive forever. |
319 | */ | | 319 | */ |
320 | so->so_rcv.sb_timeo = (5 * hz); | | 320 | so->so_rcv.sb_timeo = (5 * hz); |
321 | so->so_snd.sb_timeo = 0; | | 321 | so->so_snd.sb_timeo = 0; |
322 | } | | 322 | } |
323 | if (nmp->nm_sotype == SOCK_DGRAM) { | | 323 | if (nmp->nm_sotype == SOCK_DGRAM) { |
324 | sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2; | | 324 | sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 3; |
325 | rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + | | 325 | rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + |
326 | NFS_MAXPKTHDR) * 2; | | 326 | NFS_MAXPKTHDR) * 2; |
327 | } else if (nmp->nm_sotype == SOCK_SEQPACKET) { | | 327 | } else if (nmp->nm_sotype == SOCK_SEQPACKET) { |
328 | sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2; | | 328 | sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 3; |
329 | rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + | | 329 | rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + |
330 | NFS_MAXPKTHDR) * 2; | | 330 | NFS_MAXPKTHDR) * 3; |
331 | } else { | | 331 | } else { |
332 | sounlock(so); | | 332 | sounlock(so); |
333 | if (nmp->nm_sotype != SOCK_STREAM) | | 333 | if (nmp->nm_sotype != SOCK_STREAM) |
334 | panic("nfscon sotype"); | | 334 | panic("nfscon sotype"); |
335 | if (so->so_proto->pr_flags & PR_CONNREQUIRED) { | | 335 | if (so->so_proto->pr_flags & PR_CONNREQUIRED) { |
336 | val = 1; | | 336 | val = 1; |
337 | so_setsockopt(NULL, so, SOL_SOCKET, SO_KEEPALIVE, &val, | | 337 | so_setsockopt(NULL, so, SOL_SOCKET, SO_KEEPALIVE, &val, |
338 | sizeof(val)); | | 338 | sizeof(val)); |
339 | } | | 339 | } |
340 | if (so->so_proto->pr_protocol == IPPROTO_TCP) { | | 340 | if (so->so_proto->pr_protocol == IPPROTO_TCP) { |
341 | val = 1; | | 341 | val = 1; |
342 | so_setsockopt(NULL, so, IPPROTO_TCP, TCP_NODELAY, &val, | | 342 | so_setsockopt(NULL, so, IPPROTO_TCP, TCP_NODELAY, &val, |
343 | sizeof(val)); | | 343 | sizeof(val)); |
344 | } | | 344 | } |
345 | sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + | | 345 | sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + |
346 | sizeof (u_int32_t)) * 2; | | 346 | sizeof (u_int32_t)) * 3; |
347 | rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + | | 347 | rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + |
348 | sizeof (u_int32_t)) * 2; | | 348 | sizeof (u_int32_t)) * 3; |
349 | solock(so); | | 349 | solock(so); |
350 | } | | 350 | } |
351 | error = soreserve(so, sndreserve, rcvreserve); | | 351 | error = soreserve(so, sndreserve, rcvreserve); |
352 | if (error) { | | 352 | if (error) { |
353 | sounlock(so); | | 353 | sounlock(so); |
354 | goto bad; | | 354 | goto bad; |
355 | } | | 355 | } |
356 | so->so_rcv.sb_flags |= SB_NOINTR; | | 356 | so->so_rcv.sb_flags |= SB_NOINTR; |
357 | so->so_snd.sb_flags |= SB_NOINTR; | | 357 | so->so_snd.sb_flags |= SB_NOINTR; |
358 | sounlock(so); | | 358 | sounlock(so); |
359 | | | 359 | |
360 | /* Initialize other non-zero congestion variables */ | | 360 | /* Initialize other non-zero congestion variables */ |
361 | nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] = | | 361 | nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] = |
362 | NFS_TIMEO << 3; | | 362 | NFS_TIMEO << 3; |
363 | nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = | | 363 | nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = |
364 | nmp->nm_sdrtt[3] = 0; | | 364 | nmp->nm_sdrtt[3] = 0; |
365 | nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ | | 365 | nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ |
366 | nmp->nm_sent = 0; | | 366 | nmp->nm_sent = 0; |
367 | nmp->nm_timeouts = 0; | | 367 | nmp->nm_timeouts = 0; |
368 | return (0); | | 368 | return (0); |
369 | | | 369 | |
370 | bad: | | 370 | bad: |
371 | nfs_disconnect(nmp); | | 371 | nfs_disconnect(nmp); |
372 | return (error); | | 372 | return (error); |
373 | } | | 373 | } |
374 | | | 374 | |
375 | /* | | 375 | /* |
376 | * Reconnect routine: | | 376 | * Reconnect routine: |
377 | * Called when a connection is broken on a reliable protocol. | | 377 | * Called when a connection is broken on a reliable protocol. |
378 | * - clean up the old socket | | 378 | * - clean up the old socket |
379 | * - nfs_connect() again | | 379 | * - nfs_connect() again |
380 | * - set R_MUSTRESEND for all outstanding requests on mount point | | 380 | * - set R_MUSTRESEND for all outstanding requests on mount point |
381 | * If this fails the mount point is DEAD! | | 381 | * If this fails the mount point is DEAD! |
382 | * nb: Must be called with the nfs_sndlock() set on the mount point. | | 382 | * nb: Must be called with the nfs_sndlock() set on the mount point. |
383 | */ | | 383 | */ |
384 | int | | 384 | int |
385 | nfs_reconnect(struct nfsreq *rep) | | 385 | nfs_reconnect(struct nfsreq *rep) |
386 | { | | 386 | { |
387 | struct nfsreq *rp; | | 387 | struct nfsreq *rp; |
388 | struct nfsmount *nmp = rep->r_nmp; | | 388 | struct nfsmount *nmp = rep->r_nmp; |
389 | int error; | | 389 | int error; |
390 | | | 390 | |
391 | nfs_disconnect(nmp); | | 391 | nfs_disconnect(nmp); |
392 | while ((error = nfs_connect(nmp, rep, &lwp0)) != 0) { | | 392 | while ((error = nfs_connect(nmp, rep, &lwp0)) != 0) { |
393 | if (error == EINTR || error == ERESTART) | | 393 | if (error == EINTR || error == ERESTART) |
394 | return (EINTR); | | 394 | return (EINTR); |
395 | kpause("nfscn2", false, hz, NULL); | | 395 | kpause("nfscn2", false, hz, NULL); |
396 | } | | 396 | } |
397 | | | 397 | |
398 | /* | | 398 | /* |
399 | * Loop through outstanding request list and fix up all requests | | 399 | * Loop through outstanding request list and fix up all requests |
400 | * on old socket. | | 400 | * on old socket. |
401 | */ | | 401 | */ |
402 | TAILQ_FOREACH(rp, &nfs_reqq, r_chain) { | | 402 | TAILQ_FOREACH(rp, &nfs_reqq, r_chain) { |
403 | if (rp->r_nmp == nmp) { | | 403 | if (rp->r_nmp == nmp) { |
404 | if ((rp->r_flags & R_MUSTRESEND) == 0) | | 404 | if ((rp->r_flags & R_MUSTRESEND) == 0) |
405 | rp->r_flags |= R_MUSTRESEND | R_REXMITTED; | | 405 | rp->r_flags |= R_MUSTRESEND | R_REXMITTED; |
406 | rp->r_rexmit = 0; | | 406 | rp->r_rexmit = 0; |
407 | } | | 407 | } |
408 | } | | 408 | } |
409 | return (0); | | 409 | return (0); |
410 | } | | 410 | } |
411 | | | 411 | |
412 | /* | | 412 | /* |
413 | * NFS disconnect. Clean up and unlink. | | 413 | * NFS disconnect. Clean up and unlink. |
414 | */ | | 414 | */ |
415 | void | | 415 | void |
416 | nfs_disconnect(nmp) | | 416 | nfs_disconnect(nmp) |
417 | struct nfsmount *nmp; | | 417 | struct nfsmount *nmp; |
418 | { | | 418 | { |
419 | struct socket *so; | | 419 | struct socket *so; |
420 | int drain = 0; | | 420 | int drain = 0; |
421 | | | 421 | |
422 | if (nmp->nm_so) { | | 422 | if (nmp->nm_so) { |
423 | so = nmp->nm_so; | | 423 | so = nmp->nm_so; |
424 | nmp->nm_so = (struct socket *)0; | | 424 | nmp->nm_so = (struct socket *)0; |
425 | solock(so); | | 425 | solock(so); |
426 | soshutdown(so, SHUT_RDWR); | | 426 | soshutdown(so, SHUT_RDWR); |
427 | sounlock(so); | | 427 | sounlock(so); |
428 | drain = (nmp->nm_iflag & NFSMNT_DISMNT) != 0; | | 428 | drain = (nmp->nm_iflag & NFSMNT_DISMNT) != 0; |
429 | if (drain) { | | 429 | if (drain) { |
430 | /* | | 430 | /* |
431 | * soshutdown() above should wake up the current | | 431 | * soshutdown() above should wake up the current |
432 | * listener. | | 432 | * listener. |
433 | * Now wake up those waiting for the receive lock, and | | 433 | * Now wake up those waiting for the receive lock, and |
434 | * wait for them to go away unhappy, to prevent *nmp | | 434 | * wait for them to go away unhappy, to prevent *nmp |
435 | * from evaporating while they're sleeping. | | 435 | * from evaporating while they're sleeping. |
436 | */ | | 436 | */ |
437 | mutex_enter(&nmp->nm_lock); | | 437 | mutex_enter(&nmp->nm_lock); |
438 | while (nmp->nm_waiters > 0) { | | 438 | while (nmp->nm_waiters > 0) { |
439 | cv_broadcast(&nmp->nm_rcvcv); | | 439 | cv_broadcast(&nmp->nm_rcvcv); |
440 | cv_broadcast(&nmp->nm_sndcv); | | 440 | cv_broadcast(&nmp->nm_sndcv); |
441 | cv_wait(&nmp->nm_disconcv, &nmp->nm_lock); | | 441 | cv_wait(&nmp->nm_disconcv, &nmp->nm_lock); |
442 | } | | 442 | } |
443 | mutex_exit(&nmp->nm_lock); | | 443 | mutex_exit(&nmp->nm_lock); |
444 | } | | 444 | } |
445 | soclose(so); | | 445 | soclose(so); |
446 | } | | 446 | } |
447 | #ifdef DIAGNOSTIC | | 447 | #ifdef DIAGNOSTIC |
448 | if (drain && (nmp->nm_waiters > 0)) | | 448 | if (drain && (nmp->nm_waiters > 0)) |
449 | panic("nfs_disconnect: waiters left after drain?"); | | 449 | panic("nfs_disconnect: waiters left after drain?"); |
450 | #endif | | 450 | #endif |
451 | } | | 451 | } |
452 | | | 452 | |
453 | void | | 453 | void |
454 | nfs_safedisconnect(nmp) | | 454 | nfs_safedisconnect(nmp) |
455 | struct nfsmount *nmp; | | 455 | struct nfsmount *nmp; |
456 | { | | 456 | { |
457 | struct nfsreq dummyreq; | | 457 | struct nfsreq dummyreq; |
458 | | | 458 | |
459 | memset(&dummyreq, 0, sizeof(dummyreq)); | | 459 | memset(&dummyreq, 0, sizeof(dummyreq)); |
460 | dummyreq.r_nmp = nmp; | | 460 | dummyreq.r_nmp = nmp; |
461 | nfs_rcvlock(nmp, &dummyreq); /* XXX ignored error return */ | | 461 | nfs_rcvlock(nmp, &dummyreq); /* XXX ignored error return */ |
462 | nfs_disconnect(nmp); | | 462 | nfs_disconnect(nmp); |
463 | nfs_rcvunlock(nmp); | | 463 | nfs_rcvunlock(nmp); |
464 | } | | 464 | } |
465 | | | 465 | |
466 | /* | | 466 | /* |
467 | * This is the nfs send routine. For connection based socket types, it | | 467 | * This is the nfs send routine. For connection based socket types, it |
468 | * must be called with an nfs_sndlock() on the socket. | | 468 | * must be called with an nfs_sndlock() on the socket. |
469 | * "rep == NULL" indicates that it has been called from a server. | | 469 | * "rep == NULL" indicates that it has been called from a server. |
470 | * For the client side: | | 470 | * For the client side: |
471 | * - return EINTR if the RPC is terminated, 0 otherwise | | 471 | * - return EINTR if the RPC is terminated, 0 otherwise |
472 | * - set R_MUSTRESEND if the send fails for any reason | | 472 | * - set R_MUSTRESEND if the send fails for any reason |
473 | * - do any cleanup required by recoverable socket errors (? ? ?) | | 473 | * - do any cleanup required by recoverable socket errors (? ? ?) |
474 | * For the server side: | | 474 | * For the server side: |
475 | * - return EINTR or ERESTART if interrupted by a signal | | 475 | * - return EINTR or ERESTART if interrupted by a signal |
476 | * - return EPIPE if a connection is lost for connection based sockets (TCP...) | | 476 | * - return EPIPE if a connection is lost for connection based sockets (TCP...) |
477 | * - do any cleanup required by recoverable socket errors (? ? ?) | | 477 | * - do any cleanup required by recoverable socket errors (? ? ?) |
478 | */ | | 478 | */ |
479 | int | | 479 | int |
480 | nfs_send(so, nam, top, rep, l) | | 480 | nfs_send(so, nam, top, rep, l) |
481 | struct socket *so; | | 481 | struct socket *so; |
482 | struct mbuf *nam; | | 482 | struct mbuf *nam; |
483 | struct mbuf *top; | | 483 | struct mbuf *top; |
484 | struct nfsreq *rep; | | 484 | struct nfsreq *rep; |
485 | struct lwp *l; | | 485 | struct lwp *l; |
486 | { | | 486 | { |
487 | struct mbuf *sendnam; | | 487 | struct mbuf *sendnam; |
488 | int error, soflags, flags; | | 488 | int error, soflags, flags; |
489 | | | 489 | |
490 | /* XXX nfs_doio()/nfs_request() calls with rep->r_lwp == NULL */ | | 490 | /* XXX nfs_doio()/nfs_request() calls with rep->r_lwp == NULL */ |
491 | if (l == NULL && rep->r_lwp == NULL) | | 491 | if (l == NULL && rep->r_lwp == NULL) |
492 | l = curlwp; | | 492 | l = curlwp; |
493 | | | 493 | |
494 | if (rep) { | | 494 | if (rep) { |
495 | if (rep->r_flags & R_SOFTTERM) { | | 495 | if (rep->r_flags & R_SOFTTERM) { |
496 | m_freem(top); | | 496 | m_freem(top); |
497 | return (EINTR); | | 497 | return (EINTR); |
498 | } | | 498 | } |
499 | if ((so = rep->r_nmp->nm_so) == NULL) { | | 499 | if ((so = rep->r_nmp->nm_so) == NULL) { |
500 | rep->r_flags |= R_MUSTRESEND; | | 500 | rep->r_flags |= R_MUSTRESEND; |
501 | m_freem(top); | | 501 | m_freem(top); |
502 | return (0); | | 502 | return (0); |
503 | } | | 503 | } |
504 | rep->r_flags &= ~R_MUSTRESEND; | | 504 | rep->r_flags &= ~R_MUSTRESEND; |
505 | soflags = rep->r_nmp->nm_soflags; | | 505 | soflags = rep->r_nmp->nm_soflags; |
506 | } else | | 506 | } else |
507 | soflags = so->so_proto->pr_flags; | | 507 | soflags = so->so_proto->pr_flags; |
508 | if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) | | 508 | if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) |
509 | sendnam = (struct mbuf *)0; | | 509 | sendnam = (struct mbuf *)0; |
510 | else | | 510 | else |
511 | sendnam = nam; | | 511 | sendnam = nam; |
512 | if (so->so_type == SOCK_SEQPACKET) | | 512 | if (so->so_type == SOCK_SEQPACKET) |
513 | flags = MSG_EOR; | | 513 | flags = MSG_EOR; |
514 | else | | 514 | else |
515 | flags = 0; | | 515 | flags = 0; |
516 | | | 516 | |
517 | error = (*so->so_send)(so, sendnam, NULL, top, NULL, flags, l); | | 517 | error = (*so->so_send)(so, sendnam, NULL, top, NULL, flags, l); |
518 | if (error) { | | 518 | if (error) { |
519 | if (rep) { | | 519 | if (rep) { |
520 | if (error == ENOBUFS && so->so_type == SOCK_DGRAM) { | | 520 | if (error == ENOBUFS && so->so_type == SOCK_DGRAM) { |
521 | /* | | 521 | /* |
522 | * We're too fast for the network/driver, | | 522 | * We're too fast for the network/driver, |
523 | * and UDP isn't flowcontrolled. | | 523 | * and UDP isn't flowcontrolled. |
524 | * We need to resend. This is not fatal, | | 524 | * We need to resend. This is not fatal, |
525 | * just try again. | | 525 | * just try again. |
526 | * | | 526 | * |
527 | * Could be smarter here by doing some sort | | 527 | * Could be smarter here by doing some sort |
528 | * of a backoff, but this is rare. | | 528 | * of a backoff, but this is rare. |
529 | */ | | 529 | */ |
530 | rep->r_flags |= R_MUSTRESEND; | | 530 | rep->r_flags |= R_MUSTRESEND; |
531 | } else { | | 531 | } else { |
532 | if (error != EPIPE) | | 532 | if (error != EPIPE) |
533 | log(LOG_INFO, | | 533 | log(LOG_INFO, |
534 | "nfs send error %d for %s\n", | | 534 | "nfs send error %d for %s\n", |
535 | error, | | 535 | error, |
536 | rep->r_nmp->nm_mountp-> | | 536 | rep->r_nmp->nm_mountp-> |
537 | mnt_stat.f_mntfromname); | | 537 | mnt_stat.f_mntfromname); |
538 | /* | | 538 | /* |
539 | * Deal with errors for the client side. | | 539 | * Deal with errors for the client side. |
540 | */ | | 540 | */ |
541 | if (rep->r_flags & R_SOFTTERM) | | 541 | if (rep->r_flags & R_SOFTTERM) |
542 | error = EINTR; | | 542 | error = EINTR; |
543 | else if (error != EMSGSIZE) | | 543 | else if (error != EMSGSIZE) |
544 | rep->r_flags |= R_MUSTRESEND; | | 544 | rep->r_flags |= R_MUSTRESEND; |
545 | } | | 545 | } |
546 | } else { | | 546 | } else { |
547 | /* | | 547 | /* |
548 | * See above. This error can happen under normal | | 548 | * See above. This error can happen under normal |
549 | * circumstances and the log is too noisy. | | 549 | * circumstances and the log is too noisy. |
550 | * The error will still show up in nfsstat. | | 550 | * The error will still show up in nfsstat. |
551 | */ | | 551 | */ |
552 | if (error != ENOBUFS || so->so_type != SOCK_DGRAM) | | 552 | if (error != ENOBUFS || so->so_type != SOCK_DGRAM) |
553 | log(LOG_INFO, "nfsd send error %d\n", error); | | 553 | log(LOG_INFO, "nfsd send error %d\n", error); |
554 | } | | 554 | } |
555 | | | 555 | |
556 | /* | | 556 | /* |
557 | * Handle any recoverable (soft) socket errors here. (? ? ?) | | 557 | * Handle any recoverable (soft) socket errors here. (? ? ?) |
558 | */ | | 558 | */ |
559 | if (error != EINTR && error != ERESTART && | | 559 | if (error != EINTR && error != ERESTART && |
560 | error != EWOULDBLOCK && error != EPIPE && | | 560 | error != EWOULDBLOCK && error != EPIPE && |
561 | error != EMSGSIZE) | | 561 | error != EMSGSIZE) |
562 | error = 0; | | 562 | error = 0; |
563 | } | | 563 | } |
564 | return (error); | | 564 | return (error); |
565 | } | | 565 | } |
566 | | | 566 | |
567 | #ifdef NFS | | 567 | #ifdef NFS |
568 | /* | | 568 | /* |
569 | * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all | | 569 | * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all |
570 | * done by soreceive(), but for SOCK_STREAM we must deal with the Record | | 570 | * done by soreceive(), but for SOCK_STREAM we must deal with the Record |
571 | * Mark and consolidate the data into a new mbuf list. | | 571 | * Mark and consolidate the data into a new mbuf list. |
572 | * nb: Sometimes TCP passes the data up to soreceive() in long lists of | | 572 | * nb: Sometimes TCP passes the data up to soreceive() in long lists of |
573 | * small mbufs. | | 573 | * small mbufs. |
574 | * For SOCK_STREAM we must be very careful to read an entire record once | | 574 | * For SOCK_STREAM we must be very careful to read an entire record once |
575 | * we have read any of it, even if the system call has been interrupted. | | 575 | * we have read any of it, even if the system call has been interrupted. |
576 | */ | | 576 | */ |
577 | static int | | 577 | static int |
578 | nfs_receive(struct nfsreq *rep, struct mbuf **aname, struct mbuf **mp, | | 578 | nfs_receive(struct nfsreq *rep, struct mbuf **aname, struct mbuf **mp, |
579 | struct lwp *l) | | 579 | struct lwp *l) |
580 | { | | 580 | { |
581 | struct socket *so; | | 581 | struct socket *so; |
582 | struct uio auio; | | 582 | struct uio auio; |
583 | struct iovec aio; | | 583 | struct iovec aio; |
584 | struct mbuf *m; | | 584 | struct mbuf *m; |
585 | struct mbuf *control; | | 585 | struct mbuf *control; |
586 | u_int32_t len; | | 586 | u_int32_t len; |
587 | struct mbuf **getnam; | | 587 | struct mbuf **getnam; |
588 | int error, sotype, rcvflg; | | 588 | int error, sotype, rcvflg; |
589 | | | 589 | |
590 | /* | | 590 | /* |
591 | * Set up arguments for soreceive() | | 591 | * Set up arguments for soreceive() |
592 | */ | | 592 | */ |
593 | *mp = (struct mbuf *)0; | | 593 | *mp = (struct mbuf *)0; |
594 | *aname = (struct mbuf *)0; | | 594 | *aname = (struct mbuf *)0; |
595 | sotype = rep->r_nmp->nm_sotype; | | 595 | sotype = rep->r_nmp->nm_sotype; |
596 | | | 596 | |
597 | /* | | 597 | /* |
598 | * For reliable protocols, lock against other senders/receivers | | 598 | * For reliable protocols, lock against other senders/receivers |
599 | * in case a reconnect is necessary. | | 599 | * in case a reconnect is necessary. |
600 | * For SOCK_STREAM, first get the Record Mark to find out how much | | 600 | * For SOCK_STREAM, first get the Record Mark to find out how much |
601 | * more there is to get. | | 601 | * more there is to get. |
602 | * We must lock the socket against other receivers | | 602 | * We must lock the socket against other receivers |
603 | * until we have an entire rpc request/reply. | | 603 | * until we have an entire rpc request/reply. |
604 | */ | | 604 | */ |
605 | if (sotype != SOCK_DGRAM) { | | 605 | if (sotype != SOCK_DGRAM) { |
606 | error = nfs_sndlock(rep->r_nmp, rep); | | 606 | error = nfs_sndlock(rep->r_nmp, rep); |
607 | if (error) | | 607 | if (error) |
608 | return (error); | | 608 | return (error); |
609 | tryagain: | | 609 | tryagain: |
610 | /* | | 610 | /* |
611 | * Check for fatal errors and resending request. | | 611 | * Check for fatal errors and resending request. |
612 | */ | | 612 | */ |
613 | /* | | 613 | /* |
614 | * Ugh: If a reconnect attempt just happened, nm_so | | 614 | * Ugh: If a reconnect attempt just happened, nm_so |
615 | * would have changed. NULL indicates a failed | | 615 | * would have changed. NULL indicates a failed |
616 | * attempt that has essentially shut down this | | 616 | * attempt that has essentially shut down this |
617 | * mount point. | | 617 | * mount point. |
618 | */ | | 618 | */ |
619 | if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { | | 619 | if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { |
620 | nfs_sndunlock(rep->r_nmp); | | 620 | nfs_sndunlock(rep->r_nmp); |
621 | return (EINTR); | | 621 | return (EINTR); |
622 | } | | 622 | } |
623 | so = rep->r_nmp->nm_so; | | 623 | so = rep->r_nmp->nm_so; |
624 | if (!so) { | | 624 | if (!so) { |
625 | error = nfs_reconnect(rep); | | 625 | error = nfs_reconnect(rep); |
626 | if (error) { | | 626 | if (error) { |
627 | nfs_sndunlock(rep->r_nmp); | | 627 | nfs_sndunlock(rep->r_nmp); |
628 | return (error); | | 628 | return (error); |
629 | } | | 629 | } |
630 | goto tryagain; | | 630 | goto tryagain; |
631 | } | | 631 | } |
632 | while (rep->r_flags & R_MUSTRESEND) { | | 632 | while (rep->r_flags & R_MUSTRESEND) { |
633 | m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); | | 633 | m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); |
634 | nfsstats.rpcretries++; | | 634 | nfsstats.rpcretries++; |
635 | rep->r_rtt = 0; | | 635 | rep->r_rtt = 0; |
636 | rep->r_flags &= ~R_TIMING; | | 636 | rep->r_flags &= ~R_TIMING; |
637 | error = nfs_send(so, rep->r_nmp->nm_nam, m, rep, l); | | 637 | error = nfs_send(so, rep->r_nmp->nm_nam, m, rep, l); |
638 | if (error) { | | 638 | if (error) { |
639 | if (error == EINTR || error == ERESTART || | | 639 | if (error == EINTR || error == ERESTART || |
640 | (error = nfs_reconnect(rep)) != 0) { | | 640 | (error = nfs_reconnect(rep)) != 0) { |
641 | nfs_sndunlock(rep->r_nmp); | | 641 | nfs_sndunlock(rep->r_nmp); |
642 | return (error); | | 642 | return (error); |
643 | } | | 643 | } |
644 | goto tryagain; | | 644 | goto tryagain; |
645 | } | | 645 | } |
646 | } | | 646 | } |
647 | nfs_sndunlock(rep->r_nmp); | | 647 | nfs_sndunlock(rep->r_nmp); |
648 | if (sotype == SOCK_STREAM) { | | 648 | if (sotype == SOCK_STREAM) { |
649 | aio.iov_base = (void *) &len; | | 649 | aio.iov_base = (void *) &len; |
650 | aio.iov_len = sizeof(u_int32_t); | | 650 | aio.iov_len = sizeof(u_int32_t); |
651 | auio.uio_iov = &aio; | | 651 | auio.uio_iov = &aio; |
652 | auio.uio_iovcnt = 1; | | 652 | auio.uio_iovcnt = 1; |
653 | auio.uio_rw = UIO_READ; | | 653 | auio.uio_rw = UIO_READ; |
654 | auio.uio_offset = 0; | | 654 | auio.uio_offset = 0; |
655 | auio.uio_resid = sizeof(u_int32_t); | | 655 | auio.uio_resid = sizeof(u_int32_t); |
656 | UIO_SETUP_SYSSPACE(&auio); | | 656 | UIO_SETUP_SYSSPACE(&auio); |
657 | do { | | 657 | do { |
658 | rcvflg = MSG_WAITALL; | | 658 | rcvflg = MSG_WAITALL; |
659 | error = (*so->so_receive)(so, (struct mbuf **)0, &auio, | | 659 | error = (*so->so_receive)(so, (struct mbuf **)0, &auio, |
660 | (struct mbuf **)0, (struct mbuf **)0, &rcvflg); | | 660 | (struct mbuf **)0, (struct mbuf **)0, &rcvflg); |
661 | if (error == EWOULDBLOCK && rep) { | | 661 | if (error == EWOULDBLOCK && rep) { |
662 | if (rep->r_flags & R_SOFTTERM) | | 662 | if (rep->r_flags & R_SOFTTERM) |
663 | return (EINTR); | | 663 | return (EINTR); |
664 | /* | | 664 | /* |
665 | * if it seems that the server died after it | | 665 | * if it seems that the server died after it |
666 | * received our request, set EPIPE so that | | 666 | * received our request, set EPIPE so that |
667 | * we'll reconnect and retransmit requests. | | 667 | * we'll reconnect and retransmit requests. |
668 | */ | | 668 | */ |
669 | if (rep->r_rexmit >= rep->r_nmp->nm_retry) { | | 669 | if (rep->r_rexmit >= rep->r_nmp->nm_retry) { |
670 | nfsstats.rpctimeouts++; | | 670 | nfsstats.rpctimeouts++; |
671 | error = EPIPE; | | 671 | error = EPIPE; |
672 | } | | 672 | } |
673 | } | | 673 | } |
674 | } while (error == EWOULDBLOCK); | | 674 | } while (error == EWOULDBLOCK); |
675 | if (!error && auio.uio_resid > 0) { | | 675 | if (!error && auio.uio_resid > 0) { |
676 | /* | | 676 | /* |
677 | * Don't log a 0 byte receive; it means | | 677 | * Don't log a 0 byte receive; it means |
678 | * that the socket has been closed, and | | 678 | * that the socket has been closed, and |
679 | * can happen during normal operation | | 679 | * can happen during normal operation |
680 | * (forcible unmount or Solaris server). | | 680 | * (forcible unmount or Solaris server). |
681 | */ | | 681 | */ |
682 | if (auio.uio_resid != sizeof (u_int32_t)) | | 682 | if (auio.uio_resid != sizeof (u_int32_t)) |
683 | log(LOG_INFO, | | 683 | log(LOG_INFO, |
684 | "short receive (%lu/%lu) from nfs server %s\n", | | 684 | "short receive (%lu/%lu) from nfs server %s\n", |
685 | (u_long)sizeof(u_int32_t) - auio.uio_resid, | | 685 | (u_long)sizeof(u_int32_t) - auio.uio_resid, |
686 | (u_long)sizeof(u_int32_t), | | 686 | (u_long)sizeof(u_int32_t), |
687 | rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); | | 687 | rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); |
688 | error = EPIPE; | | 688 | error = EPIPE; |
689 | } | | 689 | } |
690 | if (error) | | 690 | if (error) |
691 | goto errout; | | 691 | goto errout; |
692 | len = ntohl(len) & ~0x80000000; | | 692 | len = ntohl(len) & ~0x80000000; |
693 | /* | | 693 | /* |
694 | * This is SERIOUS! We are out of sync with the sender | | 694 | * This is SERIOUS! We are out of sync with the sender |
695 | * and forcing a disconnect/reconnect is all I can do. | | 695 | * and forcing a disconnect/reconnect is all I can do. |
696 | */ | | 696 | */ |
697 | if (len > NFS_MAXPACKET) { | | 697 | if (len > NFS_MAXPACKET) { |
698 | log(LOG_ERR, "%s (%d) from nfs server %s\n", | | 698 | log(LOG_ERR, "%s (%d) from nfs server %s\n", |
699 | "impossible packet length", | | 699 | "impossible packet length", |
700 | len, | | 700 | len, |
701 | rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); | | 701 | rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); |
702 | error = EFBIG; | | 702 | error = EFBIG; |
703 | goto errout; | | 703 | goto errout; |
704 | } | | 704 | } |
705 | auio.uio_resid = len; | | 705 | auio.uio_resid = len; |
706 | do { | | 706 | do { |
707 | rcvflg = MSG_WAITALL; | | 707 | rcvflg = MSG_WAITALL; |
708 | error = (*so->so_receive)(so, (struct mbuf **)0, | | 708 | error = (*so->so_receive)(so, (struct mbuf **)0, |
709 | &auio, mp, (struct mbuf **)0, &rcvflg); | | 709 | &auio, mp, (struct mbuf **)0, &rcvflg); |
710 | } while (error == EWOULDBLOCK || error == EINTR || | | 710 | } while (error == EWOULDBLOCK || error == EINTR || |
711 | error == ERESTART); | | 711 | error == ERESTART); |
712 | if (!error && auio.uio_resid > 0) { | | 712 | if (!error && auio.uio_resid > 0) { |
713 | if (len != auio.uio_resid) | | 713 | if (len != auio.uio_resid) |
714 | log(LOG_INFO, | | 714 | log(LOG_INFO, |
715 | "short receive (%lu/%d) from nfs server %s\n", | | 715 | "short receive (%lu/%d) from nfs server %s\n", |
716 | (u_long)len - auio.uio_resid, len, | | 716 | (u_long)len - auio.uio_resid, len, |
717 | rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); | | 717 | rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); |
718 | error = EPIPE; | | 718 | error = EPIPE; |
719 | } | | 719 | } |
720 | } else { | | 720 | } else { |
721 | /* | | 721 | /* |
722 | * NB: Since uio_resid is big, MSG_WAITALL is ignored | | 722 | * NB: Since uio_resid is big, MSG_WAITALL is ignored |
723 | * and soreceive() will return when it has either a | | 723 | * and soreceive() will return when it has either a |
724 | * control msg or a data msg. | | 724 | * control msg or a data msg. |
725 | * We have no use for control msg., but must grab them | | 725 | * We have no use for control msg., but must grab them |
726 | * and then throw them away so we know what is going | | 726 | * and then throw them away so we know what is going |
727 | * on. | | 727 | * on. |
728 | */ | | 728 | */ |
729 | auio.uio_resid = len = 100000000; /* Anything Big */ | | 729 | auio.uio_resid = len = 100000000; /* Anything Big */ |
730 | /* not need to setup uio_vmspace */ | | 730 | /* not need to setup uio_vmspace */ |
731 | do { | | 731 | do { |
732 | rcvflg = 0; | | 732 | rcvflg = 0; |
733 | error = (*so->so_receive)(so, (struct mbuf **)0, | | 733 | error = (*so->so_receive)(so, (struct mbuf **)0, |
734 | &auio, mp, &control, &rcvflg); | | 734 | &auio, mp, &control, &rcvflg); |
735 | if (control) | | 735 | if (control) |
736 | m_freem(control); | | 736 | m_freem(control); |
737 | if (error == EWOULDBLOCK && rep) { | | 737 | if (error == EWOULDBLOCK && rep) { |
738 | if (rep->r_flags & R_SOFTTERM) | | 738 | if (rep->r_flags & R_SOFTTERM) |
739 | return (EINTR); | | 739 | return (EINTR); |
740 | } | | 740 | } |
741 | } while (error == EWOULDBLOCK || | | 741 | } while (error == EWOULDBLOCK || |
742 | (!error && *mp == NULL && control)); | | 742 | (!error && *mp == NULL && control)); |
743 | if ((rcvflg & MSG_EOR) == 0) | | 743 | if ((rcvflg & MSG_EOR) == 0) |
744 | printf("Egad!!\n"); | | 744 | printf("Egad!!\n"); |
745 | if (!error && *mp == NULL) | | 745 | if (!error && *mp == NULL) |
746 | error = EPIPE; | | 746 | error = EPIPE; |
747 | len -= auio.uio_resid; | | 747 | len -= auio.uio_resid; |
748 | } | | 748 | } |
749 | errout: | | 749 | errout: |
750 | if (error && error != EINTR && error != ERESTART) { | | 750 | if (error && error != EINTR && error != ERESTART) { |
751 | m_freem(*mp); | | 751 | m_freem(*mp); |
752 | *mp = (struct mbuf *)0; | | 752 | *mp = (struct mbuf *)0; |
753 | if (error != EPIPE) | | 753 | if (error != EPIPE) |
754 | log(LOG_INFO, | | 754 | log(LOG_INFO, |
755 | "receive error %d from nfs server %s\n", | | 755 | "receive error %d from nfs server %s\n", |
756 | error, | | 756 | error, |
757 | rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); | | 757 | rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); |
758 | error = nfs_sndlock(rep->r_nmp, rep); | | 758 | error = nfs_sndlock(rep->r_nmp, rep); |
759 | if (!error) | | 759 | if (!error) |
760 | error = nfs_reconnect(rep); | | 760 | error = nfs_reconnect(rep); |
761 | if (!error) | | 761 | if (!error) |
762 | goto tryagain; | | 762 | goto tryagain; |
763 | else | | 763 | else |
764 | nfs_sndunlock(rep->r_nmp); | | 764 | nfs_sndunlock(rep->r_nmp); |
765 | } | | 765 | } |
766 | } else { | | 766 | } else { |
767 | if ((so = rep->r_nmp->nm_so) == NULL) | | 767 | if ((so = rep->r_nmp->nm_so) == NULL) |
768 | return (EACCES); | | 768 | return (EACCES); |
769 | if (so->so_state & SS_ISCONNECTED) | | 769 | if (so->so_state & SS_ISCONNECTED) |
770 | getnam = (struct mbuf **)0; | | 770 | getnam = (struct mbuf **)0; |
771 | else | | 771 | else |
772 | getnam = aname; | | 772 | getnam = aname; |
773 | auio.uio_resid = len = 1000000; | | 773 | auio.uio_resid = len = 1000000; |
774 | /* not need to setup uio_vmspace */ | | 774 | /* not need to setup uio_vmspace */ |
775 | do { | | 775 | do { |
776 | rcvflg = 0; | | 776 | rcvflg = 0; |
777 | error = (*so->so_receive)(so, getnam, &auio, mp, | | 777 | error = (*so->so_receive)(so, getnam, &auio, mp, |
778 | (struct mbuf **)0, &rcvflg); | | 778 | (struct mbuf **)0, &rcvflg); |
779 | if (error == EWOULDBLOCK && | | 779 | if (error == EWOULDBLOCK && |
780 | (rep->r_flags & R_SOFTTERM)) | | 780 | (rep->r_flags & R_SOFTTERM)) |
781 | return (EINTR); | | 781 | return (EINTR); |
782 | } while (error == EWOULDBLOCK); | | 782 | } while (error == EWOULDBLOCK); |
783 | len -= auio.uio_resid; | | 783 | len -= auio.uio_resid; |
784 | if (!error && *mp == NULL) | | 784 | if (!error && *mp == NULL) |
785 | error = EPIPE; | | 785 | error = EPIPE; |
786 | } | | 786 | } |
787 | if (error) { | | 787 | if (error) { |
788 | m_freem(*mp); | | 788 | m_freem(*mp); |
789 | *mp = (struct mbuf *)0; | | 789 | *mp = (struct mbuf *)0; |
790 | } | | 790 | } |
791 | return (error); | | 791 | return (error); |
792 | } | | 792 | } |
793 | | | 793 | |
794 | /* | | 794 | /* |
795 | * Implement receipt of reply on a socket. | | 795 | * Implement receipt of reply on a socket. |
796 | * We must search through the list of received datagrams matching them | | 796 | * We must search through the list of received datagrams matching them |
797 | * with outstanding requests using the xid, until ours is found. | | 797 | * with outstanding requests using the xid, until ours is found. |
798 | */ | | 798 | */ |
799 | /* ARGSUSED */ | | 799 | /* ARGSUSED */ |
800 | static int | | 800 | static int |
801 | nfs_reply(struct nfsreq *myrep, struct lwp *lwp) | | 801 | nfs_reply(struct nfsreq *myrep, struct lwp *lwp) |
802 | { | | 802 | { |
803 | struct nfsreq *rep; | | 803 | struct nfsreq *rep; |
804 | struct nfsmount *nmp = myrep->r_nmp; | | 804 | struct nfsmount *nmp = myrep->r_nmp; |
805 | int32_t t1; | | 805 | int32_t t1; |
806 | struct mbuf *mrep, *nam, *md; | | 806 | struct mbuf *mrep, *nam, *md; |
807 | u_int32_t rxid, *tl; | | 807 | u_int32_t rxid, *tl; |
808 | char *dpos, *cp2; | | 808 | char *dpos, *cp2; |
809 | int error; | | 809 | int error; |
810 | | | 810 | |
811 | /* | | 811 | /* |
812 | * Loop around until we get our own reply | | 812 | * Loop around until we get our own reply |
813 | */ | | 813 | */ |
814 | for (;;) { | | 814 | for (;;) { |
815 | /* | | 815 | /* |
816 | * Lock against other receivers so that I don't get stuck in | | 816 | * Lock against other receivers so that I don't get stuck in |
817 | * sbwait() after someone else has received my reply for me. | | 817 | * sbwait() after someone else has received my reply for me. |
818 | * Also necessary for connection based protocols to avoid | | 818 | * Also necessary for connection based protocols to avoid |
819 | * race conditions during a reconnect. | | 819 | * race conditions during a reconnect. |
820 | */ | | 820 | */ |
821 | error = nfs_rcvlock(nmp, myrep); | | 821 | error = nfs_rcvlock(nmp, myrep); |
822 | if (error == EALREADY) | | 822 | if (error == EALREADY) |
823 | return (0); | | 823 | return (0); |
824 | if (error) | | 824 | if (error) |
825 | return (error); | | 825 | return (error); |
826 | /* | | 826 | /* |
827 | * Get the next Rpc reply off the socket | | 827 | * Get the next Rpc reply off the socket |
828 | */ | | 828 | */ |
829 | | | 829 | |
830 | mutex_enter(&nmp->nm_lock); | | 830 | mutex_enter(&nmp->nm_lock); |
831 | nmp->nm_waiters++; | | 831 | nmp->nm_waiters++; |
832 | mutex_exit(&nmp->nm_lock); | | 832 | mutex_exit(&nmp->nm_lock); |
833 | | | 833 | |
834 | error = nfs_receive(myrep, &nam, &mrep, lwp); | | 834 | error = nfs_receive(myrep, &nam, &mrep, lwp); |
835 | | | 835 | |
836 | mutex_enter(&nmp->nm_lock); | | 836 | mutex_enter(&nmp->nm_lock); |
837 | nmp->nm_waiters--; | | 837 | nmp->nm_waiters--; |
838 | cv_signal(&nmp->nm_disconcv); | | 838 | cv_signal(&nmp->nm_disconcv); |
839 | mutex_exit(&nmp->nm_lock); | | 839 | mutex_exit(&nmp->nm_lock); |
840 | | | 840 | |
841 | if (error) { | | 841 | if (error) { |
842 | nfs_rcvunlock(nmp); | | 842 | nfs_rcvunlock(nmp); |
843 | | | 843 | |
844 | if (nmp->nm_iflag & NFSMNT_DISMNT) { | | 844 | if (nmp->nm_iflag & NFSMNT_DISMNT) { |
845 | /* | | 845 | /* |
846 | * Oops, we're going away now.. | | 846 | * Oops, we're going away now.. |
847 | */ | | 847 | */ |
848 | return error; | | 848 | return error; |
849 | } | | 849 | } |
850 | /* | | 850 | /* |
851 | * Ignore routing errors on connectionless protocols? ? | | 851 | * Ignore routing errors on connectionless protocols? ? |
852 | */ | | 852 | */ |
853 | if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { | | 853 | if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { |
854 | nmp->nm_so->so_error = 0; | | 854 | nmp->nm_so->so_error = 0; |
855 | #ifdef DEBUG | | 855 | #ifdef DEBUG |
856 | if (ratecheck(&nfs_reply_last_err_time, | | 856 | if (ratecheck(&nfs_reply_last_err_time, |
857 | &nfs_err_interval)) | | 857 | &nfs_err_interval)) |
858 | printf("%s: ignoring error %d\n", | | 858 | printf("%s: ignoring error %d\n", |
859 | __func__, error); | | 859 | __func__, error); |
860 | #endif | | 860 | #endif |
861 | continue; | | 861 | continue; |
862 | } | | 862 | } |
863 | return (error); | | 863 | return (error); |
864 | } | | 864 | } |
865 | if (nam) | | 865 | if (nam) |
866 | m_freem(nam); | | 866 | m_freem(nam); |
867 | | | 867 | |
868 | /* | | 868 | /* |
869 | * Get the xid and check that it is an rpc reply | | 869 | * Get the xid and check that it is an rpc reply |
870 | */ | | 870 | */ |
871 | md = mrep; | | 871 | md = mrep; |
872 | dpos = mtod(md, void *); | | 872 | dpos = mtod(md, void *); |
873 | nfsm_dissect(tl, u_int32_t *, 2*NFSX_UNSIGNED); | | 873 | nfsm_dissect(tl, u_int32_t *, 2*NFSX_UNSIGNED); |
874 | rxid = *tl++; | | 874 | rxid = *tl++; |
875 | if (*tl != rpc_reply) { | | 875 | if (*tl != rpc_reply) { |
876 | nfsstats.rpcinvalid++; | | 876 | nfsstats.rpcinvalid++; |
877 | m_freem(mrep); | | 877 | m_freem(mrep); |
878 | nfsmout: | | 878 | nfsmout: |
879 | nfs_rcvunlock(nmp); | | 879 | nfs_rcvunlock(nmp); |
880 | continue; | | 880 | continue; |
881 | } | | 881 | } |
882 | | | 882 | |
883 | /* | | 883 | /* |
884 | * Loop through the request list to match up the reply | | 884 | * Loop through the request list to match up the reply |
885 | * Iff no match, just drop the datagram | | 885 | * Iff no match, just drop the datagram |
886 | */ | | 886 | */ |
887 | TAILQ_FOREACH(rep, &nfs_reqq, r_chain) { | | 887 | TAILQ_FOREACH(rep, &nfs_reqq, r_chain) { |
888 | if (rep->r_mrep == NULL && rxid == rep->r_xid) { | | 888 | if (rep->r_mrep == NULL && rxid == rep->r_xid) { |
889 | /* Found it.. */ | | 889 | /* Found it.. */ |
890 | rep->r_mrep = mrep; | | 890 | rep->r_mrep = mrep; |
891 | rep->r_md = md; | | 891 | rep->r_md = md; |
892 | rep->r_dpos = dpos; | | 892 | rep->r_dpos = dpos; |
893 | if (nfsrtton) { | | 893 | if (nfsrtton) { |
894 | struct rttl *rt; | | 894 | struct rttl *rt; |
895 | | | 895 | |
896 | rt = &nfsrtt.rttl[nfsrtt.pos]; | | 896 | rt = &nfsrtt.rttl[nfsrtt.pos]; |
897 | rt->proc = rep->r_procnum; | | 897 | rt->proc = rep->r_procnum; |
898 | rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]); | | 898 | rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]); |
899 | rt->sent = nmp->nm_sent; | | 899 | rt->sent = nmp->nm_sent; |
900 | rt->cwnd = nmp->nm_cwnd; | | 900 | rt->cwnd = nmp->nm_cwnd; |
901 | rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1]; | | 901 | rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1]; |
902 | rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1]; | | 902 | rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1]; |
903 | rt->fsid = nmp->nm_mountp->mnt_stat.f_fsidx; | | 903 | rt->fsid = nmp->nm_mountp->mnt_stat.f_fsidx; |
904 | getmicrotime(&rt->tstamp); | | 904 | getmicrotime(&rt->tstamp); |
905 | if (rep->r_flags & R_TIMING) | | 905 | if (rep->r_flags & R_TIMING) |
906 | rt->rtt = rep->r_rtt; | | 906 | rt->rtt = rep->r_rtt; |
907 | else | | 907 | else |
908 | rt->rtt = 1000000; | | 908 | rt->rtt = 1000000; |
909 | nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ; | | 909 | nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ; |
910 | } | | 910 | } |
911 | /* | | 911 | /* |
912 | * Update congestion window. | | 912 | * Update congestion window. |
913 | * Do the additive increase of | | 913 | * Do the additive increase of |
914 | * one rpc/rtt. | | 914 | * one rpc/rtt. |
915 | */ | | 915 | */ |
916 | if (nmp->nm_cwnd <= nmp->nm_sent) { | | 916 | if (nmp->nm_cwnd <= nmp->nm_sent) { |
917 | nmp->nm_cwnd += | | 917 | nmp->nm_cwnd += |
918 | (NFS_CWNDSCALE * NFS_CWNDSCALE + | | 918 | (NFS_CWNDSCALE * NFS_CWNDSCALE + |
919 | (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; | | 919 | (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; |
920 | if (nmp->nm_cwnd > NFS_MAXCWND) | | 920 | if (nmp->nm_cwnd > NFS_MAXCWND) |
921 | nmp->nm_cwnd = NFS_MAXCWND; | | 921 | nmp->nm_cwnd = NFS_MAXCWND; |
922 | } | | 922 | } |
923 | rep->r_flags &= ~R_SENT; | | 923 | rep->r_flags &= ~R_SENT; |
924 | nmp->nm_sent -= NFS_CWNDSCALE; | | 924 | nmp->nm_sent -= NFS_CWNDSCALE; |
925 | /* | | 925 | /* |
926 | * Update rtt using a gain of 0.125 on the mean | | 926 | * Update rtt using a gain of 0.125 on the mean |
927 | * and a gain of 0.25 on the deviation. | | 927 | * and a gain of 0.25 on the deviation. |
928 | */ | | 928 | */ |
929 | if (rep->r_flags & R_TIMING) { | | 929 | if (rep->r_flags & R_TIMING) { |
930 | /* | | 930 | /* |
931 | * Since the timer resolution of | | 931 | * Since the timer resolution of |
932 | * NFS_HZ is so course, it can often | | 932 | * NFS_HZ is so course, it can often |
933 | * result in r_rtt == 0. Since | | 933 | * result in r_rtt == 0. Since |
934 | * r_rtt == N means that the actual | | 934 | * r_rtt == N means that the actual |
935 | * rtt is between N+dt and N+2-dt ticks, | | 935 | * rtt is between N+dt and N+2-dt ticks, |
936 | * add 1. | | 936 | * add 1. |
937 | */ | | 937 | */ |
938 | t1 = rep->r_rtt + 1; | | 938 | t1 = rep->r_rtt + 1; |
939 | t1 -= (NFS_SRTT(rep) >> 3); | | 939 | t1 -= (NFS_SRTT(rep) >> 3); |
940 | NFS_SRTT(rep) += t1; | | 940 | NFS_SRTT(rep) += t1; |
941 | if (t1 < 0) | | 941 | if (t1 < 0) |
942 | t1 = -t1; | | 942 | t1 = -t1; |
943 | t1 -= (NFS_SDRTT(rep) >> 2); | | 943 | t1 -= (NFS_SDRTT(rep) >> 2); |
944 | NFS_SDRTT(rep) += t1; | | 944 | NFS_SDRTT(rep) += t1; |
945 | } | | 945 | } |
946 | nmp->nm_timeouts = 0; | | 946 | nmp->nm_timeouts = 0; |
947 | break; | | 947 | break; |
948 | } | | 948 | } |
949 | } | | 949 | } |
950 | nfs_rcvunlock(nmp); | | 950 | nfs_rcvunlock(nmp); |
951 | /* | | 951 | /* |
952 | * If not matched to a request, drop it. | | 952 | * If not matched to a request, drop it. |
953 | * If it's mine, get out. | | 953 | * If it's mine, get out. |
954 | */ | | 954 | */ |
955 | if (rep == 0) { | | 955 | if (rep == 0) { |
956 | nfsstats.rpcunexpected++; | | 956 | nfsstats.rpcunexpected++; |
957 | m_freem(mrep); | | 957 | m_freem(mrep); |
958 | } else if (rep == myrep) { | | 958 | } else if (rep == myrep) { |
959 | if (rep->r_mrep == NULL) | | 959 | if (rep->r_mrep == NULL) |
960 | panic("nfsreply nil"); | | 960 | panic("nfsreply nil"); |
961 | return (0); | | 961 | return (0); |
962 | } | | 962 | } |
963 | } | | 963 | } |
964 | } | | 964 | } |
965 | | | 965 | |
966 | /* | | 966 | /* |
967 | * nfs_request - goes something like this | | 967 | * nfs_request - goes something like this |
968 | * - fill in request struct | | 968 | * - fill in request struct |
969 | * - links it into list | | 969 | * - links it into list |
970 | * - calls nfs_send() for first transmit | | 970 | * - calls nfs_send() for first transmit |
971 | * - calls nfs_receive() to get reply | | 971 | * - calls nfs_receive() to get reply |
972 | * - break down rpc header and return with nfs reply pointed to | | 972 | * - break down rpc header and return with nfs reply pointed to |
973 | * by mrep or error | | 973 | * by mrep or error |
974 | * nb: always frees up mreq mbuf list | | 974 | * nb: always frees up mreq mbuf list |
975 | */ | | 975 | */ |
976 | int | | 976 | int |
977 | nfs_request(np, mrest, procnum, lwp, cred, mrp, mdp, dposp, rexmitp) | | 977 | nfs_request(np, mrest, procnum, lwp, cred, mrp, mdp, dposp, rexmitp) |
978 | struct nfsnode *np; | | 978 | struct nfsnode *np; |
979 | struct mbuf *mrest; | | 979 | struct mbuf *mrest; |
980 | int procnum; | | 980 | int procnum; |
981 | struct lwp *lwp; | | 981 | struct lwp *lwp; |
982 | kauth_cred_t cred; | | 982 | kauth_cred_t cred; |
983 | struct mbuf **mrp; | | 983 | struct mbuf **mrp; |
984 | struct mbuf **mdp; | | 984 | struct mbuf **mdp; |
985 | char **dposp; | | 985 | char **dposp; |
986 | int *rexmitp; | | 986 | int *rexmitp; |
987 | { | | 987 | { |
988 | struct mbuf *m, *mrep; | | 988 | struct mbuf *m, *mrep; |
989 | struct nfsreq *rep; | | 989 | struct nfsreq *rep; |
990 | u_int32_t *tl; | | 990 | u_int32_t *tl; |
991 | int i; | | 991 | int i; |
992 | struct nfsmount *nmp = VFSTONFS(np->n_vnode->v_mount); | | 992 | struct nfsmount *nmp = VFSTONFS(np->n_vnode->v_mount); |
993 | struct mbuf *md, *mheadend; | | 993 | struct mbuf *md, *mheadend; |
994 | char nickv[RPCX_NICKVERF]; | | 994 | char nickv[RPCX_NICKVERF]; |
995 | time_t waituntil; | | 995 | time_t waituntil; |
996 | char *dpos, *cp2; | | 996 | char *dpos, *cp2; |
997 | int t1, s, error = 0, mrest_len, auth_len, auth_type; | | 997 | int t1, s, error = 0, mrest_len, auth_len, auth_type; |
998 | int trylater_delay = NFS_TRYLATERDEL, failed_auth = 0; | | 998 | int trylater_delay = NFS_TRYLATERDEL, failed_auth = 0; |
999 | int verf_len, verf_type; | | 999 | int verf_len, verf_type; |
1000 | u_int32_t xid; | | 1000 | u_int32_t xid; |
1001 | char *auth_str, *verf_str; | | 1001 | char *auth_str, *verf_str; |
1002 | NFSKERBKEY_T key; /* save session key */ | | 1002 | NFSKERBKEY_T key; /* save session key */ |
1003 | kauth_cred_t acred; | | 1003 | kauth_cred_t acred; |
1004 | struct mbuf *mrest_backup = NULL; | | 1004 | struct mbuf *mrest_backup = NULL; |
1005 | kauth_cred_t origcred = NULL; /* XXX: gcc */ | | 1005 | kauth_cred_t origcred = NULL; /* XXX: gcc */ |
1006 | bool retry_cred = true; | | 1006 | bool retry_cred = true; |
1007 | bool use_opencred = (np->n_flag & NUSEOPENCRED) != 0; | | 1007 | bool use_opencred = (np->n_flag & NUSEOPENCRED) != 0; |
1008 | | | 1008 | |
1009 | if (rexmitp != NULL) | | 1009 | if (rexmitp != NULL) |
1010 | *rexmitp = 0; | | 1010 | *rexmitp = 0; |
1011 | | | 1011 | |
1012 | acred = kauth_cred_alloc(); | | 1012 | acred = kauth_cred_alloc(); |
1013 | | | 1013 | |
1014 | tryagain_cred: | | 1014 | tryagain_cred: |
1015 | KASSERT(cred != NULL); | | 1015 | KASSERT(cred != NULL); |
1016 | rep = kmem_alloc(sizeof(*rep), KM_SLEEP); | | 1016 | rep = kmem_alloc(sizeof(*rep), KM_SLEEP); |
1017 | rep->r_nmp = nmp; | | 1017 | rep->r_nmp = nmp; |
1018 | KASSERT(lwp == NULL || lwp == curlwp); | | 1018 | KASSERT(lwp == NULL || lwp == curlwp); |
1019 | rep->r_lwp = lwp; | | 1019 | rep->r_lwp = lwp; |
1020 | rep->r_procnum = procnum; | | 1020 | rep->r_procnum = procnum; |
1021 | i = 0; | | 1021 | i = 0; |
1022 | m = mrest; | | 1022 | m = mrest; |
1023 | while (m) { | | 1023 | while (m) { |
1024 | i += m->m_len; | | 1024 | i += m->m_len; |
1025 | m = m->m_next; | | 1025 | m = m->m_next; |
1026 | } | | 1026 | } |
1027 | mrest_len = i; | | 1027 | mrest_len = i; |
1028 | | | 1028 | |
1029 | /* | | 1029 | /* |
1030 | * Get the RPC header with authorization. | | 1030 | * Get the RPC header with authorization. |
1031 | */ | | 1031 | */ |
1032 | kerbauth: | | 1032 | kerbauth: |
1033 | verf_str = auth_str = (char *)0; | | 1033 | verf_str = auth_str = (char *)0; |
1034 | if (nmp->nm_flag & NFSMNT_KERB) { | | 1034 | if (nmp->nm_flag & NFSMNT_KERB) { |
1035 | verf_str = nickv; | | 1035 | verf_str = nickv; |
1036 | verf_len = sizeof (nickv); | | 1036 | verf_len = sizeof (nickv); |
1037 | auth_type = RPCAUTH_KERB4; | | 1037 | auth_type = RPCAUTH_KERB4; |
1038 | memset((void *)key, 0, sizeof (key)); | | 1038 | memset((void *)key, 0, sizeof (key)); |
1039 | if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str, | | 1039 | if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str, |
1040 | &auth_len, verf_str, verf_len)) { | | 1040 | &auth_len, verf_str, verf_len)) { |
1041 | error = nfs_getauth(nmp, rep, cred, &auth_str, | | 1041 | error = nfs_getauth(nmp, rep, cred, &auth_str, |
1042 | &auth_len, verf_str, &verf_len, key); | | 1042 | &auth_len, verf_str, &verf_len, key); |
1043 | if (error) { | | 1043 | if (error) { |
1044 | kmem_free(rep, sizeof(*rep)); | | 1044 | kmem_free(rep, sizeof(*rep)); |
1045 | m_freem(mrest); | | 1045 | m_freem(mrest); |
1046 | KASSERT(kauth_cred_getrefcnt(acred) == 1); | | 1046 | KASSERT(kauth_cred_getrefcnt(acred) == 1); |
1047 | kauth_cred_free(acred); | | 1047 | kauth_cred_free(acred); |
1048 | return (error); | | 1048 | return (error); |
1049 | } | | 1049 | } |
1050 | } | | 1050 | } |
1051 | retry_cred = false; | | 1051 | retry_cred = false; |
1052 | } else { | | 1052 | } else { |
1053 | /* AUTH_UNIX */ | | 1053 | /* AUTH_UNIX */ |
1054 | uid_t uid; | | 1054 | uid_t uid; |
1055 | gid_t gid; | | 1055 | gid_t gid; |
1056 | | | 1056 | |
1057 | /* | | 1057 | /* |
1058 | * on the most unix filesystems, permission checks are | | 1058 | * on the most unix filesystems, permission checks are |
1059 | * done when the file is open(2)'ed. | | 1059 | * done when the file is open(2)'ed. |
1060 | * ie. once a file is successfully open'ed, | | 1060 | * ie. once a file is successfully open'ed, |
1061 | * following i/o operations never fail with EACCES. | | 1061 | * following i/o operations never fail with EACCES. |
1062 | * we try to follow the semantics as far as possible. | | 1062 | * we try to follow the semantics as far as possible. |
1063 | * | | 1063 | * |
1064 | * note that we expect that the nfs server always grant | | 1064 | * note that we expect that the nfs server always grant |
1065 | * accesses by the file's owner. | | 1065 | * accesses by the file's owner. |
1066 | */ | | 1066 | */ |
1067 | origcred = cred; | | 1067 | origcred = cred; |
1068 | switch (procnum) { | | 1068 | switch (procnum) { |
1069 | case NFSPROC_READ: | | 1069 | case NFSPROC_READ: |
1070 | case NFSPROC_WRITE: | | 1070 | case NFSPROC_WRITE: |
1071 | case NFSPROC_COMMIT: | | 1071 | case NFSPROC_COMMIT: |
1072 | uid = np->n_vattr->va_uid; | | 1072 | uid = np->n_vattr->va_uid; |
1073 | gid = np->n_vattr->va_gid; | | 1073 | gid = np->n_vattr->va_gid; |
1074 | if (kauth_cred_geteuid(cred) == uid && | | 1074 | if (kauth_cred_geteuid(cred) == uid && |
1075 | kauth_cred_getegid(cred) == gid) { | | 1075 | kauth_cred_getegid(cred) == gid) { |
1076 | retry_cred = false; | | 1076 | retry_cred = false; |
1077 | break; | | 1077 | break; |
1078 | } | | 1078 | } |
1079 | if (use_opencred) | | 1079 | if (use_opencred) |
1080 | break; | | 1080 | break; |
1081 | kauth_cred_setuid(acred, uid); | | 1081 | kauth_cred_setuid(acred, uid); |
1082 | kauth_cred_seteuid(acred, uid); | | 1082 | kauth_cred_seteuid(acred, uid); |
1083 | kauth_cred_setsvuid(acred, uid); | | 1083 | kauth_cred_setsvuid(acred, uid); |
1084 | kauth_cred_setgid(acred, gid); | | 1084 | kauth_cred_setgid(acred, gid); |
1085 | kauth_cred_setegid(acred, gid); | | 1085 | kauth_cred_setegid(acred, gid); |
1086 | kauth_cred_setsvgid(acred, gid); | | 1086 | kauth_cred_setsvgid(acred, gid); |
1087 | cred = acred; | | 1087 | cred = acred; |
1088 | break; | | 1088 | break; |
1089 | default: | | 1089 | default: |
1090 | retry_cred = false; | | 1090 | retry_cred = false; |
1091 | break; | | 1091 | break; |
1092 | } | | 1092 | } |
1093 | /* | | 1093 | /* |
1094 | * backup mbuf chain if we can need it later to retry. | | 1094 | * backup mbuf chain if we can need it later to retry. |
1095 | * | | 1095 | * |
1096 | * XXX maybe we can keep a direct reference to | | 1096 | * XXX maybe we can keep a direct reference to |
1097 | * mrest without doing m_copym, but it's ...ugly. | | 1097 | * mrest without doing m_copym, but it's ...ugly. |
1098 | */ | | 1098 | */ |
1099 | if (retry_cred) | | 1099 | if (retry_cred) |
1100 | mrest_backup = m_copym(mrest, 0, M_COPYALL, M_WAIT); | | 1100 | mrest_backup = m_copym(mrest, 0, M_COPYALL, M_WAIT); |
1101 | auth_type = RPCAUTH_UNIX; | | 1101 | auth_type = RPCAUTH_UNIX; |
1102 | /* XXX elad - ngroups */ | | 1102 | /* XXX elad - ngroups */ |
1103 | auth_len = (((kauth_cred_ngroups(cred) > nmp->nm_numgrps) ? | | 1103 | auth_len = (((kauth_cred_ngroups(cred) > nmp->nm_numgrps) ? |
1104 | nmp->nm_numgrps : kauth_cred_ngroups(cred)) << 2) + | | 1104 | nmp->nm_numgrps : kauth_cred_ngroups(cred)) << 2) + |
1105 | 5 * NFSX_UNSIGNED; | | 1105 | 5 * NFSX_UNSIGNED; |
1106 | } | | 1106 | } |
1107 | m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len, | | 1107 | m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len, |
1108 | auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid); | | 1108 | auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid); |
1109 | if (auth_str) | | 1109 | if (auth_str) |
1110 | free(auth_str, M_TEMP); | | 1110 | free(auth_str, M_TEMP); |
1111 | | | 1111 | |
1112 | /* | | 1112 | /* |
1113 | * For stream protocols, insert a Sun RPC Record Mark. | | 1113 | * For stream protocols, insert a Sun RPC Record Mark. |
1114 | */ | | 1114 | */ |
1115 | if (nmp->nm_sotype == SOCK_STREAM) { | | 1115 | if (nmp->nm_sotype == SOCK_STREAM) { |
1116 | M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); | | 1116 | M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); |
1117 | *mtod(m, u_int32_t *) = htonl(0x80000000 | | | 1117 | *mtod(m, u_int32_t *) = htonl(0x80000000 | |
1118 | (m->m_pkthdr.len - NFSX_UNSIGNED)); | | 1118 | (m->m_pkthdr.len - NFSX_UNSIGNED)); |
1119 | } | | 1119 | } |
1120 | rep->r_mreq = m; | | 1120 | rep->r_mreq = m; |
1121 | rep->r_xid = xid; | | 1121 | rep->r_xid = xid; |
1122 | tryagain: | | 1122 | tryagain: |
1123 | if (nmp->nm_flag & NFSMNT_SOFT) | | 1123 | if (nmp->nm_flag & NFSMNT_SOFT) |
1124 | rep->r_retry = nmp->nm_retry; | | 1124 | rep->r_retry = nmp->nm_retry; |
1125 | else | | 1125 | else |
1126 | rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ | | 1126 | rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ |
1127 | rep->r_rtt = rep->r_rexmit = 0; | | 1127 | rep->r_rtt = rep->r_rexmit = 0; |
1128 | if (proct[procnum] > 0) | | 1128 | if (proct[procnum] > 0) |
1129 | rep->r_flags = R_TIMING; | | 1129 | rep->r_flags = R_TIMING; |
1130 | else | | 1130 | else |
1131 | rep->r_flags = 0; | | 1131 | rep->r_flags = 0; |
1132 | rep->r_mrep = NULL; | | 1132 | rep->r_mrep = NULL; |
1133 | | | 1133 | |
1134 | /* | | 1134 | /* |
1135 | * Do the client side RPC. | | 1135 | * Do the client side RPC. |
1136 | */ | | 1136 | */ |
1137 | nfsstats.rpcrequests++; | | 1137 | nfsstats.rpcrequests++; |
1138 | /* | | 1138 | /* |
1139 | * Chain request into list of outstanding requests. Be sure | | 1139 | * Chain request into list of outstanding requests. Be sure |
1140 | * to put it LAST so timer finds oldest requests first. | | 1140 | * to put it LAST so timer finds oldest requests first. |
1141 | */ | | 1141 | */ |
1142 | s = splsoftnet(); | | 1142 | s = splsoftnet(); |
1143 | TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain); | | 1143 | TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain); |
1144 | nfs_timer_start(); | | 1144 | nfs_timer_start(); |
1145 | | | 1145 | |
1146 | /* | | 1146 | /* |
1147 | * If backing off another request or avoiding congestion, don't | | 1147 | * If backing off another request or avoiding congestion, don't |
1148 | * send this one now but let timer do it. If not timing a request, | | 1148 | * send this one now but let timer do it. If not timing a request, |
1149 | * do it now. | | 1149 | * do it now. |
1150 | */ | | 1150 | */ |
1151 | if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || | | 1151 | if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || |
1152 | (nmp->nm_flag & NFSMNT_DUMBTIMR) || nmp->nm_sent < nmp->nm_cwnd)) { | | 1152 | (nmp->nm_flag & NFSMNT_DUMBTIMR) || nmp->nm_sent < nmp->nm_cwnd)) { |
1153 | splx(s); | | 1153 | splx(s); |
1154 | if (nmp->nm_soflags & PR_CONNREQUIRED) | | 1154 | if (nmp->nm_soflags & PR_CONNREQUIRED) |
1155 | error = nfs_sndlock(nmp, rep); | | 1155 | error = nfs_sndlock(nmp, rep); |
1156 | if (!error) { | | 1156 | if (!error) { |
1157 | m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); | | 1157 | m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); |
1158 | error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep, lwp); | | 1158 | error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep, lwp); |
1159 | if (nmp->nm_soflags & PR_CONNREQUIRED) | | 1159 | if (nmp->nm_soflags & PR_CONNREQUIRED) |
1160 | nfs_sndunlock(nmp); | | 1160 | nfs_sndunlock(nmp); |
1161 | } | | 1161 | } |
1162 | s = splsoftnet(); | | 1162 | s = splsoftnet(); |
1163 | if (!error && (rep->r_flags & R_MUSTRESEND) == 0) { | | 1163 | if (!error && (rep->r_flags & R_MUSTRESEND) == 0) { |
1164 | if ((rep->r_flags & R_SENT) == 0) { | | 1164 | if ((rep->r_flags & R_SENT) == 0) { |
1165 | nmp->nm_sent += NFS_CWNDSCALE; | | 1165 | nmp->nm_sent += NFS_CWNDSCALE; |
1166 | rep->r_flags |= R_SENT; | | 1166 | rep->r_flags |= R_SENT; |
1167 | } | | 1167 | } |
1168 | } | | 1168 | } |
1169 | splx(s); | | 1169 | splx(s); |
1170 | } else { | | 1170 | } else { |
1171 | splx(s); | | 1171 | splx(s); |
1172 | rep->r_rtt = -1; | | 1172 | rep->r_rtt = -1; |
1173 | } | | 1173 | } |
1174 | | | 1174 | |
1175 | /* | | 1175 | /* |
1176 | * Wait for the reply from our send or the timer's. | | 1176 | * Wait for the reply from our send or the timer's. |
1177 | */ | | 1177 | */ |
1178 | if (!error || error == EPIPE || error == EWOULDBLOCK) | | 1178 | if (!error || error == EPIPE || error == EWOULDBLOCK) |
1179 | error = nfs_reply(rep, lwp); | | 1179 | error = nfs_reply(rep, lwp); |
1180 | | | 1180 | |
1181 | /* | | 1181 | /* |
1182 | * RPC done, unlink the request. | | 1182 | * RPC done, unlink the request. |
1183 | */ | | 1183 | */ |
1184 | s = splsoftnet(); | | 1184 | s = splsoftnet(); |
1185 | TAILQ_REMOVE(&nfs_reqq, rep, r_chain); | | 1185 | TAILQ_REMOVE(&nfs_reqq, rep, r_chain); |
1186 | | | 1186 | |
1187 | /* | | 1187 | /* |
1188 | * Decrement the outstanding request count. | | 1188 | * Decrement the outstanding request count. |
1189 | */ | | 1189 | */ |
1190 | if (rep->r_flags & R_SENT) { | | 1190 | if (rep->r_flags & R_SENT) { |
1191 | rep->r_flags &= ~R_SENT; /* paranoia */ | | 1191 | rep->r_flags &= ~R_SENT; /* paranoia */ |
1192 | nmp->nm_sent -= NFS_CWNDSCALE; | | 1192 | nmp->nm_sent -= NFS_CWNDSCALE; |
1193 | } | | 1193 | } |
1194 | splx(s); | | 1194 | splx(s); |
1195 | | | 1195 | |
1196 | if (rexmitp != NULL) { | | 1196 | if (rexmitp != NULL) { |
1197 | int rexmit; | | 1197 | int rexmit; |
1198 | | | 1198 | |
1199 | if (nmp->nm_sotype != SOCK_DGRAM) | | 1199 | if (nmp->nm_sotype != SOCK_DGRAM) |
1200 | rexmit = (rep->r_flags & R_REXMITTED) != 0; | | 1200 | rexmit = (rep->r_flags & R_REXMITTED) != 0; |
1201 | else | | 1201 | else |
1202 | rexmit = rep->r_rexmit; | | 1202 | rexmit = rep->r_rexmit; |
1203 | *rexmitp = rexmit; | | 1203 | *rexmitp = rexmit; |
1204 | } | | 1204 | } |
1205 | | | 1205 | |
1206 | /* | | 1206 | /* |
1207 | * If there was a successful reply and a tprintf msg. | | 1207 | * If there was a successful reply and a tprintf msg. |
1208 | * tprintf a response. | | 1208 | * tprintf a response. |
1209 | */ | | 1209 | */ |
1210 | if (!error && (rep->r_flags & R_TPRINTFMSG)) | | 1210 | if (!error && (rep->r_flags & R_TPRINTFMSG)) |
1211 | nfs_msg(rep->r_lwp, nmp->nm_mountp->mnt_stat.f_mntfromname, | | 1211 | nfs_msg(rep->r_lwp, nmp->nm_mountp->mnt_stat.f_mntfromname, |
1212 | "is alive again"); | | 1212 | "is alive again"); |
1213 | mrep = rep->r_mrep; | | 1213 | mrep = rep->r_mrep; |
1214 | md = rep->r_md; | | 1214 | md = rep->r_md; |
1215 | dpos = rep->r_dpos; | | 1215 | dpos = rep->r_dpos; |
1216 | if (error) | | 1216 | if (error) |
1217 | goto nfsmout; | | 1217 | goto nfsmout; |
1218 | | | 1218 | |
1219 | /* | | 1219 | /* |
1220 | * break down the rpc header and check if ok | | 1220 | * break down the rpc header and check if ok |
1221 | */ | | 1221 | */ |
1222 | nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); | | 1222 | nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); |
1223 | if (*tl++ == rpc_msgdenied) { | | 1223 | if (*tl++ == rpc_msgdenied) { |
1224 | if (*tl == rpc_mismatch) | | 1224 | if (*tl == rpc_mismatch) |
1225 | error = EOPNOTSUPP; | | 1225 | error = EOPNOTSUPP; |
1226 | else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) { | | 1226 | else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) { |
1227 | if (!failed_auth) { | | 1227 | if (!failed_auth) { |
1228 | failed_auth++; | | 1228 | failed_auth++; |
1229 | mheadend->m_next = (struct mbuf *)0; | | 1229 | mheadend->m_next = (struct mbuf *)0; |
1230 | m_freem(mrep); | | 1230 | m_freem(mrep); |
1231 | m_freem(rep->r_mreq); | | 1231 | m_freem(rep->r_mreq); |
1232 | goto kerbauth; | | 1232 | goto kerbauth; |
1233 | } else | | 1233 | } else |
1234 | error = EAUTH; | | 1234 | error = EAUTH; |
1235 | } else | | 1235 | } else |
1236 | error = EACCES; | | 1236 | error = EACCES; |
1237 | m_freem(mrep); | | 1237 | m_freem(mrep); |
1238 | goto nfsmout; | | 1238 | goto nfsmout; |
1239 | } | | 1239 | } |
1240 | | | 1240 | |
1241 | /* | | 1241 | /* |
1242 | * Grab any Kerberos verifier, otherwise just throw it away. | | 1242 | * Grab any Kerberos verifier, otherwise just throw it away. |
1243 | */ | | 1243 | */ |
1244 | verf_type = fxdr_unsigned(int, *tl++); | | 1244 | verf_type = fxdr_unsigned(int, *tl++); |
1245 | i = fxdr_unsigned(int32_t, *tl); | | 1245 | i = fxdr_unsigned(int32_t, *tl); |
1246 | if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) { | | 1246 | if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) { |
1247 | error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep); | | 1247 | error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep); |
1248 | if (error) | | 1248 | if (error) |
1249 | goto nfsmout; | | 1249 | goto nfsmout; |
1250 | } else if (i > 0) | | 1250 | } else if (i > 0) |
1251 | nfsm_adv(nfsm_rndup(i)); | | 1251 | nfsm_adv(nfsm_rndup(i)); |
1252 | nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); | | 1252 | nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); |
1253 | /* 0 == ok */ | | 1253 | /* 0 == ok */ |
1254 | if (*tl == 0) { | | 1254 | if (*tl == 0) { |
1255 | nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); | | 1255 | nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); |
1256 | if (*tl != 0) { | | 1256 | if (*tl != 0) { |
1257 | error = fxdr_unsigned(int, *tl); | | 1257 | error = fxdr_unsigned(int, *tl); |
1258 | switch (error) { | | 1258 | switch (error) { |
1259 | case NFSERR_PERM: | | 1259 | case NFSERR_PERM: |
1260 | error = EPERM; | | 1260 | error = EPERM; |
1261 | break; | | 1261 | break; |
1262 | | | 1262 | |
1263 | case NFSERR_NOENT: | | 1263 | case NFSERR_NOENT: |
1264 | error = ENOENT; | | 1264 | error = ENOENT; |
1265 | break; | | 1265 | break; |
1266 | | | 1266 | |
1267 | case NFSERR_IO: | | 1267 | case NFSERR_IO: |
1268 | error = EIO; | | 1268 | error = EIO; |
1269 | break; | | 1269 | break; |
1270 | | | 1270 | |
1271 | case NFSERR_NXIO: | | 1271 | case NFSERR_NXIO: |
1272 | error = ENXIO; | | 1272 | error = ENXIO; |
1273 | break; | | 1273 | break; |
1274 | | | 1274 | |
1275 | case NFSERR_ACCES: | | 1275 | case NFSERR_ACCES: |
1276 | error = EACCES; | | 1276 | error = EACCES; |
1277 | if (!retry_cred) | | 1277 | if (!retry_cred) |
1278 | break; | | 1278 | break; |
1279 | m_freem(mrep); | | 1279 | m_freem(mrep); |
1280 | m_freem(rep->r_mreq); | | 1280 | m_freem(rep->r_mreq); |
1281 | kmem_free(rep, sizeof(*rep)); | | 1281 | kmem_free(rep, sizeof(*rep)); |
1282 | use_opencred = !use_opencred; | | 1282 | use_opencred = !use_opencred; |
1283 | if (mrest_backup == NULL) { | | 1283 | if (mrest_backup == NULL) { |
1284 | /* m_copym failure */ | | 1284 | /* m_copym failure */ |
1285 | KASSERT( | | 1285 | KASSERT( |
1286 | kauth_cred_getrefcnt(acred) == 1); | | 1286 | kauth_cred_getrefcnt(acred) == 1); |
1287 | kauth_cred_free(acred); | | 1287 | kauth_cred_free(acred); |
1288 | return ENOMEM; | | 1288 | return ENOMEM; |
1289 | } | | 1289 | } |
1290 | mrest = mrest_backup; | | 1290 | mrest = mrest_backup; |
1291 | mrest_backup = NULL; | | 1291 | mrest_backup = NULL; |
1292 | cred = origcred; | | 1292 | cred = origcred; |
1293 | error = 0; | | 1293 | error = 0; |
1294 | retry_cred = false; | | 1294 | retry_cred = false; |
1295 | goto tryagain_cred; | | 1295 | goto tryagain_cred; |
1296 | | | 1296 | |
1297 | case NFSERR_EXIST: | | 1297 | case NFSERR_EXIST: |
1298 | error = EEXIST; | | 1298 | error = EEXIST; |
1299 | break; | | 1299 | break; |
1300 | | | 1300 | |
1301 | case NFSERR_XDEV: | | 1301 | case NFSERR_XDEV: |
1302 | error = EXDEV; | | 1302 | error = EXDEV; |
1303 | break; | | 1303 | break; |
1304 | | | 1304 | |
1305 | case NFSERR_NODEV: | | 1305 | case NFSERR_NODEV: |
1306 | error = ENODEV; | | 1306 | error = ENODEV; |
1307 | break; | | 1307 | break; |
1308 | | | 1308 | |
1309 | case NFSERR_NOTDIR: | | 1309 | case NFSERR_NOTDIR: |
1310 | error = ENOTDIR; | | 1310 | error = ENOTDIR; |
1311 | break; | | 1311 | break; |
1312 | | | 1312 | |
1313 | case NFSERR_ISDIR: | | 1313 | case NFSERR_ISDIR: |
1314 | error = EISDIR; | | 1314 | error = EISDIR; |
1315 | break; | | 1315 | break; |
1316 | | | 1316 | |
1317 | case NFSERR_INVAL: | | 1317 | case NFSERR_INVAL: |
1318 | error = EINVAL; | | 1318 | error = EINVAL; |
1319 | break; | | 1319 | break; |
1320 | | | 1320 | |
1321 | case NFSERR_FBIG: | | 1321 | case NFSERR_FBIG: |
1322 | error = EFBIG; | | 1322 | error = EFBIG; |
1323 | break; | | 1323 | break; |
1324 | | | 1324 | |
1325 | case NFSERR_NOSPC: | | 1325 | case NFSERR_NOSPC: |
1326 | error = ENOSPC; | | 1326 | error = ENOSPC; |
1327 | break; | | 1327 | break; |
1328 | | | 1328 | |
1329 | case NFSERR_ROFS: | | 1329 | case NFSERR_ROFS: |
1330 | error = EROFS; | | 1330 | error = EROFS; |
1331 | break; | | 1331 | break; |
1332 | | | 1332 | |
1333 | case NFSERR_MLINK: | | 1333 | case NFSERR_MLINK: |
1334 | error = EMLINK; | | 1334 | error = EMLINK; |
1335 | break; | | 1335 | break; |
1336 | | | 1336 | |
1337 | case NFSERR_TIMEDOUT: | | 1337 | case NFSERR_TIMEDOUT: |
1338 | error = ETIMEDOUT; | | 1338 | error = ETIMEDOUT; |
1339 | break; | | 1339 | break; |
1340 | | | 1340 | |
1341 | case NFSERR_NAMETOL: | | 1341 | case NFSERR_NAMETOL: |
1342 | error = ENAMETOOLONG; | | 1342 | error = ENAMETOOLONG; |
1343 | break; | | 1343 | break; |
1344 | | | 1344 | |
1345 | case NFSERR_NOTEMPTY: | | 1345 | case NFSERR_NOTEMPTY: |
1346 | error = ENOTEMPTY; | | 1346 | error = ENOTEMPTY; |
1347 | break; | | 1347 | break; |