Fri Aug 28 17:01:48 2020 UTC ()
Don't cache the sa, because we are dealing with multiple mbufs (from ozaki-r)


(christos)
diff -r1.318 -r1.319 src/sys/netinet/ip_output.c

cvs diff -r1.318 -r1.319 src/sys/netinet/ip_output.c (switch to unified diff)

--- src/sys/netinet/ip_output.c 2020/08/28 06:31:42 1.318
+++ src/sys/netinet/ip_output.c 2020/08/28 17:01:48 1.319
@@ -1,1802 +1,1803 @@ @@ -1,1802 +1,1803 @@
1/* $NetBSD: ip_output.c,v 1.318 2020/08/28 06:31:42 ozaki-r Exp $ */ 1/* $NetBSD: ip_output.c,v 1.319 2020/08/28 17:01:48 christos Exp $ */
2 2
3/* 3/*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * Redistribution and use in source and binary forms, with or without 7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions 8 * modification, are permitted provided that the following conditions
9 * are met: 9 * are met:
10 * 1. Redistributions of source code must retain the above copyright 10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer. 11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright 12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the 13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution. 14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors 15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software 16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission. 17 * without specific prior written permission.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE. 29 * SUCH DAMAGE.
30 */ 30 */
31 31
32/* 32/*
33 * Copyright (c) 1998 The NetBSD Foundation, Inc. 33 * Copyright (c) 1998 The NetBSD Foundation, Inc.
34 * All rights reserved. 34 * All rights reserved.
35 * 35 *
36 * This code is derived from software contributed to The NetBSD Foundation 36 * This code is derived from software contributed to The NetBSD Foundation
37 * by Public Access Networks Corporation ("Panix"). It was developed under 37 * by Public Access Networks Corporation ("Panix"). It was developed under
38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon. 38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39 * 39 *
40 * Redistribution and use in source and binary forms, with or without 40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions 41 * modification, are permitted provided that the following conditions
42 * are met: 42 * are met:
43 * 1. Redistributions of source code must retain the above copyright 43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer. 44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright 45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the 46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution. 47 * documentation and/or other materials provided with the distribution.
48 * 48 *
49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 * POSSIBILITY OF SUCH DAMAGE. 59 * POSSIBILITY OF SUCH DAMAGE.
60 */ 60 */
61 61
62/* 62/*
63 * Copyright (c) 1982, 1986, 1988, 1990, 1993 63 * Copyright (c) 1982, 1986, 1988, 1990, 1993
64 * The Regents of the University of California. All rights reserved. 64 * The Regents of the University of California. All rights reserved.
65 * 65 *
66 * Redistribution and use in source and binary forms, with or without 66 * Redistribution and use in source and binary forms, with or without
67 * modification, are permitted provided that the following conditions 67 * modification, are permitted provided that the following conditions
68 * are met: 68 * are met:
69 * 1. Redistributions of source code must retain the above copyright 69 * 1. Redistributions of source code must retain the above copyright
70 * notice, this list of conditions and the following disclaimer. 70 * notice, this list of conditions and the following disclaimer.
71 * 2. Redistributions in binary form must reproduce the above copyright 71 * 2. Redistributions in binary form must reproduce the above copyright
72 * notice, this list of conditions and the following disclaimer in the 72 * notice, this list of conditions and the following disclaimer in the
73 * documentation and/or other materials provided with the distribution. 73 * documentation and/or other materials provided with the distribution.
74 * 3. Neither the name of the University nor the names of its contributors 74 * 3. Neither the name of the University nor the names of its contributors
75 * may be used to endorse or promote products derived from this software 75 * may be used to endorse or promote products derived from this software
76 * without specific prior written permission. 76 * without specific prior written permission.
77 * 77 *
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88 * SUCH DAMAGE. 88 * SUCH DAMAGE.
89 * 89 *
90 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 90 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
91 */ 91 */
92 92
93#include <sys/cdefs.h> 93#include <sys/cdefs.h>
94__KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.318 2020/08/28 06:31:42 ozaki-r Exp $"); 94__KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.319 2020/08/28 17:01:48 christos Exp $");
95 95
96#ifdef _KERNEL_OPT 96#ifdef _KERNEL_OPT
97#include "opt_inet.h" 97#include "opt_inet.h"
98#include "opt_ipsec.h" 98#include "opt_ipsec.h"
99#include "opt_mrouting.h" 99#include "opt_mrouting.h"
100#include "opt_net_mpsafe.h" 100#include "opt_net_mpsafe.h"
101#include "opt_mpls.h" 101#include "opt_mpls.h"
102#endif 102#endif
103 103
104#include "arp.h" 104#include "arp.h"
105 105
106#include <sys/param.h> 106#include <sys/param.h>
107#include <sys/kmem.h> 107#include <sys/kmem.h>
108#include <sys/mbuf.h> 108#include <sys/mbuf.h>
109#include <sys/socket.h> 109#include <sys/socket.h>
110#include <sys/socketvar.h> 110#include <sys/socketvar.h>
111#include <sys/kauth.h> 111#include <sys/kauth.h>
112#include <sys/systm.h> 112#include <sys/systm.h>
113#include <sys/syslog.h> 113#include <sys/syslog.h>
114 114
115#include <net/if.h> 115#include <net/if.h>
116#include <net/if_types.h> 116#include <net/if_types.h>
117#include <net/route.h> 117#include <net/route.h>
118#include <net/pfil.h> 118#include <net/pfil.h>
119 119
120#include <netinet/in.h> 120#include <netinet/in.h>
121#include <netinet/in_systm.h> 121#include <netinet/in_systm.h>
122#include <netinet/ip.h> 122#include <netinet/ip.h>
123#include <netinet/in_pcb.h> 123#include <netinet/in_pcb.h>
124#include <netinet/in_var.h> 124#include <netinet/in_var.h>
125#include <netinet/ip_var.h> 125#include <netinet/ip_var.h>
126#include <netinet/ip_private.h> 126#include <netinet/ip_private.h>
127#include <netinet/in_offload.h> 127#include <netinet/in_offload.h>
128#include <netinet/portalgo.h> 128#include <netinet/portalgo.h>
129#include <netinet/udp.h> 129#include <netinet/udp.h>
130#include <netinet/udp_var.h> 130#include <netinet/udp_var.h>
131 131
132#ifdef INET6 132#ifdef INET6
133#include <netinet6/ip6_var.h> 133#include <netinet6/ip6_var.h>
134#endif 134#endif
135 135
136#ifdef MROUTING 136#ifdef MROUTING
137#include <netinet/ip_mroute.h> 137#include <netinet/ip_mroute.h>
138#endif 138#endif
139 139
140#ifdef IPSEC 140#ifdef IPSEC
141#include <netipsec/ipsec.h> 141#include <netipsec/ipsec.h>
142#include <netipsec/key.h> 142#include <netipsec/key.h>
143#endif 143#endif
144 144
145#ifdef MPLS 145#ifdef MPLS
146#include <netmpls/mpls.h> 146#include <netmpls/mpls.h>
147#include <netmpls/mpls_var.h> 147#include <netmpls/mpls_var.h>
148#endif 148#endif
149 149
150static int ip_pcbopts(struct inpcb *, const struct sockopt *); 150static int ip_pcbopts(struct inpcb *, const struct sockopt *);
151static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *); 151static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
152static struct ifnet *ip_multicast_if(struct in_addr *, int *); 152static struct ifnet *ip_multicast_if(struct in_addr *, int *);
153static void ip_mloopback(struct ifnet *, struct mbuf *, 153static void ip_mloopback(struct ifnet *, struct mbuf *,
154 const struct sockaddr_in *); 154 const struct sockaddr_in *);
155static int ip_ifaddrvalid(const struct in_ifaddr *); 155static int ip_ifaddrvalid(const struct in_ifaddr *);
156 156
157extern pfil_head_t *inet_pfil_hook; /* XXX */ 157extern pfil_head_t *inet_pfil_hook; /* XXX */
158 158
159int ip_do_loopback_cksum = 0; 159int ip_do_loopback_cksum = 0;
160 160
161static int 161static int
162ip_mark_mpls(struct ifnet * const ifp, struct mbuf * const m, 162ip_mark_mpls(struct ifnet * const ifp, struct mbuf * const m,
163 const struct rtentry *rt) 163 const struct rtentry *rt)
164{ 164{
165 int error = 0; 165 int error = 0;
166#ifdef MPLS 166#ifdef MPLS
167 union mpls_shim msh; 167 union mpls_shim msh;
168 168
169 if (rt == NULL || rt_gettag(rt) == NULL || 169 if (rt == NULL || rt_gettag(rt) == NULL ||
170 rt_gettag(rt)->sa_family != AF_MPLS || 170 rt_gettag(rt)->sa_family != AF_MPLS ||
171 (m->m_flags & (M_MCAST | M_BCAST)) != 0 || 171 (m->m_flags & (M_MCAST | M_BCAST)) != 0 ||
172 ifp->if_type != IFT_ETHER) 172 ifp->if_type != IFT_ETHER)
173 return 0; 173 return 0;
174 174
175 msh.s_addr = MPLS_GETSADDR(rt); 175 msh.s_addr = MPLS_GETSADDR(rt);
176 if (msh.shim.label != MPLS_LABEL_IMPLNULL) { 176 if (msh.shim.label != MPLS_LABEL_IMPLNULL) {
177 struct m_tag *mtag; 177 struct m_tag *mtag;
178 /* 178 /*
179 * XXX tentative solution to tell ether_output 179 * XXX tentative solution to tell ether_output
180 * it's MPLS. Need some more efficient solution. 180 * it's MPLS. Need some more efficient solution.
181 */ 181 */
182 mtag = m_tag_get(PACKET_TAG_MPLS, 182 mtag = m_tag_get(PACKET_TAG_MPLS,
183 sizeof(int) /* dummy */, 183 sizeof(int) /* dummy */,
184 M_NOWAIT); 184 M_NOWAIT);
185 if (mtag == NULL) 185 if (mtag == NULL)
186 return ENOMEM; 186 return ENOMEM;
187 m_tag_prepend(m, mtag); 187 m_tag_prepend(m, mtag);
188 } 188 }
189#endif 189#endif
190 return error; 190 return error;
191} 191}
192 192
193/* 193/*
194 * Send an IP packet to a host. 194 * Send an IP packet to a host.
195 */ 195 */
196int 196int
197ip_if_output(struct ifnet * const ifp, struct mbuf * const m, 197ip_if_output(struct ifnet * const ifp, struct mbuf * const m,
198 const struct sockaddr * const dst, const struct rtentry *rt) 198 const struct sockaddr * const dst, const struct rtentry *rt)
199{ 199{
200 int error = 0; 200 int error = 0;
201 201
202 if (rt != NULL) { 202 if (rt != NULL) {
203 error = rt_check_reject_route(rt, ifp); 203 error = rt_check_reject_route(rt, ifp);
204 if (error != 0) { 204 if (error != 0) {
205 IP_STATINC(IP_STAT_RTREJECT); 205 IP_STATINC(IP_STAT_RTREJECT);
206 m_freem(m); 206 m_freem(m);
207 return error; 207 return error;
208 } 208 }
209 } 209 }
210 210
211 error = ip_mark_mpls(ifp, m, rt); 211 error = ip_mark_mpls(ifp, m, rt);
212 if (error != 0) { 212 if (error != 0) {
213 m_freem(m); 213 m_freem(m);
214 return error; 214 return error;
215 } 215 }
216 216
217 error = if_output_lock(ifp, ifp, m, dst, rt); 217 error = if_output_lock(ifp, ifp, m, dst, rt);
218 218
219 return error; 219 return error;
220} 220}
221 221
222/* 222/*
223 * IP output. The packet in mbuf chain m contains a skeletal IP 223 * IP output. The packet in mbuf chain m contains a skeletal IP
224 * header (with len, off, ttl, proto, tos, src, dst). 224 * header (with len, off, ttl, proto, tos, src, dst).
225 * The mbuf chain containing the packet will be freed. 225 * The mbuf chain containing the packet will be freed.
226 * The mbuf opt, if present, will not be freed. 226 * The mbuf opt, if present, will not be freed.
227 */ 227 */
228int 228int
229ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int flags, 229ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int flags,
230 struct ip_moptions *imo, struct inpcb *inp) 230 struct ip_moptions *imo, struct inpcb *inp)
231{ 231{
232 struct rtentry *rt; 232 struct rtentry *rt;
233 struct ip *ip; 233 struct ip *ip;
234 struct ifnet *ifp, *mifp = NULL; 234 struct ifnet *ifp, *mifp = NULL;
235 struct mbuf *m = m0; 235 struct mbuf *m = m0;
236 int len, hlen, error = 0; 236 int len, hlen, error = 0;
237 struct route iproute; 237 struct route iproute;
238 const struct sockaddr_in *dst; 238 const struct sockaddr_in *dst;
239 struct in_ifaddr *ia = NULL; 239 struct in_ifaddr *ia = NULL;
240 struct ifaddr *ifa; 240 struct ifaddr *ifa;
241 int isbroadcast; 241 int isbroadcast;
242 int sw_csum; 242 int sw_csum;
243 u_long mtu; 243 u_long mtu;
244 bool natt_frag = false; 244 bool natt_frag = false;
245 bool rtmtu_nolock; 245 bool rtmtu_nolock;
246 union { 246 union {
247 struct sockaddr sa; 247 struct sockaddr sa;
248 struct sockaddr_in sin; 248 struct sockaddr_in sin;
249 } udst, usrc; 249 } udst, usrc;
250 struct sockaddr *rdst = &udst.sa; /* real IP destination, as 250 struct sockaddr *rdst = &udst.sa; /* real IP destination, as
251 * opposed to the nexthop 251 * opposed to the nexthop
252 */ 252 */
253 struct psref psref, psref_ia; 253 struct psref psref, psref_ia;
254 int bound; 254 int bound;
255 bool bind_need_restore = false; 255 bool bind_need_restore = false;
256 const struct sockaddr *sa; 256 const struct sockaddr *sa;
257 257
258 len = 0; 258 len = 0;
259 259
260 MCLAIM(m, &ip_tx_mowner); 260 MCLAIM(m, &ip_tx_mowner);
261 261
262 KASSERT((m->m_flags & M_PKTHDR) != 0); 262 KASSERT((m->m_flags & M_PKTHDR) != 0);
263 KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_TCPv6|M_CSUM_UDPv6)) == 0); 263 KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_TCPv6|M_CSUM_UDPv6)) == 0);
264 KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) != 264 KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) !=
265 (M_CSUM_TCPv4|M_CSUM_UDPv4)); 265 (M_CSUM_TCPv4|M_CSUM_UDPv4));
266 KASSERT(m->m_len >= sizeof(struct ip)); 266 KASSERT(m->m_len >= sizeof(struct ip));
267 267
268 hlen = sizeof(struct ip); 268 hlen = sizeof(struct ip);
269 if (opt) { 269 if (opt) {
270 m = ip_insertoptions(m, opt, &len); 270 m = ip_insertoptions(m, opt, &len);
271 hlen = len; 271 hlen = len;
272 } 272 }
273 ip = mtod(m, struct ip *); 273 ip = mtod(m, struct ip *);
274 274
275 /* 275 /*
276 * Fill in IP header. 276 * Fill in IP header.
277 */ 277 */
278 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 278 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
279 ip->ip_v = IPVERSION; 279 ip->ip_v = IPVERSION;
280 ip->ip_off = htons(0); 280 ip->ip_off = htons(0);
281 /* ip->ip_id filled in after we find out source ia */ 281 /* ip->ip_id filled in after we find out source ia */
282 ip->ip_hl = hlen >> 2; 282 ip->ip_hl = hlen >> 2;
283 IP_STATINC(IP_STAT_LOCALOUT); 283 IP_STATINC(IP_STAT_LOCALOUT);
284 } else { 284 } else {
285 hlen = ip->ip_hl << 2; 285 hlen = ip->ip_hl << 2;
286 } 286 }
287 287
288 /* 288 /*
289 * Route packet. 289 * Route packet.
290 */ 290 */
291 if (ro == NULL) { 291 if (ro == NULL) {
292 memset(&iproute, 0, sizeof(iproute)); 292 memset(&iproute, 0, sizeof(iproute));
293 ro = &iproute; 293 ro = &iproute;
294 } 294 }
295 sockaddr_in_init(&udst.sin, &ip->ip_dst, 0); 295 sockaddr_in_init(&udst.sin, &ip->ip_dst, 0);
296 dst = satocsin(rtcache_getdst(ro)); 296 dst = satocsin(rtcache_getdst(ro));
297 297
298 /* 298 /*
299 * If there is a cached route, check that it is to the same 299 * If there is a cached route, check that it is to the same
300 * destination and is still up. If not, free it and try again. 300 * destination and is still up. If not, free it and try again.
301 * The address family should also be checked in case of sharing 301 * The address family should also be checked in case of sharing
302 * the cache with IPv6. 302 * the cache with IPv6.
303 */ 303 */
304 if (dst && (dst->sin_family != AF_INET || 304 if (dst && (dst->sin_family != AF_INET ||
305 !in_hosteq(dst->sin_addr, ip->ip_dst))) 305 !in_hosteq(dst->sin_addr, ip->ip_dst)))
306 rtcache_free(ro); 306 rtcache_free(ro);
307 307
308 /* XXX must be before rtcache operations */ 308 /* XXX must be before rtcache operations */
309 bound = curlwp_bind(); 309 bound = curlwp_bind();
310 bind_need_restore = true; 310 bind_need_restore = true;
311 311
312 if ((rt = rtcache_validate(ro)) == NULL && 312 if ((rt = rtcache_validate(ro)) == NULL &&
313 (rt = rtcache_update(ro, 1)) == NULL) { 313 (rt = rtcache_update(ro, 1)) == NULL) {
314 dst = &udst.sin; 314 dst = &udst.sin;
315 error = rtcache_setdst(ro, &udst.sa); 315 error = rtcache_setdst(ro, &udst.sa);
316 if (error != 0) { 316 if (error != 0) {
317 IP_STATINC(IP_STAT_ODROPPED); 317 IP_STATINC(IP_STAT_ODROPPED);
318 goto bad; 318 goto bad;
319 } 319 }
320 } 320 }
321 321
322 /* 322 /*
323 * If routing to interface only, short circuit routing lookup. 323 * If routing to interface only, short circuit routing lookup.
324 */ 324 */
325 if (flags & IP_ROUTETOIF) { 325 if (flags & IP_ROUTETOIF) {
326 ifa = ifa_ifwithladdr_psref(sintocsa(dst), &psref_ia); 326 ifa = ifa_ifwithladdr_psref(sintocsa(dst), &psref_ia);
327 if (ifa == NULL) { 327 if (ifa == NULL) {
328 IP_STATINC(IP_STAT_NOROUTE); 328 IP_STATINC(IP_STAT_NOROUTE);
329 error = ENETUNREACH; 329 error = ENETUNREACH;
330 goto bad; 330 goto bad;
331 } 331 }
332 /* ia is already referenced by psref_ia */ 332 /* ia is already referenced by psref_ia */
333 ia = ifatoia(ifa); 333 ia = ifatoia(ifa);
334 334
335 ifp = ia->ia_ifp; 335 ifp = ia->ia_ifp;
336 mtu = ifp->if_mtu; 336 mtu = ifp->if_mtu;
337 ip->ip_ttl = 1; 337 ip->ip_ttl = 1;
338 isbroadcast = in_broadcast(dst->sin_addr, ifp); 338 isbroadcast = in_broadcast(dst->sin_addr, ifp);
339 } else if (((IN_MULTICAST(ip->ip_dst.s_addr) || 339 } else if (((IN_MULTICAST(ip->ip_dst.s_addr) ||
340 ip->ip_dst.s_addr == INADDR_BROADCAST) || 340 ip->ip_dst.s_addr == INADDR_BROADCAST) ||
341 (flags & IP_ROUTETOIFINDEX)) && 341 (flags & IP_ROUTETOIFINDEX)) &&
342 imo != NULL && imo->imo_multicast_if_index != 0) { 342 imo != NULL && imo->imo_multicast_if_index != 0) {
343 ifp = mifp = if_get_byindex(imo->imo_multicast_if_index, &psref); 343 ifp = mifp = if_get_byindex(imo->imo_multicast_if_index, &psref);
344 if (ifp == NULL) { 344 if (ifp == NULL) {
345 IP_STATINC(IP_STAT_NOROUTE); 345 IP_STATINC(IP_STAT_NOROUTE);
346 error = ENETUNREACH; 346 error = ENETUNREACH;
347 goto bad; 347 goto bad;
348 } 348 }
349 mtu = ifp->if_mtu; 349 mtu = ifp->if_mtu;
350 ia = in_get_ia_from_ifp_psref(ifp, &psref_ia); 350 ia = in_get_ia_from_ifp_psref(ifp, &psref_ia);
351 if (ia == NULL) { 351 if (ia == NULL) {
352 IP_STATINC(IP_STAT_IFNOADDR); 352 IP_STATINC(IP_STAT_IFNOADDR);
353 error = EADDRNOTAVAIL; 353 error = EADDRNOTAVAIL;
354 goto bad; 354 goto bad;
355 } 355 }
356 if (IN_MULTICAST(ip->ip_dst.s_addr) || 356 if (IN_MULTICAST(ip->ip_dst.s_addr) ||
357 ip->ip_dst.s_addr == INADDR_BROADCAST) { 357 ip->ip_dst.s_addr == INADDR_BROADCAST) {
358 isbroadcast = 0; 358 isbroadcast = 0;
359 } else { 359 } else {
360 /* IP_ROUTETOIFINDEX */ 360 /* IP_ROUTETOIFINDEX */
361 isbroadcast = in_broadcast(dst->sin_addr, ifp); 361 isbroadcast = in_broadcast(dst->sin_addr, ifp);
362 if ((isbroadcast == 0) && ((ifp->if_flags & 362 if ((isbroadcast == 0) && ((ifp->if_flags &
363 (IFF_LOOPBACK | IFF_POINTOPOINT)) == 0) && 363 (IFF_LOOPBACK | IFF_POINTOPOINT)) == 0) &&
364 (in_direct(dst->sin_addr, ifp) == 0)) { 364 (in_direct(dst->sin_addr, ifp) == 0)) {
365 /* gateway address required */ 365 /* gateway address required */
366 if (rt == NULL) 366 if (rt == NULL)
367 rt = rtcache_init(ro); 367 rt = rtcache_init(ro);
368 if (rt == NULL || rt->rt_ifp != ifp) { 368 if (rt == NULL || rt->rt_ifp != ifp) {
369 IP_STATINC(IP_STAT_NOROUTE); 369 IP_STATINC(IP_STAT_NOROUTE);
370 error = EHOSTUNREACH; 370 error = EHOSTUNREACH;
371 goto bad; 371 goto bad;
372 } 372 }
373 rt->rt_use++; 373 rt->rt_use++;
374 if (rt->rt_flags & RTF_GATEWAY) 374 if (rt->rt_flags & RTF_GATEWAY)
375 dst = satosin(rt->rt_gateway); 375 dst = satosin(rt->rt_gateway);
376 if (rt->rt_flags & RTF_HOST) 376 if (rt->rt_flags & RTF_HOST)
377 isbroadcast = 377 isbroadcast =
378 rt->rt_flags & RTF_BROADCAST; 378 rt->rt_flags & RTF_BROADCAST;
379 } 379 }
380 } 380 }
381 } else { 381 } else {
382 if (rt == NULL) 382 if (rt == NULL)
383 rt = rtcache_init(ro); 383 rt = rtcache_init(ro);
384 if (rt == NULL) { 384 if (rt == NULL) {
385 IP_STATINC(IP_STAT_NOROUTE); 385 IP_STATINC(IP_STAT_NOROUTE);
386 error = EHOSTUNREACH; 386 error = EHOSTUNREACH;
387 goto bad; 387 goto bad;
388 } 388 }
389 if (ifa_is_destroying(rt->rt_ifa)) { 389 if (ifa_is_destroying(rt->rt_ifa)) {
390 rtcache_unref(rt, ro); 390 rtcache_unref(rt, ro);
391 rt = NULL; 391 rt = NULL;
392 IP_STATINC(IP_STAT_NOROUTE); 392 IP_STATINC(IP_STAT_NOROUTE);
393 error = EHOSTUNREACH; 393 error = EHOSTUNREACH;
394 goto bad; 394 goto bad;
395 } 395 }
396 ifa_acquire(rt->rt_ifa, &psref_ia); 396 ifa_acquire(rt->rt_ifa, &psref_ia);
397 ia = ifatoia(rt->rt_ifa); 397 ia = ifatoia(rt->rt_ifa);
398 ifp = rt->rt_ifp; 398 ifp = rt->rt_ifp;
399 if ((mtu = rt->rt_rmx.rmx_mtu) == 0) 399 if ((mtu = rt->rt_rmx.rmx_mtu) == 0)
400 mtu = ifp->if_mtu; 400 mtu = ifp->if_mtu;
401 rt->rt_use++; 401 rt->rt_use++;
402 if (rt->rt_flags & RTF_GATEWAY) 402 if (rt->rt_flags & RTF_GATEWAY)
403 dst = satosin(rt->rt_gateway); 403 dst = satosin(rt->rt_gateway);
404 if (rt->rt_flags & RTF_HOST) 404 if (rt->rt_flags & RTF_HOST)
405 isbroadcast = rt->rt_flags & RTF_BROADCAST; 405 isbroadcast = rt->rt_flags & RTF_BROADCAST;
406 else 406 else
407 isbroadcast = in_broadcast(dst->sin_addr, ifp); 407 isbroadcast = in_broadcast(dst->sin_addr, ifp);
408 } 408 }
409 rtmtu_nolock = rt && (rt->rt_rmx.rmx_locks & RTV_MTU) == 0; 409 rtmtu_nolock = rt && (rt->rt_rmx.rmx_locks & RTV_MTU) == 0;
410 410
411 if (IN_MULTICAST(ip->ip_dst.s_addr) || 411 if (IN_MULTICAST(ip->ip_dst.s_addr) ||
412 (ip->ip_dst.s_addr == INADDR_BROADCAST)) { 412 (ip->ip_dst.s_addr == INADDR_BROADCAST)) {
413 bool inmgroup; 413 bool inmgroup;
414 414
415 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ? 415 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ?
416 M_BCAST : M_MCAST; 416 M_BCAST : M_MCAST;
417 /* 417 /*
418 * See if the caller provided any multicast options 418 * See if the caller provided any multicast options
419 */ 419 */
420 if (imo != NULL) 420 if (imo != NULL)
421 ip->ip_ttl = imo->imo_multicast_ttl; 421 ip->ip_ttl = imo->imo_multicast_ttl;
422 else 422 else
423 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 423 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
424 424
425 /* 425 /*
426 * if we don't know the outgoing ifp yet, we can't generate 426 * if we don't know the outgoing ifp yet, we can't generate
427 * output 427 * output
428 */ 428 */
429 if (!ifp) { 429 if (!ifp) {
430 IP_STATINC(IP_STAT_NOROUTE); 430 IP_STATINC(IP_STAT_NOROUTE);
431 error = ENETUNREACH; 431 error = ENETUNREACH;
432 goto bad; 432 goto bad;
433 } 433 }
434 434
435 /* 435 /*
436 * If the packet is multicast or broadcast, confirm that 436 * If the packet is multicast or broadcast, confirm that
437 * the outgoing interface can transmit it. 437 * the outgoing interface can transmit it.
438 */ 438 */
439 if (((m->m_flags & M_MCAST) && 439 if (((m->m_flags & M_MCAST) &&
440 (ifp->if_flags & IFF_MULTICAST) == 0) || 440 (ifp->if_flags & IFF_MULTICAST) == 0) ||
441 ((m->m_flags & M_BCAST) && 441 ((m->m_flags & M_BCAST) &&
442 (ifp->if_flags & (IFF_BROADCAST|IFF_POINTOPOINT)) == 0)) { 442 (ifp->if_flags & (IFF_BROADCAST|IFF_POINTOPOINT)) == 0)) {
443 IP_STATINC(IP_STAT_NOROUTE); 443 IP_STATINC(IP_STAT_NOROUTE);
444 error = ENETUNREACH; 444 error = ENETUNREACH;
445 goto bad; 445 goto bad;
446 } 446 }
447 /* 447 /*
448 * If source address not specified yet, use an address 448 * If source address not specified yet, use an address
449 * of outgoing interface. 449 * of outgoing interface.
450 */ 450 */
451 if (in_nullhost(ip->ip_src)) { 451 if (in_nullhost(ip->ip_src)) {
452 struct in_ifaddr *xia; 452 struct in_ifaddr *xia;
453 struct ifaddr *xifa; 453 struct ifaddr *xifa;
454 struct psref _psref; 454 struct psref _psref;
455 455
456 xia = in_get_ia_from_ifp_psref(ifp, &_psref); 456 xia = in_get_ia_from_ifp_psref(ifp, &_psref);
457 if (!xia) { 457 if (!xia) {
458 IP_STATINC(IP_STAT_IFNOADDR); 458 IP_STATINC(IP_STAT_IFNOADDR);
459 error = EADDRNOTAVAIL; 459 error = EADDRNOTAVAIL;
460 goto bad; 460 goto bad;
461 } 461 }
462 xifa = &xia->ia_ifa; 462 xifa = &xia->ia_ifa;
463 if (xifa->ifa_getifa != NULL) { 463 if (xifa->ifa_getifa != NULL) {
464 ia4_release(xia, &_psref); 464 ia4_release(xia, &_psref);
465 /* FIXME ifa_getifa is NOMPSAFE */ 465 /* FIXME ifa_getifa is NOMPSAFE */
466 xia = ifatoia((*xifa->ifa_getifa)(xifa, rdst)); 466 xia = ifatoia((*xifa->ifa_getifa)(xifa, rdst));
467 if (xia == NULL) { 467 if (xia == NULL) {
468 IP_STATINC(IP_STAT_IFNOADDR); 468 IP_STATINC(IP_STAT_IFNOADDR);
469 error = EADDRNOTAVAIL; 469 error = EADDRNOTAVAIL;
470 goto bad; 470 goto bad;
471 } 471 }
472 ia4_acquire(xia, &_psref); 472 ia4_acquire(xia, &_psref);
473 } 473 }
474 ip->ip_src = xia->ia_addr.sin_addr; 474 ip->ip_src = xia->ia_addr.sin_addr;
475 ia4_release(xia, &_psref); 475 ia4_release(xia, &_psref);
476 } 476 }
477 477
478 inmgroup = in_multi_group(ip->ip_dst, ifp, flags); 478 inmgroup = in_multi_group(ip->ip_dst, ifp, flags);
479 if (inmgroup && (imo == NULL || imo->imo_multicast_loop)) { 479 if (inmgroup && (imo == NULL || imo->imo_multicast_loop)) {
480 /* 480 /*
481 * If we belong to the destination multicast group 481 * If we belong to the destination multicast group
482 * on the outgoing interface, and the caller did not 482 * on the outgoing interface, and the caller did not
483 * forbid loopback, loop back a copy. 483 * forbid loopback, loop back a copy.
484 */ 484 */
485 ip_mloopback(ifp, m, &udst.sin); 485 ip_mloopback(ifp, m, &udst.sin);
486 } 486 }
487#ifdef MROUTING 487#ifdef MROUTING
488 else { 488 else {
489 /* 489 /*
490 * If we are acting as a multicast router, perform 490 * If we are acting as a multicast router, perform
491 * multicast forwarding as if the packet had just 491 * multicast forwarding as if the packet had just
492 * arrived on the interface to which we are about 492 * arrived on the interface to which we are about
493 * to send. The multicast forwarding function 493 * to send. The multicast forwarding function
494 * recursively calls this function, using the 494 * recursively calls this function, using the
495 * IP_FORWARDING flag to prevent infinite recursion. 495 * IP_FORWARDING flag to prevent infinite recursion.
496 * 496 *
497 * Multicasts that are looped back by ip_mloopback(), 497 * Multicasts that are looped back by ip_mloopback(),
498 * above, will be forwarded by the ip_input() routine, 498 * above, will be forwarded by the ip_input() routine,
499 * if necessary. 499 * if necessary.
500 */ 500 */
501 extern struct socket *ip_mrouter; 501 extern struct socket *ip_mrouter;
502 502
503 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 503 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
504 if (ip_mforward(m, ifp) != 0) { 504 if (ip_mforward(m, ifp) != 0) {
505 m_freem(m); 505 m_freem(m);
506 goto done; 506 goto done;
507 } 507 }
508 } 508 }
509 } 509 }
510#endif 510#endif
511 /* 511 /*
512 * Multicasts with a time-to-live of zero may be looped- 512 * Multicasts with a time-to-live of zero may be looped-
513 * back, above, but must not be transmitted on a network. 513 * back, above, but must not be transmitted on a network.
514 * Also, multicasts addressed to the loopback interface 514 * Also, multicasts addressed to the loopback interface
515 * are not sent -- the above call to ip_mloopback() will 515 * are not sent -- the above call to ip_mloopback() will
516 * loop back a copy if this host actually belongs to the 516 * loop back a copy if this host actually belongs to the
517 * destination group on the loopback interface. 517 * destination group on the loopback interface.
518 */ 518 */
519 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) { 519 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) {
520 IP_STATINC(IP_STAT_ODROPPED); 520 IP_STATINC(IP_STAT_ODROPPED);
521 m_freem(m); 521 m_freem(m);
522 goto done; 522 goto done;
523 } 523 }
524 goto sendit; 524 goto sendit;
525 } 525 }
526 526
527 /* 527 /*
528 * If source address not specified yet, use address 528 * If source address not specified yet, use address
529 * of outgoing interface. 529 * of outgoing interface.
530 */ 530 */
531 if (in_nullhost(ip->ip_src)) { 531 if (in_nullhost(ip->ip_src)) {
532 struct ifaddr *xifa; 532 struct ifaddr *xifa;
533 533
534 xifa = &ia->ia_ifa; 534 xifa = &ia->ia_ifa;
535 if (xifa->ifa_getifa != NULL) { 535 if (xifa->ifa_getifa != NULL) {
536 ia4_release(ia, &psref_ia); 536 ia4_release(ia, &psref_ia);
537 /* FIXME ifa_getifa is NOMPSAFE */ 537 /* FIXME ifa_getifa is NOMPSAFE */
538 ia = ifatoia((*xifa->ifa_getifa)(xifa, rdst)); 538 ia = ifatoia((*xifa->ifa_getifa)(xifa, rdst));
539 if (ia == NULL) { 539 if (ia == NULL) {
540 error = EADDRNOTAVAIL; 540 error = EADDRNOTAVAIL;
541 goto bad; 541 goto bad;
542 } 542 }
543 ia4_acquire(ia, &psref_ia); 543 ia4_acquire(ia, &psref_ia);
544 } 544 }
545 ip->ip_src = ia->ia_addr.sin_addr; 545 ip->ip_src = ia->ia_addr.sin_addr;
546 } 546 }
547 547
548 /* 548 /*
549 * Packets with Class-D address as source are not valid per 549 * Packets with Class-D address as source are not valid per
550 * RFC1112. 550 * RFC1112.
551 */ 551 */
552 if (IN_MULTICAST(ip->ip_src.s_addr)) { 552 if (IN_MULTICAST(ip->ip_src.s_addr)) {
553 IP_STATINC(IP_STAT_ODROPPED); 553 IP_STATINC(IP_STAT_ODROPPED);
554 error = EADDRNOTAVAIL; 554 error = EADDRNOTAVAIL;
555 goto bad; 555 goto bad;
556 } 556 }
557 557
558 /* 558 /*
559 * Look for broadcast address and verify user is allowed to 559 * Look for broadcast address and verify user is allowed to
560 * send such a packet. 560 * send such a packet.
561 */ 561 */
562 if (isbroadcast) { 562 if (isbroadcast) {
563 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 563 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
564 IP_STATINC(IP_STAT_BCASTDENIED); 564 IP_STATINC(IP_STAT_BCASTDENIED);
565 error = EADDRNOTAVAIL; 565 error = EADDRNOTAVAIL;
566 goto bad; 566 goto bad;
567 } 567 }
568 if ((flags & IP_ALLOWBROADCAST) == 0) { 568 if ((flags & IP_ALLOWBROADCAST) == 0) {
569 IP_STATINC(IP_STAT_BCASTDENIED); 569 IP_STATINC(IP_STAT_BCASTDENIED);
570 error = EACCES; 570 error = EACCES;
571 goto bad; 571 goto bad;
572 } 572 }
573 /* don't allow broadcast messages to be fragmented */ 573 /* don't allow broadcast messages to be fragmented */
574 if (ntohs(ip->ip_len) > ifp->if_mtu) { 574 if (ntohs(ip->ip_len) > ifp->if_mtu) {
575 IP_STATINC(IP_STAT_BCASTDENIED); 575 IP_STATINC(IP_STAT_BCASTDENIED);
576 error = EMSGSIZE; 576 error = EMSGSIZE;
577 goto bad; 577 goto bad;
578 } 578 }
579 m->m_flags |= M_BCAST; 579 m->m_flags |= M_BCAST;
580 } else 580 } else
581 m->m_flags &= ~M_BCAST; 581 m->m_flags &= ~M_BCAST;
582 582
583sendit: 583sendit:
584 if ((flags & (IP_FORWARDING|IP_NOIPNEWID)) == 0) { 584 if ((flags & (IP_FORWARDING|IP_NOIPNEWID)) == 0) {
585 if (m->m_pkthdr.len < IP_MINFRAGSIZE) { 585 if (m->m_pkthdr.len < IP_MINFRAGSIZE) {
586 ip->ip_id = 0; 586 ip->ip_id = 0;
587 } else if ((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) { 587 } else if ((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) {
588 ip->ip_id = ip_newid(ia); 588 ip->ip_id = ip_newid(ia);
589 } else { 589 } else {
590 /* 590 /*
591 * TSO capable interfaces (typically?) increment 591 * TSO capable interfaces (typically?) increment
592 * ip_id for each segment. 592 * ip_id for each segment.
593 * "allocate" enough ids here to increase the chance 593 * "allocate" enough ids here to increase the chance
594 * for them to be unique. 594 * for them to be unique.
595 * 595 *
596 * note that the following calculation is not 596 * note that the following calculation is not
597 * needed to be precise. wasting some ip_id is fine. 597 * needed to be precise. wasting some ip_id is fine.
598 */ 598 */
599 599
600 unsigned int segsz = m->m_pkthdr.segsz; 600 unsigned int segsz = m->m_pkthdr.segsz;
601 unsigned int datasz = ntohs(ip->ip_len) - hlen; 601 unsigned int datasz = ntohs(ip->ip_len) - hlen;
602 unsigned int num = howmany(datasz, segsz); 602 unsigned int num = howmany(datasz, segsz);
603 603
604 ip->ip_id = ip_newid_range(ia, num); 604 ip->ip_id = ip_newid_range(ia, num);
605 } 605 }
606 } 606 }
607 if (ia != NULL) { 607 if (ia != NULL) {
608 ia4_release(ia, &psref_ia); 608 ia4_release(ia, &psref_ia);
609 ia = NULL; 609 ia = NULL;
610 } 610 }
611 611
612 /* 612 /*
613 * If we're doing Path MTU Discovery, we need to set DF unless 613 * If we're doing Path MTU Discovery, we need to set DF unless
614 * the route's MTU is locked. 614 * the route's MTU is locked.
615 */ 615 */
616 if ((flags & IP_MTUDISC) != 0 && rtmtu_nolock) { 616 if ((flags & IP_MTUDISC) != 0 && rtmtu_nolock) {
617 ip->ip_off |= htons(IP_DF); 617 ip->ip_off |= htons(IP_DF);
618 } 618 }
619 619
620#ifdef IPSEC 620#ifdef IPSEC
621 if (ipsec_used) { 621 if (ipsec_used) {
622 bool ipsec_done = false; 622 bool ipsec_done = false;
623 bool count_drop = false; 623 bool count_drop = false;
624 624
625 /* Perform IPsec processing, if any. */ 625 /* Perform IPsec processing, if any. */
626 error = ipsec4_output(m, inp, flags, &mtu, &natt_frag, 626 error = ipsec4_output(m, inp, flags, &mtu, &natt_frag,
627 &ipsec_done, &count_drop); 627 &ipsec_done, &count_drop);
628 if (count_drop) 628 if (count_drop)
629 IP_STATINC(IP_STAT_IPSECDROP_OUT); 629 IP_STATINC(IP_STAT_IPSECDROP_OUT);
630 if (error || ipsec_done) 630 if (error || ipsec_done)
631 goto done; 631 goto done;
632 } 632 }
633 633
634 if (!ipsec_used || !natt_frag) 634 if (!ipsec_used || !natt_frag)
635#endif 635#endif
636 { 636 {
637 /* 637 /*
638 * Run through list of hooks for output packets. 638 * Run through list of hooks for output packets.
639 */ 639 */
640 error = pfil_run_hooks(inet_pfil_hook, &m, ifp, PFIL_OUT); 640 error = pfil_run_hooks(inet_pfil_hook, &m, ifp, PFIL_OUT);
641 if (error || m == NULL) { 641 if (error || m == NULL) {
642 IP_STATINC(IP_STAT_PFILDROP_OUT); 642 IP_STATINC(IP_STAT_PFILDROP_OUT);
643 goto done; 643 goto done;
644 } 644 }
645 } 645 }
646 646
647 ip = mtod(m, struct ip *); 647 ip = mtod(m, struct ip *);
648 hlen = ip->ip_hl << 2; 648 hlen = ip->ip_hl << 2;
649 649
650 m->m_pkthdr.csum_data |= hlen << 16; 650 m->m_pkthdr.csum_data |= hlen << 16;
651 651
652 /* 652 /*
653 * search for the source address structure to 653 * search for the source address structure to
654 * maintain output statistics, and verify address 654 * maintain output statistics, and verify address
655 * validity 655 * validity
656 */ 656 */
657 KASSERT(ia == NULL); 657 KASSERT(ia == NULL);
658 sockaddr_in_init(&usrc.sin, &ip->ip_src, 0); 658 sockaddr_in_init(&usrc.sin, &ip->ip_src, 0);
659 ifa = ifaof_ifpforaddr_psref(&usrc.sa, ifp, &psref_ia); 659 ifa = ifaof_ifpforaddr_psref(&usrc.sa, ifp, &psref_ia);
660 if (ifa != NULL) 660 if (ifa != NULL)
661 ia = ifatoia(ifa); 661 ia = ifatoia(ifa);
662 662
663 /* 663 /*
664 * Ensure we only send from a valid address. 664 * Ensure we only send from a valid address.
665 * A NULL address is valid because the packet could be 665 * A NULL address is valid because the packet could be
666 * generated from a packet filter. 666 * generated from a packet filter.
667 */ 667 */
668 if (ia != NULL && (flags & IP_FORWARDING) == 0 && 668 if (ia != NULL && (flags & IP_FORWARDING) == 0 &&
669 (error = ip_ifaddrvalid(ia)) != 0) 669 (error = ip_ifaddrvalid(ia)) != 0)
670 { 670 {
671 ARPLOG(LOG_ERR, 671 ARPLOG(LOG_ERR,
672 "refusing to send from invalid address %s (pid %d)\n", 672 "refusing to send from invalid address %s (pid %d)\n",
673 ARPLOGADDR(&ip->ip_src), curproc->p_pid); 673 ARPLOGADDR(&ip->ip_src), curproc->p_pid);
674 IP_STATINC(IP_STAT_ODROPPED); 674 IP_STATINC(IP_STAT_ODROPPED);
675 if (error == 1) 675 if (error == 1)
676 /* 676 /*
677 * Address exists, but is tentative or detached. 677 * Address exists, but is tentative or detached.
678 * We can't send from it because it's invalid, 678 * We can't send from it because it's invalid,
679 * so we drop the packet. 679 * so we drop the packet.
680 */ 680 */
681 error = 0; 681 error = 0;
682 else 682 else
683 error = EADDRNOTAVAIL; 683 error = EADDRNOTAVAIL;
684 goto bad; 684 goto bad;
685 } 685 }
686 686
687 /* Maybe skip checksums on loopback interfaces. */ 687 /* Maybe skip checksums on loopback interfaces. */
688 if (IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) { 688 if (IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) {
689 m->m_pkthdr.csum_flags |= M_CSUM_IPv4; 689 m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
690 } 690 }
691 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx; 691 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx;
692 692
693 sa = (m->m_flags & M_MCAST) ? sintocsa(rdst) : sintocsa(dst); 
694 
695 /* Need to fragment the packet */ 693 /* Need to fragment the packet */
696 if (ntohs(ip->ip_len) > mtu && 694 if (ntohs(ip->ip_len) > mtu &&
697 (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) { 695 (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) {
698 goto fragment; 696 goto fragment;
699 } 697 }
700 698
701#if IFA_STATS 699#if IFA_STATS
702 if (ia) 700 if (ia)
703 ia->ia_ifa.ifa_data.ifad_outbytes += ntohs(ip->ip_len); 701 ia->ia_ifa.ifa_data.ifad_outbytes += ntohs(ip->ip_len);
704#endif 702#endif
705 /* 703 /*
706 * Always initialize the sum to 0! Some HW assisted 704 * Always initialize the sum to 0! Some HW assisted
707 * checksumming requires this. 705 * checksumming requires this.
708 */ 706 */
709 ip->ip_sum = 0; 707 ip->ip_sum = 0;
710 708
711 if ((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) { 709 if ((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) {
712 /* 710 /*
713 * Perform any checksums that the hardware can't do 711 * Perform any checksums that the hardware can't do
714 * for us. 712 * for us.
715 * 713 *
716 * XXX Does any hardware require the {th,uh}_sum 714 * XXX Does any hardware require the {th,uh}_sum
717 * XXX fields to be 0? 715 * XXX fields to be 0?
718 */ 716 */
719 if (sw_csum & M_CSUM_IPv4) { 717 if (sw_csum & M_CSUM_IPv4) {
720 KASSERT(IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)); 718 KASSERT(IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4));
721 ip->ip_sum = in_cksum(m, hlen); 719 ip->ip_sum = in_cksum(m, hlen);
722 m->m_pkthdr.csum_flags &= ~M_CSUM_IPv4; 720 m->m_pkthdr.csum_flags &= ~M_CSUM_IPv4;
723 } 721 }
724 if (sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4)) { 722 if (sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
725 if (IN_NEED_CHECKSUM(ifp, 723 if (IN_NEED_CHECKSUM(ifp,
726 sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4))) { 724 sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4))) {
727 in_undefer_cksum_tcpudp(m); 725 in_undefer_cksum_tcpudp(m);
728 } 726 }
729 m->m_pkthdr.csum_flags &= 727 m->m_pkthdr.csum_flags &=
730 ~(M_CSUM_TCPv4|M_CSUM_UDPv4); 728 ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
731 } 729 }
732 } 730 }
733 731
 732 sa = (m->m_flags & M_MCAST) ? sintocsa(rdst) : sintocsa(dst);
 733
734 /* Send it */ 734 /* Send it */
735 if (__predict_false(sw_csum & M_CSUM_TSOv4)) { 735 if (__predict_false(sw_csum & M_CSUM_TSOv4)) {
736 /* 736 /*
737 * TSO4 is required by a packet, but disabled for 737 * TSO4 is required by a packet, but disabled for
738 * the interface. 738 * the interface.
739 */ 739 */
740 error = ip_tso_output(ifp, m, sa, rt); 740 error = ip_tso_output(ifp, m, sa, rt);
741 } else 741 } else
742 error = ip_if_output(ifp, m, sa, rt); 742 error = ip_if_output(ifp, m, sa, rt);
743 goto done; 743 goto done;
744 744
745fragment: 745fragment:
746 /* 746 /*
747 * We can't use HW checksumming if we're about to fragment the packet. 747 * We can't use HW checksumming if we're about to fragment the packet.
748 * 748 *
749 * XXX Some hardware can do this. 749 * XXX Some hardware can do this.
750 */ 750 */
751 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) { 751 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
752 if (IN_NEED_CHECKSUM(ifp, 752 if (IN_NEED_CHECKSUM(ifp,
753 m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4))) { 753 m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4))) {
754 in_undefer_cksum_tcpudp(m); 754 in_undefer_cksum_tcpudp(m);
755 } 755 }
756 m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4); 756 m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
757 } 757 }
758 758
759 /* 759 /*
760 * Too large for interface; fragment if possible. 760 * Too large for interface; fragment if possible.
761 * Must be able to put at least 8 bytes per fragment. 761 * Must be able to put at least 8 bytes per fragment.
762 */ 762 */
763 if (ntohs(ip->ip_off) & IP_DF) { 763 if (ntohs(ip->ip_off) & IP_DF) {
764 if (flags & IP_RETURNMTU) { 764 if (flags & IP_RETURNMTU) {
765 KASSERT(inp != NULL); 765 KASSERT(inp != NULL);
766 inp->inp_errormtu = mtu; 766 inp->inp_errormtu = mtu;
767 } 767 }
768 error = EMSGSIZE; 768 error = EMSGSIZE;
769 IP_STATINC(IP_STAT_CANTFRAG); 769 IP_STATINC(IP_STAT_CANTFRAG);
770 goto bad; 770 goto bad;
771 } 771 }
772 772
773 error = ip_fragment(m, ifp, mtu); 773 error = ip_fragment(m, ifp, mtu);
774 if (error) { 774 if (error) {
775 m = NULL; 775 m = NULL;
776 goto bad; 776 goto bad;
777 } 777 }
778 778
779 for (; m; m = m0) { 779 for (; m; m = m0) {
780 m0 = m->m_nextpkt; 780 m0 = m->m_nextpkt;
781 m->m_nextpkt = NULL; 781 m->m_nextpkt = NULL;
782 if (error) { 782 if (error) {
783 m_freem(m); 783 m_freem(m);
784 continue; 784 continue;
785 } 785 }
786#if IFA_STATS 786#if IFA_STATS
787 if (ia) 787 if (ia)
788 ia->ia_ifa.ifa_data.ifad_outbytes += ntohs(ip->ip_len); 788 ia->ia_ifa.ifa_data.ifad_outbytes += ntohs(ip->ip_len);
789#endif 789#endif
790 /* 790 /*
791 * If we get there, the packet has not been handled by 791 * If we get there, the packet has not been handled by
792 * IPsec whereas it should have. Now that it has been 792 * IPsec whereas it should have. Now that it has been
793 * fragmented, re-inject it in ip_output so that IPsec 793 * fragmented, re-inject it in ip_output so that IPsec
794 * processing can occur. 794 * processing can occur.
795 */ 795 */
796 if (natt_frag) { 796 if (natt_frag) {
797 error = ip_output(m, opt, NULL, 797 error = ip_output(m, opt, NULL,
798 flags | IP_RAWOUTPUT | IP_NOIPNEWID, 798 flags | IP_RAWOUTPUT | IP_NOIPNEWID,
799 imo, inp); 799 imo, inp);
800 } else { 800 } else {
801 KASSERT((m->m_pkthdr.csum_flags & 801 KASSERT((m->m_pkthdr.csum_flags &
802 (M_CSUM_UDPv4 | M_CSUM_TCPv4)) == 0); 802 (M_CSUM_UDPv4 | M_CSUM_TCPv4)) == 0);
803 error = ip_if_output(ifp, m, sa, rt); 803 error = ip_if_output(ifp, m, (m->m_flags & M_MCAST) ?
 804 sintocsa(rdst) : sintocsa(dst), rt);
804 } 805 }
805 } 806 }
806 if (error == 0) { 807 if (error == 0) {
807 IP_STATINC(IP_STAT_FRAGMENTED); 808 IP_STATINC(IP_STAT_FRAGMENTED);
808 } 809 }
809 810
810done: 811done:
811 ia4_release(ia, &psref_ia); 812 ia4_release(ia, &psref_ia);
812 rtcache_unref(rt, ro); 813 rtcache_unref(rt, ro);
813 if (ro == &iproute) { 814 if (ro == &iproute) {
814 rtcache_free(&iproute); 815 rtcache_free(&iproute);
815 } 816 }
816 if (mifp != NULL) { 817 if (mifp != NULL) {
817 if_put(mifp, &psref); 818 if_put(mifp, &psref);
818 } 819 }
819 if (bind_need_restore) 820 if (bind_need_restore)
820 curlwp_bindx(bound); 821 curlwp_bindx(bound);
821 return error; 822 return error;
822 823
823bad: 824bad:
824 m_freem(m); 825 m_freem(m);
825 goto done; 826 goto done;
826} 827}
827 828
828int 829int
829ip_fragment(struct mbuf *m, struct ifnet *ifp, u_long mtu) 830ip_fragment(struct mbuf *m, struct ifnet *ifp, u_long mtu)
830{ 831{
831 struct ip *ip, *mhip; 832 struct ip *ip, *mhip;
832 struct mbuf *m0; 833 struct mbuf *m0;
833 int len, hlen, off; 834 int len, hlen, off;
834 int mhlen, firstlen; 835 int mhlen, firstlen;
835 struct mbuf **mnext; 836 struct mbuf **mnext;
836 int sw_csum = m->m_pkthdr.csum_flags; 837 int sw_csum = m->m_pkthdr.csum_flags;
837 int fragments = 0; 838 int fragments = 0;
838 int error = 0; 839 int error = 0;
839 int ipoff, ipflg; 840 int ipoff, ipflg;
840 841
841 ip = mtod(m, struct ip *); 842 ip = mtod(m, struct ip *);
842 hlen = ip->ip_hl << 2; 843 hlen = ip->ip_hl << 2;
843 844
844 /* Preserve the offset and flags. */ 845 /* Preserve the offset and flags. */
845 ipoff = ntohs(ip->ip_off) & IP_OFFMASK; 846 ipoff = ntohs(ip->ip_off) & IP_OFFMASK;
846 ipflg = ntohs(ip->ip_off) & (IP_RF|IP_DF|IP_MF); 847 ipflg = ntohs(ip->ip_off) & (IP_RF|IP_DF|IP_MF);
847 848
848 if (ifp != NULL) 849 if (ifp != NULL)
849 sw_csum &= ~ifp->if_csum_flags_tx; 850 sw_csum &= ~ifp->if_csum_flags_tx;
850 851
851 len = (mtu - hlen) &~ 7; 852 len = (mtu - hlen) &~ 7;
852 if (len < 8) { 853 if (len < 8) {
853 IP_STATINC(IP_STAT_CANTFRAG); 854 IP_STATINC(IP_STAT_CANTFRAG);
854 m_freem(m); 855 m_freem(m);
855 return EMSGSIZE; 856 return EMSGSIZE;
856 } 857 }
857 858
858 firstlen = len; 859 firstlen = len;
859 mnext = &m->m_nextpkt; 860 mnext = &m->m_nextpkt;
860 861
861 /* 862 /*
862 * Loop through length of segment after first fragment, 863 * Loop through length of segment after first fragment,
863 * make new header and copy data of each part and link onto chain. 864 * make new header and copy data of each part and link onto chain.
864 */ 865 */
865 m0 = m; 866 m0 = m;
866 mhlen = sizeof(struct ip); 867 mhlen = sizeof(struct ip);
867 for (off = hlen + len; off < ntohs(ip->ip_len); off += len) { 868 for (off = hlen + len; off < ntohs(ip->ip_len); off += len) {
868 MGETHDR(m, M_DONTWAIT, MT_HEADER); 869 MGETHDR(m, M_DONTWAIT, MT_HEADER);
869 if (m == NULL) { 870 if (m == NULL) {
870 error = ENOBUFS; 871 error = ENOBUFS;
871 IP_STATINC(IP_STAT_ODROPPED); 872 IP_STATINC(IP_STAT_ODROPPED);
872 goto sendorfree; 873 goto sendorfree;
873 } 874 }
874 MCLAIM(m, m0->m_owner); 875 MCLAIM(m, m0->m_owner);
875 876
876 *mnext = m; 877 *mnext = m;
877 mnext = &m->m_nextpkt; 878 mnext = &m->m_nextpkt;
878 879
879 m->m_data += max_linkhdr; 880 m->m_data += max_linkhdr;
880 mhip = mtod(m, struct ip *); 881 mhip = mtod(m, struct ip *);
881 *mhip = *ip; 882 *mhip = *ip;
882 883
883 /* we must inherit the flags */ 884 /* we must inherit the flags */
884 m->m_flags |= m0->m_flags & M_COPYFLAGS; 885 m->m_flags |= m0->m_flags & M_COPYFLAGS;
885 886
886 if (hlen > sizeof(struct ip)) { 887 if (hlen > sizeof(struct ip)) {
887 mhlen = ip_optcopy(ip, mhip) + sizeof(struct ip); 888 mhlen = ip_optcopy(ip, mhip) + sizeof(struct ip);
888 mhip->ip_hl = mhlen >> 2; 889 mhip->ip_hl = mhlen >> 2;
889 } 890 }
890 m->m_len = mhlen; 891 m->m_len = mhlen;
891 892
892 mhip->ip_off = ((off - hlen) >> 3) + ipoff; 893 mhip->ip_off = ((off - hlen) >> 3) + ipoff;
893 mhip->ip_off |= ipflg; 894 mhip->ip_off |= ipflg;
894 if (off + len >= ntohs(ip->ip_len)) 895 if (off + len >= ntohs(ip->ip_len))
895 len = ntohs(ip->ip_len) - off; 896 len = ntohs(ip->ip_len) - off;
896 else 897 else
897 mhip->ip_off |= IP_MF; 898 mhip->ip_off |= IP_MF;
898 HTONS(mhip->ip_off); 899 HTONS(mhip->ip_off);
899 900
900 mhip->ip_len = htons((u_int16_t)(len + mhlen)); 901 mhip->ip_len = htons((u_int16_t)(len + mhlen));
901 m->m_next = m_copym(m0, off, len, M_DONTWAIT); 902 m->m_next = m_copym(m0, off, len, M_DONTWAIT);
902 if (m->m_next == NULL) { 903 if (m->m_next == NULL) {
903 error = ENOBUFS; 904 error = ENOBUFS;
904 IP_STATINC(IP_STAT_ODROPPED); 905 IP_STATINC(IP_STAT_ODROPPED);
905 goto sendorfree; 906 goto sendorfree;
906 } 907 }
907 908
908 m->m_pkthdr.len = mhlen + len; 909 m->m_pkthdr.len = mhlen + len;
909 m_reset_rcvif(m); 910 m_reset_rcvif(m);
910 911
911 mhip->ip_sum = 0; 912 mhip->ip_sum = 0;
912 KASSERT((m->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0); 913 KASSERT((m->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0);
913 if (sw_csum & M_CSUM_IPv4) { 914 if (sw_csum & M_CSUM_IPv4) {
914 mhip->ip_sum = in_cksum(m, mhlen); 915 mhip->ip_sum = in_cksum(m, mhlen);
915 } else { 916 } else {
916 /* 917 /*
917 * checksum is hw-offloaded or not necessary. 918 * checksum is hw-offloaded or not necessary.
918 */ 919 */
919 m->m_pkthdr.csum_flags |= 920 m->m_pkthdr.csum_flags |=
920 m0->m_pkthdr.csum_flags & M_CSUM_IPv4; 921 m0->m_pkthdr.csum_flags & M_CSUM_IPv4;
921 m->m_pkthdr.csum_data |= mhlen << 16; 922 m->m_pkthdr.csum_data |= mhlen << 16;
922 KASSERT(!(ifp != NULL && 923 KASSERT(!(ifp != NULL &&
923 IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) || 924 IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) ||
924 (m->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0); 925 (m->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0);
925 } 926 }
926 IP_STATINC(IP_STAT_OFRAGMENTS); 927 IP_STATINC(IP_STAT_OFRAGMENTS);
927 fragments++; 928 fragments++;
928 } 929 }
929 930
930 /* 931 /*
931 * Update first fragment by trimming what's been copied out 932 * Update first fragment by trimming what's been copied out
932 * and updating header, then send each fragment (in order). 933 * and updating header, then send each fragment (in order).
933 */ 934 */
934 m = m0; 935 m = m0;
935 m_adj(m, hlen + firstlen - ntohs(ip->ip_len)); 936 m_adj(m, hlen + firstlen - ntohs(ip->ip_len));
936 m->m_pkthdr.len = hlen + firstlen; 937 m->m_pkthdr.len = hlen + firstlen;
937 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len); 938 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len);
938 ip->ip_off |= htons(IP_MF); 939 ip->ip_off |= htons(IP_MF);
939 ip->ip_sum = 0; 940 ip->ip_sum = 0;
940 if (sw_csum & M_CSUM_IPv4) { 941 if (sw_csum & M_CSUM_IPv4) {
941 ip->ip_sum = in_cksum(m, hlen); 942 ip->ip_sum = in_cksum(m, hlen);
942 m->m_pkthdr.csum_flags &= ~M_CSUM_IPv4; 943 m->m_pkthdr.csum_flags &= ~M_CSUM_IPv4;
943 } else { 944 } else {
944 /* 945 /*
945 * checksum is hw-offloaded or not necessary. 946 * checksum is hw-offloaded or not necessary.
946 */ 947 */
947 KASSERT(!(ifp != NULL && IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) || 948 KASSERT(!(ifp != NULL && IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) ||
948 (m->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0); 949 (m->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0);
949 KASSERT(M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data) >= 950 KASSERT(M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data) >=
950 sizeof(struct ip)); 951 sizeof(struct ip));
951 } 952 }
952 953
953sendorfree: 954sendorfree:
954 /* 955 /*
955 * If there is no room for all the fragments, don't queue 956 * If there is no room for all the fragments, don't queue
956 * any of them. 957 * any of them.
957 */ 958 */
958 if (ifp != NULL) { 959 if (ifp != NULL) {
959 IFQ_LOCK(&ifp->if_snd); 960 IFQ_LOCK(&ifp->if_snd);
960 if (ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len < fragments && 961 if (ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len < fragments &&
961 error == 0) { 962 error == 0) {
962 error = ENOBUFS; 963 error = ENOBUFS;
963 IP_STATINC(IP_STAT_ODROPPED); 964 IP_STATINC(IP_STAT_ODROPPED);
964 IFQ_INC_DROPS(&ifp->if_snd); 965 IFQ_INC_DROPS(&ifp->if_snd);
965 } 966 }
966 IFQ_UNLOCK(&ifp->if_snd); 967 IFQ_UNLOCK(&ifp->if_snd);
967 } 968 }
968 if (error) { 969 if (error) {
969 for (m = m0; m; m = m0) { 970 for (m = m0; m; m = m0) {
970 m0 = m->m_nextpkt; 971 m0 = m->m_nextpkt;
971 m->m_nextpkt = NULL; 972 m->m_nextpkt = NULL;
972 m_freem(m); 973 m_freem(m);
973 } 974 }
974 } 975 }
975 976
976 return error; 977 return error;
977} 978}
978 979
979/* 980/*
980 * Determine the maximum length of the options to be inserted; 981 * Determine the maximum length of the options to be inserted;
981 * we would far rather allocate too much space rather than too little. 982 * we would far rather allocate too much space rather than too little.
982 */ 983 */
983u_int 984u_int
984ip_optlen(struct inpcb *inp) 985ip_optlen(struct inpcb *inp)
985{ 986{
986 struct mbuf *m = inp->inp_options; 987 struct mbuf *m = inp->inp_options;
987 988
988 if (m && m->m_len > offsetof(struct ipoption, ipopt_dst)) { 989 if (m && m->m_len > offsetof(struct ipoption, ipopt_dst)) {
989 return (m->m_len - offsetof(struct ipoption, ipopt_dst)); 990 return (m->m_len - offsetof(struct ipoption, ipopt_dst));
990 } 991 }
991 return 0; 992 return 0;
992} 993}
993 994
994/* 995/*
995 * Insert IP options into preformed packet. 996 * Insert IP options into preformed packet.
996 * Adjust IP destination as required for IP source routing, 997 * Adjust IP destination as required for IP source routing,
997 * as indicated by a non-zero in_addr at the start of the options. 998 * as indicated by a non-zero in_addr at the start of the options.
998 */ 999 */
999static struct mbuf * 1000static struct mbuf *
1000ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen) 1001ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
1001{ 1002{
1002 struct ipoption *p = mtod(opt, struct ipoption *); 1003 struct ipoption *p = mtod(opt, struct ipoption *);
1003 struct mbuf *n; 1004 struct mbuf *n;
1004 struct ip *ip = mtod(m, struct ip *); 1005 struct ip *ip = mtod(m, struct ip *);
1005 unsigned optlen; 1006 unsigned optlen;
1006 1007
1007 optlen = opt->m_len - sizeof(p->ipopt_dst); 1008 optlen = opt->m_len - sizeof(p->ipopt_dst);
1008 KASSERT(optlen % 4 == 0); 1009 KASSERT(optlen % 4 == 0);
1009 if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET) 1010 if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET)
1010 return m; /* XXX should fail */ 1011 return m; /* XXX should fail */
1011 if (!in_nullhost(p->ipopt_dst)) 1012 if (!in_nullhost(p->ipopt_dst))
1012 ip->ip_dst = p->ipopt_dst; 1013 ip->ip_dst = p->ipopt_dst;
1013 if (M_READONLY(m) || M_LEADINGSPACE(m) < optlen) { 1014 if (M_READONLY(m) || M_LEADINGSPACE(m) < optlen) {
1014 MGETHDR(n, M_DONTWAIT, MT_HEADER); 1015 MGETHDR(n, M_DONTWAIT, MT_HEADER);
1015 if (n == NULL) 1016 if (n == NULL)
1016 return m; 1017 return m;
1017 MCLAIM(n, m->m_owner); 1018 MCLAIM(n, m->m_owner);
1018 m_move_pkthdr(n, m); 1019 m_move_pkthdr(n, m);
1019 m->m_len -= sizeof(struct ip); 1020 m->m_len -= sizeof(struct ip);
1020 m->m_data += sizeof(struct ip); 1021 m->m_data += sizeof(struct ip);
1021 n->m_next = m; 1022 n->m_next = m;
1022 n->m_len = optlen + sizeof(struct ip); 1023 n->m_len = optlen + sizeof(struct ip);
1023 n->m_data += max_linkhdr; 1024 n->m_data += max_linkhdr;
1024 memcpy(mtod(n, void *), ip, sizeof(struct ip)); 1025 memcpy(mtod(n, void *), ip, sizeof(struct ip));
1025 m = n; 1026 m = n;
1026 } else { 1027 } else {
1027 m->m_data -= optlen; 1028 m->m_data -= optlen;
1028 m->m_len += optlen; 1029 m->m_len += optlen;
1029 memmove(mtod(m, void *), ip, sizeof(struct ip)); 1030 memmove(mtod(m, void *), ip, sizeof(struct ip));
1030 } 1031 }
1031 m->m_pkthdr.len += optlen; 1032 m->m_pkthdr.len += optlen;
1032 ip = mtod(m, struct ip *); 1033 ip = mtod(m, struct ip *);
1033 memcpy(ip + 1, p->ipopt_list, optlen); 1034 memcpy(ip + 1, p->ipopt_list, optlen);
1034 *phlen = sizeof(struct ip) + optlen; 1035 *phlen = sizeof(struct ip) + optlen;
1035 ip->ip_len = htons(ntohs(ip->ip_len) + optlen); 1036 ip->ip_len = htons(ntohs(ip->ip_len) + optlen);
1036 return m; 1037 return m;
1037} 1038}
1038 1039
1039/* 1040/*
1040 * Copy options from ipsrc to ipdst, omitting those not copied during 1041 * Copy options from ipsrc to ipdst, omitting those not copied during
1041 * fragmentation. 1042 * fragmentation.
1042 */ 1043 */
1043int 1044int
1044ip_optcopy(struct ip *ipsrc, struct ip *ipdst) 1045ip_optcopy(struct ip *ipsrc, struct ip *ipdst)
1045{ 1046{
1046 u_char *cp, *dp; 1047 u_char *cp, *dp;
1047 int opt, optlen, cnt; 1048 int opt, optlen, cnt;
1048 1049
1049 cp = (u_char *)(ipsrc + 1); 1050 cp = (u_char *)(ipsrc + 1);
1050 dp = (u_char *)(ipdst + 1); 1051 dp = (u_char *)(ipdst + 1);
1051 cnt = (ipsrc->ip_hl << 2) - sizeof(struct ip); 1052 cnt = (ipsrc->ip_hl << 2) - sizeof(struct ip);
1052 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1053 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1053 opt = cp[0]; 1054 opt = cp[0];
1054 if (opt == IPOPT_EOL) 1055 if (opt == IPOPT_EOL)
1055 break; 1056 break;
1056 if (opt == IPOPT_NOP) { 1057 if (opt == IPOPT_NOP) {
1057 /* Preserve for IP mcast tunnel's LSRR alignment. */ 1058 /* Preserve for IP mcast tunnel's LSRR alignment. */
1058 *dp++ = IPOPT_NOP; 1059 *dp++ = IPOPT_NOP;
1059 optlen = 1; 1060 optlen = 1;
1060 continue; 1061 continue;
1061 } 1062 }
1062 1063
1063 KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp)); 1064 KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp));
1064 optlen = cp[IPOPT_OLEN]; 1065 optlen = cp[IPOPT_OLEN];
1065 KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen < cnt); 1066 KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen < cnt);
1066 1067
1067 /* Invalid lengths should have been caught by ip_dooptions. */ 1068 /* Invalid lengths should have been caught by ip_dooptions. */
1068 if (optlen > cnt) 1069 if (optlen > cnt)
1069 optlen = cnt; 1070 optlen = cnt;
1070 if (IPOPT_COPIED(opt)) { 1071 if (IPOPT_COPIED(opt)) {
1071 bcopy((void *)cp, (void *)dp, (unsigned)optlen); 1072 bcopy((void *)cp, (void *)dp, (unsigned)optlen);
1072 dp += optlen; 1073 dp += optlen;
1073 } 1074 }
1074 } 1075 }
1075 1076
1076 for (optlen = dp - (u_char *)(ipdst+1); optlen & 0x3; optlen++) { 1077 for (optlen = dp - (u_char *)(ipdst+1); optlen & 0x3; optlen++) {
1077 *dp++ = IPOPT_EOL; 1078 *dp++ = IPOPT_EOL;
1078 } 1079 }
1079 1080
1080 return optlen; 1081 return optlen;
1081} 1082}
1082 1083
1083/* 1084/*
1084 * IP socket option processing. 1085 * IP socket option processing.
1085 */ 1086 */
1086int 1087int
1087ip_ctloutput(int op, struct socket *so, struct sockopt *sopt) 1088ip_ctloutput(int op, struct socket *so, struct sockopt *sopt)
1088{ 1089{
1089 struct inpcb *inp = sotoinpcb(so); 1090 struct inpcb *inp = sotoinpcb(so);
1090 struct ip *ip = &inp->inp_ip; 1091 struct ip *ip = &inp->inp_ip;
1091 int inpflags = inp->inp_flags; 1092 int inpflags = inp->inp_flags;
1092 int optval = 0, error = 0; 1093 int optval = 0, error = 0;
1093 struct in_pktinfo pktinfo; 1094 struct in_pktinfo pktinfo;
1094 1095
1095 KASSERT(solocked(so)); 1096 KASSERT(solocked(so));
1096 1097
1097 if (sopt->sopt_level != IPPROTO_IP) { 1098 if (sopt->sopt_level != IPPROTO_IP) {
1098 if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_NOHEADER) 1099 if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_NOHEADER)
1099 return 0; 1100 return 0;
1100 return ENOPROTOOPT; 1101 return ENOPROTOOPT;
1101 } 1102 }
1102 1103
1103 switch (op) { 1104 switch (op) {
1104 case PRCO_SETOPT: 1105 case PRCO_SETOPT:
1105 switch (sopt->sopt_name) { 1106 switch (sopt->sopt_name) {
1106 case IP_OPTIONS: 1107 case IP_OPTIONS:
1107#ifdef notyet 1108#ifdef notyet
1108 case IP_RETOPTS: 1109 case IP_RETOPTS:
1109#endif 1110#endif
1110 error = ip_pcbopts(inp, sopt); 1111 error = ip_pcbopts(inp, sopt);
1111 break; 1112 break;
1112 1113
1113 case IP_TOS: 1114 case IP_TOS:
1114 case IP_TTL: 1115 case IP_TTL:
1115 case IP_MINTTL: 1116 case IP_MINTTL:
1116 case IP_RECVOPTS: 1117 case IP_RECVOPTS:
1117 case IP_RECVRETOPTS: 1118 case IP_RECVRETOPTS:
1118 case IP_RECVDSTADDR: 1119 case IP_RECVDSTADDR:
1119 case IP_RECVIF: 1120 case IP_RECVIF:
1120 case IP_RECVPKTINFO: 1121 case IP_RECVPKTINFO:
1121 case IP_RECVTTL: 1122 case IP_RECVTTL:
1122 error = sockopt_getint(sopt, &optval); 1123 error = sockopt_getint(sopt, &optval);
1123 if (error) 1124 if (error)
1124 break; 1125 break;
1125 1126
1126 switch (sopt->sopt_name) { 1127 switch (sopt->sopt_name) {
1127 case IP_TOS: 1128 case IP_TOS:
1128 ip->ip_tos = optval; 1129 ip->ip_tos = optval;
1129 break; 1130 break;
1130 1131
1131 case IP_TTL: 1132 case IP_TTL:
1132 ip->ip_ttl = optval; 1133 ip->ip_ttl = optval;
1133 break; 1134 break;
1134 1135
1135 case IP_MINTTL: 1136 case IP_MINTTL:
1136 if (optval > 0 && optval <= MAXTTL) 1137 if (optval > 0 && optval <= MAXTTL)
1137 inp->inp_ip_minttl = optval; 1138 inp->inp_ip_minttl = optval;
1138 else 1139 else
1139 error = EINVAL; 1140 error = EINVAL;
1140 break; 1141 break;
1141#define OPTSET(bit) \ 1142#define OPTSET(bit) \
1142 if (optval) \ 1143 if (optval) \
1143 inpflags |= bit; \ 1144 inpflags |= bit; \
1144 else \ 1145 else \
1145 inpflags &= ~bit; 1146 inpflags &= ~bit;
1146 1147
1147 case IP_RECVOPTS: 1148 case IP_RECVOPTS:
1148 OPTSET(INP_RECVOPTS); 1149 OPTSET(INP_RECVOPTS);
1149 break; 1150 break;
1150 1151
1151 case IP_RECVPKTINFO: 1152 case IP_RECVPKTINFO:
1152 OPTSET(INP_RECVPKTINFO); 1153 OPTSET(INP_RECVPKTINFO);
1153 break; 1154 break;
1154 1155
1155 case IP_RECVRETOPTS: 1156 case IP_RECVRETOPTS:
1156 OPTSET(INP_RECVRETOPTS); 1157 OPTSET(INP_RECVRETOPTS);
1157 break; 1158 break;
1158 1159
1159 case IP_RECVDSTADDR: 1160 case IP_RECVDSTADDR:
1160 OPTSET(INP_RECVDSTADDR); 1161 OPTSET(INP_RECVDSTADDR);
1161 break; 1162 break;
1162 1163
1163 case IP_RECVIF: 1164 case IP_RECVIF:
1164 OPTSET(INP_RECVIF); 1165 OPTSET(INP_RECVIF);
1165 break; 1166 break;
1166 1167
1167 case IP_RECVTTL: 1168 case IP_RECVTTL:
1168 OPTSET(INP_RECVTTL); 1169 OPTSET(INP_RECVTTL);
1169 break; 1170 break;
1170 } 1171 }
1171 break; 1172 break;
1172 case IP_PKTINFO: 1173 case IP_PKTINFO:
1173 error = sockopt_getint(sopt, &optval); 1174 error = sockopt_getint(sopt, &optval);
1174 if (!error) { 1175 if (!error) {
1175 /* Linux compatibility */ 1176 /* Linux compatibility */
1176 OPTSET(INP_RECVPKTINFO); 1177 OPTSET(INP_RECVPKTINFO);
1177 break; 1178 break;
1178 } 1179 }
1179 error = sockopt_get(sopt, &pktinfo, sizeof(pktinfo)); 1180 error = sockopt_get(sopt, &pktinfo, sizeof(pktinfo));
1180 if (error) 1181 if (error)
1181 break; 1182 break;
1182 1183
1183 if (pktinfo.ipi_ifindex == 0) { 1184 if (pktinfo.ipi_ifindex == 0) {
1184 inp->inp_prefsrcip = pktinfo.ipi_addr; 1185 inp->inp_prefsrcip = pktinfo.ipi_addr;
1185 break; 1186 break;
1186 } 1187 }
1187 1188
1188 /* Solaris compatibility */ 1189 /* Solaris compatibility */
1189 struct ifnet *ifp; 1190 struct ifnet *ifp;
1190 struct in_ifaddr *ia; 1191 struct in_ifaddr *ia;
1191 int s; 1192 int s;
1192 1193
1193 /* pick up primary address */ 1194 /* pick up primary address */
1194 s = pserialize_read_enter(); 1195 s = pserialize_read_enter();
1195 ifp = if_byindex(pktinfo.ipi_ifindex); 1196 ifp = if_byindex(pktinfo.ipi_ifindex);
1196 if (ifp == NULL) { 1197 if (ifp == NULL) {
1197 pserialize_read_exit(s); 1198 pserialize_read_exit(s);
1198 error = EADDRNOTAVAIL; 1199 error = EADDRNOTAVAIL;
1199 break; 1200 break;
1200 } 1201 }
1201 ia = in_get_ia_from_ifp(ifp); 1202 ia = in_get_ia_from_ifp(ifp);
1202 if (ia == NULL) { 1203 if (ia == NULL) {
1203 pserialize_read_exit(s); 1204 pserialize_read_exit(s);
1204 error = EADDRNOTAVAIL; 1205 error = EADDRNOTAVAIL;
1205 break; 1206 break;
1206 } 1207 }
1207 inp->inp_prefsrcip = IA_SIN(ia)->sin_addr; 1208 inp->inp_prefsrcip = IA_SIN(ia)->sin_addr;
1208 pserialize_read_exit(s); 1209 pserialize_read_exit(s);
1209 break; 1210 break;
1210 break; 1211 break;
1211#undef OPTSET 1212#undef OPTSET
1212 1213
1213 case IP_MULTICAST_IF: 1214 case IP_MULTICAST_IF:
1214 case IP_MULTICAST_TTL: 1215 case IP_MULTICAST_TTL:
1215 case IP_MULTICAST_LOOP: 1216 case IP_MULTICAST_LOOP:
1216 case IP_ADD_MEMBERSHIP: 1217 case IP_ADD_MEMBERSHIP:
1217 case IP_DROP_MEMBERSHIP: 1218 case IP_DROP_MEMBERSHIP:
1218 error = ip_setmoptions(&inp->inp_moptions, sopt); 1219 error = ip_setmoptions(&inp->inp_moptions, sopt);
1219 break; 1220 break;
1220 1221
1221 case IP_PORTRANGE: 1222 case IP_PORTRANGE:
1222 error = sockopt_getint(sopt, &optval); 1223 error = sockopt_getint(sopt, &optval);
1223 if (error) 1224 if (error)
1224 break; 1225 break;
1225 1226
1226 switch (optval) { 1227 switch (optval) {
1227 case IP_PORTRANGE_DEFAULT: 1228 case IP_PORTRANGE_DEFAULT:
1228 case IP_PORTRANGE_HIGH: 1229 case IP_PORTRANGE_HIGH:
1229 inpflags &= ~(INP_LOWPORT); 1230 inpflags &= ~(INP_LOWPORT);
1230 break; 1231 break;
1231 1232
1232 case IP_PORTRANGE_LOW: 1233 case IP_PORTRANGE_LOW:
1233 inpflags |= INP_LOWPORT; 1234 inpflags |= INP_LOWPORT;
1234 break; 1235 break;
1235 1236
1236 default: 1237 default:
1237 error = EINVAL; 1238 error = EINVAL;
1238 break; 1239 break;
1239 } 1240 }
1240 break; 1241 break;
1241 1242
1242 case IP_PORTALGO: 1243 case IP_PORTALGO:
1243 error = sockopt_getint(sopt, &optval); 1244 error = sockopt_getint(sopt, &optval);
1244 if (error) 1245 if (error)
1245 break; 1246 break;
1246 1247
1247 error = portalgo_algo_index_select( 1248 error = portalgo_algo_index_select(
1248 (struct inpcb_hdr *)inp, optval); 1249 (struct inpcb_hdr *)inp, optval);
1249 break; 1250 break;
1250 1251
1251#if defined(IPSEC) 1252#if defined(IPSEC)
1252 case IP_IPSEC_POLICY: 1253 case IP_IPSEC_POLICY:
1253 if (ipsec_enabled) { 1254 if (ipsec_enabled) {
1254 error = ipsec_set_policy(inp, 1255 error = ipsec_set_policy(inp,
1255 sopt->sopt_data, sopt->sopt_size, 1256 sopt->sopt_data, sopt->sopt_size,
1256 curlwp->l_cred); 1257 curlwp->l_cred);
1257 } else  1258 } else
1258 error = ENOPROTOOPT; 1259 error = ENOPROTOOPT;
1259 break; 1260 break;
1260#endif /* IPSEC */ 1261#endif /* IPSEC */
1261 1262
1262 default: 1263 default:
1263 error = ENOPROTOOPT; 1264 error = ENOPROTOOPT;
1264 break; 1265 break;
1265 } 1266 }
1266 break; 1267 break;
1267 1268
1268 case PRCO_GETOPT: 1269 case PRCO_GETOPT:
1269 switch (sopt->sopt_name) { 1270 switch (sopt->sopt_name) {
1270 case IP_OPTIONS: 1271 case IP_OPTIONS:
1271 case IP_RETOPTS: { 1272 case IP_RETOPTS: {
1272 struct mbuf *mopts = inp->inp_options; 1273 struct mbuf *mopts = inp->inp_options;
1273 1274
1274 if (mopts) { 1275 if (mopts) {
1275 struct mbuf *m; 1276 struct mbuf *m;
1276 1277
1277 m = m_copym(mopts, 0, M_COPYALL, M_DONTWAIT); 1278 m = m_copym(mopts, 0, M_COPYALL, M_DONTWAIT);
1278 if (m == NULL) { 1279 if (m == NULL) {
1279 error = ENOBUFS; 1280 error = ENOBUFS;
1280 break; 1281 break;
1281 } 1282 }
1282 error = sockopt_setmbuf(sopt, m); 1283 error = sockopt_setmbuf(sopt, m);
1283 } 1284 }
1284 break; 1285 break;
1285 } 1286 }
1286 case IP_TOS: 1287 case IP_TOS:
1287 case IP_TTL: 1288 case IP_TTL:
1288 case IP_MINTTL: 1289 case IP_MINTTL:
1289 case IP_RECVOPTS: 1290 case IP_RECVOPTS:
1290 case IP_RECVRETOPTS: 1291 case IP_RECVRETOPTS:
1291 case IP_RECVDSTADDR: 1292 case IP_RECVDSTADDR:
1292 case IP_RECVIF: 1293 case IP_RECVIF:
1293 case IP_RECVPKTINFO: 1294 case IP_RECVPKTINFO:
1294 case IP_RECVTTL: 1295 case IP_RECVTTL:
1295 case IP_ERRORMTU: 1296 case IP_ERRORMTU:
1296 switch (sopt->sopt_name) { 1297 switch (sopt->sopt_name) {
1297 case IP_TOS: 1298 case IP_TOS:
1298 optval = ip->ip_tos; 1299 optval = ip->ip_tos;
1299 break; 1300 break;
1300 1301
1301 case IP_TTL: 1302 case IP_TTL:
1302 optval = ip->ip_ttl; 1303 optval = ip->ip_ttl;
1303 break; 1304 break;
1304 1305
1305 case IP_MINTTL: 1306 case IP_MINTTL:
1306 optval = inp->inp_ip_minttl; 1307 optval = inp->inp_ip_minttl;
1307 break; 1308 break;
1308 1309
1309 case IP_ERRORMTU: 1310 case IP_ERRORMTU:
1310 optval = inp->inp_errormtu; 1311 optval = inp->inp_errormtu;
1311 break; 1312 break;
1312 1313
1313#define OPTBIT(bit) (inpflags & bit ? 1 : 0) 1314#define OPTBIT(bit) (inpflags & bit ? 1 : 0)
1314 1315
1315 case IP_RECVOPTS: 1316 case IP_RECVOPTS:
1316 optval = OPTBIT(INP_RECVOPTS); 1317 optval = OPTBIT(INP_RECVOPTS);
1317 break; 1318 break;
1318 1319
1319 case IP_RECVPKTINFO: 1320 case IP_RECVPKTINFO:
1320 optval = OPTBIT(INP_RECVPKTINFO); 1321 optval = OPTBIT(INP_RECVPKTINFO);
1321 break; 1322 break;
1322 1323
1323 case IP_RECVRETOPTS: 1324 case IP_RECVRETOPTS:
1324 optval = OPTBIT(INP_RECVRETOPTS); 1325 optval = OPTBIT(INP_RECVRETOPTS);
1325 break; 1326 break;
1326 1327
1327 case IP_RECVDSTADDR: 1328 case IP_RECVDSTADDR:
1328 optval = OPTBIT(INP_RECVDSTADDR); 1329 optval = OPTBIT(INP_RECVDSTADDR);
1329 break; 1330 break;
1330 1331
1331 case IP_RECVIF: 1332 case IP_RECVIF:
1332 optval = OPTBIT(INP_RECVIF); 1333 optval = OPTBIT(INP_RECVIF);
1333 break; 1334 break;
1334 1335
1335 case IP_RECVTTL: 1336 case IP_RECVTTL:
1336 optval = OPTBIT(INP_RECVTTL); 1337 optval = OPTBIT(INP_RECVTTL);
1337 break; 1338 break;
1338 } 1339 }
1339 error = sockopt_setint(sopt, optval); 1340 error = sockopt_setint(sopt, optval);
1340 break; 1341 break;
1341 1342
1342 case IP_PKTINFO: 1343 case IP_PKTINFO:
1343 switch (sopt->sopt_size) { 1344 switch (sopt->sopt_size) {
1344 case sizeof(int): 1345 case sizeof(int):
1345 /* Linux compatibility */ 1346 /* Linux compatibility */
1346 optval = OPTBIT(INP_RECVPKTINFO); 1347 optval = OPTBIT(INP_RECVPKTINFO);
1347 error = sockopt_setint(sopt, optval); 1348 error = sockopt_setint(sopt, optval);
1348 break; 1349 break;
1349 case sizeof(struct in_pktinfo): 1350 case sizeof(struct in_pktinfo):
1350 /* Solaris compatibility */ 1351 /* Solaris compatibility */
1351 pktinfo.ipi_ifindex = 0; 1352 pktinfo.ipi_ifindex = 0;
1352 pktinfo.ipi_addr = inp->inp_prefsrcip; 1353 pktinfo.ipi_addr = inp->inp_prefsrcip;
1353 error = sockopt_set(sopt, &pktinfo, 1354 error = sockopt_set(sopt, &pktinfo,
1354 sizeof(pktinfo)); 1355 sizeof(pktinfo));
1355 break; 1356 break;
1356 default: 1357 default:
1357 /* 1358 /*
1358 * While size is stuck at 0, and, later, if 1359 * While size is stuck at 0, and, later, if
1359 * the caller doesn't use an exactly sized 1360 * the caller doesn't use an exactly sized
1360 * recipient for the data, default to Linux 1361 * recipient for the data, default to Linux
1361 * compatibility 1362 * compatibility
1362 */ 1363 */
1363 optval = OPTBIT(INP_RECVPKTINFO); 1364 optval = OPTBIT(INP_RECVPKTINFO);
1364 error = sockopt_setint(sopt, optval); 1365 error = sockopt_setint(sopt, optval);
1365 break; 1366 break;
1366 } 1367 }
1367 break; 1368 break;
1368 1369
1369#if 0 /* defined(IPSEC) */ 1370#if 0 /* defined(IPSEC) */
1370 case IP_IPSEC_POLICY: 1371 case IP_IPSEC_POLICY:
1371 { 1372 {
1372 struct mbuf *m = NULL; 1373 struct mbuf *m = NULL;
1373 1374
1374 /* XXX this will return EINVAL as sopt is empty */ 1375 /* XXX this will return EINVAL as sopt is empty */
1375 error = ipsec_get_policy(inp, sopt->sopt_data, 1376 error = ipsec_get_policy(inp, sopt->sopt_data,
1376 sopt->sopt_size, &m); 1377 sopt->sopt_size, &m);
1377 if (error == 0) 1378 if (error == 0)
1378 error = sockopt_setmbuf(sopt, m); 1379 error = sockopt_setmbuf(sopt, m);
1379 break; 1380 break;
1380 } 1381 }
1381#endif /*IPSEC*/ 1382#endif /*IPSEC*/
1382 1383
1383 case IP_MULTICAST_IF: 1384 case IP_MULTICAST_IF:
1384 case IP_MULTICAST_TTL: 1385 case IP_MULTICAST_TTL:
1385 case IP_MULTICAST_LOOP: 1386 case IP_MULTICAST_LOOP:
1386 case IP_ADD_MEMBERSHIP: 1387 case IP_ADD_MEMBERSHIP:
1387 case IP_DROP_MEMBERSHIP: 1388 case IP_DROP_MEMBERSHIP:
1388 error = ip_getmoptions(inp->inp_moptions, sopt); 1389 error = ip_getmoptions(inp->inp_moptions, sopt);
1389 break; 1390 break;
1390 1391
1391 case IP_PORTRANGE: 1392 case IP_PORTRANGE:
1392 if (inpflags & INP_LOWPORT) 1393 if (inpflags & INP_LOWPORT)
1393 optval = IP_PORTRANGE_LOW; 1394 optval = IP_PORTRANGE_LOW;
1394 else 1395 else
1395 optval = IP_PORTRANGE_DEFAULT; 1396 optval = IP_PORTRANGE_DEFAULT;
1396 error = sockopt_setint(sopt, optval); 1397 error = sockopt_setint(sopt, optval);
1397 break; 1398 break;
1398 1399
1399 case IP_PORTALGO: 1400 case IP_PORTALGO:
1400 optval = inp->inp_portalgo; 1401 optval = inp->inp_portalgo;
1401 error = sockopt_setint(sopt, optval); 1402 error = sockopt_setint(sopt, optval);
1402 break; 1403 break;
1403 1404
1404 default: 1405 default:
1405 error = ENOPROTOOPT; 1406 error = ENOPROTOOPT;
1406 break; 1407 break;
1407 } 1408 }
1408 break; 1409 break;
1409 } 1410 }
1410 1411
1411 if (!error) { 1412 if (!error) {
1412 inp->inp_flags = inpflags; 1413 inp->inp_flags = inpflags;
1413 } 1414 }
1414 return error; 1415 return error;
1415} 1416}
1416 1417
1417static int 1418static int
1418ip_pktinfo_prepare(const struct in_pktinfo *pktinfo, struct ip_pktopts *pktopts, 1419ip_pktinfo_prepare(const struct in_pktinfo *pktinfo, struct ip_pktopts *pktopts,
1419 int *flags, kauth_cred_t cred) 1420 int *flags, kauth_cred_t cred)
1420{ 1421{
1421 struct ip_moptions *imo; 1422 struct ip_moptions *imo;
1422 int error = 0; 1423 int error = 0;
1423 bool addrset = false; 1424 bool addrset = false;
1424 1425
1425 if (!in_nullhost(pktinfo->ipi_addr)) { 1426 if (!in_nullhost(pktinfo->ipi_addr)) {
1426 pktopts->ippo_laddr.sin_addr = pktinfo->ipi_addr; 1427 pktopts->ippo_laddr.sin_addr = pktinfo->ipi_addr;
1427 /* EADDRNOTAVAIL? */ 1428 /* EADDRNOTAVAIL? */
1428 error = in_pcbbindableaddr(&pktopts->ippo_laddr, cred); 1429 error = in_pcbbindableaddr(&pktopts->ippo_laddr, cred);
1429 if (error != 0) 1430 if (error != 0)
1430 return error; 1431 return error;
1431 addrset = true; 1432 addrset = true;
1432 } 1433 }
1433 1434
1434 if (pktinfo->ipi_ifindex != 0) { 1435 if (pktinfo->ipi_ifindex != 0) {
1435 if (!addrset) { 1436 if (!addrset) {
1436 struct ifnet *ifp; 1437 struct ifnet *ifp;
1437 struct in_ifaddr *ia; 1438 struct in_ifaddr *ia;
1438 int s; 1439 int s;
1439 1440
1440 /* pick up primary address */ 1441 /* pick up primary address */
1441 s = pserialize_read_enter(); 1442 s = pserialize_read_enter();
1442 ifp = if_byindex(pktinfo->ipi_ifindex); 1443 ifp = if_byindex(pktinfo->ipi_ifindex);
1443 if (ifp == NULL) { 1444 if (ifp == NULL) {
1444 pserialize_read_exit(s); 1445 pserialize_read_exit(s);
1445 return EADDRNOTAVAIL; 1446 return EADDRNOTAVAIL;
1446 } 1447 }
1447 ia = in_get_ia_from_ifp(ifp); 1448 ia = in_get_ia_from_ifp(ifp);
1448 if (ia == NULL) { 1449 if (ia == NULL) {
1449 pserialize_read_exit(s); 1450 pserialize_read_exit(s);
1450 return EADDRNOTAVAIL; 1451 return EADDRNOTAVAIL;
1451 } 1452 }
1452 pktopts->ippo_laddr.sin_addr = IA_SIN(ia)->sin_addr; 1453 pktopts->ippo_laddr.sin_addr = IA_SIN(ia)->sin_addr;
1453 pserialize_read_exit(s); 1454 pserialize_read_exit(s);
1454 } 1455 }
1455 1456
1456 /* 1457 /*
1457 * If specified ipi_ifindex, 1458 * If specified ipi_ifindex,
1458 * use copied or locally initialized ip_moptions. 1459 * use copied or locally initialized ip_moptions.
1459 * Original ip_moptions must not be modified. 1460 * Original ip_moptions must not be modified.
1460 */ 1461 */
1461 imo = &pktopts->ippo_imobuf; /* local buf in pktopts */ 1462 imo = &pktopts->ippo_imobuf; /* local buf in pktopts */
1462 if (pktopts->ippo_imo != NULL) { 1463 if (pktopts->ippo_imo != NULL) {
1463 memcpy(imo, pktopts->ippo_imo, sizeof(*imo)); 1464 memcpy(imo, pktopts->ippo_imo, sizeof(*imo));
1464 } else { 1465 } else {
1465 memset(imo, 0, sizeof(*imo)); 1466 memset(imo, 0, sizeof(*imo));
1466 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1467 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1467 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1468 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1468 } 1469 }
1469 imo->imo_multicast_if_index = pktinfo->ipi_ifindex; 1470 imo->imo_multicast_if_index = pktinfo->ipi_ifindex;
1470 pktopts->ippo_imo = imo; 1471 pktopts->ippo_imo = imo;
1471 *flags |= IP_ROUTETOIFINDEX; 1472 *flags |= IP_ROUTETOIFINDEX;
1472 } 1473 }
1473 return error; 1474 return error;
1474} 1475}
1475 1476
1476/* 1477/*
1477 * Set up IP outgoing packet options. Even if control is NULL, 1478 * Set up IP outgoing packet options. Even if control is NULL,
1478 * pktopts->ippo_laddr and pktopts->ippo_imo are set and used. 1479 * pktopts->ippo_laddr and pktopts->ippo_imo are set and used.
1479 */ 1480 */
1480int 1481int
1481ip_setpktopts(struct mbuf *control, struct ip_pktopts *pktopts, int *flags, 1482ip_setpktopts(struct mbuf *control, struct ip_pktopts *pktopts, int *flags,
1482 struct inpcb *inp, kauth_cred_t cred) 1483 struct inpcb *inp, kauth_cred_t cred)
1483{ 1484{
1484 struct cmsghdr *cm; 1485 struct cmsghdr *cm;
1485 struct in_pktinfo pktinfo; 1486 struct in_pktinfo pktinfo;
1486 int error; 1487 int error;
1487 1488
1488 pktopts->ippo_imo = inp->inp_moptions; 1489 pktopts->ippo_imo = inp->inp_moptions;
1489 1490
1490 struct in_addr *ia = in_nullhost(inp->inp_prefsrcip) ? &inp->inp_laddr : 1491 struct in_addr *ia = in_nullhost(inp->inp_prefsrcip) ? &inp->inp_laddr :
1491 &inp->inp_prefsrcip; 1492 &inp->inp_prefsrcip;
1492 sockaddr_in_init(&pktopts->ippo_laddr, ia, 0); 1493 sockaddr_in_init(&pktopts->ippo_laddr, ia, 0);
1493 1494
1494 if (control == NULL) 1495 if (control == NULL)
1495 return 0; 1496 return 0;
1496 1497
1497 /* 1498 /*
1498 * XXX: Currently, we assume all the optional information is 1499 * XXX: Currently, we assume all the optional information is
1499 * stored in a single mbuf. 1500 * stored in a single mbuf.
1500 */ 1501 */
1501 if (control->m_next) 1502 if (control->m_next)
1502 return EINVAL; 1503 return EINVAL;
1503 1504
1504 for (; control->m_len > 0; 1505 for (; control->m_len > 0;
1505 control->m_data += CMSG_ALIGN(cm->cmsg_len), 1506 control->m_data += CMSG_ALIGN(cm->cmsg_len),
1506 control->m_len -= CMSG_ALIGN(cm->cmsg_len)) { 1507 control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
1507 cm = mtod(control, struct cmsghdr *); 1508 cm = mtod(control, struct cmsghdr *);
1508 if ((control->m_len < sizeof(*cm)) || 1509 if ((control->m_len < sizeof(*cm)) ||
1509 (cm->cmsg_len == 0) || 1510 (cm->cmsg_len == 0) ||
1510 (cm->cmsg_len > control->m_len)) { 1511 (cm->cmsg_len > control->m_len)) {
1511 return EINVAL; 1512 return EINVAL;
1512 } 1513 }
1513 if (cm->cmsg_level != IPPROTO_IP) 1514 if (cm->cmsg_level != IPPROTO_IP)
1514 continue; 1515 continue;
1515 1516
1516 switch (cm->cmsg_type) { 1517 switch (cm->cmsg_type) {
1517 case IP_PKTINFO: 1518 case IP_PKTINFO:
1518 if (cm->cmsg_len != CMSG_LEN(sizeof(pktinfo))) 1519 if (cm->cmsg_len != CMSG_LEN(sizeof(pktinfo)))
1519 return EINVAL; 1520 return EINVAL;
1520 memcpy(&pktinfo, CMSG_DATA(cm), sizeof(pktinfo)); 1521 memcpy(&pktinfo, CMSG_DATA(cm), sizeof(pktinfo));
1521 error = ip_pktinfo_prepare(&pktinfo, pktopts, flags, 1522 error = ip_pktinfo_prepare(&pktinfo, pktopts, flags,
1522 cred); 1523 cred);
1523 if (error) 1524 if (error)
1524 return error; 1525 return error;
1525 break; 1526 break;
1526 case IP_SENDSRCADDR: /* FreeBSD compatibility */ 1527 case IP_SENDSRCADDR: /* FreeBSD compatibility */
1527 if (cm->cmsg_len != CMSG_LEN(sizeof(struct in_addr))) 1528 if (cm->cmsg_len != CMSG_LEN(sizeof(struct in_addr)))
1528 return EINVAL; 1529 return EINVAL;
1529 pktinfo.ipi_ifindex = 0; 1530 pktinfo.ipi_ifindex = 0;
1530 pktinfo.ipi_addr = 1531 pktinfo.ipi_addr =
1531 ((struct in_pktinfo *)CMSG_DATA(cm))->ipi_addr; 1532 ((struct in_pktinfo *)CMSG_DATA(cm))->ipi_addr;
1532 error = ip_pktinfo_prepare(&pktinfo, pktopts, flags, 1533 error = ip_pktinfo_prepare(&pktinfo, pktopts, flags,
1533 cred); 1534 cred);
1534 if (error) 1535 if (error)
1535 return error; 1536 return error;
1536 break; 1537 break;
1537 default: 1538 default:
1538 return ENOPROTOOPT; 1539 return ENOPROTOOPT;
1539 } 1540 }
1540 } 1541 }
1541 return 0; 1542 return 0;
1542} 1543}
1543 1544
1544/* 1545/*
1545 * Set up IP options in pcb for insertion in output packets. 1546 * Set up IP options in pcb for insertion in output packets.
1546 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1547 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1547 * with destination address if source routed. 1548 * with destination address if source routed.
1548 */ 1549 */
1549static int 1550static int
1550ip_pcbopts(struct inpcb *inp, const struct sockopt *sopt) 1551ip_pcbopts(struct inpcb *inp, const struct sockopt *sopt)
1551{ 1552{
1552 struct mbuf *m; 1553 struct mbuf *m;
1553 const u_char *cp; 1554 const u_char *cp;
1554 u_char *dp; 1555 u_char *dp;
1555 int cnt; 1556 int cnt;
1556 1557
1557 KASSERT(inp_locked(inp)); 1558 KASSERT(inp_locked(inp));
1558 1559
1559 /* Turn off any old options. */ 1560 /* Turn off any old options. */
1560 if (inp->inp_options) { 1561 if (inp->inp_options) {
1561 m_free(inp->inp_options); 1562 m_free(inp->inp_options);
1562 } 1563 }
1563 inp->inp_options = NULL; 1564 inp->inp_options = NULL;
1564 if ((cnt = sopt->sopt_size) == 0) { 1565 if ((cnt = sopt->sopt_size) == 0) {
1565 /* Only turning off any previous options. */ 1566 /* Only turning off any previous options. */
1566 return 0; 1567 return 0;
1567 } 1568 }
1568 cp = sopt->sopt_data; 1569 cp = sopt->sopt_data;
1569 1570
1570 if (cnt % 4) { 1571 if (cnt % 4) {
1571 /* Must be 4-byte aligned, because there's no padding. */ 1572 /* Must be 4-byte aligned, because there's no padding. */
1572 return EINVAL; 1573 return EINVAL;
1573 } 1574 }
1574 1575
1575 m = m_get(M_DONTWAIT, MT_SOOPTS); 1576 m = m_get(M_DONTWAIT, MT_SOOPTS);
1576 if (m == NULL) 1577 if (m == NULL)
1577 return ENOBUFS; 1578 return ENOBUFS;
1578 1579
1579 dp = mtod(m, u_char *); 1580 dp = mtod(m, u_char *);
1580 memset(dp, 0, sizeof(struct in_addr)); 1581 memset(dp, 0, sizeof(struct in_addr));
1581 dp += sizeof(struct in_addr); 1582 dp += sizeof(struct in_addr);
1582 m->m_len = sizeof(struct in_addr); 1583 m->m_len = sizeof(struct in_addr);
1583 1584
1584 /* 1585 /*
1585 * IP option list according to RFC791. Each option is of the form 1586 * IP option list according to RFC791. Each option is of the form
1586 * 1587 *
1587 * [optval] [olen] [(olen - 2) data bytes] 1588 * [optval] [olen] [(olen - 2) data bytes]
1588 * 1589 *
1589 * We validate the list and copy options to an mbuf for prepending 1590 * We validate the list and copy options to an mbuf for prepending
1590 * to data packets. The IP first-hop destination address will be 1591 * to data packets. The IP first-hop destination address will be
1591 * stored before actual options and is zero if unset. 1592 * stored before actual options and is zero if unset.
1592 */ 1593 */
1593 while (cnt > 0) { 1594 while (cnt > 0) {
1594 uint8_t optval, olen, offset; 1595 uint8_t optval, olen, offset;
1595 1596
1596 optval = cp[IPOPT_OPTVAL]; 1597 optval = cp[IPOPT_OPTVAL];
1597 1598
1598 if (optval == IPOPT_EOL || optval == IPOPT_NOP) { 1599 if (optval == IPOPT_EOL || optval == IPOPT_NOP) {
1599 olen = 1; 1600 olen = 1;
1600 } else { 1601 } else {
1601 if (cnt < IPOPT_OLEN + 1) 1602 if (cnt < IPOPT_OLEN + 1)
1602 goto bad; 1603 goto bad;
1603 1604
1604 olen = cp[IPOPT_OLEN]; 1605 olen = cp[IPOPT_OLEN];
1605 if (olen < IPOPT_OLEN + 1 || olen > cnt) 1606 if (olen < IPOPT_OLEN + 1 || olen > cnt)
1606 goto bad; 1607 goto bad;
1607 } 1608 }
1608 1609
1609 if (optval == IPOPT_LSRR || optval == IPOPT_SSRR) { 1610 if (optval == IPOPT_LSRR || optval == IPOPT_SSRR) {
1610 /* 1611 /*
1611 * user process specifies route as: 1612 * user process specifies route as:
1612 * ->A->B->C->D 1613 * ->A->B->C->D
1613 * D must be our final destination (but we can't 1614 * D must be our final destination (but we can't
1614 * check that since we may not have connected yet). 1615 * check that since we may not have connected yet).
1615 * A is first hop destination, which doesn't appear in 1616 * A is first hop destination, which doesn't appear in
1616 * actual IP option, but is stored before the options. 1617 * actual IP option, but is stored before the options.
1617 */ 1618 */
1618 if (olen < IPOPT_OFFSET + 1 + sizeof(struct in_addr)) 1619 if (olen < IPOPT_OFFSET + 1 + sizeof(struct in_addr))
1619 goto bad; 1620 goto bad;
1620 1621
1621 offset = cp[IPOPT_OFFSET]; 1622 offset = cp[IPOPT_OFFSET];
1622 memcpy(mtod(m, u_char *), cp + IPOPT_OFFSET + 1, 1623 memcpy(mtod(m, u_char *), cp + IPOPT_OFFSET + 1,
1623 sizeof(struct in_addr)); 1624 sizeof(struct in_addr));
1624 1625
1625 cp += sizeof(struct in_addr); 1626 cp += sizeof(struct in_addr);
1626 cnt -= sizeof(struct in_addr); 1627 cnt -= sizeof(struct in_addr);
1627 olen -= sizeof(struct in_addr); 1628 olen -= sizeof(struct in_addr);
1628 1629
1629 if (m->m_len + olen > MAX_IPOPTLEN + sizeof(struct in_addr)) 1630 if (m->m_len + olen > MAX_IPOPTLEN + sizeof(struct in_addr))
1630 goto bad; 1631 goto bad;
1631 1632
1632 memcpy(dp, cp, olen); 1633 memcpy(dp, cp, olen);
1633 dp[IPOPT_OPTVAL] = optval; 1634 dp[IPOPT_OPTVAL] = optval;
1634 dp[IPOPT_OLEN] = olen; 1635 dp[IPOPT_OLEN] = olen;
1635 dp[IPOPT_OFFSET] = offset; 1636 dp[IPOPT_OFFSET] = offset;
1636 break; 1637 break;
1637 } else { 1638 } else {
1638 if (m->m_len + olen > MAX_IPOPTLEN + sizeof(struct in_addr)) 1639 if (m->m_len + olen > MAX_IPOPTLEN + sizeof(struct in_addr))
1639 goto bad; 1640 goto bad;
1640 1641
1641 memcpy(dp, cp, olen); 1642 memcpy(dp, cp, olen);
1642 break; 1643 break;
1643 } 1644 }
1644 1645
1645 dp += olen; 1646 dp += olen;
1646 m->m_len += olen; 1647 m->m_len += olen;
1647 1648
1648 if (optval == IPOPT_EOL) 1649 if (optval == IPOPT_EOL)
1649 break; 1650 break;
1650 1651
1651 cp += olen; 1652 cp += olen;
1652 cnt -= olen; 1653 cnt -= olen;
1653 } 1654 }
1654 1655
1655 inp->inp_options = m; 1656 inp->inp_options = m;
1656 return 0; 1657 return 0;
1657 1658
1658bad: 1659bad:
1659 (void)m_free(m); 1660 (void)m_free(m);
1660 return EINVAL; 1661 return EINVAL;
1661} 1662}
1662 1663
1663/* 1664/*
1664 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index. 1665 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
1665 * Must be called in a pserialize critical section. 1666 * Must be called in a pserialize critical section.
1666 */ 1667 */
1667static struct ifnet * 1668static struct ifnet *
1668ip_multicast_if(struct in_addr *a, int *ifindexp) 1669ip_multicast_if(struct in_addr *a, int *ifindexp)
1669{ 1670{
1670 int ifindex; 1671 int ifindex;
1671 struct ifnet *ifp = NULL; 1672 struct ifnet *ifp = NULL;
1672 struct in_ifaddr *ia; 1673 struct in_ifaddr *ia;
1673 1674
1674 if (ifindexp) 1675 if (ifindexp)
1675 *ifindexp = 0; 1676 *ifindexp = 0;
1676 if (ntohl(a->s_addr) >> 24 == 0) { 1677 if (ntohl(a->s_addr) >> 24 == 0) {
1677 ifindex = ntohl(a->s_addr) & 0xffffff; 1678 ifindex = ntohl(a->s_addr) & 0xffffff;
1678 ifp = if_byindex(ifindex); 1679 ifp = if_byindex(ifindex);
1679 if (!ifp) 1680 if (!ifp)
1680 return NULL; 1681 return NULL;
1681 if (ifindexp) 1682 if (ifindexp)
1682 *ifindexp = ifindex; 1683 *ifindexp = ifindex;
1683 } else { 1684 } else {
1684 IN_ADDRHASH_READER_FOREACH(ia, a->s_addr) { 1685 IN_ADDRHASH_READER_FOREACH(ia, a->s_addr) {
1685 if (in_hosteq(ia->ia_addr.sin_addr, *a) && 1686 if (in_hosteq(ia->ia_addr.sin_addr, *a) &&
1686 (ia->ia_ifp->if_flags & IFF_MULTICAST) != 0) { 1687 (ia->ia_ifp->if_flags & IFF_MULTICAST) != 0) {
1687 ifp = ia->ia_ifp; 1688 ifp = ia->ia_ifp;
1688 if (if_is_deactivated(ifp)) 1689 if (if_is_deactivated(ifp))
1689 ifp = NULL; 1690 ifp = NULL;
1690 break; 1691 break;
1691 } 1692 }
1692 } 1693 }
1693 } 1694 }
1694 return ifp; 1695 return ifp;
1695} 1696}
1696 1697
1697static int 1698static int
1698ip_getoptval(const struct sockopt *sopt, u_int8_t *val, u_int maxval) 1699ip_getoptval(const struct sockopt *sopt, u_int8_t *val, u_int maxval)
1699{ 1700{
1700 u_int tval; 1701 u_int tval;
1701 u_char cval; 1702 u_char cval;
1702 int error; 1703 int error;
1703 1704
1704 if (sopt == NULL) 1705 if (sopt == NULL)
1705 return EINVAL; 1706 return EINVAL;
1706 1707
1707 switch (sopt->sopt_size) { 1708 switch (sopt->sopt_size) {
1708 case sizeof(u_char): 1709 case sizeof(u_char):
1709 error = sockopt_get(sopt, &cval, sizeof(u_char)); 1710 error = sockopt_get(sopt, &cval, sizeof(u_char));
1710 tval = cval; 1711 tval = cval;
1711 break; 1712 break;
1712 1713
1713 case sizeof(u_int): 1714 case sizeof(u_int):
1714 error = sockopt_get(sopt, &tval, sizeof(u_int)); 1715 error = sockopt_get(sopt, &tval, sizeof(u_int));
1715 break; 1716 break;
1716 1717
1717 default: 1718 default:
1718 error = EINVAL; 1719 error = EINVAL;
1719 } 1720 }
1720 1721
1721 if (error) 1722 if (error)
1722 return error; 1723 return error;
1723 1724
1724 if (tval > maxval) 1725 if (tval > maxval)
1725 return EINVAL; 1726 return EINVAL;
1726 1727
1727 *val = tval; 1728 *val = tval;
1728 return 0; 1729 return 0;
1729} 1730}
1730 1731
1731static int 1732static int
1732ip_get_membership(const struct sockopt *sopt, struct ifnet **ifp, 1733ip_get_membership(const struct sockopt *sopt, struct ifnet **ifp,
1733 struct psref *psref, struct in_addr *ia, bool add) 1734 struct psref *psref, struct in_addr *ia, bool add)
1734{ 1735{
1735 int error; 1736 int error;
1736 struct ip_mreq mreq; 1737 struct ip_mreq mreq;
1737 1738
1738 error = sockopt_get(sopt, &mreq, sizeof(mreq)); 1739 error = sockopt_get(sopt, &mreq, sizeof(mreq));
1739 if (error) 1740 if (error)
1740 return error; 1741 return error;
1741 1742
1742 if (!IN_MULTICAST(mreq.imr_multiaddr.s_addr)) 1743 if (!IN_MULTICAST(mreq.imr_multiaddr.s_addr))
1743 return EINVAL; 1744 return EINVAL;
1744 1745
1745 memcpy(ia, &mreq.imr_multiaddr, sizeof(*ia)); 1746 memcpy(ia, &mreq.imr_multiaddr, sizeof(*ia));
1746 1747
1747 if (in_nullhost(mreq.imr_interface)) { 1748 if (in_nullhost(mreq.imr_interface)) {
1748 union { 1749 union {
1749 struct sockaddr dst; 1750 struct sockaddr dst;
1750 struct sockaddr_in dst4; 1751 struct sockaddr_in dst4;
1751 } u; 1752 } u;
1752 struct route ro; 1753 struct route ro;
1753 1754
1754 if (!add) { 1755 if (!add) {
1755 *ifp = NULL; 1756 *ifp = NULL;
1756 return 0; 1757 return 0;
1757 } 1758 }
1758 /* 1759 /*
1759 * If no interface address was provided, use the interface of 1760 * If no interface address was provided, use the interface of
1760 * the route to the given multicast address. 1761 * the route to the given multicast address.
1761 */ 1762 */
1762 struct rtentry *rt; 1763 struct rtentry *rt;
1763 memset(&ro, 0, sizeof(ro)); 1764 memset(&ro, 0, sizeof(ro));
1764 1765
1765 sockaddr_in_init(&u.dst4, ia, 0); 1766 sockaddr_in_init(&u.dst4, ia, 0);
1766 error = rtcache_setdst(&ro, &u.dst); 1767 error = rtcache_setdst(&ro, &u.dst);
1767 if (error != 0) 1768 if (error != 0)
1768 return error; 1769 return error;
1769 *ifp = (rt = rtcache_init(&ro)) != NULL ? rt->rt_ifp : NULL; 1770 *ifp = (rt = rtcache_init(&ro)) != NULL ? rt->rt_ifp : NULL;
1770 if (*ifp != NULL) { 1771 if (*ifp != NULL) {
1771 if (if_is_deactivated(*ifp)) 1772 if (if_is_deactivated(*ifp))
1772 *ifp = NULL; 1773 *ifp = NULL;
1773 else 1774 else
1774 if_acquire(*ifp, psref); 1775 if_acquire(*ifp, psref);
1775 } 1776 }
1776 rtcache_unref(rt, &ro); 1777 rtcache_unref(rt, &ro);
1777 rtcache_free(&ro); 1778 rtcache_free(&ro);
1778 } else { 1779 } else {
1779 int s = pserialize_read_enter(); 1780 int s = pserialize_read_enter();
1780 *ifp = ip_multicast_if(&mreq.imr_interface, NULL); 1781 *ifp = ip_multicast_if(&mreq.imr_interface, NULL);
1781 if (!add && *ifp == NULL) { 1782 if (!add && *ifp == NULL) {
1782 pserialize_read_exit(s); 1783 pserialize_read_exit(s);
1783 return EADDRNOTAVAIL; 1784 return EADDRNOTAVAIL;
1784 } 1785 }
1785 if (*ifp != NULL) { 1786 if (*ifp != NULL) {
1786 if (if_is_deactivated(*ifp)) 1787 if (if_is_deactivated(*ifp))
1787 *ifp = NULL; 1788 *ifp = NULL;
1788 else 1789 else
1789 if_acquire(*ifp, psref); 1790 if_acquire(*ifp, psref);
1790 } 1791 }
1791 pserialize_read_exit(s); 1792 pserialize_read_exit(s);
1792 } 1793 }
1793 return 0; 1794 return 0;
1794} 1795}
1795 1796
1796/* 1797/*
1797 * Add a multicast group membership. 1798 * Add a multicast group membership.
1798 * Group must be a valid IP multicast address. 1799 * Group must be a valid IP multicast address.
1799 */ 1800 */
1800static int 1801static int
1801ip_add_membership(struct ip_moptions *imo, const struct sockopt *sopt) 1802ip_add_membership(struct ip_moptions *imo, const struct sockopt *sopt)
1802{ 1803{