Mon Mar 6 07:31:15 2017 UTC ()
Make sure icmp_redirect_timeout_q and ip_mtudisc_timeout_q are initialized on bootup

Fix PR kern/52029


(ozaki-r)
diff -r1.159 -r1.160 src/sys/netinet/ip_icmp.c
diff -r1.351 -r1.352 src/sys/netinet/ip_input.c

cvs diff -r1.159 -r1.160 src/sys/netinet/ip_icmp.c (switch to unified diff)

--- src/sys/netinet/ip_icmp.c 2017/02/17 04:32:10 1.159
+++ src/sys/netinet/ip_icmp.c 2017/03/06 07:31:15 1.160
@@ -1,1361 +1,1356 @@ @@ -1,1361 +1,1356 @@
1/* $NetBSD: ip_icmp.c,v 1.159 2017/02/17 04:32:10 ozaki-r Exp $ */ 1/* $NetBSD: ip_icmp.c,v 1.160 2017/03/06 07:31:15 ozaki-r Exp $ */
2 2
3/* 3/*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * Redistribution and use in source and binary forms, with or without 7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions 8 * modification, are permitted provided that the following conditions
9 * are met: 9 * are met:
10 * 1. Redistributions of source code must retain the above copyright 10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer. 11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright 12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the 13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution. 14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors 15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software 16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission. 17 * without specific prior written permission.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE. 29 * SUCH DAMAGE.
30 */ 30 */
31 31
32/*- 32/*-
33 * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. 33 * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
34 * All rights reserved. 34 * All rights reserved.
35 * 35 *
36 * This code is derived from software contributed to The NetBSD Foundation 36 * This code is derived from software contributed to The NetBSD Foundation
37 * by Public Access Networks Corporation ("Panix"). It was developed under 37 * by Public Access Networks Corporation ("Panix"). It was developed under
38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon. 38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39 * 39 *
40 * This code is derived from software contributed to The NetBSD Foundation 40 * This code is derived from software contributed to The NetBSD Foundation
41 * by Jason R. Thorpe of Zembu Labs, Inc. 41 * by Jason R. Thorpe of Zembu Labs, Inc.
42 * 42 *
43 * Redistribution and use in source and binary forms, with or without 43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions 44 * modification, are permitted provided that the following conditions
45 * are met: 45 * are met:
46 * 1. Redistributions of source code must retain the above copyright 46 * 1. Redistributions of source code must retain the above copyright
47 * notice, this list of conditions and the following disclaimer. 47 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright 48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the 49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution. 50 * documentation and/or other materials provided with the distribution.
51 * 51 *
52 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 52 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
53 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 53 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
54 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 54 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
55 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 55 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
56 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 56 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
57 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 57 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
58 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 58 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
59 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 59 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
60 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 60 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
61 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 61 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62 * POSSIBILITY OF SUCH DAMAGE. 62 * POSSIBILITY OF SUCH DAMAGE.
63 */ 63 */
64 64
65/* 65/*
66 * Copyright (c) 1982, 1986, 1988, 1993 66 * Copyright (c) 1982, 1986, 1988, 1993
67 * The Regents of the University of California. All rights reserved. 67 * The Regents of the University of California. All rights reserved.
68 * 68 *
69 * Redistribution and use in source and binary forms, with or without 69 * Redistribution and use in source and binary forms, with or without
70 * modification, are permitted provided that the following conditions 70 * modification, are permitted provided that the following conditions
71 * are met: 71 * are met:
72 * 1. Redistributions of source code must retain the above copyright 72 * 1. Redistributions of source code must retain the above copyright
73 * notice, this list of conditions and the following disclaimer. 73 * notice, this list of conditions and the following disclaimer.
74 * 2. Redistributions in binary form must reproduce the above copyright 74 * 2. Redistributions in binary form must reproduce the above copyright
75 * notice, this list of conditions and the following disclaimer in the 75 * notice, this list of conditions and the following disclaimer in the
76 * documentation and/or other materials provided with the distribution. 76 * documentation and/or other materials provided with the distribution.
77 * 3. Neither the name of the University nor the names of its contributors 77 * 3. Neither the name of the University nor the names of its contributors
78 * may be used to endorse or promote products derived from this software 78 * may be used to endorse or promote products derived from this software
79 * without specific prior written permission. 79 * without specific prior written permission.
80 * 80 *
81 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 81 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
82 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 82 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
83 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 83 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
84 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 84 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
85 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 85 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
86 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 86 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
87 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 87 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
88 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 88 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
89 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 89 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
90 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 90 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
91 * SUCH DAMAGE. 91 * SUCH DAMAGE.
92 * 92 *
93 * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 93 * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94
94 */ 94 */
95 95
96#include <sys/cdefs.h> 96#include <sys/cdefs.h>
97__KERNEL_RCSID(0, "$NetBSD: ip_icmp.c,v 1.159 2017/02/17 04:32:10 ozaki-r Exp $"); 97__KERNEL_RCSID(0, "$NetBSD: ip_icmp.c,v 1.160 2017/03/06 07:31:15 ozaki-r Exp $");
98 98
99#ifdef _KERNEL_OPT 99#ifdef _KERNEL_OPT
100#include "opt_ipsec.h" 100#include "opt_ipsec.h"
101#endif 101#endif
102 102
103#include <sys/param.h> 103#include <sys/param.h>
104#include <sys/systm.h> 104#include <sys/systm.h>
105#include <sys/mbuf.h> 105#include <sys/mbuf.h>
106#include <sys/protosw.h> 106#include <sys/protosw.h>
107#include <sys/socket.h> 107#include <sys/socket.h>
108#include <sys/socketvar.h> /* For softnet_lock */ 108#include <sys/socketvar.h> /* For softnet_lock */
109#include <sys/kmem.h> 109#include <sys/kmem.h>
110#include <sys/time.h> 110#include <sys/time.h>
111#include <sys/kernel.h> 111#include <sys/kernel.h>
112#include <sys/syslog.h> 112#include <sys/syslog.h>
113#include <sys/sysctl.h> 113#include <sys/sysctl.h>
114 114
115#include <net/if.h> 115#include <net/if.h>
116#include <net/route.h> 116#include <net/route.h>
117 117
118#include <netinet/in.h> 118#include <netinet/in.h>
119#include <netinet/in_systm.h> 119#include <netinet/in_systm.h>
120#include <netinet/in_var.h> 120#include <netinet/in_var.h>
121#include <netinet/ip.h> 121#include <netinet/ip.h>
122#include <netinet/ip_icmp.h> 122#include <netinet/ip_icmp.h>
123#include <netinet/ip_var.h> 123#include <netinet/ip_var.h>
124#include <netinet/in_pcb.h> 124#include <netinet/in_pcb.h>
125#include <netinet/in_proto.h> 125#include <netinet/in_proto.h>
126#include <netinet/icmp_var.h> 126#include <netinet/icmp_var.h>
127#include <netinet/icmp_private.h> 127#include <netinet/icmp_private.h>
128#include <netinet/wqinput.h> 128#include <netinet/wqinput.h>
129 129
130#ifdef IPSEC 130#ifdef IPSEC
131#include <netipsec/ipsec.h> 131#include <netipsec/ipsec.h>
132#include <netipsec/key.h> 132#include <netipsec/key.h>
133#endif /* IPSEC*/ 133#endif /* IPSEC*/
134 134
135/* 135/*
136 * ICMP routines: error generation, receive packet processing, and 136 * ICMP routines: error generation, receive packet processing, and
137 * routines to turnaround packets back to the originator, and 137 * routines to turnaround packets back to the originator, and
138 * host table maintenance routines. 138 * host table maintenance routines.
139 */ 139 */
140 140
141int icmpmaskrepl = 0; 141int icmpmaskrepl = 0;
142int icmpbmcastecho = 0; 142int icmpbmcastecho = 0;
143#ifdef ICMPPRINTFS 143#ifdef ICMPPRINTFS
144int icmpprintfs = 0; 144int icmpprintfs = 0;
145#endif 145#endif
146int icmpreturndatabytes = 8; 146int icmpreturndatabytes = 8;
147 147
148percpu_t *icmpstat_percpu; 148percpu_t *icmpstat_percpu;
149 149
150/* 150/*
151 * List of callbacks to notify when Path MTU changes are made. 151 * List of callbacks to notify when Path MTU changes are made.
152 */ 152 */
153struct icmp_mtudisc_callback { 153struct icmp_mtudisc_callback {
154 LIST_ENTRY(icmp_mtudisc_callback) mc_list; 154 LIST_ENTRY(icmp_mtudisc_callback) mc_list;
155 void (*mc_func)(struct in_addr); 155 void (*mc_func)(struct in_addr);
156}; 156};
157 157
158LIST_HEAD(, icmp_mtudisc_callback) icmp_mtudisc_callbacks = 158LIST_HEAD(, icmp_mtudisc_callback) icmp_mtudisc_callbacks =
159 LIST_HEAD_INITIALIZER(&icmp_mtudisc_callbacks); 159 LIST_HEAD_INITIALIZER(&icmp_mtudisc_callbacks);
160 160
161#if 0 161#if 0
162static u_int ip_next_mtu(u_int, int); 162static u_int ip_next_mtu(u_int, int);
163#else 163#else
164/*static*/ u_int ip_next_mtu(u_int, int); 164/*static*/ u_int ip_next_mtu(u_int, int);
165#endif 165#endif
166 166
167extern int icmperrppslim; 167extern int icmperrppslim;
168static int icmperrpps_count = 0; 168static int icmperrpps_count = 0;
169static struct timeval icmperrppslim_last; 169static struct timeval icmperrppslim_last;
170static int icmp_rediraccept = 1; 170static int icmp_rediraccept = 1;
171static int icmp_redirtimeout = 600; 171static int icmp_redirtimeout = 600;
172static struct rttimer_queue *icmp_redirect_timeout_q = NULL; 172static struct rttimer_queue *icmp_redirect_timeout_q = NULL;
173 173
174/* Protect mtudisc and redirect stuffs */ 174/* Protect mtudisc and redirect stuffs */
175static kmutex_t icmp_mtx __cacheline_aligned; 175static kmutex_t icmp_mtx __cacheline_aligned;
176 176
177static void icmp_mtudisc_timeout(struct rtentry *, struct rttimer *); 177static void icmp_mtudisc_timeout(struct rtentry *, struct rttimer *);
178static void icmp_redirect_timeout(struct rtentry *, struct rttimer *); 178static void icmp_redirect_timeout(struct rtentry *, struct rttimer *);
179 179
180static void sysctl_netinet_icmp_setup(struct sysctllog **); 180static void sysctl_netinet_icmp_setup(struct sysctllog **);
181 181
182/* workqueue-based pr_input */ 182/* workqueue-based pr_input */
183static struct wqinput *icmp_wqinput; 183static struct wqinput *icmp_wqinput;
184static void _icmp_input(struct mbuf *, int, int); 184static void _icmp_input(struct mbuf *, int, int);
185 185
186void 186void
187icmp_init(void) 187icmp_init(void)
188{ 188{
189 189
190 sysctl_netinet_icmp_setup(NULL); 190 sysctl_netinet_icmp_setup(NULL);
191 191
192 mutex_init(&icmp_mtx, MUTEX_DEFAULT, IPL_NONE); 192 mutex_init(&icmp_mtx, MUTEX_DEFAULT, IPL_NONE);
193 /* 193 /*
194 * This is only useful if the user initializes redirtimeout to 194 * This is only useful if the user initializes redirtimeout to
195 * something other than zero. 195 * something other than zero.
196 */ 196 */
197 mutex_enter(&icmp_mtx); 197 mutex_enter(&icmp_mtx);
198 if (icmp_redirtimeout != 0) { 198 icmp_redirect_timeout_q = rt_timer_queue_create(icmp_redirtimeout);
199 icmp_redirect_timeout_q = 
200 rt_timer_queue_create(icmp_redirtimeout); 
201 } 
202 mutex_exit(&icmp_mtx); 199 mutex_exit(&icmp_mtx);
203 200
204 icmpstat_percpu = percpu_alloc(sizeof(uint64_t) * ICMP_NSTATS); 201 icmpstat_percpu = percpu_alloc(sizeof(uint64_t) * ICMP_NSTATS);
205 icmp_wqinput = wqinput_create("icmp", _icmp_input); 202 icmp_wqinput = wqinput_create("icmp", _icmp_input);
206} 203}
207 204
208void 205void
209icmp_mtudisc_lock(void) 206icmp_mtudisc_lock(void)
210{ 207{
211 208
212 mutex_enter(&icmp_mtx); 209 mutex_enter(&icmp_mtx);
213} 210}
214 211
215void 212void
216icmp_mtudisc_unlock(void) 213icmp_mtudisc_unlock(void)
217{ 214{
218 215
219 mutex_exit(&icmp_mtx); 216 mutex_exit(&icmp_mtx);
220} 217}
221 218
222/* 219/*
223 * Register a Path MTU Discovery callback. 220 * Register a Path MTU Discovery callback.
224 */ 221 */
225void 222void
226icmp_mtudisc_callback_register(void (*func)(struct in_addr)) 223icmp_mtudisc_callback_register(void (*func)(struct in_addr))
227{ 224{
228 struct icmp_mtudisc_callback *mc, *new; 225 struct icmp_mtudisc_callback *mc, *new;
229 226
230 new = kmem_alloc(sizeof(*mc), KM_SLEEP); 227 new = kmem_alloc(sizeof(*mc), KM_SLEEP);
231 228
232 mutex_enter(&icmp_mtx); 229 mutex_enter(&icmp_mtx);
233 for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL; 230 for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
234 mc = LIST_NEXT(mc, mc_list)) { 231 mc = LIST_NEXT(mc, mc_list)) {
235 if (mc->mc_func == func) { 232 if (mc->mc_func == func) {
236 mutex_exit(&icmp_mtx); 233 mutex_exit(&icmp_mtx);
237 kmem_free(new, sizeof(*mc)); 234 kmem_free(new, sizeof(*mc));
238 return; 235 return;
239 } 236 }
240 } 237 }
241 238
242 new->mc_func = func; 239 new->mc_func = func;
243 LIST_INSERT_HEAD(&icmp_mtudisc_callbacks, new, mc_list); 240 LIST_INSERT_HEAD(&icmp_mtudisc_callbacks, new, mc_list);
244 mutex_exit(&icmp_mtx); 241 mutex_exit(&icmp_mtx);
245} 242}
246 243
247/* 244/*
248 * Generate an error packet of type error 245 * Generate an error packet of type error
249 * in response to bad packet ip. 246 * in response to bad packet ip.
250 */ 247 */
251void 248void
252icmp_error(struct mbuf *n, int type, int code, n_long dest, 249icmp_error(struct mbuf *n, int type, int code, n_long dest,
253 int destmtu) 250 int destmtu)
254{ 251{
255 struct ip *oip = mtod(n, struct ip *), *nip; 252 struct ip *oip = mtod(n, struct ip *), *nip;
256 unsigned oiplen = oip->ip_hl << 2; 253 unsigned oiplen = oip->ip_hl << 2;
257 struct icmp *icp; 254 struct icmp *icp;
258 struct mbuf *m; 255 struct mbuf *m;
259 struct m_tag *mtag; 256 struct m_tag *mtag;
260 unsigned icmplen, mblen; 257 unsigned icmplen, mblen;
261 258
262#ifdef ICMPPRINTFS 259#ifdef ICMPPRINTFS
263 if (icmpprintfs) 260 if (icmpprintfs)
264 printf("icmp_error(%p, type:%d, code:%d)\n", oip, type, code); 261 printf("icmp_error(%p, type:%d, code:%d)\n", oip, type, code);
265#endif 262#endif
266 if (type != ICMP_REDIRECT) 263 if (type != ICMP_REDIRECT)
267 ICMP_STATINC(ICMP_STAT_ERROR); 264 ICMP_STATINC(ICMP_STAT_ERROR);
268 /* 265 /*
269 * Don't send error if the original packet was encrypted. 266 * Don't send error if the original packet was encrypted.
270 * Don't send error if not the first fragment of message. 267 * Don't send error if not the first fragment of message.
271 * Don't error if the old packet protocol was ICMP 268 * Don't error if the old packet protocol was ICMP
272 * error message, only known informational types. 269 * error message, only known informational types.
273 */ 270 */
274 if (n->m_flags & M_DECRYPTED) 271 if (n->m_flags & M_DECRYPTED)
275 goto freeit; 272 goto freeit;
276 if (oip->ip_off &~ htons(IP_MF|IP_DF)) 273 if (oip->ip_off &~ htons(IP_MF|IP_DF))
277 goto freeit; 274 goto freeit;
278 if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT && 275 if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
279 n->m_len >= oiplen + ICMP_MINLEN && 276 n->m_len >= oiplen + ICMP_MINLEN &&
280 !ICMP_INFOTYPE(((struct icmp *)((char *)oip + oiplen))->icmp_type)) { 277 !ICMP_INFOTYPE(((struct icmp *)((char *)oip + oiplen))->icmp_type)) {
281 ICMP_STATINC(ICMP_STAT_OLDICMP); 278 ICMP_STATINC(ICMP_STAT_OLDICMP);
282 goto freeit; 279 goto freeit;
283 } 280 }
284 /* Don't send error in response to a multicast or broadcast packet */ 281 /* Don't send error in response to a multicast or broadcast packet */
285 if (n->m_flags & (M_BCAST|M_MCAST)) 282 if (n->m_flags & (M_BCAST|M_MCAST))
286 goto freeit; 283 goto freeit;
287 284
288 /* 285 /*
289 * First, do a rate limitation check. 286 * First, do a rate limitation check.
290 */ 287 */
291 if (icmp_ratelimit(&oip->ip_src, type, code)) { 288 if (icmp_ratelimit(&oip->ip_src, type, code)) {
292 /* XXX stat */ 289 /* XXX stat */
293 goto freeit; 290 goto freeit;
294 } 291 }
295 292
296 /* 293 /*
297 * Now, formulate icmp message 294 * Now, formulate icmp message
298 */ 295 */
299 icmplen = oiplen + min(icmpreturndatabytes, 296 icmplen = oiplen + min(icmpreturndatabytes,
300 ntohs(oip->ip_len) - oiplen); 297 ntohs(oip->ip_len) - oiplen);
301 /* 298 /*
302 * Defend against mbuf chains shorter than oip->ip_len - oiplen: 299 * Defend against mbuf chains shorter than oip->ip_len - oiplen:
303 */ 300 */
304 mblen = 0; 301 mblen = 0;
305 for (m = n; m && (mblen < icmplen); m = m->m_next) 302 for (m = n; m && (mblen < icmplen); m = m->m_next)
306 mblen += m->m_len; 303 mblen += m->m_len;
307 icmplen = min(mblen, icmplen); 304 icmplen = min(mblen, icmplen);
308 305
309 /* 306 /*
310 * As we are not required to return everything we have, 307 * As we are not required to return everything we have,
311 * we return whatever we can return at ease. 308 * we return whatever we can return at ease.
312 * 309 *
313 * Note that ICMP datagrams longer than 576 octets are out of spec 310 * Note that ICMP datagrams longer than 576 octets are out of spec
314 * according to RFC1812; the limit on icmpreturndatabytes below in 311 * according to RFC1812; the limit on icmpreturndatabytes below in
315 * icmp_sysctl will keep things below that limit. 312 * icmp_sysctl will keep things below that limit.
316 */ 313 */
317 314
318 KASSERT(ICMP_MINLEN <= MCLBYTES); 315 KASSERT(ICMP_MINLEN <= MCLBYTES);
319 316
320 if (icmplen + ICMP_MINLEN > MCLBYTES) 317 if (icmplen + ICMP_MINLEN > MCLBYTES)
321 icmplen = MCLBYTES - ICMP_MINLEN; 318 icmplen = MCLBYTES - ICMP_MINLEN;
322 319
323 m = m_gethdr(M_DONTWAIT, MT_HEADER); 320 m = m_gethdr(M_DONTWAIT, MT_HEADER);
324 if (m && (icmplen + ICMP_MINLEN > MHLEN)) { 321 if (m && (icmplen + ICMP_MINLEN > MHLEN)) {
325 MCLGET(m, M_DONTWAIT); 322 MCLGET(m, M_DONTWAIT);
326 if ((m->m_flags & M_EXT) == 0) { 323 if ((m->m_flags & M_EXT) == 0) {
327 m_freem(m); 324 m_freem(m);
328 m = NULL; 325 m = NULL;
329 } 326 }
330 } 327 }
331 if (m == NULL) 328 if (m == NULL)
332 goto freeit; 329 goto freeit;
333 MCLAIM(m, n->m_owner); 330 MCLAIM(m, n->m_owner);
334 m->m_len = icmplen + ICMP_MINLEN; 331 m->m_len = icmplen + ICMP_MINLEN;
335 if ((m->m_flags & M_EXT) == 0) 332 if ((m->m_flags & M_EXT) == 0)
336 MH_ALIGN(m, m->m_len); 333 MH_ALIGN(m, m->m_len);
337 else { 334 else {
338 m->m_data += sizeof(struct ip); 335 m->m_data += sizeof(struct ip);
339 m->m_len -= sizeof(struct ip); 336 m->m_len -= sizeof(struct ip);
340 } 337 }
341 icp = mtod(m, struct icmp *); 338 icp = mtod(m, struct icmp *);
342 if ((u_int)type > ICMP_MAXTYPE) 339 if ((u_int)type > ICMP_MAXTYPE)
343 panic("icmp_error"); 340 panic("icmp_error");
344 ICMP_STATINC(ICMP_STAT_OUTHIST + type); 341 ICMP_STATINC(ICMP_STAT_OUTHIST + type);
345 icp->icmp_type = type; 342 icp->icmp_type = type;
346 if (type == ICMP_REDIRECT) 343 if (type == ICMP_REDIRECT)
347 icp->icmp_gwaddr.s_addr = dest; 344 icp->icmp_gwaddr.s_addr = dest;
348 else { 345 else {
349 icp->icmp_void = 0; 346 icp->icmp_void = 0;
350 /* 347 /*
351 * The following assignments assume an overlay with the 348 * The following assignments assume an overlay with the
352 * zeroed icmp_void field. 349 * zeroed icmp_void field.
353 */ 350 */
354 if (type == ICMP_PARAMPROB) { 351 if (type == ICMP_PARAMPROB) {
355 icp->icmp_pptr = code; 352 icp->icmp_pptr = code;
356 code = 0; 353 code = 0;
357 } else if (type == ICMP_UNREACH && 354 } else if (type == ICMP_UNREACH &&
358 code == ICMP_UNREACH_NEEDFRAG && destmtu) 355 code == ICMP_UNREACH_NEEDFRAG && destmtu)
359 icp->icmp_nextmtu = htons(destmtu); 356 icp->icmp_nextmtu = htons(destmtu);
360 } 357 }
361 358
362 icp->icmp_code = code; 359 icp->icmp_code = code;
363 m_copydata(n, 0, icmplen, (void *)&icp->icmp_ip); 360 m_copydata(n, 0, icmplen, (void *)&icp->icmp_ip);
364 361
365 /* 362 /*
366 * Now, copy old ip header (without options) 363 * Now, copy old ip header (without options)
367 * in front of icmp message. 364 * in front of icmp message.
368 */ 365 */
369 if ((m->m_flags & M_EXT) == 0 && 366 if ((m->m_flags & M_EXT) == 0 &&
370 m->m_data - sizeof(struct ip) < m->m_pktdat) 367 m->m_data - sizeof(struct ip) < m->m_pktdat)
371 panic("icmp len"); 368 panic("icmp len");
372 m->m_data -= sizeof(struct ip); 369 m->m_data -= sizeof(struct ip);
373 m->m_len += sizeof(struct ip); 370 m->m_len += sizeof(struct ip);
374 m->m_pkthdr.len = m->m_len; 371 m->m_pkthdr.len = m->m_len;
375 m_copy_rcvif(m, n); 372 m_copy_rcvif(m, n);
376 nip = mtod(m, struct ip *); 373 nip = mtod(m, struct ip *);
377 /* ip_v set in ip_output */ 374 /* ip_v set in ip_output */
378 nip->ip_hl = sizeof(struct ip) >> 2; 375 nip->ip_hl = sizeof(struct ip) >> 2;
379 nip->ip_tos = 0; 376 nip->ip_tos = 0;
380 nip->ip_len = htons(m->m_len); 377 nip->ip_len = htons(m->m_len);
381 /* ip_id set in ip_output */ 378 /* ip_id set in ip_output */
382 nip->ip_off = htons(0); 379 nip->ip_off = htons(0);
383 /* ip_ttl set in icmp_reflect */ 380 /* ip_ttl set in icmp_reflect */
384 nip->ip_p = IPPROTO_ICMP; 381 nip->ip_p = IPPROTO_ICMP;
385 nip->ip_src = oip->ip_src; 382 nip->ip_src = oip->ip_src;
386 nip->ip_dst = oip->ip_dst; 383 nip->ip_dst = oip->ip_dst;
387 /* move PF m_tag to new packet, if it exists */ 384 /* move PF m_tag to new packet, if it exists */
388 mtag = m_tag_find(n, PACKET_TAG_PF, NULL); 385 mtag = m_tag_find(n, PACKET_TAG_PF, NULL);
389 if (mtag != NULL) { 386 if (mtag != NULL) {
390 m_tag_unlink(n, mtag); 387 m_tag_unlink(n, mtag);
391 m_tag_prepend(m, mtag); 388 m_tag_prepend(m, mtag);
392 } 389 }
393 icmp_reflect(m); 390 icmp_reflect(m);
394 391
395freeit: 392freeit:
396 m_freem(n); 393 m_freem(n);
397} 394}
398 395
399struct sockaddr_in icmpsrc = { 396struct sockaddr_in icmpsrc = {
400 .sin_len = sizeof (struct sockaddr_in), 397 .sin_len = sizeof (struct sockaddr_in),
401 .sin_family = AF_INET, 398 .sin_family = AF_INET,
402}; 399};
403static struct sockaddr_in icmpdst = { 400static struct sockaddr_in icmpdst = {
404 .sin_len = sizeof (struct sockaddr_in), 401 .sin_len = sizeof (struct sockaddr_in),
405 .sin_family = AF_INET, 402 .sin_family = AF_INET,
406}; 403};
407static struct sockaddr_in icmpgw = { 404static struct sockaddr_in icmpgw = {
408 .sin_len = sizeof (struct sockaddr_in), 405 .sin_len = sizeof (struct sockaddr_in),
409 .sin_family = AF_INET, 406 .sin_family = AF_INET,
410}; 407};
411struct sockaddr_in icmpmask = {  408struct sockaddr_in icmpmask = {
412 .sin_len = 8, 409 .sin_len = 8,
413 .sin_family = 0, 410 .sin_family = 0,
414}; 411};
415 412
416/* 413/*
417 * Process a received ICMP message. 414 * Process a received ICMP message.
418 */ 415 */
419static void 416static void
420_icmp_input(struct mbuf *m, int hlen, int proto) 417_icmp_input(struct mbuf *m, int hlen, int proto)
421{ 418{
422 struct icmp *icp; 419 struct icmp *icp;
423 struct ip *ip = mtod(m, struct ip *); 420 struct ip *ip = mtod(m, struct ip *);
424 int icmplen; 421 int icmplen;
425 int i; 422 int i;
426 struct in_ifaddr *ia; 423 struct in_ifaddr *ia;
427 void *(*ctlfunc)(int, const struct sockaddr *, void *); 424 void *(*ctlfunc)(int, const struct sockaddr *, void *);
428 int code; 425 int code;
429 struct rtentry *rt; 426 struct rtentry *rt;
430 427
431 /* 428 /*
432 * Locate icmp structure in mbuf, and check 429 * Locate icmp structure in mbuf, and check
433 * that not corrupted and of at least minimum length. 430 * that not corrupted and of at least minimum length.
434 */ 431 */
435 icmplen = ntohs(ip->ip_len) - hlen; 432 icmplen = ntohs(ip->ip_len) - hlen;
436#ifdef ICMPPRINTFS 433#ifdef ICMPPRINTFS
437 if (icmpprintfs) { 434 if (icmpprintfs) {
438 char sbuf[INET_ADDRSTRLEN], dbuf[INET_ADDRSTRLEN]; 435 char sbuf[INET_ADDRSTRLEN], dbuf[INET_ADDRSTRLEN];
439 printf("icmp_input from `%s' to `%s', len %d\n", 436 printf("icmp_input from `%s' to `%s', len %d\n",
440 IN_PRINT(sbuf, &ip->ip_src), IN_PRINT(dbuf, &ip->ip_dst), 437 IN_PRINT(sbuf, &ip->ip_src), IN_PRINT(dbuf, &ip->ip_dst),
441 icmplen); 438 icmplen);
442 } 439 }
443#endif 440#endif
444 if (icmplen < ICMP_MINLEN) { 441 if (icmplen < ICMP_MINLEN) {
445 ICMP_STATINC(ICMP_STAT_TOOSHORT); 442 ICMP_STATINC(ICMP_STAT_TOOSHORT);
446 goto freeit; 443 goto freeit;
447 } 444 }
448 i = hlen + min(icmplen, ICMP_ADVLENMIN); 445 i = hlen + min(icmplen, ICMP_ADVLENMIN);
449 if ((m->m_len < i || M_READONLY(m)) && (m = m_pullup(m, i)) == NULL) { 446 if ((m->m_len < i || M_READONLY(m)) && (m = m_pullup(m, i)) == NULL) {
450 ICMP_STATINC(ICMP_STAT_TOOSHORT); 447 ICMP_STATINC(ICMP_STAT_TOOSHORT);
451 return; 448 return;
452 } 449 }
453 ip = mtod(m, struct ip *); 450 ip = mtod(m, struct ip *);
454 m->m_len -= hlen; 451 m->m_len -= hlen;
455 m->m_data += hlen; 452 m->m_data += hlen;
456 icp = mtod(m, struct icmp *); 453 icp = mtod(m, struct icmp *);
457 /* Don't need to assert alignment, here. */ 454 /* Don't need to assert alignment, here. */
458 if (in_cksum(m, icmplen)) { 455 if (in_cksum(m, icmplen)) {
459 ICMP_STATINC(ICMP_STAT_CHECKSUM); 456 ICMP_STATINC(ICMP_STAT_CHECKSUM);
460 goto freeit; 457 goto freeit;
461 } 458 }
462 m->m_len += hlen; 459 m->m_len += hlen;
463 m->m_data -= hlen; 460 m->m_data -= hlen;
464 461
465#ifdef ICMPPRINTFS 462#ifdef ICMPPRINTFS
466 /* 463 /*
467 * Message type specific processing. 464 * Message type specific processing.
468 */ 465 */
469 if (icmpprintfs) 466 if (icmpprintfs)
470 printf("icmp_input(type:%d, code:%d)\n", icp->icmp_type, 467 printf("icmp_input(type:%d, code:%d)\n", icp->icmp_type,
471 icp->icmp_code); 468 icp->icmp_code);
472#endif 469#endif
473 if (icp->icmp_type > ICMP_MAXTYPE) 470 if (icp->icmp_type > ICMP_MAXTYPE)
474 goto raw; 471 goto raw;
475 ICMP_STATINC(ICMP_STAT_INHIST + icp->icmp_type); 472 ICMP_STATINC(ICMP_STAT_INHIST + icp->icmp_type);
476 code = icp->icmp_code; 473 code = icp->icmp_code;
477 switch (icp->icmp_type) { 474 switch (icp->icmp_type) {
478 475
479 case ICMP_UNREACH: 476 case ICMP_UNREACH:
480 switch (code) { 477 switch (code) {
481 case ICMP_UNREACH_PROTOCOL: 478 case ICMP_UNREACH_PROTOCOL:
482 code = PRC_UNREACH_PROTOCOL; 479 code = PRC_UNREACH_PROTOCOL;
483 break; 480 break;
484 481
485 case ICMP_UNREACH_PORT: 482 case ICMP_UNREACH_PORT:
486 code = PRC_UNREACH_PORT; 483 code = PRC_UNREACH_PORT;
487 break; 484 break;
488 485
489 case ICMP_UNREACH_SRCFAIL: 486 case ICMP_UNREACH_SRCFAIL:
490 code = PRC_UNREACH_SRCFAIL; 487 code = PRC_UNREACH_SRCFAIL;
491 break; 488 break;
492 489
493 case ICMP_UNREACH_NEEDFRAG: 490 case ICMP_UNREACH_NEEDFRAG:
494 code = PRC_MSGSIZE; 491 code = PRC_MSGSIZE;
495 break; 492 break;
496 493
497 case ICMP_UNREACH_NET: 494 case ICMP_UNREACH_NET:
498 case ICMP_UNREACH_NET_UNKNOWN: 495 case ICMP_UNREACH_NET_UNKNOWN:
499 case ICMP_UNREACH_NET_PROHIB: 496 case ICMP_UNREACH_NET_PROHIB:
500 case ICMP_UNREACH_TOSNET: 497 case ICMP_UNREACH_TOSNET:
501 code = PRC_UNREACH_NET; 498 code = PRC_UNREACH_NET;
502 break; 499 break;
503 500
504 case ICMP_UNREACH_HOST: 501 case ICMP_UNREACH_HOST:
505 case ICMP_UNREACH_HOST_UNKNOWN: 502 case ICMP_UNREACH_HOST_UNKNOWN:
506 case ICMP_UNREACH_ISOLATED: 503 case ICMP_UNREACH_ISOLATED:
507 case ICMP_UNREACH_HOST_PROHIB: 504 case ICMP_UNREACH_HOST_PROHIB:
508 case ICMP_UNREACH_TOSHOST: 505 case ICMP_UNREACH_TOSHOST:
509 case ICMP_UNREACH_ADMIN_PROHIBIT: 506 case ICMP_UNREACH_ADMIN_PROHIBIT:
510 case ICMP_UNREACH_HOST_PREC: 507 case ICMP_UNREACH_HOST_PREC:
511 case ICMP_UNREACH_PREC_CUTOFF: 508 case ICMP_UNREACH_PREC_CUTOFF:
512 code = PRC_UNREACH_HOST; 509 code = PRC_UNREACH_HOST;
513 break; 510 break;
514 511
515 default: 512 default:
516 goto badcode; 513 goto badcode;
517 } 514 }
518 goto deliver; 515 goto deliver;
519 516
520 case ICMP_TIMXCEED: 517 case ICMP_TIMXCEED:
521 if (code > 1) 518 if (code > 1)
522 goto badcode; 519 goto badcode;
523 code += PRC_TIMXCEED_INTRANS; 520 code += PRC_TIMXCEED_INTRANS;
524 goto deliver; 521 goto deliver;
525 522
526 case ICMP_PARAMPROB: 523 case ICMP_PARAMPROB:
527 if (code > 1) 524 if (code > 1)
528 goto badcode; 525 goto badcode;
529 code = PRC_PARAMPROB; 526 code = PRC_PARAMPROB;
530 goto deliver; 527 goto deliver;
531 528
532 case ICMP_SOURCEQUENCH: 529 case ICMP_SOURCEQUENCH:
533 if (code) 530 if (code)
534 goto badcode; 531 goto badcode;
535 code = PRC_QUENCH; 532 code = PRC_QUENCH;
536 goto deliver; 533 goto deliver;
537 534
538 deliver: 535 deliver:
539 /* 536 /*
540 * Problem with datagram; advise higher level routines. 537 * Problem with datagram; advise higher level routines.
541 */ 538 */
542 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || 539 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
543 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) { 540 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
544 ICMP_STATINC(ICMP_STAT_BADLEN); 541 ICMP_STATINC(ICMP_STAT_BADLEN);
545 goto freeit; 542 goto freeit;
546 } 543 }
547 if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr)) 544 if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
548 goto badcode; 545 goto badcode;
549#ifdef ICMPPRINTFS 546#ifdef ICMPPRINTFS
550 if (icmpprintfs) 547 if (icmpprintfs)
551 printf("deliver to protocol %d\n", icp->icmp_ip.ip_p); 548 printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
552#endif 549#endif
553 icmpsrc.sin_addr = icp->icmp_ip.ip_dst; 550 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
554 ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput; 551 ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
555 if (ctlfunc) 552 if (ctlfunc)
556 (void) (*ctlfunc)(code, sintosa(&icmpsrc), 553 (void) (*ctlfunc)(code, sintosa(&icmpsrc),
557 &icp->icmp_ip); 554 &icp->icmp_ip);
558 break; 555 break;
559 556
560 badcode: 557 badcode:
561 ICMP_STATINC(ICMP_STAT_BADCODE); 558 ICMP_STATINC(ICMP_STAT_BADCODE);
562 break; 559 break;
563 560
564 case ICMP_ECHO: 561 case ICMP_ECHO:
565 if (!icmpbmcastecho && 562 if (!icmpbmcastecho &&
566 (m->m_flags & (M_MCAST | M_BCAST)) != 0) { 563 (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
567 ICMP_STATINC(ICMP_STAT_BMCASTECHO); 564 ICMP_STATINC(ICMP_STAT_BMCASTECHO);
568 break; 565 break;
569 } 566 }
570 icp->icmp_type = ICMP_ECHOREPLY; 567 icp->icmp_type = ICMP_ECHOREPLY;
571 goto reflect; 568 goto reflect;
572 569
573 case ICMP_TSTAMP: 570 case ICMP_TSTAMP:
574 if (icmplen < ICMP_TSLEN) { 571 if (icmplen < ICMP_TSLEN) {
575 ICMP_STATINC(ICMP_STAT_BADLEN); 572 ICMP_STATINC(ICMP_STAT_BADLEN);
576 break; 573 break;
577 } 574 }
578 if (!icmpbmcastecho && 575 if (!icmpbmcastecho &&
579 (m->m_flags & (M_MCAST | M_BCAST)) != 0) { 576 (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
580 ICMP_STATINC(ICMP_STAT_BMCASTTSTAMP); 577 ICMP_STATINC(ICMP_STAT_BMCASTTSTAMP);
581 break; 578 break;
582 } 579 }
583 icp->icmp_type = ICMP_TSTAMPREPLY; 580 icp->icmp_type = ICMP_TSTAMPREPLY;
584 icp->icmp_rtime = iptime(); 581 icp->icmp_rtime = iptime();
585 icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */ 582 icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */
586 goto reflect; 583 goto reflect;
587 584
588 case ICMP_MASKREQ: { 585 case ICMP_MASKREQ: {
589 struct ifnet *rcvif; 586 struct ifnet *rcvif;
590 int s, ss; 587 int s, ss;
591 struct ifaddr *ifa = NULL; 588 struct ifaddr *ifa = NULL;
592 589
593 if (icmpmaskrepl == 0) 590 if (icmpmaskrepl == 0)
594 break; 591 break;
595 /* 592 /*
596 * We are not able to respond with all ones broadcast 593 * We are not able to respond with all ones broadcast
597 * unless we receive it over a point-to-point interface. 594 * unless we receive it over a point-to-point interface.
598 */ 595 */
599 if (icmplen < ICMP_MASKLEN) { 596 if (icmplen < ICMP_MASKLEN) {
600 ICMP_STATINC(ICMP_STAT_BADLEN); 597 ICMP_STATINC(ICMP_STAT_BADLEN);
601 break; 598 break;
602 } 599 }
603 if (ip->ip_dst.s_addr == INADDR_BROADCAST || 600 if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
604 in_nullhost(ip->ip_dst)) 601 in_nullhost(ip->ip_dst))
605 icmpdst.sin_addr = ip->ip_src; 602 icmpdst.sin_addr = ip->ip_src;
606 else 603 else
607 icmpdst.sin_addr = ip->ip_dst; 604 icmpdst.sin_addr = ip->ip_dst;
608 ss = pserialize_read_enter(); 605 ss = pserialize_read_enter();
609 rcvif = m_get_rcvif(m, &s); 606 rcvif = m_get_rcvif(m, &s);
610 if (__predict_true(rcvif != NULL)) 607 if (__predict_true(rcvif != NULL))
611 ifa = ifaof_ifpforaddr(sintosa(&icmpdst), rcvif); 608 ifa = ifaof_ifpforaddr(sintosa(&icmpdst), rcvif);
612 m_put_rcvif(rcvif, &s); 609 m_put_rcvif(rcvif, &s);
613 if (ifa == NULL) { 610 if (ifa == NULL) {
614 pserialize_read_exit(ss); 611 pserialize_read_exit(ss);
615 break; 612 break;
616 } 613 }
617 ia = ifatoia(ifa); 614 ia = ifatoia(ifa);
618 icp->icmp_type = ICMP_MASKREPLY; 615 icp->icmp_type = ICMP_MASKREPLY;
619 icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr; 616 icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
620 if (in_nullhost(ip->ip_src)) { 617 if (in_nullhost(ip->ip_src)) {
621 if (ia->ia_ifp->if_flags & IFF_BROADCAST) 618 if (ia->ia_ifp->if_flags & IFF_BROADCAST)
622 ip->ip_src = ia->ia_broadaddr.sin_addr; 619 ip->ip_src = ia->ia_broadaddr.sin_addr;
623 else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT) 620 else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
624 ip->ip_src = ia->ia_dstaddr.sin_addr; 621 ip->ip_src = ia->ia_dstaddr.sin_addr;
625 } 622 }
626 pserialize_read_exit(ss); 623 pserialize_read_exit(ss);
627reflect: 624reflect:
628 { 625 {
629 uint64_t *icps = percpu_getref(icmpstat_percpu); 626 uint64_t *icps = percpu_getref(icmpstat_percpu);
630 icps[ICMP_STAT_REFLECT]++; 627 icps[ICMP_STAT_REFLECT]++;
631 icps[ICMP_STAT_OUTHIST + icp->icmp_type]++; 628 icps[ICMP_STAT_OUTHIST + icp->icmp_type]++;
632 percpu_putref(icmpstat_percpu); 629 percpu_putref(icmpstat_percpu);
633 } 630 }
634 icmp_reflect(m); 631 icmp_reflect(m);
635 return; 632 return;
636 } 633 }
637 634
638 case ICMP_REDIRECT: 635 case ICMP_REDIRECT:
639 if (code > 3) 636 if (code > 3)
640 goto badcode; 637 goto badcode;
641 if (icmp_rediraccept == 0) 638 if (icmp_rediraccept == 0)
642 goto freeit; 639 goto freeit;
643 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || 640 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
644 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) { 641 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
645 ICMP_STATINC(ICMP_STAT_BADLEN); 642 ICMP_STATINC(ICMP_STAT_BADLEN);
646 break; 643 break;
647 } 644 }
648 /* 645 /*
649 * Short circuit routing redirects to force 646 * Short circuit routing redirects to force
650 * immediate change in the kernel's routing 647 * immediate change in the kernel's routing
651 * tables. The message is also handed to anyone 648 * tables. The message is also handed to anyone
652 * listening on a raw socket (e.g. the routing 649 * listening on a raw socket (e.g. the routing
653 * daemon for use in updating its tables). 650 * daemon for use in updating its tables).
654 */ 651 */
655 icmpgw.sin_addr = ip->ip_src; 652 icmpgw.sin_addr = ip->ip_src;
656 icmpdst.sin_addr = icp->icmp_gwaddr; 653 icmpdst.sin_addr = icp->icmp_gwaddr;
657#ifdef ICMPPRINTFS 654#ifdef ICMPPRINTFS
658 if (icmpprintfs) { 655 if (icmpprintfs) {
659 char gbuf[INET_ADDRSTRLEN], dbuf[INET_ADDRSTRLEN]; 656 char gbuf[INET_ADDRSTRLEN], dbuf[INET_ADDRSTRLEN];
660 printf("redirect dst `%s' to `%s'\n", 657 printf("redirect dst `%s' to `%s'\n",
661 IN_PRINT(dbuf, &icp->icmp_ip.ip_dst), 658 IN_PRINT(dbuf, &icp->icmp_ip.ip_dst),
662 IN_PRINT(gbuf, &icp->icmp_gwaddr)); 659 IN_PRINT(gbuf, &icp->icmp_gwaddr));
663 } 660 }
664#endif 661#endif
665 icmpsrc.sin_addr = icp->icmp_ip.ip_dst; 662 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
666 rt = NULL; 663 rt = NULL;
667 rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst), 664 rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
668 NULL, RTF_GATEWAY | RTF_HOST, sintosa(&icmpgw), &rt); 665 NULL, RTF_GATEWAY | RTF_HOST, sintosa(&icmpgw), &rt);
669 mutex_enter(&icmp_mtx); 666 mutex_enter(&icmp_mtx);
670 if (rt != NULL && icmp_redirtimeout != 0) { 667 if (rt != NULL && icmp_redirtimeout != 0) {
671 i = rt_timer_add(rt, icmp_redirect_timeout, 668 i = rt_timer_add(rt, icmp_redirect_timeout,
672 icmp_redirect_timeout_q); 669 icmp_redirect_timeout_q);
673 if (i) { 670 if (i) {
674 char buf[INET_ADDRSTRLEN]; 671 char buf[INET_ADDRSTRLEN];
675 log(LOG_ERR, "ICMP: redirect failed to " 672 log(LOG_ERR, "ICMP: redirect failed to "
676 "register timeout for route to %s, " 673 "register timeout for route to %s, "
677 "code %d\n", 674 "code %d\n",
678 IN_PRINT(buf, &icp->icmp_ip.ip_dst), i); 675 IN_PRINT(buf, &icp->icmp_ip.ip_dst), i);
679 } 676 }
680 } 677 }
681 mutex_exit(&icmp_mtx); 678 mutex_exit(&icmp_mtx);
682 if (rt != NULL) 679 if (rt != NULL)
683 rt_unref(rt); 680 rt_unref(rt);
684 681
685 pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc)); 682 pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
686#if defined(IPSEC) 683#if defined(IPSEC)
687 if (ipsec_used) 684 if (ipsec_used)
688 key_sa_routechange((struct sockaddr *)&icmpsrc); 685 key_sa_routechange((struct sockaddr *)&icmpsrc);
689#endif 686#endif
690 break; 687 break;
691 688
692 /* 689 /*
693 * No kernel processing for the following; 690 * No kernel processing for the following;
694 * just fall through to send to raw listener. 691 * just fall through to send to raw listener.
695 */ 692 */
696 case ICMP_ECHOREPLY: 693 case ICMP_ECHOREPLY:
697 case ICMP_ROUTERADVERT: 694 case ICMP_ROUTERADVERT:
698 case ICMP_ROUTERSOLICIT: 695 case ICMP_ROUTERSOLICIT:
699 case ICMP_TSTAMPREPLY: 696 case ICMP_TSTAMPREPLY:
700 case ICMP_IREQREPLY: 697 case ICMP_IREQREPLY:
701 case ICMP_MASKREPLY: 698 case ICMP_MASKREPLY:
702 default: 699 default:
703 break; 700 break;
704 } 701 }
705 702
706raw: 703raw:
707 rip_input(m, hlen, proto); 704 rip_input(m, hlen, proto);
708 return; 705 return;
709 706
710freeit: 707freeit:
711 m_freem(m); 708 m_freem(m);
712 return; 709 return;
713} 710}
714 711
715void 712void
716icmp_input(struct mbuf *m, ...) 713icmp_input(struct mbuf *m, ...)
717{ 714{
718 int hlen, proto; 715 int hlen, proto;
719 va_list ap; 716 va_list ap;
720 717
721 va_start(ap, m); 718 va_start(ap, m);
722 hlen = va_arg(ap, int); 719 hlen = va_arg(ap, int);
723 proto = va_arg(ap, int); 720 proto = va_arg(ap, int);
724 va_end(ap); 721 va_end(ap);
725 722
726 wqinput_input(icmp_wqinput, m, hlen, proto); 723 wqinput_input(icmp_wqinput, m, hlen, proto);
727} 724}
728 725
729/* 726/*
730 * Reflect the ip packet back to the source 727 * Reflect the ip packet back to the source
731 */ 728 */
732void 729void
733icmp_reflect(struct mbuf *m) 730icmp_reflect(struct mbuf *m)
734{ 731{
735 struct ip *ip = mtod(m, struct ip *); 732 struct ip *ip = mtod(m, struct ip *);
736 struct in_ifaddr *ia; 733 struct in_ifaddr *ia;
737 struct ifaddr *ifa; 734 struct ifaddr *ifa;
738 struct sockaddr_in *sin; 735 struct sockaddr_in *sin;
739 struct in_addr t; 736 struct in_addr t;
740 struct mbuf *opts = NULL; 737 struct mbuf *opts = NULL;
741 int optlen = (ip->ip_hl << 2) - sizeof(struct ip); 738 int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
742 struct ifnet *rcvif; 739 struct ifnet *rcvif;
743 struct psref psref, psref_ia; 740 struct psref psref, psref_ia;
744 int s; 741 int s;
745 int bound; 742 int bound;
746 743
747 bound = curlwp_bind(); 744 bound = curlwp_bind();
748 745
749 if (!in_canforward(ip->ip_src) && 746 if (!in_canforward(ip->ip_src) &&
750 ((ip->ip_src.s_addr & IN_CLASSA_NET) != 747 ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
751 htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) { 748 htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
752 m_freem(m); /* Bad return address */ 749 m_freem(m); /* Bad return address */
753 goto done; /* ip_output() will check for broadcast */ 750 goto done; /* ip_output() will check for broadcast */
754 } 751 }
755 t = ip->ip_dst; 752 t = ip->ip_dst;
756 ip->ip_dst = ip->ip_src; 753 ip->ip_dst = ip->ip_src;
757 /* 754 /*
758 * If the incoming packet was addressed directly to us, use 755 * If the incoming packet was addressed directly to us, use
759 * dst as the src for the reply. Otherwise (broadcast or 756 * dst as the src for the reply. Otherwise (broadcast or
760 * anonymous), use an address which corresponds to the 757 * anonymous), use an address which corresponds to the
761 * incoming interface, with a preference for the address which 758 * incoming interface, with a preference for the address which
762 * corresponds to the route to the destination of the ICMP. 759 * corresponds to the route to the destination of the ICMP.
763 */ 760 */
764 761
765 /* Look for packet addressed to us */ 762 /* Look for packet addressed to us */
766 ia = in_get_ia_psref(t, &psref_ia); 763 ia = in_get_ia_psref(t, &psref_ia);
767 if (ia && (ia->ia4_flags & IN_IFF_NOTREADY)) { 764 if (ia && (ia->ia4_flags & IN_IFF_NOTREADY)) {
768 ia4_release(ia, &psref_ia); 765 ia4_release(ia, &psref_ia);
769 ia = NULL; 766 ia = NULL;
770 } 767 }
771 768
772 rcvif = m_get_rcvif_psref(m, &psref); 769 rcvif = m_get_rcvif_psref(m, &psref);
773 770
774 /* look for packet sent to broadcast address */ 771 /* look for packet sent to broadcast address */
775 if (ia == NULL && rcvif && 772 if (ia == NULL && rcvif &&
776 (rcvif->if_flags & IFF_BROADCAST)) { 773 (rcvif->if_flags & IFF_BROADCAST)) {
777 s = pserialize_read_enter(); 774 s = pserialize_read_enter();
778 IFADDR_READER_FOREACH(ifa, rcvif) { 775 IFADDR_READER_FOREACH(ifa, rcvif) {
779 if (ifa->ifa_addr->sa_family != AF_INET) 776 if (ifa->ifa_addr->sa_family != AF_INET)
780 continue; 777 continue;
781 if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) { 778 if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
782 ia = ifatoia(ifa); 779 ia = ifatoia(ifa);
783 if ((ia->ia4_flags & IN_IFF_NOTREADY) == 0) 780 if ((ia->ia4_flags & IN_IFF_NOTREADY) == 0)
784 break; 781 break;
785 ia = NULL; 782 ia = NULL;
786 } 783 }
787 } 784 }
788 if (ia != NULL) 785 if (ia != NULL)
789 ia4_acquire(ia, &psref_ia); 786 ia4_acquire(ia, &psref_ia);
790 pserialize_read_exit(s); 787 pserialize_read_exit(s);
791 } 788 }
792 789
793 sin = ia ? &ia->ia_addr : NULL; 790 sin = ia ? &ia->ia_addr : NULL;
794 791
795 icmpdst.sin_addr = t; 792 icmpdst.sin_addr = t;
796 793
797 /* 794 /*
798 * if the packet is addressed somewhere else, compute the 795 * if the packet is addressed somewhere else, compute the
799 * source address for packets routed back to the source, and 796 * source address for packets routed back to the source, and
800 * use that, if it's an address on the interface which 797 * use that, if it's an address on the interface which
801 * received the packet 798 * received the packet
802 */ 799 */
803 if (sin == NULL && rcvif) { 800 if (sin == NULL && rcvif) {
804 struct sockaddr_in sin_dst; 801 struct sockaddr_in sin_dst;
805 struct route icmproute; 802 struct route icmproute;
806 int errornum; 803 int errornum;
807 804
808 sockaddr_in_init(&sin_dst, &ip->ip_dst, 0); 805 sockaddr_in_init(&sin_dst, &ip->ip_dst, 0);
809 memset(&icmproute, 0, sizeof(icmproute)); 806 memset(&icmproute, 0, sizeof(icmproute));
810 errornum = 0; 807 errornum = 0;
811 ia = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum, 808 ia = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum,
812 &psref_ia); 809 &psref_ia);
813 /* errornum is never used */ 810 /* errornum is never used */
814 rtcache_free(&icmproute); 811 rtcache_free(&icmproute);
815 /* check to make sure sin is a source address on rcvif */ 812 /* check to make sure sin is a source address on rcvif */
816 if (ia != NULL) { 813 if (ia != NULL) {
817 sin = &ia->ia_addr; 814 sin = &ia->ia_addr;
818 t = sin->sin_addr; 815 t = sin->sin_addr;
819 sin = NULL; 816 sin = NULL;
820 ia4_release(ia, &psref_ia); 817 ia4_release(ia, &psref_ia);
821 ia = in_get_ia_on_iface_psref(t, rcvif, &psref_ia); 818 ia = in_get_ia_on_iface_psref(t, rcvif, &psref_ia);
822 if (ia != NULL) 819 if (ia != NULL)
823 sin = &ia->ia_addr; 820 sin = &ia->ia_addr;
824 } 821 }
825 } 822 }
826 823
827 /* 824 /*
828 * if it was not addressed to us, but the route doesn't go out 825 * if it was not addressed to us, but the route doesn't go out
829 * the source interface, pick an address on the source 826 * the source interface, pick an address on the source
830 * interface. This can happen when routing is asymmetric, or 827 * interface. This can happen when routing is asymmetric, or
831 * when the incoming packet was encapsulated 828 * when the incoming packet was encapsulated
832 */ 829 */
833 if (sin == NULL && rcvif) { 830 if (sin == NULL && rcvif) {
834 KASSERT(ia == NULL); 831 KASSERT(ia == NULL);
835 s = pserialize_read_enter(); 832 s = pserialize_read_enter();
836 IFADDR_READER_FOREACH(ifa, rcvif) { 833 IFADDR_READER_FOREACH(ifa, rcvif) {
837 if (ifa->ifa_addr->sa_family != AF_INET) 834 if (ifa->ifa_addr->sa_family != AF_INET)
838 continue; 835 continue;
839 sin = &(ifatoia(ifa)->ia_addr); 836 sin = &(ifatoia(ifa)->ia_addr);
840 ia = ifatoia(ifa); 837 ia = ifatoia(ifa);
841 ia4_acquire(ia, &psref_ia); 838 ia4_acquire(ia, &psref_ia);
842 break; 839 break;
843 } 840 }
844 pserialize_read_exit(s); 841 pserialize_read_exit(s);
845 } 842 }
846 843
847 m_put_rcvif_psref(rcvif, &psref); 844 m_put_rcvif_psref(rcvif, &psref);
848 845
849 /* 846 /*
850 * The following happens if the packet was not addressed to us, 847 * The following happens if the packet was not addressed to us,
851 * and was received on an interface with no IP address: 848 * and was received on an interface with no IP address:
852 * We find the first AF_INET address on the first non-loopback 849 * We find the first AF_INET address on the first non-loopback
853 * interface. 850 * interface.
854 */ 851 */
855 if (sin == NULL) { 852 if (sin == NULL) {
856 KASSERT(ia == NULL); 853 KASSERT(ia == NULL);
857 s = pserialize_read_enter(); 854 s = pserialize_read_enter();
858 IN_ADDRLIST_READER_FOREACH(ia) { 855 IN_ADDRLIST_READER_FOREACH(ia) {
859 if (ia->ia_ifp->if_flags & IFF_LOOPBACK) 856 if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
860 continue; 857 continue;
861 sin = &ia->ia_addr; 858 sin = &ia->ia_addr;
862 ia4_acquire(ia, &psref_ia); 859 ia4_acquire(ia, &psref_ia);
863 break; 860 break;
864 } 861 }
865 pserialize_read_exit(s); 862 pserialize_read_exit(s);
866 } 863 }
867 864
868 /* 865 /*
869 * If we still didn't find an address, punt. We could have an 866 * If we still didn't find an address, punt. We could have an
870 * interface up (and receiving packets) with no address. 867 * interface up (and receiving packets) with no address.
871 */ 868 */
872 if (sin == NULL) { 869 if (sin == NULL) {
873 KASSERT(ia == NULL); 870 KASSERT(ia == NULL);
874 m_freem(m); 871 m_freem(m);
875 goto done; 872 goto done;
876 } 873 }
877 874
878 ip->ip_src = sin->sin_addr; 875 ip->ip_src = sin->sin_addr;
879 ip->ip_ttl = MAXTTL; 876 ip->ip_ttl = MAXTTL;
880 877
881 if (ia != NULL) 878 if (ia != NULL)
882 ia4_release(ia, &psref_ia); 879 ia4_release(ia, &psref_ia);
883 880
884 if (optlen > 0) { 881 if (optlen > 0) {
885 u_char *cp; 882 u_char *cp;
886 int opt, cnt; 883 int opt, cnt;
887 u_int len; 884 u_int len;
888 885
889 /* 886 /*
890 * Retrieve any source routing from the incoming packet; 887 * Retrieve any source routing from the incoming packet;
891 * add on any record-route or timestamp options. 888 * add on any record-route or timestamp options.
892 */ 889 */
893 cp = (u_char *) (ip + 1); 890 cp = (u_char *) (ip + 1);
894 if ((opts = ip_srcroute()) == NULL && 891 if ((opts = ip_srcroute()) == NULL &&
895 (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) { 892 (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
896 MCLAIM(opts, m->m_owner); 893 MCLAIM(opts, m->m_owner);
897 opts->m_len = sizeof(struct in_addr); 894 opts->m_len = sizeof(struct in_addr);
898 *mtod(opts, struct in_addr *) = zeroin_addr; 895 *mtod(opts, struct in_addr *) = zeroin_addr;
899 } 896 }
900 if (opts) { 897 if (opts) {
901#ifdef ICMPPRINTFS 898#ifdef ICMPPRINTFS
902 if (icmpprintfs) 899 if (icmpprintfs)
903 printf("icmp_reflect optlen %d rt %d => ", 900 printf("icmp_reflect optlen %d rt %d => ",
904 optlen, opts->m_len); 901 optlen, opts->m_len);
905#endif 902#endif
906 for (cnt = optlen; cnt > 0; cnt -= len, cp += len) { 903 for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
907 opt = cp[IPOPT_OPTVAL]; 904 opt = cp[IPOPT_OPTVAL];
908 if (opt == IPOPT_EOL) 905 if (opt == IPOPT_EOL)
909 break; 906 break;
910 if (opt == IPOPT_NOP) 907 if (opt == IPOPT_NOP)
911 len = 1; 908 len = 1;
912 else { 909 else {
913 if (cnt < IPOPT_OLEN + sizeof(*cp)) 910 if (cnt < IPOPT_OLEN + sizeof(*cp))
914 break; 911 break;
915 len = cp[IPOPT_OLEN]; 912 len = cp[IPOPT_OLEN];
916 if (len < IPOPT_OLEN + sizeof(*cp) || 913 if (len < IPOPT_OLEN + sizeof(*cp) ||
917 len > cnt) 914 len > cnt)
918 break; 915 break;
919 } 916 }
920 /* 917 /*
921 * Should check for overflow, but it "can't happen" 918 * Should check for overflow, but it "can't happen"
922 */ 919 */
923 if (opt == IPOPT_RR || opt == IPOPT_TS || 920 if (opt == IPOPT_RR || opt == IPOPT_TS ||
924 opt == IPOPT_SECURITY) { 921 opt == IPOPT_SECURITY) {
925 memmove(mtod(opts, char *) + opts->m_len, 922 memmove(mtod(opts, char *) + opts->m_len,
926 cp, len); 923 cp, len);
927 opts->m_len += len; 924 opts->m_len += len;
928 } 925 }
929 } 926 }
930 /* Terminate & pad, if necessary */ 927 /* Terminate & pad, if necessary */
931 if ((cnt = opts->m_len % 4) != 0) { 928 if ((cnt = opts->m_len % 4) != 0) {
932 for (; cnt < 4; cnt++) { 929 for (; cnt < 4; cnt++) {
933 *(mtod(opts, char *) + opts->m_len) = 930 *(mtod(opts, char *) + opts->m_len) =
934 IPOPT_EOL; 931 IPOPT_EOL;
935 opts->m_len++; 932 opts->m_len++;
936 } 933 }
937 } 934 }
938#ifdef ICMPPRINTFS 935#ifdef ICMPPRINTFS
939 if (icmpprintfs) 936 if (icmpprintfs)
940 printf("%d\n", opts->m_len); 937 printf("%d\n", opts->m_len);
941#endif 938#endif
942 } 939 }
943 /* 940 /*
944 * Now strip out original options by copying rest of first 941 * Now strip out original options by copying rest of first
945 * mbuf's data back, and adjust the IP length. 942 * mbuf's data back, and adjust the IP length.
946 */ 943 */
947 ip->ip_len = htons(ntohs(ip->ip_len) - optlen); 944 ip->ip_len = htons(ntohs(ip->ip_len) - optlen);
948 ip->ip_hl = sizeof(struct ip) >> 2; 945 ip->ip_hl = sizeof(struct ip) >> 2;
949 m->m_len -= optlen; 946 m->m_len -= optlen;
950 if (m->m_flags & M_PKTHDR) 947 if (m->m_flags & M_PKTHDR)
951 m->m_pkthdr.len -= optlen; 948 m->m_pkthdr.len -= optlen;
952 optlen += sizeof(struct ip); 949 optlen += sizeof(struct ip);
953 memmove(ip + 1, (char *)ip + optlen, 950 memmove(ip + 1, (char *)ip + optlen,
954 (unsigned)(m->m_len - sizeof(struct ip))); 951 (unsigned)(m->m_len - sizeof(struct ip)));
955 } 952 }
956 m_tag_delete_nonpersistent(m); 953 m_tag_delete_nonpersistent(m);
957 m->m_flags &= ~(M_BCAST|M_MCAST); 954 m->m_flags &= ~(M_BCAST|M_MCAST);
958 955
959 /* 956 /*
960 * Clear any in-bound checksum flags for this packet. 957 * Clear any in-bound checksum flags for this packet.
961 */ 958 */
962 if (m->m_flags & M_PKTHDR) 959 if (m->m_flags & M_PKTHDR)
963 m->m_pkthdr.csum_flags = 0; 960 m->m_pkthdr.csum_flags = 0;
964 961
965 icmp_send(m, opts); 962 icmp_send(m, opts);
966done: 963done:
967 curlwp_bindx(bound); 964 curlwp_bindx(bound);
968 if (opts) 965 if (opts)
969 (void)m_free(opts); 966 (void)m_free(opts);
970} 967}
971 968
972/* 969/*
973 * Send an icmp packet back to the ip level, 970 * Send an icmp packet back to the ip level,
974 * after supplying a checksum. 971 * after supplying a checksum.
975 */ 972 */
976void 973void
977icmp_send(struct mbuf *m, struct mbuf *opts) 974icmp_send(struct mbuf *m, struct mbuf *opts)
978{ 975{
979 struct ip *ip = mtod(m, struct ip *); 976 struct ip *ip = mtod(m, struct ip *);
980 int hlen; 977 int hlen;
981 struct icmp *icp; 978 struct icmp *icp;
982 979
983 hlen = ip->ip_hl << 2; 980 hlen = ip->ip_hl << 2;
984 m->m_data += hlen; 981 m->m_data += hlen;
985 m->m_len -= hlen; 982 m->m_len -= hlen;
986 icp = mtod(m, struct icmp *); 983 icp = mtod(m, struct icmp *);
987 icp->icmp_cksum = 0; 984 icp->icmp_cksum = 0;
988 icp->icmp_cksum = in_cksum(m, ntohs(ip->ip_len) - hlen); 985 icp->icmp_cksum = in_cksum(m, ntohs(ip->ip_len) - hlen);
989 m->m_data -= hlen; 986 m->m_data -= hlen;
990 m->m_len += hlen; 987 m->m_len += hlen;
991#ifdef ICMPPRINTFS 988#ifdef ICMPPRINTFS
992 if (icmpprintfs) { 989 if (icmpprintfs) {
993 char sbuf[INET_ADDRSTRLEN], dbuf[INET_ADDRSTRLEN]; 990 char sbuf[INET_ADDRSTRLEN], dbuf[INET_ADDRSTRLEN];
994 printf("icmp_send to destination `%s' from `%s'\n", 991 printf("icmp_send to destination `%s' from `%s'\n",
995 IN_PRINT(dbuf, &ip->ip_dst), IN_PRINT(sbuf, &ip->ip_src)); 992 IN_PRINT(dbuf, &ip->ip_dst), IN_PRINT(sbuf, &ip->ip_src));
996 } 993 }
997#endif 994#endif
998 (void)ip_output(m, opts, NULL, 0, NULL, NULL); 995 (void)ip_output(m, opts, NULL, 0, NULL, NULL);
999} 996}
1000 997
1001n_time 998n_time
1002iptime(void) 999iptime(void)
1003{ 1000{
1004 struct timeval atv; 1001 struct timeval atv;
1005 u_long t; 1002 u_long t;
1006 1003
1007 microtime(&atv); 1004 microtime(&atv);
1008 t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000; 1005 t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
1009 return (htonl(t)); 1006 return (htonl(t));
1010} 1007}
1011 1008
1012/* 1009/*
1013 * sysctl helper routine for net.inet.icmp.returndatabytes. ensures 1010 * sysctl helper routine for net.inet.icmp.returndatabytes. ensures
1014 * that the new value is in the correct range. 1011 * that the new value is in the correct range.
1015 */ 1012 */
1016static int 1013static int
1017sysctl_net_inet_icmp_returndatabytes(SYSCTLFN_ARGS) 1014sysctl_net_inet_icmp_returndatabytes(SYSCTLFN_ARGS)
1018{ 1015{
1019 int error, t; 1016 int error, t;
1020 struct sysctlnode node; 1017 struct sysctlnode node;
1021 1018
1022 node = *rnode; 1019 node = *rnode;
1023 node.sysctl_data = &t; 1020 node.sysctl_data = &t;
1024 t = icmpreturndatabytes; 1021 t = icmpreturndatabytes;
1025 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1022 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1026 if (error || newp == NULL) 1023 if (error || newp == NULL)
1027 return (error); 1024 return (error);
1028 1025
1029 if (t < 8 || t > 512) 1026 if (t < 8 || t > 512)
1030 return (EINVAL); 1027 return (EINVAL);
1031 icmpreturndatabytes = t; 1028 icmpreturndatabytes = t;
1032 1029
1033 return (0); 1030 return (0);
1034} 1031}
1035 1032
1036/* 1033/*
1037 * sysctl helper routine for net.inet.icmp.redirtimeout. ensures that 1034 * sysctl helper routine for net.inet.icmp.redirtimeout. ensures that
1038 * the given value is not less than zero and then resets the timeout 1035 * the given value is not less than zero and then resets the timeout
1039 * queue. 1036 * queue.
1040 */ 1037 */
1041static int 1038static int
1042sysctl_net_inet_icmp_redirtimeout(SYSCTLFN_ARGS) 1039sysctl_net_inet_icmp_redirtimeout(SYSCTLFN_ARGS)
1043{ 1040{
1044 int error, tmp; 1041 int error, tmp;
1045 struct sysctlnode node; 1042 struct sysctlnode node;
1046 1043
1047 mutex_enter(&icmp_mtx); 1044 mutex_enter(&icmp_mtx);
1048 1045
1049 node = *rnode; 1046 node = *rnode;
1050 node.sysctl_data = &tmp; 1047 node.sysctl_data = &tmp;
1051 tmp = icmp_redirtimeout; 1048 tmp = icmp_redirtimeout;
1052 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1049 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1053 if (error || newp == NULL) 1050 if (error || newp == NULL)
1054 goto out; 1051 goto out;
1055 if (tmp < 0) { 1052 if (tmp < 0) {
1056 error = EINVAL; 1053 error = EINVAL;
1057 goto out; 1054 goto out;
1058 } 1055 }
1059 icmp_redirtimeout = tmp; 1056 icmp_redirtimeout = tmp;
1060 1057
1061 /* 1058 /*
1062 * was it a *defined* side-effect that anyone even *reading* 1059 * was it a *defined* side-effect that anyone even *reading*
1063 * this value causes these things to happen? 1060 * this value causes these things to happen?
1064 */ 1061 */
1065 if (icmp_redirect_timeout_q != NULL) { 1062 if (icmp_redirect_timeout_q != NULL) {
1066 if (icmp_redirtimeout == 0) { 1063 if (icmp_redirtimeout == 0) {
1067 rt_timer_queue_destroy(icmp_redirect_timeout_q); 1064 rt_timer_queue_destroy(icmp_redirect_timeout_q);
1068 icmp_redirect_timeout_q = NULL; 1065 icmp_redirect_timeout_q = NULL;
1069 } else { 1066 } else {
1070 rt_timer_queue_change(icmp_redirect_timeout_q, 1067 rt_timer_queue_change(icmp_redirect_timeout_q,
1071 icmp_redirtimeout); 1068 icmp_redirtimeout);
1072 } 1069 }
1073 } else if (icmp_redirtimeout > 0) { 1070 } else if (icmp_redirtimeout > 0) {
1074 icmp_redirect_timeout_q = 1071 icmp_redirect_timeout_q =
1075 rt_timer_queue_create(icmp_redirtimeout); 1072 rt_timer_queue_create(icmp_redirtimeout);
1076 } 1073 }
1077 error = 0; 1074 error = 0;
1078out: 1075out:
1079 mutex_exit(&icmp_mtx); 1076 mutex_exit(&icmp_mtx);
1080 return error; 1077 return error;
1081} 1078}
1082 1079
1083static int 1080static int
1084sysctl_net_inet_icmp_stats(SYSCTLFN_ARGS) 1081sysctl_net_inet_icmp_stats(SYSCTLFN_ARGS)
1085{ 1082{
1086 1083
1087 return (NETSTAT_SYSCTL(icmpstat_percpu, ICMP_NSTATS)); 1084 return (NETSTAT_SYSCTL(icmpstat_percpu, ICMP_NSTATS));
1088} 1085}
1089 1086
1090static void 1087static void
1091sysctl_netinet_icmp_setup(struct sysctllog **clog) 1088sysctl_netinet_icmp_setup(struct sysctllog **clog)
1092{ 1089{
1093 1090
1094 sysctl_createv(clog, 0, NULL, NULL, 1091 sysctl_createv(clog, 0, NULL, NULL,
1095 CTLFLAG_PERMANENT, 1092 CTLFLAG_PERMANENT,
1096 CTLTYPE_NODE, "inet", NULL, 1093 CTLTYPE_NODE, "inet", NULL,
1097 NULL, 0, NULL, 0, 1094 NULL, 0, NULL, 0,
1098 CTL_NET, PF_INET, CTL_EOL); 1095 CTL_NET, PF_INET, CTL_EOL);
1099 sysctl_createv(clog, 0, NULL, NULL, 1096 sysctl_createv(clog, 0, NULL, NULL,
1100 CTLFLAG_PERMANENT, 1097 CTLFLAG_PERMANENT,
1101 CTLTYPE_NODE, "icmp", 1098 CTLTYPE_NODE, "icmp",
1102 SYSCTL_DESCR("ICMPv4 related settings"), 1099 SYSCTL_DESCR("ICMPv4 related settings"),
1103 NULL, 0, NULL, 0, 1100 NULL, 0, NULL, 0,
1104 CTL_NET, PF_INET, IPPROTO_ICMP, CTL_EOL); 1101 CTL_NET, PF_INET, IPPROTO_ICMP, CTL_EOL);
1105 1102
1106 sysctl_createv(clog, 0, NULL, NULL, 1103 sysctl_createv(clog, 0, NULL, NULL,
1107 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1104 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1108 CTLTYPE_INT, "maskrepl", 1105 CTLTYPE_INT, "maskrepl",
1109 SYSCTL_DESCR("Respond to ICMP_MASKREQ messages"), 1106 SYSCTL_DESCR("Respond to ICMP_MASKREQ messages"),
1110 NULL, 0, &icmpmaskrepl, 0, 1107 NULL, 0, &icmpmaskrepl, 0,
1111 CTL_NET, PF_INET, IPPROTO_ICMP, 1108 CTL_NET, PF_INET, IPPROTO_ICMP,
1112 ICMPCTL_MASKREPL, CTL_EOL); 1109 ICMPCTL_MASKREPL, CTL_EOL);
1113 sysctl_createv(clog, 0, NULL, NULL, 1110 sysctl_createv(clog, 0, NULL, NULL,
1114 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1111 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1115 CTLTYPE_INT, "returndatabytes", 1112 CTLTYPE_INT, "returndatabytes",
1116 SYSCTL_DESCR("Number of bytes to return in an ICMP " 1113 SYSCTL_DESCR("Number of bytes to return in an ICMP "
1117 "error message"), 1114 "error message"),
1118 sysctl_net_inet_icmp_returndatabytes, 0, 1115 sysctl_net_inet_icmp_returndatabytes, 0,
1119 &icmpreturndatabytes, 0, 1116 &icmpreturndatabytes, 0,
1120 CTL_NET, PF_INET, IPPROTO_ICMP, 1117 CTL_NET, PF_INET, IPPROTO_ICMP,
1121 ICMPCTL_RETURNDATABYTES, CTL_EOL); 1118 ICMPCTL_RETURNDATABYTES, CTL_EOL);
1122 sysctl_createv(clog, 0, NULL, NULL, 1119 sysctl_createv(clog, 0, NULL, NULL,
1123 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1120 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1124 CTLTYPE_INT, "errppslimit", 1121 CTLTYPE_INT, "errppslimit",
1125 SYSCTL_DESCR("Maximum number of outgoing ICMP error " 1122 SYSCTL_DESCR("Maximum number of outgoing ICMP error "
1126 "messages per second"), 1123 "messages per second"),
1127 NULL, 0, &icmperrppslim, 0, 1124 NULL, 0, &icmperrppslim, 0,
1128 CTL_NET, PF_INET, IPPROTO_ICMP, 1125 CTL_NET, PF_INET, IPPROTO_ICMP,
1129 ICMPCTL_ERRPPSLIMIT, CTL_EOL); 1126 ICMPCTL_ERRPPSLIMIT, CTL_EOL);
1130 sysctl_createv(clog, 0, NULL, NULL, 1127 sysctl_createv(clog, 0, NULL, NULL,
1131 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1128 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1132 CTLTYPE_INT, "rediraccept", 1129 CTLTYPE_INT, "rediraccept",
1133 SYSCTL_DESCR("Accept ICMP_REDIRECT messages"), 1130 SYSCTL_DESCR("Accept ICMP_REDIRECT messages"),
1134 NULL, 0, &icmp_rediraccept, 0, 1131 NULL, 0, &icmp_rediraccept, 0,
1135 CTL_NET, PF_INET, IPPROTO_ICMP, 1132 CTL_NET, PF_INET, IPPROTO_ICMP,
1136 ICMPCTL_REDIRACCEPT, CTL_EOL); 1133 ICMPCTL_REDIRACCEPT, CTL_EOL);
1137 sysctl_createv(clog, 0, NULL, NULL, 1134 sysctl_createv(clog, 0, NULL, NULL,
1138 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1135 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1139 CTLTYPE_INT, "redirtimeout", 1136 CTLTYPE_INT, "redirtimeout",
1140 SYSCTL_DESCR("Lifetime of ICMP_REDIRECT generated " 1137 SYSCTL_DESCR("Lifetime of ICMP_REDIRECT generated "
1141 "routes"), 1138 "routes"),
1142 sysctl_net_inet_icmp_redirtimeout, 0, 1139 sysctl_net_inet_icmp_redirtimeout, 0,
1143 &icmp_redirtimeout, 0, 1140 &icmp_redirtimeout, 0,
1144 CTL_NET, PF_INET, IPPROTO_ICMP, 1141 CTL_NET, PF_INET, IPPROTO_ICMP,
1145 ICMPCTL_REDIRTIMEOUT, CTL_EOL); 1142 ICMPCTL_REDIRTIMEOUT, CTL_EOL);
1146 sysctl_createv(clog, 0, NULL, NULL, 1143 sysctl_createv(clog, 0, NULL, NULL,
1147 CTLFLAG_PERMANENT, 1144 CTLFLAG_PERMANENT,
1148 CTLTYPE_STRUCT, "stats", 1145 CTLTYPE_STRUCT, "stats",
1149 SYSCTL_DESCR("ICMP statistics"),  1146 SYSCTL_DESCR("ICMP statistics"),
1150 sysctl_net_inet_icmp_stats, 0, NULL, 0, 1147 sysctl_net_inet_icmp_stats, 0, NULL, 0,
1151 CTL_NET, PF_INET, IPPROTO_ICMP, ICMPCTL_STATS, 1148 CTL_NET, PF_INET, IPPROTO_ICMP, ICMPCTL_STATS,
1152 CTL_EOL); 1149 CTL_EOL);
1153 sysctl_createv(clog, 0, NULL, NULL, 1150 sysctl_createv(clog, 0, NULL, NULL,
1154 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1151 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1155 CTLTYPE_INT, "bmcastecho", 1152 CTLTYPE_INT, "bmcastecho",
1156 SYSCTL_DESCR("Respond to ICMP_ECHO or ICMP_TIMESTAMP " 1153 SYSCTL_DESCR("Respond to ICMP_ECHO or ICMP_TIMESTAMP "
1157 "message to the broadcast or multicast"), 1154 "message to the broadcast or multicast"),
1158 NULL, 0, &icmpbmcastecho, 0, 1155 NULL, 0, &icmpbmcastecho, 0,
1159 CTL_NET, PF_INET, IPPROTO_ICMP, ICMPCTL_BMCASTECHO, 1156 CTL_NET, PF_INET, IPPROTO_ICMP, ICMPCTL_BMCASTECHO,
1160 CTL_EOL); 1157 CTL_EOL);
1161} 1158}
1162 1159
1163void 1160void
1164icmp_statinc(u_int stat) 1161icmp_statinc(u_int stat)
1165{ 1162{
1166 1163
1167 KASSERT(stat < ICMP_NSTATS); 1164 KASSERT(stat < ICMP_NSTATS);
1168 ICMP_STATINC(stat); 1165 ICMP_STATINC(stat);
1169} 1166}
1170 1167
1171/* Table of common MTUs: */ 1168/* Table of common MTUs: */
1172 1169
1173static const u_int mtu_table[] = { 1170static const u_int mtu_table[] = {
1174 65535, 65280, 32000, 17914, 9180, 8166, 1171 65535, 65280, 32000, 17914, 9180, 8166,
1175 4352, 2002, 1492, 1006, 508, 296, 68, 0 1172 4352, 2002, 1492, 1006, 508, 296, 68, 0
1176}; 1173};
1177 1174
1178void 1175void
1179icmp_mtudisc(struct icmp *icp, struct in_addr faddr) 1176icmp_mtudisc(struct icmp *icp, struct in_addr faddr)
1180{ 1177{
1181 struct icmp_mtudisc_callback *mc; 1178 struct icmp_mtudisc_callback *mc;
1182 struct sockaddr *dst = sintosa(&icmpsrc); 1179 struct sockaddr *dst = sintosa(&icmpsrc);
1183 struct rtentry *rt; 1180 struct rtentry *rt;
1184 u_long mtu = ntohs(icp->icmp_nextmtu); /* Why a long? IPv6 */ 1181 u_long mtu = ntohs(icp->icmp_nextmtu); /* Why a long? IPv6 */
1185 int error; 1182 int error;
1186 1183
1187 rt = rtalloc1(dst, 1); 1184 rt = rtalloc1(dst, 1);
1188 if (rt == NULL) 1185 if (rt == NULL)
1189 return; 1186 return;
1190 1187
1191 /* If we didn't get a host route, allocate one */ 1188 /* If we didn't get a host route, allocate one */
1192 1189
1193 if ((rt->rt_flags & RTF_HOST) == 0) { 1190 if ((rt->rt_flags & RTF_HOST) == 0) {
1194 struct rtentry *nrt; 1191 struct rtentry *nrt;
1195 1192
1196 error = rtrequest(RTM_ADD, dst, rt->rt_gateway, NULL, 1193 error = rtrequest(RTM_ADD, dst, rt->rt_gateway, NULL,
1197 RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt); 1194 RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
1198 if (error) { 1195 if (error) {
1199 rt_unref(rt); 1196 rt_unref(rt);
1200 return; 1197 return;
1201 } 1198 }
1202 nrt->rt_rmx = rt->rt_rmx; 1199 nrt->rt_rmx = rt->rt_rmx;
1203 rt_unref(rt); 1200 rt_unref(rt);
1204 rt = nrt; 1201 rt = nrt;
1205 } 1202 }
1206 1203
1207 mutex_enter(&icmp_mtx); 1204 mutex_enter(&icmp_mtx);
1208 if (ip_mtudisc_timeout_q == NULL) 
1209 ip_mtudisc_timeout_q = rt_timer_queue_create(ip_mtudisc_timeout); 
1210 error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q); 1205 error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
1211 mutex_exit(&icmp_mtx); 1206 mutex_exit(&icmp_mtx);
1212 if (error) { 1207 if (error) {
1213 rt_unref(rt); 1208 rt_unref(rt);
1214 return; 1209 return;
1215 } 1210 }
1216 1211
1217 if (mtu == 0) { 1212 if (mtu == 0) {
1218 int i = 0; 1213 int i = 0;
1219 1214
1220 mtu = ntohs(icp->icmp_ip.ip_len); 1215 mtu = ntohs(icp->icmp_ip.ip_len);
1221 /* Some 4.2BSD-based routers incorrectly adjust the ip_len */ 1216 /* Some 4.2BSD-based routers incorrectly adjust the ip_len */
1222 if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0) 1217 if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
1223 mtu -= (icp->icmp_ip.ip_hl << 2); 1218 mtu -= (icp->icmp_ip.ip_hl << 2);
1224 1219
1225 /* If we still can't guess a value, try the route */ 1220 /* If we still can't guess a value, try the route */
1226 1221
1227 if (mtu == 0) { 1222 if (mtu == 0) {
1228 mtu = rt->rt_rmx.rmx_mtu; 1223 mtu = rt->rt_rmx.rmx_mtu;
1229 1224
1230 /* If no route mtu, default to the interface mtu */ 1225 /* If no route mtu, default to the interface mtu */
1231 1226
1232 if (mtu == 0) 1227 if (mtu == 0)
1233 mtu = rt->rt_ifp->if_mtu; 1228 mtu = rt->rt_ifp->if_mtu;
1234 } 1229 }
1235 1230
1236 for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++) 1231 for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
1237 if (mtu > mtu_table[i]) { 1232 if (mtu > mtu_table[i]) {
1238 mtu = mtu_table[i]; 1233 mtu = mtu_table[i];
1239 break; 1234 break;
1240 } 1235 }
1241 } 1236 }
1242 1237
1243 /* 1238 /*
1244 * XXX: RTV_MTU is overloaded, since the admin can set it 1239 * XXX: RTV_MTU is overloaded, since the admin can set it
1245 * to turn off PMTU for a route, and the kernel can 1240 * to turn off PMTU for a route, and the kernel can
1246 * set it to indicate a serious problem with PMTU 1241 * set it to indicate a serious problem with PMTU
1247 * on a route. We should be using a separate flag 1242 * on a route. We should be using a separate flag
1248 * for the kernel to indicate this. 1243 * for the kernel to indicate this.
1249 */ 1244 */
1250 1245
1251 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) { 1246 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
1252 if (mtu < 296 || mtu > rt->rt_ifp->if_mtu) 1247 if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
1253 rt->rt_rmx.rmx_locks |= RTV_MTU; 1248 rt->rt_rmx.rmx_locks |= RTV_MTU;
1254 else if (rt->rt_rmx.rmx_mtu > mtu || 1249 else if (rt->rt_rmx.rmx_mtu > mtu ||
1255 rt->rt_rmx.rmx_mtu == 0) { 1250 rt->rt_rmx.rmx_mtu == 0) {
1256 ICMP_STATINC(ICMP_STAT_PMTUCHG); 1251 ICMP_STATINC(ICMP_STAT_PMTUCHG);
1257 rt->rt_rmx.rmx_mtu = mtu; 1252 rt->rt_rmx.rmx_mtu = mtu;
1258 } 1253 }
1259 } 1254 }
1260 1255
1261 if (rt != NULL) 1256 if (rt != NULL)
1262 rt_unref(rt); 1257 rt_unref(rt);
1263 1258
1264 /* 1259 /*
1265 * Notify protocols that the MTU for this destination 1260 * Notify protocols that the MTU for this destination
1266 * has changed. 1261 * has changed.
1267 */ 1262 */
1268 mutex_enter(&icmp_mtx); 1263 mutex_enter(&icmp_mtx);
1269 for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL; 1264 for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
1270 mc = LIST_NEXT(mc, mc_list)) 1265 mc = LIST_NEXT(mc, mc_list))
1271 (*mc->mc_func)(faddr); 1266 (*mc->mc_func)(faddr);
1272 mutex_exit(&icmp_mtx); 1267 mutex_exit(&icmp_mtx);
1273} 1268}
1274 1269
1275/* 1270/*
1276 * Return the next larger or smaller MTU plateau (table from RFC 1191) 1271 * Return the next larger or smaller MTU plateau (table from RFC 1191)
1277 * given current value MTU. If DIR is less than zero, a larger plateau 1272 * given current value MTU. If DIR is less than zero, a larger plateau
1278 * is returned; otherwise, a smaller value is returned. 1273 * is returned; otherwise, a smaller value is returned.
1279 */ 1274 */
1280u_int 1275u_int
1281ip_next_mtu(u_int mtu, int dir) /* XXX */ 1276ip_next_mtu(u_int mtu, int dir) /* XXX */
1282{ 1277{
1283 int i; 1278 int i;
1284 1279
1285 for (i = 0; i < (sizeof mtu_table) / (sizeof mtu_table[0]); i++) { 1280 for (i = 0; i < (sizeof mtu_table) / (sizeof mtu_table[0]); i++) {
1286 if (mtu >= mtu_table[i]) 1281 if (mtu >= mtu_table[i])
1287 break; 1282 break;
1288 } 1283 }
1289 1284
1290 if (dir < 0) { 1285 if (dir < 0) {
1291 if (i == 0) { 1286 if (i == 0) {
1292 return 0; 1287 return 0;
1293 } else { 1288 } else {
1294 return mtu_table[i - 1]; 1289 return mtu_table[i - 1];
1295 } 1290 }
1296 } else { 1291 } else {
1297 if (mtu_table[i] == 0) { 1292 if (mtu_table[i] == 0) {
1298 return 0; 1293 return 0;
1299 } else if (mtu > mtu_table[i]) { 1294 } else if (mtu > mtu_table[i]) {
1300 return mtu_table[i]; 1295 return mtu_table[i];
1301 } else { 1296 } else {
1302 return mtu_table[i + 1]; 1297 return mtu_table[i + 1];
1303 } 1298 }
1304 } 1299 }
1305} 1300}
1306 1301
1307static void 1302static void
1308icmp_mtudisc_timeout(struct rtentry *rt, struct rttimer *r) 1303icmp_mtudisc_timeout(struct rtentry *rt, struct rttimer *r)
1309{ 1304{
1310 1305
1311 KASSERT(rt != NULL); 1306 KASSERT(rt != NULL);
1312 rt_assert_referenced(rt); 1307 rt_assert_referenced(rt);
1313 1308
1314 if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) == 1309 if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1315 (RTF_DYNAMIC | RTF_HOST)) { 1310 (RTF_DYNAMIC | RTF_HOST)) {
1316 rtrequest(RTM_DELETE, rt_getkey(rt), 1311 rtrequest(RTM_DELETE, rt_getkey(rt),
1317 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); 1312 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
1318 } else { 1313 } else {
1319 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) { 1314 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
1320 rt->rt_rmx.rmx_mtu = 0; 1315 rt->rt_rmx.rmx_mtu = 0;
1321 } 1316 }
1322 } 1317 }
1323} 1318}
1324 1319
1325static void 1320static void
1326icmp_redirect_timeout(struct rtentry *rt, struct rttimer *r) 1321icmp_redirect_timeout(struct rtentry *rt, struct rttimer *r)
1327{ 1322{
1328 1323
1329 KASSERT(rt != NULL); 1324 KASSERT(rt != NULL);
1330 rt_assert_referenced(rt); 1325 rt_assert_referenced(rt);
1331 1326
1332 if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) == 1327 if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1333 (RTF_DYNAMIC | RTF_HOST)) { 1328 (RTF_DYNAMIC | RTF_HOST)) {
1334 rtrequest(RTM_DELETE, rt_getkey(rt), 1329 rtrequest(RTM_DELETE, rt_getkey(rt),
1335 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); 1330 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
1336 } 1331 }
1337} 1332}
1338 1333
1339/* 1334/*
1340 * Perform rate limit check. 1335 * Perform rate limit check.
1341 * Returns 0 if it is okay to send the icmp packet. 1336 * Returns 0 if it is okay to send the icmp packet.
1342 * Returns 1 if the router SHOULD NOT send this icmp packet due to rate 1337 * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
1343 * limitation. 1338 * limitation.
1344 * 1339 *
1345 * XXX per-destination/type check necessary? 1340 * XXX per-destination/type check necessary?
1346 */ 1341 */
1347int 1342int
1348icmp_ratelimit(const struct in_addr *dst, const int type, 1343icmp_ratelimit(const struct in_addr *dst, const int type,
1349 const int code) 1344 const int code)
1350{ 1345{
1351 1346
1352 /* PPS limit */ 1347 /* PPS limit */
1353 if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count, 1348 if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
1354 icmperrppslim)) { 1349 icmperrppslim)) {
1355 /* The packet is subject to rate limit */ 1350 /* The packet is subject to rate limit */
1356 return 1; 1351 return 1;
1357 } 1352 }
1358 1353
1359 /* okay to send */ 1354 /* okay to send */
1360 return 0; 1355 return 0;
1361} 1356}

cvs diff -r1.351 -r1.352 src/sys/netinet/ip_input.c (switch to unified diff)

--- src/sys/netinet/ip_input.c 2017/02/17 18:09:25 1.351
+++ src/sys/netinet/ip_input.c 2017/03/06 07:31:15 1.352
@@ -1,1354 +1,1356 @@ @@ -1,1354 +1,1356 @@
1/* $NetBSD: ip_input.c,v 1.351 2017/02/17 18:09:25 ozaki-r Exp $ */ 1/* $NetBSD: ip_input.c,v 1.352 2017/03/06 07:31:15 ozaki-r Exp $ */
2 2
3/* 3/*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * Redistribution and use in source and binary forms, with or without 7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions 8 * modification, are permitted provided that the following conditions
9 * are met: 9 * are met:
10 * 1. Redistributions of source code must retain the above copyright 10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer. 11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright 12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the 13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution. 14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors 15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software 16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission. 17 * without specific prior written permission.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE. 29 * SUCH DAMAGE.
30 */ 30 */
31 31
32/*- 32/*-
33 * Copyright (c) 1998 The NetBSD Foundation, Inc. 33 * Copyright (c) 1998 The NetBSD Foundation, Inc.
34 * All rights reserved. 34 * All rights reserved.
35 * 35 *
36 * This code is derived from software contributed to The NetBSD Foundation 36 * This code is derived from software contributed to The NetBSD Foundation
37 * by Public Access Networks Corporation ("Panix"). It was developed under 37 * by Public Access Networks Corporation ("Panix"). It was developed under
38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon. 38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39 * 39 *
40 * Redistribution and use in source and binary forms, with or without 40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions 41 * modification, are permitted provided that the following conditions
42 * are met: 42 * are met:
43 * 1. Redistributions of source code must retain the above copyright 43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer. 44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright 45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the 46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution. 47 * documentation and/or other materials provided with the distribution.
48 * 48 *
49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 * POSSIBILITY OF SUCH DAMAGE. 59 * POSSIBILITY OF SUCH DAMAGE.
60 */ 60 */
61 61
62/* 62/*
63 * Copyright (c) 1982, 1986, 1988, 1993 63 * Copyright (c) 1982, 1986, 1988, 1993
64 * The Regents of the University of California. All rights reserved. 64 * The Regents of the University of California. All rights reserved.
65 * 65 *
66 * Redistribution and use in source and binary forms, with or without 66 * Redistribution and use in source and binary forms, with or without
67 * modification, are permitted provided that the following conditions 67 * modification, are permitted provided that the following conditions
68 * are met: 68 * are met:
69 * 1. Redistributions of source code must retain the above copyright 69 * 1. Redistributions of source code must retain the above copyright
70 * notice, this list of conditions and the following disclaimer. 70 * notice, this list of conditions and the following disclaimer.
71 * 2. Redistributions in binary form must reproduce the above copyright 71 * 2. Redistributions in binary form must reproduce the above copyright
72 * notice, this list of conditions and the following disclaimer in the 72 * notice, this list of conditions and the following disclaimer in the
73 * documentation and/or other materials provided with the distribution. 73 * documentation and/or other materials provided with the distribution.
74 * 3. Neither the name of the University nor the names of its contributors 74 * 3. Neither the name of the University nor the names of its contributors
75 * may be used to endorse or promote products derived from this software 75 * may be used to endorse or promote products derived from this software
76 * without specific prior written permission. 76 * without specific prior written permission.
77 * 77 *
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88 * SUCH DAMAGE. 88 * SUCH DAMAGE.
89 * 89 *
90 * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 90 * @(#)ip_input.c 8.2 (Berkeley) 1/4/94
91 */ 91 */
92 92
93#include <sys/cdefs.h> 93#include <sys/cdefs.h>
94__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.351 2017/02/17 18:09:25 ozaki-r Exp $"); 94__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.352 2017/03/06 07:31:15 ozaki-r Exp $");
95 95
96#ifdef _KERNEL_OPT 96#ifdef _KERNEL_OPT
97#include "opt_inet.h" 97#include "opt_inet.h"
98#include "opt_compat_netbsd.h" 98#include "opt_compat_netbsd.h"
99#include "opt_gateway.h" 99#include "opt_gateway.h"
100#include "opt_ipsec.h" 100#include "opt_ipsec.h"
101#include "opt_mrouting.h" 101#include "opt_mrouting.h"
102#include "opt_mbuftrace.h" 102#include "opt_mbuftrace.h"
103#include "opt_inet_csum.h" 103#include "opt_inet_csum.h"
104#include "opt_net_mpsafe.h" 104#include "opt_net_mpsafe.h"
105#endif 105#endif
106 106
107#include "arp.h" 107#include "arp.h"
108 108
109#include <sys/param.h> 109#include <sys/param.h>
110#include <sys/systm.h> 110#include <sys/systm.h>
111#include <sys/cpu.h> 111#include <sys/cpu.h>
112#include <sys/mbuf.h> 112#include <sys/mbuf.h>
113#include <sys/domain.h> 113#include <sys/domain.h>
114#include <sys/protosw.h> 114#include <sys/protosw.h>
115#include <sys/socket.h> 115#include <sys/socket.h>
116#include <sys/socketvar.h> 116#include <sys/socketvar.h>
117#include <sys/errno.h> 117#include <sys/errno.h>
118#include <sys/time.h> 118#include <sys/time.h>
119#include <sys/kernel.h> 119#include <sys/kernel.h>
120#include <sys/pool.h> 120#include <sys/pool.h>
121#include <sys/sysctl.h> 121#include <sys/sysctl.h>
122#include <sys/kauth.h> 122#include <sys/kauth.h>
123 123
124#include <net/if.h> 124#include <net/if.h>
125#include <net/if_dl.h> 125#include <net/if_dl.h>
126#include <net/route.h> 126#include <net/route.h>
127#include <net/pktqueue.h> 127#include <net/pktqueue.h>
128#include <net/pfil.h> 128#include <net/pfil.h>
129 129
130#include <netinet/in.h> 130#include <netinet/in.h>
131#include <netinet/in_systm.h> 131#include <netinet/in_systm.h>
132#include <netinet/ip.h> 132#include <netinet/ip.h>
133#include <netinet/in_pcb.h> 133#include <netinet/in_pcb.h>
134#include <netinet/in_proto.h> 134#include <netinet/in_proto.h>
135#include <netinet/in_var.h> 135#include <netinet/in_var.h>
136#include <netinet/ip_var.h> 136#include <netinet/ip_var.h>
137#include <netinet/ip_private.h> 137#include <netinet/ip_private.h>
138#include <netinet/ip_icmp.h> 138#include <netinet/ip_icmp.h>
139/* just for gif_ttl */ 139/* just for gif_ttl */
140#include <netinet/in_gif.h> 140#include <netinet/in_gif.h>
141#include "gif.h" 141#include "gif.h"
142#include <net/if_gre.h> 142#include <net/if_gre.h>
143#include "gre.h" 143#include "gre.h"
144 144
145#ifdef MROUTING 145#ifdef MROUTING
146#include <netinet/ip_mroute.h> 146#include <netinet/ip_mroute.h>
147#endif 147#endif
148#include <netinet/portalgo.h> 148#include <netinet/portalgo.h>
149 149
150#ifdef IPSEC 150#ifdef IPSEC
151#include <netipsec/ipsec.h> 151#include <netipsec/ipsec.h>
152#endif 152#endif
153 153
154#ifndef IPFORWARDING 154#ifndef IPFORWARDING
155#ifdef GATEWAY 155#ifdef GATEWAY
156#define IPFORWARDING 1 /* forward IP packets not for us */ 156#define IPFORWARDING 1 /* forward IP packets not for us */
157#else /* GATEWAY */ 157#else /* GATEWAY */
158#define IPFORWARDING 0 /* don't forward IP packets not for us */ 158#define IPFORWARDING 0 /* don't forward IP packets not for us */
159#endif /* GATEWAY */ 159#endif /* GATEWAY */
160#endif /* IPFORWARDING */ 160#endif /* IPFORWARDING */
161#ifndef IPSENDREDIRECTS 161#ifndef IPSENDREDIRECTS
162#define IPSENDREDIRECTS 1 162#define IPSENDREDIRECTS 1
163#endif 163#endif
164#ifndef IPFORWSRCRT 164#ifndef IPFORWSRCRT
165#define IPFORWSRCRT 1 /* forward source-routed packets */ 165#define IPFORWSRCRT 1 /* forward source-routed packets */
166#endif 166#endif
167#ifndef IPALLOWSRCRT 167#ifndef IPALLOWSRCRT
168#define IPALLOWSRCRT 1 /* allow source-routed packets */ 168#define IPALLOWSRCRT 1 /* allow source-routed packets */
169#endif 169#endif
170#ifndef IPMTUDISC 170#ifndef IPMTUDISC
171#define IPMTUDISC 1 171#define IPMTUDISC 1
172#endif 172#endif
173#ifndef IPMTUDISCTIMEOUT 173#ifndef IPMTUDISCTIMEOUT
174#define IPMTUDISCTIMEOUT (10 * 60) /* as per RFC 1191 */ 174#define IPMTUDISCTIMEOUT (10 * 60) /* as per RFC 1191 */
175#endif 175#endif
176 176
177#ifdef COMPAT_50 177#ifdef COMPAT_50
178#include <compat/sys/time.h> 178#include <compat/sys/time.h>
179#include <compat/sys/socket.h> 179#include <compat/sys/socket.h>
180#endif 180#endif
181 181
182/* 182/*
183 * Note: DIRECTED_BROADCAST is handled this way so that previous 183 * Note: DIRECTED_BROADCAST is handled this way so that previous
184 * configuration using this option will Just Work. 184 * configuration using this option will Just Work.
185 */ 185 */
186#ifndef IPDIRECTEDBCAST 186#ifndef IPDIRECTEDBCAST
187#ifdef DIRECTED_BROADCAST 187#ifdef DIRECTED_BROADCAST
188#define IPDIRECTEDBCAST 1 188#define IPDIRECTEDBCAST 1
189#else 189#else
190#define IPDIRECTEDBCAST 0 190#define IPDIRECTEDBCAST 0
191#endif /* DIRECTED_BROADCAST */ 191#endif /* DIRECTED_BROADCAST */
192#endif /* IPDIRECTEDBCAST */ 192#endif /* IPDIRECTEDBCAST */
193int ipforwarding = IPFORWARDING; 193int ipforwarding = IPFORWARDING;
194int ipsendredirects = IPSENDREDIRECTS; 194int ipsendredirects = IPSENDREDIRECTS;
195int ip_defttl = IPDEFTTL; 195int ip_defttl = IPDEFTTL;
196int ip_forwsrcrt = IPFORWSRCRT; 196int ip_forwsrcrt = IPFORWSRCRT;
197int ip_directedbcast = IPDIRECTEDBCAST; 197int ip_directedbcast = IPDIRECTEDBCAST;
198int ip_allowsrcrt = IPALLOWSRCRT; 198int ip_allowsrcrt = IPALLOWSRCRT;
199int ip_mtudisc = IPMTUDISC; 199int ip_mtudisc = IPMTUDISC;
200int ip_mtudisc_timeout = IPMTUDISCTIMEOUT; 200int ip_mtudisc_timeout = IPMTUDISCTIMEOUT;
201#ifdef DIAGNOSTIC 201#ifdef DIAGNOSTIC
202int ipprintfs = 0; 202int ipprintfs = 0;
203#endif 203#endif
204 204
205int ip_do_randomid = 0; 205int ip_do_randomid = 0;
206 206
207/* 207/*
208 * XXX - Setting ip_checkinterface mostly implements the receive side of 208 * XXX - Setting ip_checkinterface mostly implements the receive side of
209 * the Strong ES model described in RFC 1122, but since the routing table 209 * the Strong ES model described in RFC 1122, but since the routing table
210 * and transmit implementation do not implement the Strong ES model, 210 * and transmit implementation do not implement the Strong ES model,
211 * setting this to 1 results in an odd hybrid. 211 * setting this to 1 results in an odd hybrid.
212 * 212 *
213 * XXX - ip_checkinterface currently must be disabled if you use ipnat 213 * XXX - ip_checkinterface currently must be disabled if you use ipnat
214 * to translate the destination address to another local interface. 214 * to translate the destination address to another local interface.
215 * 215 *
216 * XXX - ip_checkinterface must be disabled if you add IP aliases 216 * XXX - ip_checkinterface must be disabled if you add IP aliases
217 * to the loopback interface instead of the interface where the 217 * to the loopback interface instead of the interface where the
218 * packets for those addresses are received. 218 * packets for those addresses are received.
219 */ 219 */
220static int ip_checkinterface __read_mostly = 0; 220static int ip_checkinterface __read_mostly = 0;
221 221
222struct rttimer_queue *ip_mtudisc_timeout_q = NULL; 222struct rttimer_queue *ip_mtudisc_timeout_q = NULL;
223 223
224pktqueue_t * ip_pktq __read_mostly; 224pktqueue_t * ip_pktq __read_mostly;
225pfil_head_t * inet_pfil_hook __read_mostly; 225pfil_head_t * inet_pfil_hook __read_mostly;
226ipid_state_t * ip_ids __read_mostly; 226ipid_state_t * ip_ids __read_mostly;
227percpu_t * ipstat_percpu __read_mostly; 227percpu_t * ipstat_percpu __read_mostly;
228 228
229static percpu_t *ipforward_rt_percpu __cacheline_aligned; 229static percpu_t *ipforward_rt_percpu __cacheline_aligned;
230 230
231uint16_t ip_id; 231uint16_t ip_id;
232 232
233#ifdef INET_CSUM_COUNTERS 233#ifdef INET_CSUM_COUNTERS
234#include <sys/device.h> 234#include <sys/device.h>
235 235
236struct evcnt ip_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 236struct evcnt ip_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
237 NULL, "inet", "hwcsum bad"); 237 NULL, "inet", "hwcsum bad");
238struct evcnt ip_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 238struct evcnt ip_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
239 NULL, "inet", "hwcsum ok"); 239 NULL, "inet", "hwcsum ok");
240struct evcnt ip_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 240struct evcnt ip_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
241 NULL, "inet", "swcsum"); 241 NULL, "inet", "swcsum");
242 242
243#define INET_CSUM_COUNTER_INCR(ev) (ev)->ev_count++ 243#define INET_CSUM_COUNTER_INCR(ev) (ev)->ev_count++
244 244
245EVCNT_ATTACH_STATIC(ip_hwcsum_bad); 245EVCNT_ATTACH_STATIC(ip_hwcsum_bad);
246EVCNT_ATTACH_STATIC(ip_hwcsum_ok); 246EVCNT_ATTACH_STATIC(ip_hwcsum_ok);
247EVCNT_ATTACH_STATIC(ip_swcsum); 247EVCNT_ATTACH_STATIC(ip_swcsum);
248 248
249#else 249#else
250 250
251#define INET_CSUM_COUNTER_INCR(ev) /* nothing */ 251#define INET_CSUM_COUNTER_INCR(ev) /* nothing */
252 252
253#endif /* INET_CSUM_COUNTERS */ 253#endif /* INET_CSUM_COUNTERS */
254 254
255/* 255/*
256 * We need to save the IP options in case a protocol wants to respond 256 * We need to save the IP options in case a protocol wants to respond
257 * to an incoming packet over the same route if the packet got here 257 * to an incoming packet over the same route if the packet got here
258 * using IP source routing. This allows connection establishment and 258 * using IP source routing. This allows connection establishment and
259 * maintenance when the remote end is on a network that is not known 259 * maintenance when the remote end is on a network that is not known
260 * to us. 260 * to us.
261 */ 261 */
262 262
263static int ip_nhops = 0; 263static int ip_nhops = 0;
264 264
265static struct ip_srcrt { 265static struct ip_srcrt {
266 struct in_addr dst; /* final destination */ 266 struct in_addr dst; /* final destination */
267 char nop; /* one NOP to align */ 267 char nop; /* one NOP to align */
268 char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */ 268 char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */
269 struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)]; 269 struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
270} ip_srcrt; 270} ip_srcrt;
271 271
272static int ip_drainwanted; 272static int ip_drainwanted;
273 273
274struct sockaddr_in ipaddr = { 274struct sockaddr_in ipaddr = {
275 .sin_len = sizeof(ipaddr), 275 .sin_len = sizeof(ipaddr),
276 .sin_family = AF_INET, 276 .sin_family = AF_INET,
277}; 277};
278 278
279static void save_rte(u_char *, struct in_addr); 279static void save_rte(u_char *, struct in_addr);
280 280
281#ifdef MBUFTRACE 281#ifdef MBUFTRACE
282struct mowner ip_rx_mowner = MOWNER_INIT("internet", "rx"); 282struct mowner ip_rx_mowner = MOWNER_INIT("internet", "rx");
283struct mowner ip_tx_mowner = MOWNER_INIT("internet", "tx"); 283struct mowner ip_tx_mowner = MOWNER_INIT("internet", "tx");
284#endif 284#endif
285 285
286static void ipintr(void *); 286static void ipintr(void *);
287static void ip_input(struct mbuf *); 287static void ip_input(struct mbuf *);
288static void ip_forward(struct mbuf *, int, struct ifnet *); 288static void ip_forward(struct mbuf *, int, struct ifnet *);
289static bool ip_dooptions(struct mbuf *); 289static bool ip_dooptions(struct mbuf *);
290static struct in_ifaddr *ip_rtaddr(struct in_addr, struct psref *); 290static struct in_ifaddr *ip_rtaddr(struct in_addr, struct psref *);
291static void sysctl_net_inet_ip_setup(struct sysctllog **); 291static void sysctl_net_inet_ip_setup(struct sysctllog **);
292 292
293static struct in_ifaddr *ip_match_our_address(struct ifnet *, struct ip *, 293static struct in_ifaddr *ip_match_our_address(struct ifnet *, struct ip *,
294 int *); 294 int *);
295static struct in_ifaddr *ip_match_our_address_broadcast(struct ifnet *, 295static struct in_ifaddr *ip_match_our_address_broadcast(struct ifnet *,
296 struct ip *); 296 struct ip *);
297 297
298#ifdef NET_MPSAFE 298#ifdef NET_MPSAFE
299#define SOFTNET_LOCK() mutex_enter(softnet_lock) 299#define SOFTNET_LOCK() mutex_enter(softnet_lock)
300#define SOFTNET_UNLOCK() mutex_exit(softnet_lock) 300#define SOFTNET_UNLOCK() mutex_exit(softnet_lock)
301#else 301#else
302#define SOFTNET_LOCK() KASSERT(mutex_owned(softnet_lock)) 302#define SOFTNET_LOCK() KASSERT(mutex_owned(softnet_lock))
303#define SOFTNET_UNLOCK() KASSERT(mutex_owned(softnet_lock)) 303#define SOFTNET_UNLOCK() KASSERT(mutex_owned(softnet_lock))
304#endif 304#endif
305 305
306/* 306/*
307 * IP initialization: fill in IP protocol switch table. 307 * IP initialization: fill in IP protocol switch table.
308 * All protocols not implemented in kernel go to raw IP protocol handler. 308 * All protocols not implemented in kernel go to raw IP protocol handler.
309 */ 309 */
310void 310void
311ip_init(void) 311ip_init(void)
312{ 312{
313 const struct protosw *pr; 313 const struct protosw *pr;
314 314
315 in_init(); 315 in_init();
316 sysctl_net_inet_ip_setup(NULL); 316 sysctl_net_inet_ip_setup(NULL);
317 317
318 pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 318 pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
319 KASSERT(pr != NULL); 319 KASSERT(pr != NULL);
320 320
321 ip_pktq = pktq_create(IFQ_MAXLEN, ipintr, NULL); 321 ip_pktq = pktq_create(IFQ_MAXLEN, ipintr, NULL);
322 KASSERT(ip_pktq != NULL); 322 KASSERT(ip_pktq != NULL);
323 323
324 for (u_int i = 0; i < IPPROTO_MAX; i++) { 324 for (u_int i = 0; i < IPPROTO_MAX; i++) {
325 ip_protox[i] = pr - inetsw; 325 ip_protox[i] = pr - inetsw;
326 } 326 }
327 for (pr = inetdomain.dom_protosw; 327 for (pr = inetdomain.dom_protosw;
328 pr < inetdomain.dom_protoswNPROTOSW; pr++) 328 pr < inetdomain.dom_protoswNPROTOSW; pr++)
329 if (pr->pr_domain->dom_family == PF_INET && 329 if (pr->pr_domain->dom_family == PF_INET &&
330 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) 330 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
331 ip_protox[pr->pr_protocol] = pr - inetsw; 331 ip_protox[pr->pr_protocol] = pr - inetsw;
332 332
333 ip_reass_init(); 333 ip_reass_init();
334 334
335 ip_ids = ip_id_init(); 335 ip_ids = ip_id_init();
336 ip_id = time_uptime & 0xfffff; 336 ip_id = time_uptime & 0xfffff;
337 337
338#ifdef GATEWAY 338#ifdef GATEWAY
339 ipflow_init(); 339 ipflow_init();
340#endif 340#endif
341 341
342 /* Register our Packet Filter hook. */ 342 /* Register our Packet Filter hook. */
343 inet_pfil_hook = pfil_head_create(PFIL_TYPE_AF, (void *)AF_INET); 343 inet_pfil_hook = pfil_head_create(PFIL_TYPE_AF, (void *)AF_INET);
344 KASSERT(inet_pfil_hook != NULL); 344 KASSERT(inet_pfil_hook != NULL);
345 345
346#ifdef MBUFTRACE 346#ifdef MBUFTRACE
347 MOWNER_ATTACH(&ip_tx_mowner); 347 MOWNER_ATTACH(&ip_tx_mowner);
348 MOWNER_ATTACH(&ip_rx_mowner); 348 MOWNER_ATTACH(&ip_rx_mowner);
349#endif /* MBUFTRACE */ 349#endif /* MBUFTRACE */
350 350
351 ipstat_percpu = percpu_alloc(sizeof(uint64_t) * IP_NSTATS); 351 ipstat_percpu = percpu_alloc(sizeof(uint64_t) * IP_NSTATS);
352 352
353 ipforward_rt_percpu = percpu_alloc(sizeof(struct route)); 353 ipforward_rt_percpu = percpu_alloc(sizeof(struct route));
354 if (ipforward_rt_percpu == NULL) 354 if (ipforward_rt_percpu == NULL)
355 panic("failed to allocate ipforward_rt_percpu"); 355 panic("failed to allocate ipforward_rt_percpu");
 356
 357 ip_mtudisc_timeout_q = rt_timer_queue_create(ip_mtudisc_timeout);
356} 358}
357 359
358static struct in_ifaddr * 360static struct in_ifaddr *
359ip_match_our_address(struct ifnet *ifp, struct ip *ip, int *downmatch) 361ip_match_our_address(struct ifnet *ifp, struct ip *ip, int *downmatch)
360{ 362{
361 struct in_ifaddr *ia = NULL; 363 struct in_ifaddr *ia = NULL;
362 int checkif; 364 int checkif;
363 365
364 /* 366 /*
365 * Enable a consistency check between the destination address 367 * Enable a consistency check between the destination address
366 * and the arrival interface for a unicast packet (the RFC 1122 368 * and the arrival interface for a unicast packet (the RFC 1122
367 * strong ES model) if IP forwarding is disabled and the packet 369 * strong ES model) if IP forwarding is disabled and the packet
368 * is not locally generated. 370 * is not locally generated.
369 * 371 *
370 * XXX - Checking also should be disabled if the destination 372 * XXX - Checking also should be disabled if the destination
371 * address is ipnat'ed to a different interface. 373 * address is ipnat'ed to a different interface.
372 * 374 *
373 * XXX - Checking is incompatible with IP aliases added 375 * XXX - Checking is incompatible with IP aliases added
374 * to the loopback interface instead of the interface where 376 * to the loopback interface instead of the interface where
375 * the packets are received. 377 * the packets are received.
376 * 378 *
377 * XXX - We need to add a per ifaddr flag for this so that 379 * XXX - We need to add a per ifaddr flag for this so that
378 * we get finer grain control. 380 * we get finer grain control.
379 */ 381 */
380 checkif = ip_checkinterface && (ipforwarding == 0) && 382 checkif = ip_checkinterface && (ipforwarding == 0) &&
381 (ifp->if_flags & IFF_LOOPBACK) == 0; 383 (ifp->if_flags & IFF_LOOPBACK) == 0;
382 384
383 IN_ADDRHASH_READER_FOREACH(ia, ip->ip_dst.s_addr) { 385 IN_ADDRHASH_READER_FOREACH(ia, ip->ip_dst.s_addr) {
384 if (in_hosteq(ia->ia_addr.sin_addr, ip->ip_dst)) { 386 if (in_hosteq(ia->ia_addr.sin_addr, ip->ip_dst)) {
385 if (ia->ia4_flags & IN_IFF_NOTREADY) 387 if (ia->ia4_flags & IN_IFF_NOTREADY)
386 continue; 388 continue;
387 if (checkif && ia->ia_ifp != ifp) 389 if (checkif && ia->ia_ifp != ifp)
388 continue; 390 continue;
389 if ((ia->ia_ifp->if_flags & IFF_UP) != 0 && 391 if ((ia->ia_ifp->if_flags & IFF_UP) != 0 &&
390 (ia->ia4_flags & IN_IFF_DETACHED) == 0) 392 (ia->ia4_flags & IN_IFF_DETACHED) == 0)
391 break; 393 break;
392 else 394 else
393 (*downmatch)++; 395 (*downmatch)++;
394 } 396 }
395 } 397 }
396 398
397 return ia; 399 return ia;
398} 400}
399 401
400static struct in_ifaddr * 402static struct in_ifaddr *
401ip_match_our_address_broadcast(struct ifnet *ifp, struct ip *ip) 403ip_match_our_address_broadcast(struct ifnet *ifp, struct ip *ip)
402{ 404{
403 struct in_ifaddr *ia = NULL; 405 struct in_ifaddr *ia = NULL;
404 struct ifaddr *ifa; 406 struct ifaddr *ifa;
405 407
406 IFADDR_READER_FOREACH(ifa, ifp) { 408 IFADDR_READER_FOREACH(ifa, ifp) {
407 if (ifa->ifa_addr->sa_family != AF_INET) 409 if (ifa->ifa_addr->sa_family != AF_INET)
408 continue; 410 continue;
409 ia = ifatoia(ifa); 411 ia = ifatoia(ifa);
410 if (ia->ia4_flags & (IN_IFF_NOTREADY | IN_IFF_DETACHED)) 412 if (ia->ia4_flags & (IN_IFF_NOTREADY | IN_IFF_DETACHED))
411 continue; 413 continue;
412 if (in_hosteq(ip->ip_dst, ia->ia_broadaddr.sin_addr) || 414 if (in_hosteq(ip->ip_dst, ia->ia_broadaddr.sin_addr) ||
413 in_hosteq(ip->ip_dst, ia->ia_netbroadcast) || 415 in_hosteq(ip->ip_dst, ia->ia_netbroadcast) ||
414 /* 416 /*
415 * Look for all-0's host part (old broadcast addr), 417 * Look for all-0's host part (old broadcast addr),
416 * either for subnet or net. 418 * either for subnet or net.
417 */ 419 */
418 ip->ip_dst.s_addr == ia->ia_subnet || 420 ip->ip_dst.s_addr == ia->ia_subnet ||
419 ip->ip_dst.s_addr == ia->ia_net) 421 ip->ip_dst.s_addr == ia->ia_net)
420 goto matched; 422 goto matched;
421 /* 423 /*
422 * An interface with IP address zero accepts 424 * An interface with IP address zero accepts
423 * all packets that arrive on that interface. 425 * all packets that arrive on that interface.
424 */ 426 */
425 if (in_nullhost(ia->ia_addr.sin_addr)) 427 if (in_nullhost(ia->ia_addr.sin_addr))
426 goto matched; 428 goto matched;
427 } 429 }
428 ia = NULL; 430 ia = NULL;
429 431
430matched: 432matched:
431 return ia; 433 return ia;
432} 434}
433 435
434/* 436/*
435 * IP software interrupt routine. 437 * IP software interrupt routine.
436 */ 438 */
437static void 439static void
438ipintr(void *arg __unused) 440ipintr(void *arg __unused)
439{ 441{
440 struct mbuf *m; 442 struct mbuf *m;
441 443
442 KASSERT(cpu_softintr_p()); 444 KASSERT(cpu_softintr_p());
443 445
444#ifndef NET_MPSAFE 446#ifndef NET_MPSAFE
445 mutex_enter(softnet_lock); 447 mutex_enter(softnet_lock);
446#endif 448#endif
447 while ((m = pktq_dequeue(ip_pktq)) != NULL) { 449 while ((m = pktq_dequeue(ip_pktq)) != NULL) {
448 ip_input(m); 450 ip_input(m);
449 } 451 }
450#ifndef NET_MPSAFE 452#ifndef NET_MPSAFE
451 mutex_exit(softnet_lock); 453 mutex_exit(softnet_lock);
452#endif 454#endif
453} 455}
454 456
455/* 457/*
456 * IP input routine. Checksum and byte swap header. If fragmented 458 * IP input routine. Checksum and byte swap header. If fragmented
457 * try to reassemble. Process options. Pass to next level. 459 * try to reassemble. Process options. Pass to next level.
458 */ 460 */
459static void 461static void
460ip_input(struct mbuf *m) 462ip_input(struct mbuf *m)
461{ 463{
462 struct ip *ip = NULL; 464 struct ip *ip = NULL;
463 struct in_ifaddr *ia = NULL; 465 struct in_ifaddr *ia = NULL;
464 int hlen = 0, len; 466 int hlen = 0, len;
465 int downmatch; 467 int downmatch;
466 int srcrt = 0; 468 int srcrt = 0;
467 ifnet_t *ifp; 469 ifnet_t *ifp;
468 struct psref psref; 470 struct psref psref;
469 int s; 471 int s;
470 472
471 KASSERTMSG(cpu_softintr_p(), "ip_input: not in the software " 473 KASSERTMSG(cpu_softintr_p(), "ip_input: not in the software "
472 "interrupt handler; synchronization assumptions violated"); 474 "interrupt handler; synchronization assumptions violated");
473 475
474 MCLAIM(m, &ip_rx_mowner); 476 MCLAIM(m, &ip_rx_mowner);
475 KASSERT((m->m_flags & M_PKTHDR) != 0); 477 KASSERT((m->m_flags & M_PKTHDR) != 0);
476 478
477 ifp = m_get_rcvif_psref(m, &psref); 479 ifp = m_get_rcvif_psref(m, &psref);
478 if (__predict_false(ifp == NULL)) 480 if (__predict_false(ifp == NULL))
479 goto out; 481 goto out;
480 482
481 /* 483 /*
482 * If no IP addresses have been set yet but the interfaces 484 * If no IP addresses have been set yet but the interfaces
483 * are receiving, can't do anything with incoming packets yet. 485 * are receiving, can't do anything with incoming packets yet.
484 * Note: we pre-check without locks held. 486 * Note: we pre-check without locks held.
485 */ 487 */
486 if (IN_ADDRLIST_READER_EMPTY()) 488 if (IN_ADDRLIST_READER_EMPTY())
487 goto out; 489 goto out;
488 IP_STATINC(IP_STAT_TOTAL); 490 IP_STATINC(IP_STAT_TOTAL);
489 491
490 /* 492 /*
491 * If the IP header is not aligned, slurp it up into a new 493 * If the IP header is not aligned, slurp it up into a new
492 * mbuf with space for link headers, in the event we forward 494 * mbuf with space for link headers, in the event we forward
493 * it. Otherwise, if it is aligned, make sure the entire 495 * it. Otherwise, if it is aligned, make sure the entire
494 * base IP header is in the first mbuf of the chain. 496 * base IP header is in the first mbuf of the chain.
495 */ 497 */
496 if (IP_HDR_ALIGNED_P(mtod(m, void *)) == 0) { 498 if (IP_HDR_ALIGNED_P(mtod(m, void *)) == 0) {
497 if ((m = m_copyup(m, sizeof(struct ip), 499 if ((m = m_copyup(m, sizeof(struct ip),
498 (max_linkhdr + 3) & ~3)) == NULL) { 500 (max_linkhdr + 3) & ~3)) == NULL) {
499 /* XXXJRT new stat, please */ 501 /* XXXJRT new stat, please */
500 IP_STATINC(IP_STAT_TOOSMALL); 502 IP_STATINC(IP_STAT_TOOSMALL);
501 goto out; 503 goto out;
502 } 504 }
503 } else if (__predict_false(m->m_len < sizeof (struct ip))) { 505 } else if (__predict_false(m->m_len < sizeof (struct ip))) {
504 if ((m = m_pullup(m, sizeof (struct ip))) == NULL) { 506 if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
505 IP_STATINC(IP_STAT_TOOSMALL); 507 IP_STATINC(IP_STAT_TOOSMALL);
506 goto out; 508 goto out;
507 } 509 }
508 } 510 }
509 ip = mtod(m, struct ip *); 511 ip = mtod(m, struct ip *);
510 if (ip->ip_v != IPVERSION) { 512 if (ip->ip_v != IPVERSION) {
511 IP_STATINC(IP_STAT_BADVERS); 513 IP_STATINC(IP_STAT_BADVERS);
512 goto out; 514 goto out;
513 } 515 }
514 hlen = ip->ip_hl << 2; 516 hlen = ip->ip_hl << 2;
515 if (hlen < sizeof(struct ip)) { /* minimum header length */ 517 if (hlen < sizeof(struct ip)) { /* minimum header length */
516 IP_STATINC(IP_STAT_BADHLEN); 518 IP_STATINC(IP_STAT_BADHLEN);
517 goto out; 519 goto out;
518 } 520 }
519 if (hlen > m->m_len) { 521 if (hlen > m->m_len) {
520 if ((m = m_pullup(m, hlen)) == NULL) { 522 if ((m = m_pullup(m, hlen)) == NULL) {
521 IP_STATINC(IP_STAT_BADHLEN); 523 IP_STATINC(IP_STAT_BADHLEN);
522 goto out; 524 goto out;
523 } 525 }
524 ip = mtod(m, struct ip *); 526 ip = mtod(m, struct ip *);
525 } 527 }
526 528
527 /* 529 /*
528 * RFC1122: packets with a multicast source address are 530 * RFC1122: packets with a multicast source address are
529 * not allowed. 531 * not allowed.
530 */ 532 */
531 if (IN_MULTICAST(ip->ip_src.s_addr)) { 533 if (IN_MULTICAST(ip->ip_src.s_addr)) {
532 IP_STATINC(IP_STAT_BADADDR); 534 IP_STATINC(IP_STAT_BADADDR);
533 goto out; 535 goto out;
534 } 536 }
535 537
536 /* 127/8 must not appear on wire - RFC1122 */ 538 /* 127/8 must not appear on wire - RFC1122 */
537 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 539 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
538 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 540 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
539 if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 541 if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
540 IP_STATINC(IP_STAT_BADADDR); 542 IP_STATINC(IP_STAT_BADADDR);
541 goto out; 543 goto out;
542 } 544 }
543 } 545 }
544 546
545 switch (m->m_pkthdr.csum_flags & 547 switch (m->m_pkthdr.csum_flags &
546 ((ifp->if_csum_flags_rx & M_CSUM_IPv4) | 548 ((ifp->if_csum_flags_rx & M_CSUM_IPv4) |
547 M_CSUM_IPv4_BAD)) { 549 M_CSUM_IPv4_BAD)) {
548 case M_CSUM_IPv4|M_CSUM_IPv4_BAD: 550 case M_CSUM_IPv4|M_CSUM_IPv4_BAD:
549 INET_CSUM_COUNTER_INCR(&ip_hwcsum_bad); 551 INET_CSUM_COUNTER_INCR(&ip_hwcsum_bad);
550 IP_STATINC(IP_STAT_BADSUM); 552 IP_STATINC(IP_STAT_BADSUM);
551 goto out; 553 goto out;
552 554
553 case M_CSUM_IPv4: 555 case M_CSUM_IPv4:
554 /* Checksum was okay. */ 556 /* Checksum was okay. */
555 INET_CSUM_COUNTER_INCR(&ip_hwcsum_ok); 557 INET_CSUM_COUNTER_INCR(&ip_hwcsum_ok);
556 break; 558 break;
557 559
558 default: 560 default:
559 /* 561 /*
560 * Must compute it ourselves. Maybe skip checksum on 562 * Must compute it ourselves. Maybe skip checksum on
561 * loopback interfaces. 563 * loopback interfaces.
562 */ 564 */
563 if (__predict_true(!(ifp->if_flags & IFF_LOOPBACK) || 565 if (__predict_true(!(ifp->if_flags & IFF_LOOPBACK) ||
564 ip_do_loopback_cksum)) { 566 ip_do_loopback_cksum)) {
565 INET_CSUM_COUNTER_INCR(&ip_swcsum); 567 INET_CSUM_COUNTER_INCR(&ip_swcsum);
566 if (in_cksum(m, hlen) != 0) { 568 if (in_cksum(m, hlen) != 0) {
567 IP_STATINC(IP_STAT_BADSUM); 569 IP_STATINC(IP_STAT_BADSUM);
568 goto out; 570 goto out;
569 } 571 }
570 } 572 }
571 break; 573 break;
572 } 574 }
573 575
574 /* Retrieve the packet length. */ 576 /* Retrieve the packet length. */
575 len = ntohs(ip->ip_len); 577 len = ntohs(ip->ip_len);
576 578
577 /* 579 /*
578 * Check for additional length bogosity 580 * Check for additional length bogosity
579 */ 581 */
580 if (len < hlen) { 582 if (len < hlen) {
581 IP_STATINC(IP_STAT_BADLEN); 583 IP_STATINC(IP_STAT_BADLEN);
582 goto out; 584 goto out;
583 } 585 }
584 586
585 /* 587 /*
586 * Check that the amount of data in the buffers 588 * Check that the amount of data in the buffers
587 * is as at least much as the IP header would have us expect. 589 * is as at least much as the IP header would have us expect.
588 * Trim mbufs if longer than we expect. 590 * Trim mbufs if longer than we expect.
589 * Drop packet if shorter than we expect. 591 * Drop packet if shorter than we expect.
590 */ 592 */
591 if (m->m_pkthdr.len < len) { 593 if (m->m_pkthdr.len < len) {
592 IP_STATINC(IP_STAT_TOOSHORT); 594 IP_STATINC(IP_STAT_TOOSHORT);
593 goto out; 595 goto out;
594 } 596 }
595 if (m->m_pkthdr.len > len) { 597 if (m->m_pkthdr.len > len) {
596 if (m->m_len == m->m_pkthdr.len) { 598 if (m->m_len == m->m_pkthdr.len) {
597 m->m_len = len; 599 m->m_len = len;
598 m->m_pkthdr.len = len; 600 m->m_pkthdr.len = len;
599 } else 601 } else
600 m_adj(m, len - m->m_pkthdr.len); 602 m_adj(m, len - m->m_pkthdr.len);
601 } 603 }
602 604
603 /* 605 /*
604 * Assume that we can create a fast-forward IP flow entry 606 * Assume that we can create a fast-forward IP flow entry
605 * based on this packet. 607 * based on this packet.
606 */ 608 */
607 m->m_flags |= M_CANFASTFWD; 609 m->m_flags |= M_CANFASTFWD;
608 610
609 /* 611 /*
610 * Run through list of hooks for input packets. If there are any 612 * Run through list of hooks for input packets. If there are any
611 * filters which require that additional packets in the flow are 613 * filters which require that additional packets in the flow are
612 * not fast-forwarded, they must clear the M_CANFASTFWD flag. 614 * not fast-forwarded, they must clear the M_CANFASTFWD flag.
613 * Note that filters must _never_ set this flag, as another filter 615 * Note that filters must _never_ set this flag, as another filter
614 * in the list may have previously cleared it. 616 * in the list may have previously cleared it.
615 */ 617 */
616#if defined(IPSEC) 618#if defined(IPSEC)
617 if (!ipsec_used || !ipsec_indone(m)) 619 if (!ipsec_used || !ipsec_indone(m))
618#else 620#else
619 if (1) 621 if (1)
620#endif 622#endif
621 { 623 {
622 struct in_addr odst = ip->ip_dst; 624 struct in_addr odst = ip->ip_dst;
623 bool freed; 625 bool freed;
624 626
625 freed = pfil_run_hooks(inet_pfil_hook, &m, ifp, PFIL_IN) != 0; 627 freed = pfil_run_hooks(inet_pfil_hook, &m, ifp, PFIL_IN) != 0;
626 if (freed || m == NULL) { 628 if (freed || m == NULL) {
627 m = NULL; 629 m = NULL;
628 goto out; 630 goto out;
629 } 631 }
630 ip = mtod(m, struct ip *); 632 ip = mtod(m, struct ip *);
631 hlen = ip->ip_hl << 2; 633 hlen = ip->ip_hl << 2;
632 634
633 /* 635 /*
634 * XXX The setting of "srcrt" here is to prevent ip_forward() 636 * XXX The setting of "srcrt" here is to prevent ip_forward()
635 * from generating ICMP redirects for packets that have 637 * from generating ICMP redirects for packets that have
636 * been redirected by a hook back out on to the same LAN that 638 * been redirected by a hook back out on to the same LAN that
637 * they came from and is not an indication that the packet 639 * they came from and is not an indication that the packet
638 * is being inffluenced by source routing options. This 640 * is being inffluenced by source routing options. This
639 * allows things like 641 * allows things like
640 * "rdr tlp0 0/0 port 80 -> 1.1.1.200 3128 tcp" 642 * "rdr tlp0 0/0 port 80 -> 1.1.1.200 3128 tcp"
641 * where tlp0 is both on the 1.1.1.0/24 network and is the 643 * where tlp0 is both on the 1.1.1.0/24 network and is the
642 * default route for hosts on 1.1.1.0/24. Of course this 644 * default route for hosts on 1.1.1.0/24. Of course this
643 * also requires a "map tlp0 ..." to complete the story. 645 * also requires a "map tlp0 ..." to complete the story.
644 * One might argue whether or not this kind of network config. 646 * One might argue whether or not this kind of network config.
645 * should be supported in this manner... 647 * should be supported in this manner...
646 */ 648 */
647 srcrt = (odst.s_addr != ip->ip_dst.s_addr); 649 srcrt = (odst.s_addr != ip->ip_dst.s_addr);
648 } 650 }
649 651
650#ifdef ALTQ 652#ifdef ALTQ
651 /* XXX Temporary until ALTQ is changed to use a pfil hook */ 653 /* XXX Temporary until ALTQ is changed to use a pfil hook */
652 if (altq_input) { 654 if (altq_input) {
653 SOFTNET_LOCK(); 655 SOFTNET_LOCK();
654 if ((*altq_input)(m, AF_INET) == 0) { 656 if ((*altq_input)(m, AF_INET) == 0) {
655 /* Packet dropped by traffic conditioner. */ 657 /* Packet dropped by traffic conditioner. */
656 SOFTNET_UNLOCK(); 658 SOFTNET_UNLOCK();
657 m = NULL; 659 m = NULL;
658 goto out; 660 goto out;
659 } 661 }
660 SOFTNET_UNLOCK(); 662 SOFTNET_UNLOCK();
661 } 663 }
662#endif 664#endif
663 665
664 /* 666 /*
665 * Process options and, if not destined for us, 667 * Process options and, if not destined for us,
666 * ship it on. ip_dooptions returns 1 when an 668 * ship it on. ip_dooptions returns 1 when an
667 * error was detected (causing an icmp message 669 * error was detected (causing an icmp message
668 * to be sent and the original packet to be freed). 670 * to be sent and the original packet to be freed).
669 */ 671 */
670 ip_nhops = 0; /* for source routed packets */ 672 ip_nhops = 0; /* for source routed packets */
671 if (hlen > sizeof (struct ip) && ip_dooptions(m)) { 673 if (hlen > sizeof (struct ip) && ip_dooptions(m)) {
672 m = NULL; 674 m = NULL;
673 goto out; 675 goto out;
674 } 676 }
675 677
676 /* 678 /*
677 * Check our list of addresses, to see if the packet is for us. 679 * Check our list of addresses, to see if the packet is for us.
678 * 680 *
679 * Traditional 4.4BSD did not consult IFF_UP at all. 681 * Traditional 4.4BSD did not consult IFF_UP at all.
680 * The behavior here is to treat addresses on !IFF_UP interface 682 * The behavior here is to treat addresses on !IFF_UP interface
681 * or IN_IFF_NOTREADY addresses as not mine. 683 * or IN_IFF_NOTREADY addresses as not mine.
682 */ 684 */
683 downmatch = 0; 685 downmatch = 0;
684 s = pserialize_read_enter(); 686 s = pserialize_read_enter();
685 ia = ip_match_our_address(ifp, ip, &downmatch); 687 ia = ip_match_our_address(ifp, ip, &downmatch);
686 if (ia != NULL) { 688 if (ia != NULL) {
687 pserialize_read_exit(s); 689 pserialize_read_exit(s);
688 goto ours; 690 goto ours;
689 } 691 }
690 692
691 if (ifp->if_flags & IFF_BROADCAST) { 693 if (ifp->if_flags & IFF_BROADCAST) {
692 ia = ip_match_our_address_broadcast(ifp, ip); 694 ia = ip_match_our_address_broadcast(ifp, ip);
693 if (ia != NULL) { 695 if (ia != NULL) {
694 pserialize_read_exit(s); 696 pserialize_read_exit(s);
695 goto ours; 697 goto ours;
696 } 698 }
697 } 699 }
698 pserialize_read_exit(s); 700 pserialize_read_exit(s);
699 701
700 if (IN_MULTICAST(ip->ip_dst.s_addr)) { 702 if (IN_MULTICAST(ip->ip_dst.s_addr)) {
701#ifdef MROUTING 703#ifdef MROUTING
702 extern struct socket *ip_mrouter; 704 extern struct socket *ip_mrouter;
703 705
704 if (ip_mrouter) { 706 if (ip_mrouter) {
705 /* 707 /*
706 * If we are acting as a multicast router, all 708 * If we are acting as a multicast router, all
707 * incoming multicast packets are passed to the 709 * incoming multicast packets are passed to the
708 * kernel-level multicast forwarding function. 710 * kernel-level multicast forwarding function.
709 * The packet is returned (relatively) intact; if 711 * The packet is returned (relatively) intact; if
710 * ip_mforward() returns a non-zero value, the packet 712 * ip_mforward() returns a non-zero value, the packet
711 * must be discarded, else it may be accepted below. 713 * must be discarded, else it may be accepted below.
712 * 714 *
713 * (The IP ident field is put in the same byte order 715 * (The IP ident field is put in the same byte order
714 * as expected when ip_mforward() is called from 716 * as expected when ip_mforward() is called from
715 * ip_output().) 717 * ip_output().)
716 */ 718 */
717 SOFTNET_LOCK(); 719 SOFTNET_LOCK();
718 if (ip_mforward(m, ifp) != 0) { 720 if (ip_mforward(m, ifp) != 0) {
719 SOFTNET_UNLOCK(); 721 SOFTNET_UNLOCK();
720 IP_STATINC(IP_STAT_CANTFORWARD); 722 IP_STATINC(IP_STAT_CANTFORWARD);
721 goto out; 723 goto out;
722 } 724 }
723 SOFTNET_UNLOCK(); 725 SOFTNET_UNLOCK();
724 726
725 /* 727 /*
726 * The process-level routing demon needs to receive 728 * The process-level routing demon needs to receive
727 * all multicast IGMP packets, whether or not this 729 * all multicast IGMP packets, whether or not this
728 * host belongs to their destination groups. 730 * host belongs to their destination groups.
729 */ 731 */
730 if (ip->ip_p == IPPROTO_IGMP) { 732 if (ip->ip_p == IPPROTO_IGMP) {
731 goto ours; 733 goto ours;
732 } 734 }
733 IP_STATINC(IP_STAT_CANTFORWARD); 735 IP_STATINC(IP_STAT_CANTFORWARD);
734 } 736 }
735#endif 737#endif
736 /* 738 /*
737 * See if we belong to the destination multicast group on the 739 * See if we belong to the destination multicast group on the
738 * arrival interface. 740 * arrival interface.
739 */ 741 */
740 if (!in_multi_group(ip->ip_dst, ifp, 0)) { 742 if (!in_multi_group(ip->ip_dst, ifp, 0)) {
741 IP_STATINC(IP_STAT_CANTFORWARD); 743 IP_STATINC(IP_STAT_CANTFORWARD);
742 goto out; 744 goto out;
743 } 745 }
744 goto ours; 746 goto ours;
745 } 747 }
746 if (ip->ip_dst.s_addr == INADDR_BROADCAST || 748 if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
747 in_nullhost(ip->ip_dst)) 749 in_nullhost(ip->ip_dst))
748 goto ours; 750 goto ours;
749 751
750 /* 752 /*
751 * Not for us; forward if possible and desirable. 753 * Not for us; forward if possible and desirable.
752 */ 754 */
753 if (ipforwarding == 0) { 755 if (ipforwarding == 0) {
754 m_put_rcvif_psref(ifp, &psref); 756 m_put_rcvif_psref(ifp, &psref);
755 IP_STATINC(IP_STAT_CANTFORWARD); 757 IP_STATINC(IP_STAT_CANTFORWARD);
756 m_freem(m); 758 m_freem(m);
757 } else { 759 } else {
758 /* 760 /*
759 * If ip_dst matched any of my address on !IFF_UP interface, 761 * If ip_dst matched any of my address on !IFF_UP interface,
760 * and there's no IFF_UP interface that matches ip_dst, 762 * and there's no IFF_UP interface that matches ip_dst,
761 * send icmp unreach. Forwarding it will result in in-kernel 763 * send icmp unreach. Forwarding it will result in in-kernel
762 * forwarding loop till TTL goes to 0. 764 * forwarding loop till TTL goes to 0.
763 */ 765 */
764 if (downmatch) { 766 if (downmatch) {
765 m_put_rcvif_psref(ifp, &psref); 767 m_put_rcvif_psref(ifp, &psref);
766 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); 768 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
767 IP_STATINC(IP_STAT_CANTFORWARD); 769 IP_STATINC(IP_STAT_CANTFORWARD);
768 return; 770 return;
769 } 771 }
770#ifdef IPSEC 772#ifdef IPSEC
771 /* Perform IPsec, if any. */ 773 /* Perform IPsec, if any. */
772 if (ipsec_used) { 774 if (ipsec_used) {
773 SOFTNET_LOCK(); 775 SOFTNET_LOCK();
774 if (ipsec4_input(m, IP_FORWARDING | 776 if (ipsec4_input(m, IP_FORWARDING |
775 (ip_directedbcast ? IP_ALLOWBROADCAST : 0)) != 0) { 777 (ip_directedbcast ? IP_ALLOWBROADCAST : 0)) != 0) {
776 SOFTNET_UNLOCK(); 778 SOFTNET_UNLOCK();
777 goto out; 779 goto out;
778 } 780 }
779 SOFTNET_UNLOCK(); 781 SOFTNET_UNLOCK();
780 } 782 }
781#endif 783#endif
782 ip_forward(m, srcrt, ifp); 784 ip_forward(m, srcrt, ifp);
783 m_put_rcvif_psref(ifp, &psref); 785 m_put_rcvif_psref(ifp, &psref);
784 } 786 }
785 return; 787 return;
786 788
787ours: 789ours:
788 m_put_rcvif_psref(ifp, &psref); 790 m_put_rcvif_psref(ifp, &psref);
789 ifp = NULL; 791 ifp = NULL;
790 792
791 /* 793 /*
792 * If offset or IP_MF are set, must reassemble. 794 * If offset or IP_MF are set, must reassemble.
793 */ 795 */
794 if (ip->ip_off & ~htons(IP_DF|IP_RF)) { 796 if (ip->ip_off & ~htons(IP_DF|IP_RF)) {
795 /* 797 /*
796 * Pass to IP reassembly mechanism. 798 * Pass to IP reassembly mechanism.
797 */ 799 */
798 if (ip_reass_packet(&m, ip) != 0) { 800 if (ip_reass_packet(&m, ip) != 0) {
799 /* Failed; invalid fragment(s) or packet. */ 801 /* Failed; invalid fragment(s) or packet. */
800 goto out; 802 goto out;
801 } 803 }
802 if (m == NULL) { 804 if (m == NULL) {
803 /* More fragments should come; silently return. */ 805 /* More fragments should come; silently return. */
804 goto out; 806 goto out;
805 } 807 }
806 /* 808 /*
807 * Reassembly is done, we have the final packet. 809 * Reassembly is done, we have the final packet.
808 * Updated cached data in local variable(s). 810 * Updated cached data in local variable(s).
809 */ 811 */
810 ip = mtod(m, struct ip *); 812 ip = mtod(m, struct ip *);
811 hlen = ip->ip_hl << 2; 813 hlen = ip->ip_hl << 2;
812 } 814 }
813 815
814#ifdef IPSEC 816#ifdef IPSEC
815 /* 817 /*
816 * Enforce IPsec policy checking if we are seeing last header. 818 * Enforce IPsec policy checking if we are seeing last header.
817 * Note that we do not visit this with protocols with PCB layer 819 * Note that we do not visit this with protocols with PCB layer
818 * code - like UDP/TCP/raw IP. 820 * code - like UDP/TCP/raw IP.
819 */ 821 */
820 if (ipsec_used && 822 if (ipsec_used &&
821 (inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) { 823 (inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) {
822 SOFTNET_LOCK(); 824 SOFTNET_LOCK();
823 if (ipsec4_input(m, 0) != 0) { 825 if (ipsec4_input(m, 0) != 0) {
824 SOFTNET_UNLOCK(); 826 SOFTNET_UNLOCK();
825 goto out; 827 goto out;
826 } 828 }
827 SOFTNET_UNLOCK(); 829 SOFTNET_UNLOCK();
828 } 830 }
829#endif 831#endif
830 832
831 /* 833 /*
832 * Switch out to protocol's input routine. 834 * Switch out to protocol's input routine.
833 */ 835 */
834#if IFA_STATS 836#if IFA_STATS
835 if (ia && ip) { 837 if (ia && ip) {
836 struct in_ifaddr *_ia; 838 struct in_ifaddr *_ia;
837 /* 839 /*
838 * Keep a reference from ip_match_our_address with psref 840 * Keep a reference from ip_match_our_address with psref
839 * is expensive, so explore ia here again. 841 * is expensive, so explore ia here again.
840 */ 842 */
841 s = pserialize_read_enter(); 843 s = pserialize_read_enter();
842 _ia = in_get_ia(ip->ip_dst); 844 _ia = in_get_ia(ip->ip_dst);
843 _ia->ia_ifa.ifa_data.ifad_inbytes += ntohs(ip->ip_len); 845 _ia->ia_ifa.ifa_data.ifad_inbytes += ntohs(ip->ip_len);
844 pserialize_read_exit(s); 846 pserialize_read_exit(s);
845 } 847 }
846#endif 848#endif
847 IP_STATINC(IP_STAT_DELIVERED); 849 IP_STATINC(IP_STAT_DELIVERED);
848 850
849 const int off = hlen, nh = ip->ip_p; 851 const int off = hlen, nh = ip->ip_p;
850 852
851 SOFTNET_LOCK(); 853 SOFTNET_LOCK();
852 (*inetsw[ip_protox[nh]].pr_input)(m, off, nh); 854 (*inetsw[ip_protox[nh]].pr_input)(m, off, nh);
853 SOFTNET_UNLOCK(); 855 SOFTNET_UNLOCK();
854 return; 856 return;
855 857
856out: 858out:
857 m_put_rcvif_psref(ifp, &psref); 859 m_put_rcvif_psref(ifp, &psref);
858 if (m != NULL) 860 if (m != NULL)
859 m_freem(m); 861 m_freem(m);
860} 862}
861 863
862/* 864/*
863 * IP timer processing. 865 * IP timer processing.
864 */ 866 */
865void 867void
866ip_slowtimo(void) 868ip_slowtimo(void)
867{ 869{
868 870
869#ifndef NET_MPSAFE 871#ifndef NET_MPSAFE
870 mutex_enter(softnet_lock); 872 mutex_enter(softnet_lock);
871 KERNEL_LOCK(1, NULL); 873 KERNEL_LOCK(1, NULL);
872#endif 874#endif
873 875
874 ip_reass_slowtimo(); 876 ip_reass_slowtimo();
875 877
876#ifndef NET_MPSAFE 878#ifndef NET_MPSAFE
877 KERNEL_UNLOCK_ONE(NULL); 879 KERNEL_UNLOCK_ONE(NULL);
878 mutex_exit(softnet_lock); 880 mutex_exit(softnet_lock);
879#endif 881#endif
880} 882}
881 883
882/* 884/*
883 * IP drain processing. 885 * IP drain processing.
884 */ 886 */
885void 887void
886ip_drain(void) 888ip_drain(void)
887{ 889{
888 890
889 KERNEL_LOCK(1, NULL); 891 KERNEL_LOCK(1, NULL);
890 ip_reass_drain(); 892 ip_reass_drain();
891 KERNEL_UNLOCK_ONE(NULL); 893 KERNEL_UNLOCK_ONE(NULL);
892} 894}
893 895
894/* 896/*
895 * ip_dooptions: perform option processing on a datagram, possibly discarding 897 * ip_dooptions: perform option processing on a datagram, possibly discarding
896 * it if bad options are encountered, or forwarding it if source-routed. 898 * it if bad options are encountered, or forwarding it if source-routed.
897 * 899 *
898 * => Returns true if packet has been forwarded/freed. 900 * => Returns true if packet has been forwarded/freed.
899 * => Returns false if the packet should be processed further. 901 * => Returns false if the packet should be processed further.
900 */ 902 */
901static bool 903static bool
902ip_dooptions(struct mbuf *m) 904ip_dooptions(struct mbuf *m)
903{ 905{
904 struct ip *ip = mtod(m, struct ip *); 906 struct ip *ip = mtod(m, struct ip *);
905 u_char *cp, *cp0; 907 u_char *cp, *cp0;
906 struct ip_timestamp *ipt; 908 struct ip_timestamp *ipt;
907 struct in_ifaddr *ia; 909 struct in_ifaddr *ia;
908 int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0; 910 int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
909 struct in_addr dst; 911 struct in_addr dst;
910 n_time ntime; 912 n_time ntime;
911 struct ifaddr *ifa = NULL; 913 struct ifaddr *ifa = NULL;
912 int s; 914 int s;
913 915
914 dst = ip->ip_dst; 916 dst = ip->ip_dst;
915 cp = (u_char *)(ip + 1); 917 cp = (u_char *)(ip + 1);
916 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 918 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
917 for (; cnt > 0; cnt -= optlen, cp += optlen) { 919 for (; cnt > 0; cnt -= optlen, cp += optlen) {
918 opt = cp[IPOPT_OPTVAL]; 920 opt = cp[IPOPT_OPTVAL];
919 if (opt == IPOPT_EOL) 921 if (opt == IPOPT_EOL)
920 break; 922 break;
921 if (opt == IPOPT_NOP) 923 if (opt == IPOPT_NOP)
922 optlen = 1; 924 optlen = 1;
923 else { 925 else {
924 if (cnt < IPOPT_OLEN + sizeof(*cp)) { 926 if (cnt < IPOPT_OLEN + sizeof(*cp)) {
925 code = &cp[IPOPT_OLEN] - (u_char *)ip; 927 code = &cp[IPOPT_OLEN] - (u_char *)ip;
926 goto bad; 928 goto bad;
927 } 929 }
928 optlen = cp[IPOPT_OLEN]; 930 optlen = cp[IPOPT_OLEN];
929 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) { 931 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
930 code = &cp[IPOPT_OLEN] - (u_char *)ip; 932 code = &cp[IPOPT_OLEN] - (u_char *)ip;
931 goto bad; 933 goto bad;
932 } 934 }
933 } 935 }
934 switch (opt) { 936 switch (opt) {
935 937
936 default: 938 default:
937 break; 939 break;
938 940
939 /* 941 /*
940 * Source routing with record. 942 * Source routing with record.
941 * Find interface with current destination address. 943 * Find interface with current destination address.
942 * If none on this machine then drop if strictly routed, 944 * If none on this machine then drop if strictly routed,
943 * or do nothing if loosely routed. 945 * or do nothing if loosely routed.
944 * Record interface address and bring up next address 946 * Record interface address and bring up next address
945 * component. If strictly routed make sure next 947 * component. If strictly routed make sure next
946 * address is on directly accessible net. 948 * address is on directly accessible net.
947 */ 949 */
948 case IPOPT_LSRR: 950 case IPOPT_LSRR:
949 case IPOPT_SSRR: { 951 case IPOPT_SSRR: {
950 struct psref psref; 952 struct psref psref;
951 if (ip_allowsrcrt == 0) { 953 if (ip_allowsrcrt == 0) {
952 type = ICMP_UNREACH; 954 type = ICMP_UNREACH;
953 code = ICMP_UNREACH_NET_PROHIB; 955 code = ICMP_UNREACH_NET_PROHIB;
954 goto bad; 956 goto bad;
955 } 957 }
956 if (optlen < IPOPT_OFFSET + sizeof(*cp)) { 958 if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
957 code = &cp[IPOPT_OLEN] - (u_char *)ip; 959 code = &cp[IPOPT_OLEN] - (u_char *)ip;
958 goto bad; 960 goto bad;
959 } 961 }
960 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) { 962 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
961 code = &cp[IPOPT_OFFSET] - (u_char *)ip; 963 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
962 goto bad; 964 goto bad;
963 } 965 }
964 ipaddr.sin_addr = ip->ip_dst; 966 ipaddr.sin_addr = ip->ip_dst;
965 967
966 s = pserialize_read_enter(); 968 s = pserialize_read_enter();
967 ifa = ifa_ifwithaddr(sintosa(&ipaddr)); 969 ifa = ifa_ifwithaddr(sintosa(&ipaddr));
968 if (ifa == NULL) { 970 if (ifa == NULL) {
969 pserialize_read_exit(s); 971 pserialize_read_exit(s);
970 if (opt == IPOPT_SSRR) { 972 if (opt == IPOPT_SSRR) {
971 type = ICMP_UNREACH; 973 type = ICMP_UNREACH;
972 code = ICMP_UNREACH_SRCFAIL; 974 code = ICMP_UNREACH_SRCFAIL;
973 goto bad; 975 goto bad;
974 } 976 }
975 /* 977 /*
976 * Loose routing, and not at next destination 978 * Loose routing, and not at next destination
977 * yet; nothing to do except forward. 979 * yet; nothing to do except forward.
978 */ 980 */
979 break; 981 break;
980 } 982 }
981 pserialize_read_exit(s); 983 pserialize_read_exit(s);
982 984
983 off--; /* 0 origin */ 985 off--; /* 0 origin */
984 if ((off + sizeof(struct in_addr)) > optlen) { 986 if ((off + sizeof(struct in_addr)) > optlen) {
985 /* 987 /*
986 * End of source route. Should be for us. 988 * End of source route. Should be for us.
987 */ 989 */
988 save_rte(cp, ip->ip_src); 990 save_rte(cp, ip->ip_src);
989 break; 991 break;
990 } 992 }
991 /* 993 /*
992 * locate outgoing interface 994 * locate outgoing interface
993 */ 995 */
994 memcpy((void *)&ipaddr.sin_addr, (void *)(cp + off), 996 memcpy((void *)&ipaddr.sin_addr, (void *)(cp + off),
995 sizeof(ipaddr.sin_addr)); 997 sizeof(ipaddr.sin_addr));
996 if (opt == IPOPT_SSRR) { 998 if (opt == IPOPT_SSRR) {
997 ifa = ifa_ifwithladdr_psref(sintosa(&ipaddr), 999 ifa = ifa_ifwithladdr_psref(sintosa(&ipaddr),
998 &psref); 1000 &psref);
999 if (ifa != NULL) 1001 if (ifa != NULL)
1000 ia = ifatoia(ifa); 1002 ia = ifatoia(ifa);
1001 else 1003 else
1002 ia = NULL; 1004 ia = NULL;
1003 } else { 1005 } else {
1004 ia = ip_rtaddr(ipaddr.sin_addr, &psref); 1006 ia = ip_rtaddr(ipaddr.sin_addr, &psref);
1005 } 1007 }
1006 if (ia == NULL) { 1008 if (ia == NULL) {
1007 type = ICMP_UNREACH; 1009 type = ICMP_UNREACH;
1008 code = ICMP_UNREACH_SRCFAIL; 1010 code = ICMP_UNREACH_SRCFAIL;
1009 goto bad; 1011 goto bad;
1010 } 1012 }
1011 ip->ip_dst = ipaddr.sin_addr; 1013 ip->ip_dst = ipaddr.sin_addr;
1012 bcopy((void *)&ia->ia_addr.sin_addr, 1014 bcopy((void *)&ia->ia_addr.sin_addr,
1013 (void *)(cp + off), sizeof(struct in_addr)); 1015 (void *)(cp + off), sizeof(struct in_addr));
1014 ia4_release(ia, &psref); 1016 ia4_release(ia, &psref);
1015 cp[IPOPT_OFFSET] += sizeof(struct in_addr); 1017 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1016 /* 1018 /*
1017 * Let ip_intr's mcast routing check handle mcast pkts 1019 * Let ip_intr's mcast routing check handle mcast pkts
1018 */ 1020 */
1019 forward = !IN_MULTICAST(ip->ip_dst.s_addr); 1021 forward = !IN_MULTICAST(ip->ip_dst.s_addr);
1020 break; 1022 break;
1021 } 1023 }
1022 1024
1023 case IPOPT_RR: { 1025 case IPOPT_RR: {
1024 struct psref psref; 1026 struct psref psref;
1025 if (optlen < IPOPT_OFFSET + sizeof(*cp)) { 1027 if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
1026 code = &cp[IPOPT_OLEN] - (u_char *)ip; 1028 code = &cp[IPOPT_OLEN] - (u_char *)ip;
1027 goto bad; 1029 goto bad;
1028 } 1030 }
1029 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) { 1031 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1030 code = &cp[IPOPT_OFFSET] - (u_char *)ip; 1032 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1031 goto bad; 1033 goto bad;
1032 } 1034 }
1033 /* 1035 /*
1034 * If no space remains, ignore. 1036 * If no space remains, ignore.
1035 */ 1037 */
1036 off--; /* 0 origin */ 1038 off--; /* 0 origin */
1037 if ((off + sizeof(struct in_addr)) > optlen) 1039 if ((off + sizeof(struct in_addr)) > optlen)
1038 break; 1040 break;
1039 memcpy((void *)&ipaddr.sin_addr, (void *)(&ip->ip_dst), 1041 memcpy((void *)&ipaddr.sin_addr, (void *)(&ip->ip_dst),
1040 sizeof(ipaddr.sin_addr)); 1042 sizeof(ipaddr.sin_addr));
1041 /* 1043 /*
1042 * locate outgoing interface; if we're the destination, 1044 * locate outgoing interface; if we're the destination,
1043 * use the incoming interface (should be same). 1045 * use the incoming interface (should be same).
1044 */ 1046 */
1045 ifa = ifa_ifwithaddr_psref(sintosa(&ipaddr), &psref); 1047 ifa = ifa_ifwithaddr_psref(sintosa(&ipaddr), &psref);
1046 if (ifa == NULL) { 1048 if (ifa == NULL) {
1047 ia = ip_rtaddr(ipaddr.sin_addr, &psref); 1049 ia = ip_rtaddr(ipaddr.sin_addr, &psref);
1048 if (ia == NULL) { 1050 if (ia == NULL) {
1049 type = ICMP_UNREACH; 1051 type = ICMP_UNREACH;
1050 code = ICMP_UNREACH_HOST; 1052 code = ICMP_UNREACH_HOST;
1051 goto bad; 1053 goto bad;
1052 } 1054 }
1053 } else { 1055 } else {
1054 ia = ifatoia(ifa); 1056 ia = ifatoia(ifa);
1055 } 1057 }
1056 bcopy((void *)&ia->ia_addr.sin_addr, 1058 bcopy((void *)&ia->ia_addr.sin_addr,
1057 (void *)(cp + off), sizeof(struct in_addr)); 1059 (void *)(cp + off), sizeof(struct in_addr));
1058 ia4_release(ia, &psref); 1060 ia4_release(ia, &psref);
1059 cp[IPOPT_OFFSET] += sizeof(struct in_addr); 1061 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1060 break; 1062 break;
1061 } 1063 }
1062 1064
1063 case IPOPT_TS: 1065 case IPOPT_TS:
1064 code = cp - (u_char *)ip; 1066 code = cp - (u_char *)ip;
1065 ipt = (struct ip_timestamp *)cp; 1067 ipt = (struct ip_timestamp *)cp;
1066 if (ipt->ipt_len < 4 || ipt->ipt_len > 40) { 1068 if (ipt->ipt_len < 4 || ipt->ipt_len > 40) {
1067 code = (u_char *)&ipt->ipt_len - (u_char *)ip; 1069 code = (u_char *)&ipt->ipt_len - (u_char *)ip;
1068 goto bad; 1070 goto bad;
1069 } 1071 }
1070 if (ipt->ipt_ptr < 5) { 1072 if (ipt->ipt_ptr < 5) {
1071 code = (u_char *)&ipt->ipt_ptr - (u_char *)ip; 1073 code = (u_char *)&ipt->ipt_ptr - (u_char *)ip;
1072 goto bad; 1074 goto bad;
1073 } 1075 }
1074 if (ipt->ipt_ptr > ipt->ipt_len - sizeof (int32_t)) { 1076 if (ipt->ipt_ptr > ipt->ipt_len - sizeof (int32_t)) {
1075 if (++ipt->ipt_oflw == 0) { 1077 if (++ipt->ipt_oflw == 0) {
1076 code = (u_char *)&ipt->ipt_ptr - 1078 code = (u_char *)&ipt->ipt_ptr -
1077 (u_char *)ip; 1079 (u_char *)ip;
1078 goto bad; 1080 goto bad;
1079 } 1081 }
1080 break; 1082 break;
1081 } 1083 }
1082 cp0 = (cp + ipt->ipt_ptr - 1); 1084 cp0 = (cp + ipt->ipt_ptr - 1);
1083 switch (ipt->ipt_flg) { 1085 switch (ipt->ipt_flg) {
1084 1086
1085 case IPOPT_TS_TSONLY: 1087 case IPOPT_TS_TSONLY:
1086 break; 1088 break;
1087 1089
1088 case IPOPT_TS_TSANDADDR: { 1090 case IPOPT_TS_TSANDADDR: {
1089 struct ifnet *rcvif; 1091 struct ifnet *rcvif;
1090 int _s, _ss; 1092 int _s, _ss;
1091 1093
1092 if (ipt->ipt_ptr - 1 + sizeof(n_time) + 1094 if (ipt->ipt_ptr - 1 + sizeof(n_time) +
1093 sizeof(struct in_addr) > ipt->ipt_len) { 1095 sizeof(struct in_addr) > ipt->ipt_len) {
1094 code = (u_char *)&ipt->ipt_ptr - 1096 code = (u_char *)&ipt->ipt_ptr -
1095 (u_char *)ip; 1097 (u_char *)ip;
1096 goto bad; 1098 goto bad;
1097 } 1099 }
1098 ipaddr.sin_addr = dst; 1100 ipaddr.sin_addr = dst;
1099 _ss = pserialize_read_enter(); 1101 _ss = pserialize_read_enter();
1100 rcvif = m_get_rcvif(m, &_s); 1102 rcvif = m_get_rcvif(m, &_s);
1101 if (__predict_true(rcvif != NULL)) { 1103 if (__predict_true(rcvif != NULL)) {
1102 ifa = ifaof_ifpforaddr(sintosa(&ipaddr), 1104 ifa = ifaof_ifpforaddr(sintosa(&ipaddr),
1103 rcvif); 1105 rcvif);
1104 } 1106 }
1105 m_put_rcvif(rcvif, &_s); 1107 m_put_rcvif(rcvif, &_s);
1106 if (ifa == NULL) { 1108 if (ifa == NULL) {
1107 pserialize_read_exit(_ss); 1109 pserialize_read_exit(_ss);
1108 break; 1110 break;
1109 } 1111 }
1110 ia = ifatoia(ifa); 1112 ia = ifatoia(ifa);
1111 bcopy(&ia->ia_addr.sin_addr, 1113 bcopy(&ia->ia_addr.sin_addr,
1112 cp0, sizeof(struct in_addr)); 1114 cp0, sizeof(struct in_addr));
1113 pserialize_read_exit(_ss); 1115 pserialize_read_exit(_ss);
1114 ipt->ipt_ptr += sizeof(struct in_addr); 1116 ipt->ipt_ptr += sizeof(struct in_addr);
1115 break; 1117 break;
1116 } 1118 }
1117 1119
1118 case IPOPT_TS_PRESPEC: 1120 case IPOPT_TS_PRESPEC:
1119 if (ipt->ipt_ptr - 1 + sizeof(n_time) + 1121 if (ipt->ipt_ptr - 1 + sizeof(n_time) +
1120 sizeof(struct in_addr) > ipt->ipt_len) { 1122 sizeof(struct in_addr) > ipt->ipt_len) {
1121 code = (u_char *)&ipt->ipt_ptr - 1123 code = (u_char *)&ipt->ipt_ptr -
1122 (u_char *)ip; 1124 (u_char *)ip;
1123 goto bad; 1125 goto bad;
1124 } 1126 }
1125 memcpy(&ipaddr.sin_addr, cp0, 1127 memcpy(&ipaddr.sin_addr, cp0,
1126 sizeof(struct in_addr)); 1128 sizeof(struct in_addr));
1127 s = pserialize_read_enter(); 1129 s = pserialize_read_enter();
1128 ifa = ifa_ifwithaddr(sintosa(&ipaddr)); 1130 ifa = ifa_ifwithaddr(sintosa(&ipaddr));
1129 if (ifa == NULL) { 1131 if (ifa == NULL) {
1130 pserialize_read_exit(s); 1132 pserialize_read_exit(s);
1131 continue; 1133 continue;
1132 } 1134 }
1133 pserialize_read_exit(s); 1135 pserialize_read_exit(s);
1134 ipt->ipt_ptr += sizeof(struct in_addr); 1136 ipt->ipt_ptr += sizeof(struct in_addr);
1135 break; 1137 break;
1136 1138
1137 default: 1139 default:
1138 /* XXX can't take &ipt->ipt_flg */ 1140 /* XXX can't take &ipt->ipt_flg */
1139 code = (u_char *)&ipt->ipt_ptr - 1141 code = (u_char *)&ipt->ipt_ptr -
1140 (u_char *)ip + 1; 1142 (u_char *)ip + 1;
1141 goto bad; 1143 goto bad;
1142 } 1144 }
1143 ntime = iptime(); 1145 ntime = iptime();
1144 cp0 = (u_char *) &ntime; /* XXX grumble, GCC... */ 1146 cp0 = (u_char *) &ntime; /* XXX grumble, GCC... */
1145 memmove((char *)cp + ipt->ipt_ptr - 1, cp0, 1147 memmove((char *)cp + ipt->ipt_ptr - 1, cp0,
1146 sizeof(n_time)); 1148 sizeof(n_time));
1147 ipt->ipt_ptr += sizeof(n_time); 1149 ipt->ipt_ptr += sizeof(n_time);
1148 } 1150 }
1149 } 1151 }
1150 if (forward) { 1152 if (forward) {
1151 struct ifnet *rcvif; 1153 struct ifnet *rcvif;
1152 struct psref _psref; 1154 struct psref _psref;
1153 1155
1154 if (ip_forwsrcrt == 0) { 1156 if (ip_forwsrcrt == 0) {
1155 type = ICMP_UNREACH; 1157 type = ICMP_UNREACH;
1156 code = ICMP_UNREACH_SRCFAIL; 1158 code = ICMP_UNREACH_SRCFAIL;
1157 goto bad; 1159 goto bad;
1158 } 1160 }
1159 1161
1160 rcvif = m_get_rcvif_psref(m, &_psref); 1162 rcvif = m_get_rcvif_psref(m, &_psref);
1161 if (__predict_false(rcvif == NULL)) { 1163 if (__predict_false(rcvif == NULL)) {
1162 type = ICMP_UNREACH; 1164 type = ICMP_UNREACH;
1163 code = ICMP_UNREACH_HOST; 1165 code = ICMP_UNREACH_HOST;
1164 goto bad; 1166 goto bad;
1165 } 1167 }
1166 ip_forward(m, 1, rcvif); 1168 ip_forward(m, 1, rcvif);
1167 m_put_rcvif_psref(rcvif, &_psref); 1169 m_put_rcvif_psref(rcvif, &_psref);
1168 return true; 1170 return true;
1169 } 1171 }
1170 return false; 1172 return false;
1171bad: 1173bad:
1172 icmp_error(m, type, code, 0, 0); 1174 icmp_error(m, type, code, 0, 0);
1173 IP_STATINC(IP_STAT_BADOPTIONS); 1175 IP_STATINC(IP_STAT_BADOPTIONS);
1174 return true; 1176 return true;
1175} 1177}
1176 1178
1177/* 1179/*
1178 * ip_rtaddr: given address of next destination (final or next hop), 1180 * ip_rtaddr: given address of next destination (final or next hop),
1179 * return internet address info of interface to be used to get there. 1181 * return internet address info of interface to be used to get there.
1180 */ 1182 */
1181static struct in_ifaddr * 1183static struct in_ifaddr *
1182ip_rtaddr(struct in_addr dst, struct psref *psref) 1184ip_rtaddr(struct in_addr dst, struct psref *psref)
1183{ 1185{
1184 struct rtentry *rt; 1186 struct rtentry *rt;
1185 union { 1187 union {
1186 struct sockaddr dst; 1188 struct sockaddr dst;
1187 struct sockaddr_in dst4; 1189 struct sockaddr_in dst4;
1188 } u; 1190 } u;
1189 struct route *ro; 1191 struct route *ro;
1190 1192
1191 sockaddr_in_init(&u.dst4, &dst, 0); 1193 sockaddr_in_init(&u.dst4, &dst, 0);
1192 1194
1193 ro = percpu_getref(ipforward_rt_percpu); 1195 ro = percpu_getref(ipforward_rt_percpu);
1194 rt = rtcache_lookup(ro, &u.dst); 1196 rt = rtcache_lookup(ro, &u.dst);
1195 if (rt == NULL) { 1197 if (rt == NULL) {
1196 percpu_putref(ipforward_rt_percpu); 1198 percpu_putref(ipforward_rt_percpu);
1197 return NULL; 1199 return NULL;
1198 } 1200 }
1199 1201
1200 ia4_acquire(ifatoia(rt->rt_ifa), psref); 1202 ia4_acquire(ifatoia(rt->rt_ifa), psref);
1201 rtcache_unref(rt, ro); 1203 rtcache_unref(rt, ro);
1202 percpu_putref(ipforward_rt_percpu); 1204 percpu_putref(ipforward_rt_percpu);
1203 1205
1204 return ifatoia(rt->rt_ifa); 1206 return ifatoia(rt->rt_ifa);
1205} 1207}
1206 1208
1207/* 1209/*
1208 * save_rte: save incoming source route for use in replies, to be picked 1210 * save_rte: save incoming source route for use in replies, to be picked
1209 * up later by ip_srcroute if the receiver is interested. 1211 * up later by ip_srcroute if the receiver is interested.
1210 */ 1212 */
1211static void 1213static void
1212save_rte(u_char *option, struct in_addr dst) 1214save_rte(u_char *option, struct in_addr dst)
1213{ 1215{
1214 unsigned olen; 1216 unsigned olen;
1215 1217
1216 olen = option[IPOPT_OLEN]; 1218 olen = option[IPOPT_OLEN];
1217 if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst))) 1219 if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
1218 return; 1220 return;
1219 memcpy((void *)ip_srcrt.srcopt, (void *)option, olen); 1221 memcpy((void *)ip_srcrt.srcopt, (void *)option, olen);
1220 ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr); 1222 ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
1221 ip_srcrt.dst = dst; 1223 ip_srcrt.dst = dst;
1222} 1224}
1223 1225
1224/* 1226/*
1225 * Retrieve incoming source route for use in replies, 1227 * Retrieve incoming source route for use in replies,
1226 * in the same form used by setsockopt. 1228 * in the same form used by setsockopt.
1227 * The first hop is placed before the options, will be removed later. 1229 * The first hop is placed before the options, will be removed later.
1228 */ 1230 */
1229struct mbuf * 1231struct mbuf *
1230ip_srcroute(void) 1232ip_srcroute(void)
1231{ 1233{
1232 struct in_addr *p, *q; 1234 struct in_addr *p, *q;
1233 struct mbuf *m; 1235 struct mbuf *m;
1234 1236
1235 if (ip_nhops == 0) 1237 if (ip_nhops == 0)
1236 return NULL; 1238 return NULL;
1237 m = m_get(M_DONTWAIT, MT_SOOPTS); 1239 m = m_get(M_DONTWAIT, MT_SOOPTS);
1238 if (m == 0) 1240 if (m == 0)
1239 return NULL; 1241 return NULL;
1240 1242
1241 MCLAIM(m, &inetdomain.dom_mowner); 1243 MCLAIM(m, &inetdomain.dom_mowner);
1242#define OPTSIZ (sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt)) 1244#define OPTSIZ (sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
1243 1245
1244 /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */ 1246 /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
1245 m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) + 1247 m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
1246 OPTSIZ; 1248 OPTSIZ;
1247 1249
1248 /* 1250 /*
1249 * First save first hop for return route 1251 * First save first hop for return route
1250 */ 1252 */
1251 p = &ip_srcrt.route[ip_nhops - 1]; 1253 p = &ip_srcrt.route[ip_nhops - 1];
1252 *(mtod(m, struct in_addr *)) = *p--; 1254 *(mtod(m, struct in_addr *)) = *p--;
1253 1255
1254 /* 1256 /*
1255 * Copy option fields and padding (nop) to mbuf. 1257 * Copy option fields and padding (nop) to mbuf.
1256 */ 1258 */
1257 ip_srcrt.nop = IPOPT_NOP; 1259 ip_srcrt.nop = IPOPT_NOP;
1258 ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF; 1260 ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
1259 memmove(mtod(m, char *) + sizeof(struct in_addr), &ip_srcrt.nop, 1261 memmove(mtod(m, char *) + sizeof(struct in_addr), &ip_srcrt.nop,
1260 OPTSIZ); 1262 OPTSIZ);
1261 q = (struct in_addr *)(mtod(m, char *) + 1263 q = (struct in_addr *)(mtod(m, char *) +
1262 sizeof(struct in_addr) + OPTSIZ); 1264 sizeof(struct in_addr) + OPTSIZ);
1263#undef OPTSIZ 1265#undef OPTSIZ
1264 /* 1266 /*
1265 * Record return path as an IP source route, 1267 * Record return path as an IP source route,
1266 * reversing the path (pointers are now aligned). 1268 * reversing the path (pointers are now aligned).
1267 */ 1269 */
1268 while (p >= ip_srcrt.route) { 1270 while (p >= ip_srcrt.route) {
1269 *q++ = *p--; 1271 *q++ = *p--;
1270 } 1272 }
1271 /* 1273 /*
1272 * Last hop goes to final destination. 1274 * Last hop goes to final destination.
1273 */ 1275 */
1274 *q = ip_srcrt.dst; 1276 *q = ip_srcrt.dst;
1275 return (m); 1277 return (m);
1276} 1278}
1277 1279
1278const int inetctlerrmap[PRC_NCMDS] = { 1280const int inetctlerrmap[PRC_NCMDS] = {
1279 [PRC_MSGSIZE] = EMSGSIZE, 1281 [PRC_MSGSIZE] = EMSGSIZE,
1280 [PRC_HOSTDEAD] = EHOSTDOWN, 1282 [PRC_HOSTDEAD] = EHOSTDOWN,
1281 [PRC_HOSTUNREACH] = EHOSTUNREACH, 1283 [PRC_HOSTUNREACH] = EHOSTUNREACH,
1282 [PRC_UNREACH_NET] = EHOSTUNREACH, 1284 [PRC_UNREACH_NET] = EHOSTUNREACH,
1283 [PRC_UNREACH_HOST] = EHOSTUNREACH, 1285 [PRC_UNREACH_HOST] = EHOSTUNREACH,
1284 [PRC_UNREACH_PROTOCOL] = ECONNREFUSED, 1286 [PRC_UNREACH_PROTOCOL] = ECONNREFUSED,
1285 [PRC_UNREACH_PORT] = ECONNREFUSED, 1287 [PRC_UNREACH_PORT] = ECONNREFUSED,
1286 [PRC_UNREACH_SRCFAIL] = EHOSTUNREACH, 1288 [PRC_UNREACH_SRCFAIL] = EHOSTUNREACH,
1287 [PRC_PARAMPROB] = ENOPROTOOPT, 1289 [PRC_PARAMPROB] = ENOPROTOOPT,
1288}; 1290};
1289 1291
1290void 1292void
1291ip_fasttimo(void) 1293ip_fasttimo(void)
1292{ 1294{
1293 if (ip_drainwanted) { 1295 if (ip_drainwanted) {
1294 ip_drain(); 1296 ip_drain();
1295 ip_drainwanted = 0; 1297 ip_drainwanted = 0;
1296 } 1298 }
1297} 1299}
1298 1300
1299void 1301void
1300ip_drainstub(void) 1302ip_drainstub(void)
1301{ 1303{
1302 ip_drainwanted = 1; 1304 ip_drainwanted = 1;
1303} 1305}
1304 1306
1305/* 1307/*
1306 * Forward a packet. If some error occurs return the sender 1308 * Forward a packet. If some error occurs return the sender
1307 * an icmp packet. Note we can't always generate a meaningful 1309 * an icmp packet. Note we can't always generate a meaningful
1308 * icmp message because icmp doesn't have a large enough repertoire 1310 * icmp message because icmp doesn't have a large enough repertoire
1309 * of codes and types. 1311 * of codes and types.
1310 * 1312 *
1311 * If not forwarding, just drop the packet. This could be confusing 1313 * If not forwarding, just drop the packet. This could be confusing
1312 * if ipforwarding was zero but some routing protocol was advancing 1314 * if ipforwarding was zero but some routing protocol was advancing
1313 * us as a gateway to somewhere. However, we must let the routing 1315 * us as a gateway to somewhere. However, we must let the routing
1314 * protocol deal with that. 1316 * protocol deal with that.
1315 * 1317 *
1316 * The srcrt parameter indicates whether the packet is being forwarded 1318 * The srcrt parameter indicates whether the packet is being forwarded
1317 * via a source route. 1319 * via a source route.
1318 */ 1320 */
1319static void 1321static void
1320ip_forward(struct mbuf *m, int srcrt, struct ifnet *rcvif) 1322ip_forward(struct mbuf *m, int srcrt, struct ifnet *rcvif)
1321{ 1323{
1322 struct ip *ip = mtod(m, struct ip *); 1324 struct ip *ip = mtod(m, struct ip *);
1323 struct rtentry *rt; 1325 struct rtentry *rt;
1324 int error, type = 0, code = 0, destmtu = 0; 1326 int error, type = 0, code = 0, destmtu = 0;
1325 struct mbuf *mcopy; 1327 struct mbuf *mcopy;
1326 n_long dest; 1328 n_long dest;
1327 union { 1329 union {
1328 struct sockaddr dst; 1330 struct sockaddr dst;
1329 struct sockaddr_in dst4; 1331 struct sockaddr_in dst4;
1330 } u; 1332 } u;
1331 uint64_t *ips; 1333 uint64_t *ips;
1332 struct route *ro; 1334 struct route *ro;
1333 1335
1334 KASSERTMSG(cpu_softintr_p(), "ip_forward: not in the software " 1336 KASSERTMSG(cpu_softintr_p(), "ip_forward: not in the software "
1335 "interrupt handler; synchronization assumptions violated"); 1337 "interrupt handler; synchronization assumptions violated");
1336 1338
1337 /* 1339 /*
1338 * We are now in the output path. 1340 * We are now in the output path.
1339 */ 1341 */
1340 MCLAIM(m, &ip_tx_mowner); 1342 MCLAIM(m, &ip_tx_mowner);
1341 1343
1342 /* 1344 /*
1343 * Clear any in-bound checksum flags for this packet. 1345 * Clear any in-bound checksum flags for this packet.
1344 */ 1346 */
1345 m->m_pkthdr.csum_flags = 0; 1347 m->m_pkthdr.csum_flags = 0;
1346 1348
1347 dest = 0; 1349 dest = 0;
1348 if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { 1350 if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
1349 IP_STATINC(IP_STAT_CANTFORWARD); 1351 IP_STATINC(IP_STAT_CANTFORWARD);
1350 m_freem(m); 1352 m_freem(m);
1351 return; 1353 return;
1352 } 1354 }
1353 1355
1354 if (ip->ip_ttl <= IPTTLDEC) { 1356 if (ip->ip_ttl <= IPTTLDEC) {