| @@ -1,1477 +1,1475 @@ | | | @@ -1,1477 +1,1475 @@ |
1 | /* $NetBSD: ip_input.c,v 1.275.4.1.8.1 2011/01/07 03:16:14 matt Exp $ */ | | 1 | /* $NetBSD: ip_input.c,v 1.275.4.1.8.2 2011/01/07 03:17:44 matt Exp $ */ |
2 | | | 2 | |
3 | /* | | 3 | /* |
4 | * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. | | 4 | * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. |
5 | * All rights reserved. | | 5 | * All rights reserved. |
6 | * | | 6 | * |
7 | * Redistribution and use in source and binary forms, with or without | | 7 | * Redistribution and use in source and binary forms, with or without |
8 | * modification, are permitted provided that the following conditions | | 8 | * modification, are permitted provided that the following conditions |
9 | * are met: | | 9 | * are met: |
10 | * 1. Redistributions of source code must retain the above copyright | | 10 | * 1. Redistributions of source code must retain the above copyright |
11 | * notice, this list of conditions and the following disclaimer. | | 11 | * notice, this list of conditions and the following disclaimer. |
12 | * 2. Redistributions in binary form must reproduce the above copyright | | 12 | * 2. Redistributions in binary form must reproduce the above copyright |
13 | * notice, this list of conditions and the following disclaimer in the | | 13 | * notice, this list of conditions and the following disclaimer in the |
14 | * documentation and/or other materials provided with the distribution. | | 14 | * documentation and/or other materials provided with the distribution. |
15 | * 3. Neither the name of the project nor the names of its contributors | | 15 | * 3. Neither the name of the project nor the names of its contributors |
16 | * may be used to endorse or promote products derived from this software | | 16 | * may be used to endorse or promote products derived from this software |
17 | * without specific prior written permission. | | 17 | * without specific prior written permission. |
18 | * | | 18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND | | 19 | * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND |
20 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | | 20 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | | 21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE | | 22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE |
23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | | 24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
25 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 25 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
29 | * SUCH DAMAGE. | | 29 | * SUCH DAMAGE. |
30 | */ | | 30 | */ |
31 | | | 31 | |
32 | /*- | | 32 | /*- |
33 | * Copyright (c) 1998 The NetBSD Foundation, Inc. | | 33 | * Copyright (c) 1998 The NetBSD Foundation, Inc. |
34 | * All rights reserved. | | 34 | * All rights reserved. |
35 | * | | 35 | * |
36 | * This code is derived from software contributed to The NetBSD Foundation | | 36 | * This code is derived from software contributed to The NetBSD Foundation |
37 | * by Public Access Networks Corporation ("Panix"). It was developed under | | 37 | * by Public Access Networks Corporation ("Panix"). It was developed under |
38 | * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon. | | 38 | * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon. |
39 | * | | 39 | * |
40 | * Redistribution and use in source and binary forms, with or without | | 40 | * Redistribution and use in source and binary forms, with or without |
41 | * modification, are permitted provided that the following conditions | | 41 | * modification, are permitted provided that the following conditions |
42 | * are met: | | 42 | * are met: |
43 | * 1. Redistributions of source code must retain the above copyright | | 43 | * 1. Redistributions of source code must retain the above copyright |
44 | * notice, this list of conditions and the following disclaimer. | | 44 | * notice, this list of conditions and the following disclaimer. |
45 | * 2. Redistributions in binary form must reproduce the above copyright | | 45 | * 2. Redistributions in binary form must reproduce the above copyright |
46 | * notice, this list of conditions and the following disclaimer in the | | 46 | * notice, this list of conditions and the following disclaimer in the |
47 | * documentation and/or other materials provided with the distribution. | | 47 | * documentation and/or other materials provided with the distribution. |
48 | * | | 48 | * |
49 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | | 49 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
50 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 50 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
51 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 51 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
52 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | | 52 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
53 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | 53 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
54 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | 54 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
55 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | 55 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
56 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 56 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
57 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 57 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
58 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 58 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
59 | * POSSIBILITY OF SUCH DAMAGE. | | 59 | * POSSIBILITY OF SUCH DAMAGE. |
60 | */ | | 60 | */ |
61 | | | 61 | |
62 | /* | | 62 | /* |
63 | * Copyright (c) 1982, 1986, 1988, 1993 | | 63 | * Copyright (c) 1982, 1986, 1988, 1993 |
64 | * The Regents of the University of California. All rights reserved. | | 64 | * The Regents of the University of California. All rights reserved. |
65 | * | | 65 | * |
66 | * Redistribution and use in source and binary forms, with or without | | 66 | * Redistribution and use in source and binary forms, with or without |
67 | * modification, are permitted provided that the following conditions | | 67 | * modification, are permitted provided that the following conditions |
68 | * are met: | | 68 | * are met: |
69 | * 1. Redistributions of source code must retain the above copyright | | 69 | * 1. Redistributions of source code must retain the above copyright |
70 | * notice, this list of conditions and the following disclaimer. | | 70 | * notice, this list of conditions and the following disclaimer. |
71 | * 2. Redistributions in binary form must reproduce the above copyright | | 71 | * 2. Redistributions in binary form must reproduce the above copyright |
72 | * notice, this list of conditions and the following disclaimer in the | | 72 | * notice, this list of conditions and the following disclaimer in the |
73 | * documentation and/or other materials provided with the distribution. | | 73 | * documentation and/or other materials provided with the distribution. |
74 | * 3. Neither the name of the University nor the names of its contributors | | 74 | * 3. Neither the name of the University nor the names of its contributors |
75 | * may be used to endorse or promote products derived from this software | | 75 | * may be used to endorse or promote products derived from this software |
76 | * without specific prior written permission. | | 76 | * without specific prior written permission. |
77 | * | | 77 | * |
78 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | | 78 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
79 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | | 79 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
80 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | | 80 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
81 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | | 81 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
82 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 82 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
83 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | | 83 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
84 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 84 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
85 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 85 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
86 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 86 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
87 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 87 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
88 | * SUCH DAMAGE. | | 88 | * SUCH DAMAGE. |
89 | * | | 89 | * |
90 | * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 | | 90 | * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 |
91 | */ | | 91 | */ |
92 | | | 92 | |
93 | #include <sys/cdefs.h> | | 93 | #include <sys/cdefs.h> |
94 | __KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.275.4.1.8.1 2011/01/07 03:16:14 matt Exp $"); | | 94 | __KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.275.4.1.8.2 2011/01/07 03:17:44 matt Exp $"); |
95 | | | 95 | |
96 | #include "opt_inet.h" | | 96 | #include "opt_inet.h" |
97 | #include "opt_gateway.h" | | 97 | #include "opt_gateway.h" |
98 | #include "opt_pfil_hooks.h" | | 98 | #include "opt_pfil_hooks.h" |
99 | #include "opt_ipsec.h" | | 99 | #include "opt_ipsec.h" |
100 | #include "opt_mrouting.h" | | 100 | #include "opt_mrouting.h" |
101 | #include "opt_mbuftrace.h" | | 101 | #include "opt_mbuftrace.h" |
102 | #include "opt_inet_csum.h" | | 102 | #include "opt_inet_csum.h" |
103 | | | 103 | |
104 | #include <sys/param.h> | | 104 | #include <sys/param.h> |
105 | #include <sys/systm.h> | | 105 | #include <sys/systm.h> |
106 | #include <sys/malloc.h> | | 106 | #include <sys/malloc.h> |
107 | #include <sys/mbuf.h> | | 107 | #include <sys/mbuf.h> |
108 | #include <sys/domain.h> | | 108 | #include <sys/domain.h> |
109 | #include <sys/protosw.h> | | 109 | #include <sys/protosw.h> |
110 | #include <sys/socket.h> | | 110 | #include <sys/socket.h> |
111 | #include <sys/socketvar.h> | | 111 | #include <sys/socketvar.h> |
112 | #include <sys/errno.h> | | 112 | #include <sys/errno.h> |
113 | #include <sys/time.h> | | 113 | #include <sys/time.h> |
114 | #include <sys/kernel.h> | | 114 | #include <sys/kernel.h> |
115 | #include <sys/pool.h> | | 115 | #include <sys/pool.h> |
116 | #include <sys/sysctl.h> | | 116 | #include <sys/sysctl.h> |
117 | #include <sys/kauth.h> | | 117 | #include <sys/kauth.h> |
118 | | | 118 | |
119 | #include <net/if.h> | | 119 | #include <net/if.h> |
120 | #include <net/if_dl.h> | | 120 | #include <net/if_dl.h> |
121 | #include <net/route.h> | | 121 | #include <net/route.h> |
122 | #include <net/pfil.h> | | 122 | #include <net/pfil.h> |
123 | | | 123 | |
124 | #include <netinet/in.h> | | 124 | #include <netinet/in.h> |
125 | #include <netinet/in_systm.h> | | 125 | #include <netinet/in_systm.h> |
126 | #include <netinet/ip.h> | | 126 | #include <netinet/ip.h> |
127 | #include <netinet/in_pcb.h> | | 127 | #include <netinet/in_pcb.h> |
128 | #include <netinet/in_proto.h> | | 128 | #include <netinet/in_proto.h> |
129 | #include <netinet/in_var.h> | | 129 | #include <netinet/in_var.h> |
130 | #include <netinet/ip_var.h> | | 130 | #include <netinet/ip_var.h> |
131 | #include <netinet/ip_private.h> | | 131 | #include <netinet/ip_private.h> |
132 | #include <netinet/ip_icmp.h> | | 132 | #include <netinet/ip_icmp.h> |
133 | /* just for gif_ttl */ | | 133 | /* just for gif_ttl */ |
134 | #include <netinet/in_gif.h> | | 134 | #include <netinet/in_gif.h> |
135 | #include "gif.h" | | 135 | #include "gif.h" |
136 | #include <net/if_gre.h> | | 136 | #include <net/if_gre.h> |
137 | #include "gre.h" | | 137 | #include "gre.h" |
138 | | | 138 | |
139 | #ifdef MROUTING | | 139 | #ifdef MROUTING |
140 | #include <netinet/ip_mroute.h> | | 140 | #include <netinet/ip_mroute.h> |
141 | #endif | | 141 | #endif |
142 | | | 142 | |
143 | #ifdef IPSEC | | 143 | #ifdef IPSEC |
144 | #include <netinet6/ipsec.h> | | 144 | #include <netinet6/ipsec.h> |
145 | #include <netinet6/ipsec_private.h> | | 145 | #include <netinet6/ipsec_private.h> |
146 | #include <netkey/key.h> | | 146 | #include <netkey/key.h> |
147 | #endif | | 147 | #endif |
148 | #ifdef FAST_IPSEC | | 148 | #ifdef FAST_IPSEC |
149 | #include <netipsec/ipsec.h> | | 149 | #include <netipsec/ipsec.h> |
150 | #include <netipsec/key.h> | | 150 | #include <netipsec/key.h> |
151 | #endif /* FAST_IPSEC*/ | | 151 | #endif /* FAST_IPSEC*/ |
152 | | | 152 | |
153 | #ifndef IPFORWARDING | | 153 | #ifndef IPFORWARDING |
154 | #ifdef GATEWAY | | 154 | #ifdef GATEWAY |
155 | #define IPFORWARDING 1 /* forward IP packets not for us */ | | 155 | #define IPFORWARDING 1 /* forward IP packets not for us */ |
156 | #else /* GATEWAY */ | | 156 | #else /* GATEWAY */ |
157 | #define IPFORWARDING 0 /* don't forward IP packets not for us */ | | 157 | #define IPFORWARDING 0 /* don't forward IP packets not for us */ |
158 | #endif /* GATEWAY */ | | 158 | #endif /* GATEWAY */ |
159 | #endif /* IPFORWARDING */ | | 159 | #endif /* IPFORWARDING */ |
160 | #ifndef IPSENDREDIRECTS | | 160 | #ifndef IPSENDREDIRECTS |
161 | #define IPSENDREDIRECTS 1 | | 161 | #define IPSENDREDIRECTS 1 |
162 | #endif | | 162 | #endif |
163 | #ifndef IPFORWSRCRT | | 163 | #ifndef IPFORWSRCRT |
164 | #define IPFORWSRCRT 1 /* forward source-routed packets */ | | 164 | #define IPFORWSRCRT 1 /* forward source-routed packets */ |
165 | #endif | | 165 | #endif |
166 | #ifndef IPALLOWSRCRT | | 166 | #ifndef IPALLOWSRCRT |
167 | #define IPALLOWSRCRT 1 /* allow source-routed packets */ | | 167 | #define IPALLOWSRCRT 1 /* allow source-routed packets */ |
168 | #endif | | 168 | #endif |
169 | #ifndef IPMTUDISC | | 169 | #ifndef IPMTUDISC |
170 | #define IPMTUDISC 1 | | 170 | #define IPMTUDISC 1 |
171 | #endif | | 171 | #endif |
172 | #ifndef IPMTUDISCTIMEOUT | | 172 | #ifndef IPMTUDISCTIMEOUT |
173 | #define IPMTUDISCTIMEOUT (10 * 60) /* as per RFC 1191 */ | | 173 | #define IPMTUDISCTIMEOUT (10 * 60) /* as per RFC 1191 */ |
174 | #endif | | 174 | #endif |
175 | | | 175 | |
176 | /* | | 176 | /* |
177 | * Note: DIRECTED_BROADCAST is handled this way so that previous | | 177 | * Note: DIRECTED_BROADCAST is handled this way so that previous |
178 | * configuration using this option will Just Work. | | 178 | * configuration using this option will Just Work. |
179 | */ | | 179 | */ |
180 | #ifndef IPDIRECTEDBCAST | | 180 | #ifndef IPDIRECTEDBCAST |
181 | #ifdef DIRECTED_BROADCAST | | 181 | #ifdef DIRECTED_BROADCAST |
182 | #define IPDIRECTEDBCAST 1 | | 182 | #define IPDIRECTEDBCAST 1 |
183 | #else | | 183 | #else |
184 | #define IPDIRECTEDBCAST 0 | | 184 | #define IPDIRECTEDBCAST 0 |
185 | #endif /* DIRECTED_BROADCAST */ | | 185 | #endif /* DIRECTED_BROADCAST */ |
186 | #endif /* IPDIRECTEDBCAST */ | | 186 | #endif /* IPDIRECTEDBCAST */ |
187 | int ipforwarding = IPFORWARDING; | | 187 | int ipforwarding = IPFORWARDING; |
188 | int ipsendredirects = IPSENDREDIRECTS; | | 188 | int ipsendredirects = IPSENDREDIRECTS; |
189 | int ip_defttl = IPDEFTTL; | | 189 | int ip_defttl = IPDEFTTL; |
190 | int ip_forwsrcrt = IPFORWSRCRT; | | 190 | int ip_forwsrcrt = IPFORWSRCRT; |
191 | int ip_directedbcast = IPDIRECTEDBCAST; | | 191 | int ip_directedbcast = IPDIRECTEDBCAST; |
192 | int ip_allowsrcrt = IPALLOWSRCRT; | | 192 | int ip_allowsrcrt = IPALLOWSRCRT; |
193 | int ip_mtudisc = IPMTUDISC; | | 193 | int ip_mtudisc = IPMTUDISC; |
194 | int ip_mtudisc_timeout = IPMTUDISCTIMEOUT; | | 194 | int ip_mtudisc_timeout = IPMTUDISCTIMEOUT; |
195 | #ifdef DIAGNOSTIC | | 195 | #ifdef DIAGNOSTIC |
196 | int ipprintfs = 0; | | 196 | int ipprintfs = 0; |
197 | #endif | | 197 | #endif |
198 | | | 198 | |
199 | int ip_do_randomid = 0; | | 199 | int ip_do_randomid = 0; |
200 | | | 200 | |
201 | /* | | 201 | /* |
202 | * XXX - Setting ip_checkinterface mostly implements the receive side of | | 202 | * XXX - Setting ip_checkinterface mostly implements the receive side of |
203 | * the Strong ES model described in RFC 1122, but since the routing table | | 203 | * the Strong ES model described in RFC 1122, but since the routing table |
204 | * and transmit implementation do not implement the Strong ES model, | | 204 | * and transmit implementation do not implement the Strong ES model, |
205 | * setting this to 1 results in an odd hybrid. | | 205 | * setting this to 1 results in an odd hybrid. |
206 | * | | 206 | * |
207 | * XXX - ip_checkinterface currently must be disabled if you use ipnat | | 207 | * XXX - ip_checkinterface currently must be disabled if you use ipnat |
208 | * to translate the destination address to another local interface. | | 208 | * to translate the destination address to another local interface. |
209 | * | | 209 | * |
210 | * XXX - ip_checkinterface must be disabled if you add IP aliases | | 210 | * XXX - ip_checkinterface must be disabled if you add IP aliases |
211 | * to the loopback interface instead of the interface where the | | 211 | * to the loopback interface instead of the interface where the |
212 | * packets for those addresses are received. | | 212 | * packets for those addresses are received. |
213 | */ | | 213 | */ |
214 | int ip_checkinterface = 0; | | 214 | int ip_checkinterface = 0; |
215 | | | 215 | |
216 | | | 216 | |
217 | struct rttimer_queue *ip_mtudisc_timeout_q = NULL; | | 217 | struct rttimer_queue *ip_mtudisc_timeout_q = NULL; |
218 | | | 218 | |
219 | int ipqmaxlen = IFQ_MAXLEN; | | 219 | int ipqmaxlen = IFQ_MAXLEN; |
220 | u_long in_ifaddrhash; /* size of hash table - 1 */ | | 220 | u_long in_ifaddrhash; /* size of hash table - 1 */ |
221 | int in_ifaddrentries; /* total number of addrs */ | | 221 | int in_ifaddrentries; /* total number of addrs */ |
222 | struct in_ifaddrhead in_ifaddrhead; | | 222 | struct in_ifaddrhead in_ifaddrhead; |
223 | struct in_ifaddrhashhead *in_ifaddrhashtbl; | | 223 | struct in_ifaddrhashhead *in_ifaddrhashtbl; |
224 | u_long in_multihash; /* size of hash table - 1 */ | | 224 | u_long in_multihash; /* size of hash table - 1 */ |
225 | int in_multientries; /* total number of addrs */ | | 225 | int in_multientries; /* total number of addrs */ |
226 | struct in_multihashhead *in_multihashtbl; | | 226 | struct in_multihashhead *in_multihashtbl; |
227 | struct ifqueue ipintrq; | | 227 | struct ifqueue ipintrq; |
228 | uint16_t ip_id; | | 228 | uint16_t ip_id; |
229 | | | 229 | |
230 | percpu_t *ipstat_percpu; | | 230 | percpu_t *ipstat_percpu; |
231 | | | 231 | |
232 | #ifdef PFIL_HOOKS | | 232 | #ifdef PFIL_HOOKS |
233 | struct pfil_head inet_pfil_hook; | | 233 | struct pfil_head inet_pfil_hook; |
234 | #endif | | 234 | #endif |
235 | | | 235 | |
236 | /* | | 236 | /* |
237 | * Cached copy of nmbclusters. If nbclusters is different, | | 237 | * Cached copy of nmbclusters. If nbclusters is different, |
238 | * recalculate IP parameters derived from nmbclusters. | | 238 | * recalculate IP parameters derived from nmbclusters. |
239 | */ | | 239 | */ |
240 | static int ip_nmbclusters; /* copy of nmbclusters */ | | 240 | static int ip_nmbclusters; /* copy of nmbclusters */ |
241 | static void ip_nmbclusters_changed(void); /* recalc limits */ | | 241 | static void ip_nmbclusters_changed(void); /* recalc limits */ |
242 | | | 242 | |
243 | #define CHECK_NMBCLUSTER_PARAMS() \ | | 243 | #define CHECK_NMBCLUSTER_PARAMS() \ |
244 | do { \ | | 244 | do { \ |
245 | if (__predict_false(ip_nmbclusters != nmbclusters)) \ | | 245 | if (__predict_false(ip_nmbclusters != nmbclusters)) \ |
246 | ip_nmbclusters_changed(); \ | | 246 | ip_nmbclusters_changed(); \ |
247 | } while (/*CONSTCOND*/0) | | 247 | } while (/*CONSTCOND*/0) |
248 | | | 248 | |
249 | /* IP datagram reassembly queues (hashed) */ | | 249 | /* IP datagram reassembly queues (hashed) */ |
250 | #define IPREASS_NHASH_LOG2 6 | | 250 | #define IPREASS_NHASH_LOG2 6 |
251 | #define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2) | | 251 | #define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2) |
252 | #define IPREASS_HMASK (IPREASS_NHASH - 1) | | 252 | #define IPREASS_HMASK (IPREASS_NHASH - 1) |
253 | #define IPREASS_HASH(x,y) \ | | 253 | #define IPREASS_HASH(x,y) \ |
254 | (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK) | | 254 | (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK) |
255 | struct ipqhead ipq[IPREASS_NHASH]; | | 255 | struct ipqhead ipq[IPREASS_NHASH]; |
256 | int ipq_locked; | | 256 | int ipq_locked; |
257 | static int ip_nfragpackets; /* packets in reass queue */ | | 257 | static int ip_nfragpackets; /* packets in reass queue */ |
258 | static int ip_nfrags; /* total fragments in reass queues */ | | 258 | static int ip_nfrags; /* total fragments in reass queues */ |
259 | | | 259 | |
260 | int ip_maxfragpackets = 200; /* limit on packets. XXX sysctl */ | | 260 | int ip_maxfragpackets = 200; /* limit on packets. XXX sysctl */ |
261 | int ip_maxfrags; /* limit on fragments. XXX sysctl */ | | 261 | int ip_maxfrags; /* limit on fragments. XXX sysctl */ |
262 | | | 262 | |
263 | | | 263 | |
264 | /* | | 264 | /* |
265 | * Additive-Increase/Multiplicative-Decrease (AIMD) strategy for | | 265 | * Additive-Increase/Multiplicative-Decrease (AIMD) strategy for |
266 | * IP reassembly queue buffer managment. | | 266 | * IP reassembly queue buffer managment. |
267 | * | | 267 | * |
268 | * We keep a count of total IP fragments (NB: not fragmented packets!) | | 268 | * We keep a count of total IP fragments (NB: not fragmented packets!) |
269 | * awaiting reassembly (ip_nfrags) and a limit (ip_maxfrags) on fragments. | | 269 | * awaiting reassembly (ip_nfrags) and a limit (ip_maxfrags) on fragments. |
270 | * If ip_nfrags exceeds ip_maxfrags the limit, we drop half the | | 270 | * If ip_nfrags exceeds ip_maxfrags the limit, we drop half the |
271 | * total fragments in reassembly queues.This AIMD policy avoids | | 271 | * total fragments in reassembly queues.This AIMD policy avoids |
272 | * repeatedly deleting single packets under heavy fragmentation load | | 272 | * repeatedly deleting single packets under heavy fragmentation load |
273 | * (e.g., from lossy NFS peers). | | 273 | * (e.g., from lossy NFS peers). |
274 | */ | | 274 | */ |
275 | static u_int ip_reass_ttl_decr(u_int ticks); | | 275 | static u_int ip_reass_ttl_decr(u_int ticks); |
276 | static void ip_reass_drophalf(void); | | 276 | static void ip_reass_drophalf(void); |
277 | | | 277 | |
278 | | | 278 | |
279 | static inline int ipq_lock_try(void); | | 279 | static inline int ipq_lock_try(void); |
280 | static inline void ipq_unlock(void); | | 280 | static inline void ipq_unlock(void); |
281 | | | 281 | |
282 | static inline int | | 282 | static inline int |
283 | ipq_lock_try(void) | | 283 | ipq_lock_try(void) |
284 | { | | 284 | { |
285 | int s; | | 285 | int s; |
286 | | | 286 | |
287 | /* | | 287 | /* |
288 | * Use splvm() -- we're blocking things that would cause | | 288 | * Use splvm() -- we're blocking things that would cause |
289 | * mbuf allocation. | | 289 | * mbuf allocation. |
290 | */ | | 290 | */ |
291 | s = splvm(); | | 291 | s = splvm(); |
292 | if (ipq_locked) { | | 292 | if (ipq_locked) { |
293 | splx(s); | | 293 | splx(s); |
294 | return (0); | | 294 | return (0); |
295 | } | | 295 | } |
296 | ipq_locked = 1; | | 296 | ipq_locked = 1; |
297 | splx(s); | | 297 | splx(s); |
298 | return (1); | | 298 | return (1); |
299 | } | | 299 | } |
300 | | | 300 | |
301 | static inline void | | 301 | static inline void |
302 | ipq_unlock(void) | | 302 | ipq_unlock(void) |
303 | { | | 303 | { |
304 | int s; | | 304 | int s; |
305 | | | 305 | |
306 | s = splvm(); | | 306 | s = splvm(); |
307 | ipq_locked = 0; | | 307 | ipq_locked = 0; |
308 | splx(s); | | 308 | splx(s); |
309 | } | | 309 | } |
310 | | | 310 | |
311 | #ifdef DIAGNOSTIC | | 311 | #ifdef DIAGNOSTIC |
312 | #define IPQ_LOCK() \ | | 312 | #define IPQ_LOCK() \ |
313 | do { \ | | 313 | do { \ |
314 | if (ipq_lock_try() == 0) { \ | | 314 | if (ipq_lock_try() == 0) { \ |
315 | printf("%s:%d: ipq already locked\n", __FILE__, __LINE__); \ | | 315 | printf("%s:%d: ipq already locked\n", __FILE__, __LINE__); \ |
316 | panic("ipq_lock"); \ | | 316 | panic("ipq_lock"); \ |
317 | } \ | | 317 | } \ |
318 | } while (/*CONSTCOND*/ 0) | | 318 | } while (/*CONSTCOND*/ 0) |
319 | #define IPQ_LOCK_CHECK() \ | | 319 | #define IPQ_LOCK_CHECK() \ |
320 | do { \ | | 320 | do { \ |
321 | if (ipq_locked == 0) { \ | | 321 | if (ipq_locked == 0) { \ |
322 | printf("%s:%d: ipq lock not held\n", __FILE__, __LINE__); \ | | 322 | printf("%s:%d: ipq lock not held\n", __FILE__, __LINE__); \ |
323 | panic("ipq lock check"); \ | | 323 | panic("ipq lock check"); \ |
324 | } \ | | 324 | } \ |
325 | } while (/*CONSTCOND*/ 0) | | 325 | } while (/*CONSTCOND*/ 0) |
326 | #else | | 326 | #else |
327 | #define IPQ_LOCK() (void) ipq_lock_try() | | 327 | #define IPQ_LOCK() (void) ipq_lock_try() |
328 | #define IPQ_LOCK_CHECK() /* nothing */ | | 328 | #define IPQ_LOCK_CHECK() /* nothing */ |
329 | #endif | | 329 | #endif |
330 | | | 330 | |
331 | #define IPQ_UNLOCK() ipq_unlock() | | 331 | #define IPQ_UNLOCK() ipq_unlock() |
332 | | | 332 | |
333 | struct pool inmulti_pool; | | 333 | struct pool inmulti_pool; |
334 | struct pool ipqent_pool; | | 334 | struct pool ipqent_pool; |
335 | | | 335 | |
336 | #ifdef INET_CSUM_COUNTERS | | 336 | #ifdef INET_CSUM_COUNTERS |
337 | #include <sys/device.h> | | 337 | #include <sys/device.h> |
338 | | | 338 | |
339 | struct evcnt ip_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, | | 339 | struct evcnt ip_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, |
340 | NULL, "inet", "hwcsum bad"); | | 340 | NULL, "inet", "hwcsum bad"); |
341 | struct evcnt ip_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, | | 341 | struct evcnt ip_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, |
342 | NULL, "inet", "hwcsum ok"); | | 342 | NULL, "inet", "hwcsum ok"); |
343 | struct evcnt ip_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, | | 343 | struct evcnt ip_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, |
344 | NULL, "inet", "swcsum"); | | 344 | NULL, "inet", "swcsum"); |
345 | | | 345 | |
346 | #define INET_CSUM_COUNTER_INCR(ev) (ev)->ev_count++ | | 346 | #define INET_CSUM_COUNTER_INCR(ev) (ev)->ev_count++ |
347 | | | 347 | |
348 | EVCNT_ATTACH_STATIC(ip_hwcsum_bad); | | 348 | EVCNT_ATTACH_STATIC(ip_hwcsum_bad); |
349 | EVCNT_ATTACH_STATIC(ip_hwcsum_ok); | | 349 | EVCNT_ATTACH_STATIC(ip_hwcsum_ok); |
350 | EVCNT_ATTACH_STATIC(ip_swcsum); | | 350 | EVCNT_ATTACH_STATIC(ip_swcsum); |
351 | | | 351 | |
352 | #else | | 352 | #else |
353 | | | 353 | |
354 | #define INET_CSUM_COUNTER_INCR(ev) /* nothing */ | | 354 | #define INET_CSUM_COUNTER_INCR(ev) /* nothing */ |
355 | | | 355 | |
356 | #endif /* INET_CSUM_COUNTERS */ | | 356 | #endif /* INET_CSUM_COUNTERS */ |
357 | | | 357 | |
358 | /* | | 358 | /* |
359 | * We need to save the IP options in case a protocol wants to respond | | 359 | * We need to save the IP options in case a protocol wants to respond |
360 | * to an incoming packet over the same route if the packet got here | | 360 | * to an incoming packet over the same route if the packet got here |
361 | * using IP source routing. This allows connection establishment and | | 361 | * using IP source routing. This allows connection establishment and |
362 | * maintenance when the remote end is on a network that is not known | | 362 | * maintenance when the remote end is on a network that is not known |
363 | * to us. | | 363 | * to us. |
364 | */ | | 364 | */ |
365 | int ip_nhops = 0; | | 365 | int ip_nhops = 0; |
366 | static struct ip_srcrt { | | 366 | static struct ip_srcrt { |
367 | struct in_addr dst; /* final destination */ | | 367 | struct in_addr dst; /* final destination */ |
368 | char nop; /* one NOP to align */ | | 368 | char nop; /* one NOP to align */ |
369 | char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */ | | 369 | char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */ |
370 | struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)]; | | 370 | struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)]; |
371 | } ip_srcrt; | | 371 | } ip_srcrt; |
372 | | | 372 | |
373 | static void save_rte(u_char *, struct in_addr); | | 373 | static void save_rte(u_char *, struct in_addr); |
374 | | | 374 | |
375 | #ifdef MBUFTRACE | | 375 | #ifdef MBUFTRACE |
376 | struct mowner ip_rx_mowner = MOWNER_INIT("internet", "rx"); | | 376 | struct mowner ip_rx_mowner = MOWNER_INIT("internet", "rx"); |
377 | struct mowner ip_tx_mowner = MOWNER_INIT("internet", "tx"); | | 377 | struct mowner ip_tx_mowner = MOWNER_INIT("internet", "tx"); |
378 | #endif | | 378 | #endif |
379 | | | 379 | |
380 | /* | | 380 | /* |
381 | * Compute IP limits derived from the value of nmbclusters. | | 381 | * Compute IP limits derived from the value of nmbclusters. |
382 | */ | | 382 | */ |
383 | static void | | 383 | static void |
384 | ip_nmbclusters_changed(void) | | 384 | ip_nmbclusters_changed(void) |
385 | { | | 385 | { |
386 | ip_maxfrags = nmbclusters / 4; | | 386 | ip_maxfrags = nmbclusters / 4; |
387 | ip_nmbclusters = nmbclusters; | | 387 | ip_nmbclusters = nmbclusters; |
388 | } | | 388 | } |
389 | | | 389 | |
390 | /* | | 390 | /* |
391 | * IP initialization: fill in IP protocol switch table. | | 391 | * IP initialization: fill in IP protocol switch table. |
392 | * All protocols not implemented in kernel go to raw IP protocol handler. | | 392 | * All protocols not implemented in kernel go to raw IP protocol handler. |
393 | */ | | 393 | */ |
394 | void | | 394 | void |
395 | ip_init(void) | | 395 | ip_init(void) |
396 | { | | 396 | { |
397 | const struct protosw *pr; | | 397 | const struct protosw *pr; |
398 | int i; | | 398 | int i; |
399 | | | 399 | |
400 | pool_init(&inmulti_pool, sizeof(struct in_multi), 0, 0, 0, "inmltpl", | | 400 | pool_init(&inmulti_pool, sizeof(struct in_multi), 0, 0, 0, "inmltpl", |
401 | NULL, IPL_SOFTNET); | | 401 | NULL, IPL_SOFTNET); |
402 | pool_init(&ipqent_pool, sizeof(struct ipqent), 0, 0, 0, "ipqepl", | | 402 | pool_init(&ipqent_pool, sizeof(struct ipqent), 0, 0, 0, "ipqepl", |
403 | NULL, IPL_VM); | | 403 | NULL, IPL_VM); |
404 | | | 404 | |
405 | pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); | | 405 | pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); |
406 | if (pr == 0) | | 406 | if (pr == 0) |
407 | panic("ip_init"); | | 407 | panic("ip_init"); |
408 | for (i = 0; i < IPPROTO_MAX; i++) | | 408 | for (i = 0; i < IPPROTO_MAX; i++) |
409 | ip_protox[i] = pr - inetsw; | | 409 | ip_protox[i] = pr - inetsw; |
410 | for (pr = inetdomain.dom_protosw; | | 410 | for (pr = inetdomain.dom_protosw; |
411 | pr < inetdomain.dom_protoswNPROTOSW; pr++) | | 411 | pr < inetdomain.dom_protoswNPROTOSW; pr++) |
412 | if (pr->pr_domain->dom_family == PF_INET && | | 412 | if (pr->pr_domain->dom_family == PF_INET && |
413 | pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) | | 413 | pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) |
414 | ip_protox[pr->pr_protocol] = pr - inetsw; | | 414 | ip_protox[pr->pr_protocol] = pr - inetsw; |
415 | | | 415 | |
416 | for (i = 0; i < IPREASS_NHASH; i++) | | 416 | for (i = 0; i < IPREASS_NHASH; i++) |
417 | LIST_INIT(&ipq[i]); | | 417 | LIST_INIT(&ipq[i]); |
418 | | | 418 | |
419 | ip_initid(); | | 419 | ip_initid(); |
420 | ip_id = time_second & 0xfffff; | | 420 | ip_id = time_second & 0xfffff; |
421 | | | 421 | |
422 | ipintrq.ifq_maxlen = ipqmaxlen; | | 422 | ipintrq.ifq_maxlen = ipqmaxlen; |
423 | ip_nmbclusters_changed(); | | 423 | ip_nmbclusters_changed(); |
424 | | | 424 | |
425 | TAILQ_INIT(&in_ifaddrhead); | | 425 | TAILQ_INIT(&in_ifaddrhead); |
426 | in_ifaddrhashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, true, | | 426 | in_ifaddrhashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, true, |
427 | &in_ifaddrhash); | | 427 | &in_ifaddrhash); |
428 | in_multihashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, true, | | 428 | in_multihashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, true, |
429 | &in_multihash); | | 429 | &in_multihash); |
430 | ip_mtudisc_timeout_q = rt_timer_queue_create(ip_mtudisc_timeout); | | 430 | ip_mtudisc_timeout_q = rt_timer_queue_create(ip_mtudisc_timeout); |
431 | #ifdef GATEWAY | | 431 | #ifdef GATEWAY |
432 | ipflow_init(ip_hashsize); | | 432 | ipflow_init(ip_hashsize); |
433 | #endif | | 433 | #endif |
434 | | | 434 | |
435 | #ifdef PFIL_HOOKS | | 435 | #ifdef PFIL_HOOKS |
436 | /* Register our Packet Filter hook. */ | | 436 | /* Register our Packet Filter hook. */ |
437 | inet_pfil_hook.ph_type = PFIL_TYPE_AF; | | 437 | inet_pfil_hook.ph_type = PFIL_TYPE_AF; |
438 | inet_pfil_hook.ph_af = AF_INET; | | 438 | inet_pfil_hook.ph_af = AF_INET; |
439 | i = pfil_head_register(&inet_pfil_hook); | | 439 | i = pfil_head_register(&inet_pfil_hook); |
440 | if (i != 0) | | 440 | if (i != 0) |
441 | printf("ip_init: WARNING: unable to register pfil hook, " | | 441 | printf("ip_init: WARNING: unable to register pfil hook, " |
442 | "error %d\n", i); | | 442 | "error %d\n", i); |
443 | #endif /* PFIL_HOOKS */ | | 443 | #endif /* PFIL_HOOKS */ |
444 | | | 444 | |
445 | #ifdef MBUFTRACE | | 445 | #ifdef MBUFTRACE |
446 | MOWNER_ATTACH(&ip_tx_mowner); | | 446 | MOWNER_ATTACH(&ip_tx_mowner); |
447 | MOWNER_ATTACH(&ip_rx_mowner); | | 447 | MOWNER_ATTACH(&ip_rx_mowner); |
448 | #endif /* MBUFTRACE */ | | 448 | #endif /* MBUFTRACE */ |
449 | | | 449 | |
450 | ipstat_percpu = percpu_alloc(sizeof(uint64_t) * IP_NSTATS); | | 450 | ipstat_percpu = percpu_alloc(sizeof(uint64_t) * IP_NSTATS); |
451 | } | | 451 | } |
452 | | | 452 | |
453 | struct sockaddr_in ipaddr = { | | 453 | struct sockaddr_in ipaddr = { |
454 | .sin_len = sizeof(ipaddr), | | 454 | .sin_len = sizeof(ipaddr), |
455 | .sin_family = AF_INET, | | 455 | .sin_family = AF_INET, |
456 | }; | | 456 | }; |
457 | struct route ipforward_rt; | | 457 | struct route ipforward_rt; |
458 | | | 458 | |
459 | /* | | 459 | /* |
460 | * IP software interrupt routine | | 460 | * IP software interrupt routine |
461 | */ | | 461 | */ |
462 | void | | 462 | void |
463 | ipintr(void) | | 463 | ipintr(void) |
464 | { | | 464 | { |
465 | int s; | | 465 | int s; |
466 | struct mbuf *m; | | 466 | struct mbuf *m; |
467 | | | 467 | |
468 | mutex_enter(softnet_lock); | | 468 | mutex_enter(softnet_lock); |
469 | KERNEL_LOCK(1, NULL); | | 469 | KERNEL_LOCK(1, NULL); |
470 | while (!IF_IS_EMPTY(&ipintrq)) { | | 470 | while (!IF_IS_EMPTY(&ipintrq)) { |
471 | s = splnet(); | | 471 | s = splnet(); |
472 | IF_DEQUEUE(&ipintrq, m); | | 472 | IF_DEQUEUE(&ipintrq, m); |
473 | splx(s); | | 473 | splx(s); |
474 | if (m == NULL) | | 474 | if (m == NULL) |
475 | break; | | 475 | break; |
476 | KERNEL_UNLOCK_ONE(NULL); | | | |
477 | ip_input(m); | | 476 | ip_input(m); |
478 | KERNEL_LOCK(1, NULL); | | | |
479 | } | | 477 | } |
480 | KERNEL_UNLOCK_ONE(NULL); | | 478 | KERNEL_UNLOCK_ONE(NULL); |
481 | mutex_exit(softnet_lock); | | 479 | mutex_exit(softnet_lock); |
482 | } | | 480 | } |
483 | | | 481 | |
484 | /* | | 482 | /* |
485 | * Ip input routine. Checksum and byte swap header. If fragmented | | 483 | * Ip input routine. Checksum and byte swap header. If fragmented |
486 | * try to reassemble. Process options. Pass to next level. | | 484 | * try to reassemble. Process options. Pass to next level. |
487 | */ | | 485 | */ |
488 | void | | 486 | void |
489 | ip_input(struct mbuf *m) | | 487 | ip_input(struct mbuf *m) |
490 | { | | 488 | { |
491 | struct ip *ip = NULL; | | 489 | struct ip *ip = NULL; |
492 | struct ipq *fp; | | 490 | struct ipq *fp; |
493 | struct in_ifaddr *ia; | | 491 | struct in_ifaddr *ia; |
494 | struct ifaddr *ifa; | | 492 | struct ifaddr *ifa; |
495 | struct ipqent *ipqe; | | 493 | struct ipqent *ipqe; |
496 | int hlen = 0, mff, len; | | 494 | int hlen = 0, mff, len; |
497 | int downmatch; | | 495 | int downmatch; |
498 | int checkif; | | 496 | int checkif; |
499 | int srcrt = 0; | | 497 | int srcrt = 0; |
500 | int s; | | 498 | int s; |
501 | u_int hash; | | 499 | u_int hash; |
502 | #ifdef FAST_IPSEC | | 500 | #ifdef FAST_IPSEC |
503 | struct m_tag *mtag; | | 501 | struct m_tag *mtag; |
504 | struct tdb_ident *tdbi; | | 502 | struct tdb_ident *tdbi; |
505 | struct secpolicy *sp; | | 503 | struct secpolicy *sp; |
506 | int error; | | 504 | int error; |
507 | #endif /* FAST_IPSEC */ | | 505 | #endif /* FAST_IPSEC */ |
508 | | | 506 | |
509 | MCLAIM(m, &ip_rx_mowner); | | 507 | MCLAIM(m, &ip_rx_mowner); |
510 | #ifdef DIAGNOSTIC | | 508 | #ifdef DIAGNOSTIC |
511 | if ((m->m_flags & M_PKTHDR) == 0) | | 509 | if ((m->m_flags & M_PKTHDR) == 0) |
512 | panic("ipintr no HDR"); | | 510 | panic("ipintr no HDR"); |
513 | #endif | | 511 | #endif |
514 | | | 512 | |
515 | /* | | 513 | /* |
516 | * If no IP addresses have been set yet but the interfaces | | 514 | * If no IP addresses have been set yet but the interfaces |
517 | * are receiving, can't do anything with incoming packets yet. | | 515 | * are receiving, can't do anything with incoming packets yet. |
518 | */ | | 516 | */ |
519 | if (TAILQ_FIRST(&in_ifaddrhead) == 0) | | 517 | if (TAILQ_FIRST(&in_ifaddrhead) == 0) |
520 | goto bad; | | 518 | goto bad; |
521 | IP_STATINC(IP_STAT_TOTAL); | | 519 | IP_STATINC(IP_STAT_TOTAL); |
522 | /* | | 520 | /* |
523 | * If the IP header is not aligned, slurp it up into a new | | 521 | * If the IP header is not aligned, slurp it up into a new |
524 | * mbuf with space for link headers, in the event we forward | | 522 | * mbuf with space for link headers, in the event we forward |
525 | * it. Otherwise, if it is aligned, make sure the entire | | 523 | * it. Otherwise, if it is aligned, make sure the entire |
526 | * base IP header is in the first mbuf of the chain. | | 524 | * base IP header is in the first mbuf of the chain. |
527 | */ | | 525 | */ |
528 | if (IP_HDR_ALIGNED_P(mtod(m, void *)) == 0) { | | 526 | if (IP_HDR_ALIGNED_P(mtod(m, void *)) == 0) { |
529 | if ((m = m_copyup(m, sizeof(struct ip), | | 527 | if ((m = m_copyup(m, sizeof(struct ip), |
530 | (max_linkhdr + 3) & ~3)) == NULL) { | | 528 | (max_linkhdr + 3) & ~3)) == NULL) { |
531 | /* XXXJRT new stat, please */ | | 529 | /* XXXJRT new stat, please */ |
532 | IP_STATINC(IP_STAT_TOOSMALL); | | 530 | IP_STATINC(IP_STAT_TOOSMALL); |
533 | return; | | 531 | return; |
534 | } | | 532 | } |
535 | } else if (__predict_false(m->m_len < sizeof (struct ip))) { | | 533 | } else if (__predict_false(m->m_len < sizeof (struct ip))) { |
536 | if ((m = m_pullup(m, sizeof (struct ip))) == NULL) { | | 534 | if ((m = m_pullup(m, sizeof (struct ip))) == NULL) { |
537 | IP_STATINC(IP_STAT_TOOSMALL); | | 535 | IP_STATINC(IP_STAT_TOOSMALL); |
538 | return; | | 536 | return; |
539 | } | | 537 | } |
540 | } | | 538 | } |
541 | ip = mtod(m, struct ip *); | | 539 | ip = mtod(m, struct ip *); |
542 | if (ip->ip_v != IPVERSION) { | | 540 | if (ip->ip_v != IPVERSION) { |
543 | IP_STATINC(IP_STAT_BADVERS); | | 541 | IP_STATINC(IP_STAT_BADVERS); |
544 | goto bad; | | 542 | goto bad; |
545 | } | | 543 | } |
546 | hlen = ip->ip_hl << 2; | | 544 | hlen = ip->ip_hl << 2; |
547 | if (hlen < sizeof(struct ip)) { /* minimum header length */ | | 545 | if (hlen < sizeof(struct ip)) { /* minimum header length */ |
548 | IP_STATINC(IP_STAT_BADHLEN); | | 546 | IP_STATINC(IP_STAT_BADHLEN); |
549 | goto bad; | | 547 | goto bad; |
550 | } | | 548 | } |
551 | if (hlen > m->m_len) { | | 549 | if (hlen > m->m_len) { |
552 | if ((m = m_pullup(m, hlen)) == 0) { | | 550 | if ((m = m_pullup(m, hlen)) == 0) { |
553 | IP_STATINC(IP_STAT_BADHLEN); | | 551 | IP_STATINC(IP_STAT_BADHLEN); |
554 | return; | | 552 | return; |
555 | } | | 553 | } |
556 | ip = mtod(m, struct ip *); | | 554 | ip = mtod(m, struct ip *); |
557 | } | | 555 | } |
558 | | | 556 | |
559 | /* | | 557 | /* |
560 | * RFC1122: packets with a multicast source address are | | 558 | * RFC1122: packets with a multicast source address are |
561 | * not allowed. | | 559 | * not allowed. |
562 | */ | | 560 | */ |
563 | if (IN_MULTICAST(ip->ip_src.s_addr)) { | | 561 | if (IN_MULTICAST(ip->ip_src.s_addr)) { |
564 | IP_STATINC(IP_STAT_BADADDR); | | 562 | IP_STATINC(IP_STAT_BADADDR); |
565 | goto bad; | | 563 | goto bad; |
566 | } | | 564 | } |
567 | | | 565 | |
568 | /* 127/8 must not appear on wire - RFC1122 */ | | 566 | /* 127/8 must not appear on wire - RFC1122 */ |
569 | if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || | | 567 | if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || |
570 | (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { | | 568 | (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { |
571 | if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) { | | 569 | if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) { |
572 | IP_STATINC(IP_STAT_BADADDR); | | 570 | IP_STATINC(IP_STAT_BADADDR); |
573 | goto bad; | | 571 | goto bad; |
574 | } | | 572 | } |
575 | } | | 573 | } |
576 | | | 574 | |
577 | switch (m->m_pkthdr.csum_flags & | | 575 | switch (m->m_pkthdr.csum_flags & |
578 | ((m->m_pkthdr.rcvif->if_csum_flags_rx & M_CSUM_IPv4) | | | 576 | ((m->m_pkthdr.rcvif->if_csum_flags_rx & M_CSUM_IPv4) | |
579 | M_CSUM_IPv4_BAD)) { | | 577 | M_CSUM_IPv4_BAD)) { |
580 | case M_CSUM_IPv4|M_CSUM_IPv4_BAD: | | 578 | case M_CSUM_IPv4|M_CSUM_IPv4_BAD: |
581 | INET_CSUM_COUNTER_INCR(&ip_hwcsum_bad); | | 579 | INET_CSUM_COUNTER_INCR(&ip_hwcsum_bad); |
582 | goto badcsum; | | 580 | goto badcsum; |
583 | | | 581 | |
584 | case M_CSUM_IPv4: | | 582 | case M_CSUM_IPv4: |
585 | /* Checksum was okay. */ | | 583 | /* Checksum was okay. */ |
586 | INET_CSUM_COUNTER_INCR(&ip_hwcsum_ok); | | 584 | INET_CSUM_COUNTER_INCR(&ip_hwcsum_ok); |
587 | break; | | 585 | break; |
588 | | | 586 | |
589 | default: | | 587 | default: |
590 | /* | | 588 | /* |
591 | * Must compute it ourselves. Maybe skip checksum on | | 589 | * Must compute it ourselves. Maybe skip checksum on |
592 | * loopback interfaces. | | 590 | * loopback interfaces. |
593 | */ | | 591 | */ |
594 | if (__predict_true(!(m->m_pkthdr.rcvif->if_flags & | | 592 | if (__predict_true(!(m->m_pkthdr.rcvif->if_flags & |
595 | IFF_LOOPBACK) || ip_do_loopback_cksum)) { | | 593 | IFF_LOOPBACK) || ip_do_loopback_cksum)) { |
596 | INET_CSUM_COUNTER_INCR(&ip_swcsum); | | 594 | INET_CSUM_COUNTER_INCR(&ip_swcsum); |
597 | if (in_cksum(m, hlen) != 0) | | 595 | if (in_cksum(m, hlen) != 0) |
598 | goto badcsum; | | 596 | goto badcsum; |
599 | } | | 597 | } |
600 | break; | | 598 | break; |
601 | } | | 599 | } |
602 | | | 600 | |
603 | /* Retrieve the packet length. */ | | 601 | /* Retrieve the packet length. */ |
604 | len = ntohs(ip->ip_len); | | 602 | len = ntohs(ip->ip_len); |
605 | | | 603 | |
606 | /* | | 604 | /* |
607 | * Check for additional length bogosity | | 605 | * Check for additional length bogosity |
608 | */ | | 606 | */ |
609 | if (len < hlen) { | | 607 | if (len < hlen) { |
610 | IP_STATINC(IP_STAT_BADLEN); | | 608 | IP_STATINC(IP_STAT_BADLEN); |
611 | goto bad; | | 609 | goto bad; |
612 | } | | 610 | } |
613 | | | 611 | |
614 | /* | | 612 | /* |
615 | * Check that the amount of data in the buffers | | 613 | * Check that the amount of data in the buffers |
616 | * is as at least much as the IP header would have us expect. | | 614 | * is as at least much as the IP header would have us expect. |
617 | * Trim mbufs if longer than we expect. | | 615 | * Trim mbufs if longer than we expect. |
618 | * Drop packet if shorter than we expect. | | 616 | * Drop packet if shorter than we expect. |
619 | */ | | 617 | */ |
620 | if (m->m_pkthdr.len < len) { | | 618 | if (m->m_pkthdr.len < len) { |
621 | IP_STATINC(IP_STAT_TOOSHORT); | | 619 | IP_STATINC(IP_STAT_TOOSHORT); |
622 | goto bad; | | 620 | goto bad; |
623 | } | | 621 | } |
624 | if (m->m_pkthdr.len > len) { | | 622 | if (m->m_pkthdr.len > len) { |
625 | if (m->m_len == m->m_pkthdr.len) { | | 623 | if (m->m_len == m->m_pkthdr.len) { |
626 | m->m_len = len; | | 624 | m->m_len = len; |
627 | m->m_pkthdr.len = len; | | 625 | m->m_pkthdr.len = len; |
628 | } else | | 626 | } else |
629 | m_adj(m, len - m->m_pkthdr.len); | | 627 | m_adj(m, len - m->m_pkthdr.len); |
630 | } | | 628 | } |
631 | | | 629 | |
632 | #if defined(IPSEC) | | 630 | #if defined(IPSEC) |
633 | /* ipflow (IP fast forwarding) is not compatible with IPsec. */ | | 631 | /* ipflow (IP fast forwarding) is not compatible with IPsec. */ |
634 | m->m_flags &= ~M_CANFASTFWD; | | 632 | m->m_flags &= ~M_CANFASTFWD; |
635 | #else | | 633 | #else |
636 | /* | | 634 | /* |
637 | * Assume that we can create a fast-forward IP flow entry | | 635 | * Assume that we can create a fast-forward IP flow entry |
638 | * based on this packet. | | 636 | * based on this packet. |
639 | */ | | 637 | */ |
640 | m->m_flags |= M_CANFASTFWD; | | 638 | m->m_flags |= M_CANFASTFWD; |
641 | #endif | | 639 | #endif |
642 | | | 640 | |
643 | #ifdef PFIL_HOOKS | | 641 | #ifdef PFIL_HOOKS |
644 | /* | | 642 | /* |
645 | * Run through list of hooks for input packets. If there are any | | 643 | * Run through list of hooks for input packets. If there are any |
646 | * filters which require that additional packets in the flow are | | 644 | * filters which require that additional packets in the flow are |
647 | * not fast-forwarded, they must clear the M_CANFASTFWD flag. | | 645 | * not fast-forwarded, they must clear the M_CANFASTFWD flag. |
648 | * Note that filters must _never_ set this flag, as another filter | | 646 | * Note that filters must _never_ set this flag, as another filter |
649 | * in the list may have previously cleared it. | | 647 | * in the list may have previously cleared it. |
650 | */ | | 648 | */ |
651 | /* | | 649 | /* |
652 | * let ipfilter look at packet on the wire, | | 650 | * let ipfilter look at packet on the wire, |
653 | * not the decapsulated packet. | | 651 | * not the decapsulated packet. |
654 | */ | | 652 | */ |
655 | #ifdef IPSEC | | 653 | #ifdef IPSEC |
656 | if (!ipsec_getnhist(m)) | | 654 | if (!ipsec_getnhist(m)) |
657 | #elif defined(FAST_IPSEC) | | 655 | #elif defined(FAST_IPSEC) |
658 | if (!ipsec_indone(m)) | | 656 | if (!ipsec_indone(m)) |
659 | #else | | 657 | #else |
660 | if (1) | | 658 | if (1) |
661 | #endif | | 659 | #endif |
662 | { | | 660 | { |
663 | struct in_addr odst; | | 661 | struct in_addr odst; |
664 | | | 662 | |
665 | odst = ip->ip_dst; | | 663 | odst = ip->ip_dst; |
666 | if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif, | | 664 | if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif, |
667 | PFIL_IN) != 0) | | 665 | PFIL_IN) != 0) |
668 | return; | | 666 | return; |
669 | if (m == NULL) | | 667 | if (m == NULL) |
670 | return; | | 668 | return; |
671 | ip = mtod(m, struct ip *); | | 669 | ip = mtod(m, struct ip *); |
672 | hlen = ip->ip_hl << 2; | | 670 | hlen = ip->ip_hl << 2; |
673 | /* | | 671 | /* |
674 | * XXX The setting of "srcrt" here is to prevent ip_forward() | | 672 | * XXX The setting of "srcrt" here is to prevent ip_forward() |
675 | * from generating ICMP redirects for packets that have | | 673 | * from generating ICMP redirects for packets that have |
676 | * been redirected by a hook back out on to the same LAN that | | 674 | * been redirected by a hook back out on to the same LAN that |
677 | * they came from and is not an indication that the packet | | 675 | * they came from and is not an indication that the packet |
678 | * is being inffluenced by source routing options. This | | 676 | * is being inffluenced by source routing options. This |
679 | * allows things like | | 677 | * allows things like |
680 | * "rdr tlp0 0/0 port 80 -> 1.1.1.200 3128 tcp" | | 678 | * "rdr tlp0 0/0 port 80 -> 1.1.1.200 3128 tcp" |
681 | * where tlp0 is both on the 1.1.1.0/24 network and is the | | 679 | * where tlp0 is both on the 1.1.1.0/24 network and is the |
682 | * default route for hosts on 1.1.1.0/24. Of course this | | 680 | * default route for hosts on 1.1.1.0/24. Of course this |
683 | * also requires a "map tlp0 ..." to complete the story. | | 681 | * also requires a "map tlp0 ..." to complete the story. |
684 | * One might argue whether or not this kind of network config. | | 682 | * One might argue whether or not this kind of network config. |
685 | * should be supported in this manner... | | 683 | * should be supported in this manner... |
686 | */ | | 684 | */ |
687 | srcrt = (odst.s_addr != ip->ip_dst.s_addr); | | 685 | srcrt = (odst.s_addr != ip->ip_dst.s_addr); |
688 | } | | 686 | } |
689 | #endif /* PFIL_HOOKS */ | | 687 | #endif /* PFIL_HOOKS */ |
690 | | | 688 | |
691 | #ifdef ALTQ | | 689 | #ifdef ALTQ |
692 | /* XXX Temporary until ALTQ is changed to use a pfil hook */ | | 690 | /* XXX Temporary until ALTQ is changed to use a pfil hook */ |
693 | if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) { | | 691 | if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) { |
694 | /* packet dropped by traffic conditioner */ | | 692 | /* packet dropped by traffic conditioner */ |
695 | return; | | 693 | return; |
696 | } | | 694 | } |
697 | #endif | | 695 | #endif |
698 | | | 696 | |
699 | /* | | 697 | /* |
700 | * Process options and, if not destined for us, | | 698 | * Process options and, if not destined for us, |
701 | * ship it on. ip_dooptions returns 1 when an | | 699 | * ship it on. ip_dooptions returns 1 when an |
702 | * error was detected (causing an icmp message | | 700 | * error was detected (causing an icmp message |
703 | * to be sent and the original packet to be freed). | | 701 | * to be sent and the original packet to be freed). |
704 | */ | | 702 | */ |
705 | ip_nhops = 0; /* for source routed packets */ | | 703 | ip_nhops = 0; /* for source routed packets */ |
706 | if (hlen > sizeof (struct ip) && ip_dooptions(m)) | | 704 | if (hlen > sizeof (struct ip) && ip_dooptions(m)) |
707 | return; | | 705 | return; |
708 | | | 706 | |
709 | /* | | 707 | /* |
710 | * Enable a consistency check between the destination address | | 708 | * Enable a consistency check between the destination address |
711 | * and the arrival interface for a unicast packet (the RFC 1122 | | 709 | * and the arrival interface for a unicast packet (the RFC 1122 |
712 | * strong ES model) if IP forwarding is disabled and the packet | | 710 | * strong ES model) if IP forwarding is disabled and the packet |
713 | * is not locally generated. | | 711 | * is not locally generated. |
714 | * | | 712 | * |
715 | * XXX - Checking also should be disabled if the destination | | 713 | * XXX - Checking also should be disabled if the destination |
716 | * address is ipnat'ed to a different interface. | | 714 | * address is ipnat'ed to a different interface. |
717 | * | | 715 | * |
718 | * XXX - Checking is incompatible with IP aliases added | | 716 | * XXX - Checking is incompatible with IP aliases added |
719 | * to the loopback interface instead of the interface where | | 717 | * to the loopback interface instead of the interface where |
720 | * the packets are received. | | 718 | * the packets are received. |
721 | * | | 719 | * |
722 | * XXX - We need to add a per ifaddr flag for this so that | | 720 | * XXX - We need to add a per ifaddr flag for this so that |
723 | * we get finer grain control. | | 721 | * we get finer grain control. |
724 | */ | | 722 | */ |
725 | checkif = ip_checkinterface && (ipforwarding == 0) && | | 723 | checkif = ip_checkinterface && (ipforwarding == 0) && |
726 | (m->m_pkthdr.rcvif != NULL) && | | 724 | (m->m_pkthdr.rcvif != NULL) && |
727 | ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0); | | 725 | ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0); |
728 | | | 726 | |
729 | /* | | 727 | /* |
730 | * Check our list of addresses, to see if the packet is for us. | | 728 | * Check our list of addresses, to see if the packet is for us. |
731 | * | | 729 | * |
732 | * Traditional 4.4BSD did not consult IFF_UP at all. | | 730 | * Traditional 4.4BSD did not consult IFF_UP at all. |
733 | * The behavior here is to treat addresses on !IFF_UP interface | | 731 | * The behavior here is to treat addresses on !IFF_UP interface |
734 | * as not mine. | | 732 | * as not mine. |
735 | */ | | 733 | */ |
736 | downmatch = 0; | | 734 | downmatch = 0; |
737 | LIST_FOREACH(ia, &IN_IFADDR_HASH(ip->ip_dst.s_addr), ia_hash) { | | 735 | LIST_FOREACH(ia, &IN_IFADDR_HASH(ip->ip_dst.s_addr), ia_hash) { |
738 | if (in_hosteq(ia->ia_addr.sin_addr, ip->ip_dst)) { | | 736 | if (in_hosteq(ia->ia_addr.sin_addr, ip->ip_dst)) { |
739 | if (checkif && ia->ia_ifp != m->m_pkthdr.rcvif) | | 737 | if (checkif && ia->ia_ifp != m->m_pkthdr.rcvif) |
740 | continue; | | 738 | continue; |
741 | if ((ia->ia_ifp->if_flags & IFF_UP) != 0) | | 739 | if ((ia->ia_ifp->if_flags & IFF_UP) != 0) |
742 | break; | | 740 | break; |
743 | else | | 741 | else |
744 | downmatch++; | | 742 | downmatch++; |
745 | } | | 743 | } |
746 | } | | 744 | } |
747 | if (ia != NULL) | | 745 | if (ia != NULL) |
748 | goto ours; | | 746 | goto ours; |
749 | if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) { | | 747 | if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) { |
750 | IFADDR_FOREACH(ifa, m->m_pkthdr.rcvif) { | | 748 | IFADDR_FOREACH(ifa, m->m_pkthdr.rcvif) { |
751 | if (ifa->ifa_addr->sa_family != AF_INET) | | 749 | if (ifa->ifa_addr->sa_family != AF_INET) |
752 | continue; | | 750 | continue; |
753 | ia = ifatoia(ifa); | | 751 | ia = ifatoia(ifa); |
754 | if (in_hosteq(ip->ip_dst, ia->ia_broadaddr.sin_addr) || | | 752 | if (in_hosteq(ip->ip_dst, ia->ia_broadaddr.sin_addr) || |
755 | in_hosteq(ip->ip_dst, ia->ia_netbroadcast) || | | 753 | in_hosteq(ip->ip_dst, ia->ia_netbroadcast) || |
756 | /* | | 754 | /* |
757 | * Look for all-0's host part (old broadcast addr), | | 755 | * Look for all-0's host part (old broadcast addr), |
758 | * either for subnet or net. | | 756 | * either for subnet or net. |
759 | */ | | 757 | */ |
760 | ip->ip_dst.s_addr == ia->ia_subnet || | | 758 | ip->ip_dst.s_addr == ia->ia_subnet || |
761 | ip->ip_dst.s_addr == ia->ia_net) | | 759 | ip->ip_dst.s_addr == ia->ia_net) |
762 | goto ours; | | 760 | goto ours; |
763 | /* | | 761 | /* |
764 | * An interface with IP address zero accepts | | 762 | * An interface with IP address zero accepts |
765 | * all packets that arrive on that interface. | | 763 | * all packets that arrive on that interface. |
766 | */ | | 764 | */ |
767 | if (in_nullhost(ia->ia_addr.sin_addr)) | | 765 | if (in_nullhost(ia->ia_addr.sin_addr)) |
768 | goto ours; | | 766 | goto ours; |
769 | } | | 767 | } |
770 | } | | 768 | } |
771 | if (IN_MULTICAST(ip->ip_dst.s_addr)) { | | 769 | if (IN_MULTICAST(ip->ip_dst.s_addr)) { |
772 | struct in_multi *inm; | | 770 | struct in_multi *inm; |
773 | #ifdef MROUTING | | 771 | #ifdef MROUTING |
774 | extern struct socket *ip_mrouter; | | 772 | extern struct socket *ip_mrouter; |
775 | | | 773 | |
776 | if (ip_mrouter) { | | 774 | if (ip_mrouter) { |
777 | /* | | 775 | /* |
778 | * If we are acting as a multicast router, all | | 776 | * If we are acting as a multicast router, all |
779 | * incoming multicast packets are passed to the | | 777 | * incoming multicast packets are passed to the |
780 | * kernel-level multicast forwarding function. | | 778 | * kernel-level multicast forwarding function. |
781 | * The packet is returned (relatively) intact; if | | 779 | * The packet is returned (relatively) intact; if |
782 | * ip_mforward() returns a non-zero value, the packet | | 780 | * ip_mforward() returns a non-zero value, the packet |
783 | * must be discarded, else it may be accepted below. | | 781 | * must be discarded, else it may be accepted below. |
784 | * | | 782 | * |
785 | * (The IP ident field is put in the same byte order | | 783 | * (The IP ident field is put in the same byte order |
786 | * as expected when ip_mforward() is called from | | 784 | * as expected when ip_mforward() is called from |
787 | * ip_output().) | | 785 | * ip_output().) |
788 | */ | | 786 | */ |
789 | if (ip_mforward(m, m->m_pkthdr.rcvif) != 0) { | | 787 | if (ip_mforward(m, m->m_pkthdr.rcvif) != 0) { |
790 | IP_STATINC(IP_STAT_CANTFORWARD); | | 788 | IP_STATINC(IP_STAT_CANTFORWARD); |
791 | m_freem(m); | | 789 | m_freem(m); |
792 | return; | | 790 | return; |
793 | } | | 791 | } |
794 | | | 792 | |
795 | /* | | 793 | /* |
796 | * The process-level routing demon needs to receive | | 794 | * The process-level routing demon needs to receive |
797 | * all multicast IGMP packets, whether or not this | | 795 | * all multicast IGMP packets, whether or not this |
798 | * host belongs to their destination groups. | | 796 | * host belongs to their destination groups. |
799 | */ | | 797 | */ |
800 | if (ip->ip_p == IPPROTO_IGMP) | | 798 | if (ip->ip_p == IPPROTO_IGMP) |
801 | goto ours; | | 799 | goto ours; |
802 | IP_STATINC(IP_STAT_CANTFORWARD); | | 800 | IP_STATINC(IP_STAT_CANTFORWARD); |
803 | } | | 801 | } |
804 | #endif | | 802 | #endif |
805 | /* | | 803 | /* |
806 | * See if we belong to the destination multicast group on the | | 804 | * See if we belong to the destination multicast group on the |
807 | * arrival interface. | | 805 | * arrival interface. |
808 | */ | | 806 | */ |
809 | IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm); | | 807 | IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm); |
810 | if (inm == NULL) { | | 808 | if (inm == NULL) { |
811 | IP_STATINC(IP_STAT_CANTFORWARD); | | 809 | IP_STATINC(IP_STAT_CANTFORWARD); |
812 | m_freem(m); | | 810 | m_freem(m); |
813 | return; | | 811 | return; |
814 | } | | 812 | } |
815 | goto ours; | | 813 | goto ours; |
816 | } | | 814 | } |
817 | if (ip->ip_dst.s_addr == INADDR_BROADCAST || | | 815 | if (ip->ip_dst.s_addr == INADDR_BROADCAST || |
818 | in_nullhost(ip->ip_dst)) | | 816 | in_nullhost(ip->ip_dst)) |
819 | goto ours; | | 817 | goto ours; |
820 | | | 818 | |
821 | /* | | 819 | /* |
822 | * Not for us; forward if possible and desirable. | | 820 | * Not for us; forward if possible and desirable. |
823 | */ | | 821 | */ |
824 | if (ipforwarding == 0) { | | 822 | if (ipforwarding == 0) { |
825 | IP_STATINC(IP_STAT_CANTFORWARD); | | 823 | IP_STATINC(IP_STAT_CANTFORWARD); |
826 | m_freem(m); | | 824 | m_freem(m); |
827 | } else { | | 825 | } else { |
828 | /* | | 826 | /* |
829 | * If ip_dst matched any of my address on !IFF_UP interface, | | 827 | * If ip_dst matched any of my address on !IFF_UP interface, |
830 | * and there's no IFF_UP interface that matches ip_dst, | | 828 | * and there's no IFF_UP interface that matches ip_dst, |
831 | * send icmp unreach. Forwarding it will result in in-kernel | | 829 | * send icmp unreach. Forwarding it will result in in-kernel |
832 | * forwarding loop till TTL goes to 0. | | 830 | * forwarding loop till TTL goes to 0. |
833 | */ | | 831 | */ |
834 | if (downmatch) { | | 832 | if (downmatch) { |
835 | icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); | | 833 | icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); |
836 | IP_STATINC(IP_STAT_CANTFORWARD); | | 834 | IP_STATINC(IP_STAT_CANTFORWARD); |
837 | return; | | 835 | return; |
838 | } | | 836 | } |
839 | #ifdef IPSEC | | 837 | #ifdef IPSEC |
840 | if (ipsec4_in_reject(m, NULL)) { | | 838 | if (ipsec4_in_reject(m, NULL)) { |
841 | IPSEC_STATINC(IPSEC_STAT_IN_POLVIO); | | 839 | IPSEC_STATINC(IPSEC_STAT_IN_POLVIO); |
842 | goto bad; | | 840 | goto bad; |
843 | } | | 841 | } |
844 | #endif | | 842 | #endif |
845 | #ifdef FAST_IPSEC | | 843 | #ifdef FAST_IPSEC |
846 | mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL); | | 844 | mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL); |
847 | s = splsoftnet(); | | 845 | s = splsoftnet(); |
848 | if (mtag != NULL) { | | 846 | if (mtag != NULL) { |
849 | tdbi = (struct tdb_ident *)(mtag + 1); | | 847 | tdbi = (struct tdb_ident *)(mtag + 1); |
850 | sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND); | | 848 | sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND); |
851 | } else { | | 849 | } else { |
852 | sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, | | 850 | sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, |
853 | IP_FORWARDING, &error); | | 851 | IP_FORWARDING, &error); |
854 | } | | 852 | } |
855 | if (sp == NULL) { /* NB: can happen if error */ | | 853 | if (sp == NULL) { /* NB: can happen if error */ |
856 | splx(s); | | 854 | splx(s); |
857 | /*XXX error stat???*/ | | 855 | /*XXX error stat???*/ |
858 | DPRINTF(("ip_input: no SP for forwarding\n")); /*XXX*/ | | 856 | DPRINTF(("ip_input: no SP for forwarding\n")); /*XXX*/ |
859 | goto bad; | | 857 | goto bad; |
860 | } | | 858 | } |
861 | | | 859 | |
862 | /* | | 860 | /* |
863 | * Check security policy against packet attributes. | | 861 | * Check security policy against packet attributes. |
864 | */ | | 862 | */ |
865 | error = ipsec_in_reject(sp, m); | | 863 | error = ipsec_in_reject(sp, m); |
866 | KEY_FREESP(&sp); | | 864 | KEY_FREESP(&sp); |
867 | splx(s); | | 865 | splx(s); |
868 | if (error) { | | 866 | if (error) { |
869 | IP_STATINC(IP_STAT_CANTFORWARD); | | 867 | IP_STATINC(IP_STAT_CANTFORWARD); |
870 | goto bad; | | 868 | goto bad; |
871 | } | | 869 | } |
872 | | | 870 | |
873 | /* | | 871 | /* |
874 | * Peek at the outbound SP for this packet to determine if | | 872 | * Peek at the outbound SP for this packet to determine if |
875 | * it's a Fast Forward candidate. | | 873 | * it's a Fast Forward candidate. |
876 | */ | | 874 | */ |
877 | mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); | | 875 | mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); |
878 | if (mtag != NULL) | | 876 | if (mtag != NULL) |
879 | m->m_flags &= ~M_CANFASTFWD; | | 877 | m->m_flags &= ~M_CANFASTFWD; |
880 | else { | | 878 | else { |
881 | s = splsoftnet(); | | 879 | s = splsoftnet(); |
882 | sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND, | | 880 | sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND, |
883 | (IP_FORWARDING | | | 881 | (IP_FORWARDING | |
884 | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)), | | 882 | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)), |
885 | &error, NULL); | | 883 | &error, NULL); |
886 | if (sp != NULL) { | | 884 | if (sp != NULL) { |
887 | m->m_flags &= ~M_CANFASTFWD; | | 885 | m->m_flags &= ~M_CANFASTFWD; |
888 | KEY_FREESP(&sp); | | 886 | KEY_FREESP(&sp); |
889 | } | | 887 | } |
890 | splx(s); | | 888 | splx(s); |
891 | } | | 889 | } |
892 | #endif /* FAST_IPSEC */ | | 890 | #endif /* FAST_IPSEC */ |
893 | | | 891 | |
894 | ip_forward(m, srcrt); | | 892 | ip_forward(m, srcrt); |
895 | } | | 893 | } |
896 | return; | | 894 | return; |
897 | | | 895 | |
898 | ours: | | 896 | ours: |
899 | /* | | 897 | /* |
900 | * If offset or IP_MF are set, must reassemble. | | 898 | * If offset or IP_MF are set, must reassemble. |
901 | * Otherwise, nothing need be done. | | 899 | * Otherwise, nothing need be done. |
902 | * (We could look in the reassembly queue to see | | 900 | * (We could look in the reassembly queue to see |
903 | * if the packet was previously fragmented, | | 901 | * if the packet was previously fragmented, |
904 | * but it's not worth the time; just let them time out.) | | 902 | * but it's not worth the time; just let them time out.) |
905 | */ | | 903 | */ |
906 | if (ip->ip_off & ~htons(IP_DF|IP_RF)) { | | 904 | if (ip->ip_off & ~htons(IP_DF|IP_RF)) { |
907 | uint16_t off; | | 905 | uint16_t off; |
908 | /* | | 906 | /* |
909 | * Prevent TCP blind data attacks by not allowing non-initial | | 907 | * Prevent TCP blind data attacks by not allowing non-initial |
910 | * fragments to start at less than 68 bytes (minimal fragment | | 908 | * fragments to start at less than 68 bytes (minimal fragment |
911 | * size) and making sure the first fragment is at least 68 | | 909 | * size) and making sure the first fragment is at least 68 |
912 | * bytes. | | 910 | * bytes. |
913 | */ | | 911 | */ |
914 | off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; | | 912 | off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; |
915 | if ((off > 0 ? off + hlen : len) < IP_MINFRAGSIZE - 1) { | | 913 | if ((off > 0 ? off + hlen : len) < IP_MINFRAGSIZE - 1) { |
916 | IP_STATINC(IP_STAT_BADFRAGS); | | 914 | IP_STATINC(IP_STAT_BADFRAGS); |
917 | goto bad; | | 915 | goto bad; |
918 | } | | 916 | } |
919 | /* | | 917 | /* |
920 | * Look for queue of fragments | | 918 | * Look for queue of fragments |
921 | * of this datagram. | | 919 | * of this datagram. |
922 | */ | | 920 | */ |
923 | IPQ_LOCK(); | | 921 | IPQ_LOCK(); |
924 | hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id); | | 922 | hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id); |
925 | LIST_FOREACH(fp, &ipq[hash], ipq_q) { | | 923 | LIST_FOREACH(fp, &ipq[hash], ipq_q) { |
926 | if (ip->ip_id == fp->ipq_id && | | 924 | if (ip->ip_id == fp->ipq_id && |
927 | in_hosteq(ip->ip_src, fp->ipq_src) && | | 925 | in_hosteq(ip->ip_src, fp->ipq_src) && |
928 | in_hosteq(ip->ip_dst, fp->ipq_dst) && | | 926 | in_hosteq(ip->ip_dst, fp->ipq_dst) && |
929 | ip->ip_p == fp->ipq_p) { | | 927 | ip->ip_p == fp->ipq_p) { |
930 | /* | | 928 | /* |
931 | * Make sure the TOS is matches previous | | 929 | * Make sure the TOS is matches previous |
932 | * fragments. | | 930 | * fragments. |
933 | */ | | 931 | */ |
934 | if (ip->ip_tos != fp->ipq_tos) { | | 932 | if (ip->ip_tos != fp->ipq_tos) { |
935 | IP_STATINC(IP_STAT_BADFRAGS); | | 933 | IP_STATINC(IP_STAT_BADFRAGS); |
936 | IPQ_UNLOCK(); | | 934 | IPQ_UNLOCK(); |
937 | goto bad; | | 935 | goto bad; |
938 | } | | 936 | } |
939 | goto found; | | 937 | goto found; |
940 | } | | 938 | } |
941 | } | | 939 | } |
942 | fp = 0; | | 940 | fp = 0; |
943 | found: | | 941 | found: |
944 | | | 942 | |
945 | /* | | 943 | /* |
946 | * Adjust ip_len to not reflect header, | | 944 | * Adjust ip_len to not reflect header, |
947 | * set ipqe_mff if more fragments are expected, | | 945 | * set ipqe_mff if more fragments are expected, |
948 | * convert offset of this to bytes. | | 946 | * convert offset of this to bytes. |
949 | */ | | 947 | */ |
950 | ip->ip_len = htons(ntohs(ip->ip_len) - hlen); | | 948 | ip->ip_len = htons(ntohs(ip->ip_len) - hlen); |
951 | mff = (ip->ip_off & htons(IP_MF)) != 0; | | 949 | mff = (ip->ip_off & htons(IP_MF)) != 0; |
952 | if (mff) { | | 950 | if (mff) { |
953 | /* | | 951 | /* |
954 | * Make sure that fragments have a data length | | 952 | * Make sure that fragments have a data length |
955 | * that's a non-zero multiple of 8 bytes. | | 953 | * that's a non-zero multiple of 8 bytes. |
956 | */ | | 954 | */ |
957 | if (ntohs(ip->ip_len) == 0 || | | 955 | if (ntohs(ip->ip_len) == 0 || |
958 | (ntohs(ip->ip_len) & 0x7) != 0) { | | 956 | (ntohs(ip->ip_len) & 0x7) != 0) { |
959 | IP_STATINC(IP_STAT_BADFRAGS); | | 957 | IP_STATINC(IP_STAT_BADFRAGS); |
960 | IPQ_UNLOCK(); | | 958 | IPQ_UNLOCK(); |
961 | goto bad; | | 959 | goto bad; |
962 | } | | 960 | } |
963 | } | | 961 | } |
964 | ip->ip_off = htons((ntohs(ip->ip_off) & IP_OFFMASK) << 3); | | 962 | ip->ip_off = htons((ntohs(ip->ip_off) & IP_OFFMASK) << 3); |
965 | | | 963 | |
966 | /* | | 964 | /* |
967 | * If datagram marked as having more fragments | | 965 | * If datagram marked as having more fragments |
968 | * or if this is not the first fragment, | | 966 | * or if this is not the first fragment, |
969 | * attempt reassembly; if it succeeds, proceed. | | 967 | * attempt reassembly; if it succeeds, proceed. |
970 | */ | | 968 | */ |
971 | if (mff || ip->ip_off != htons(0)) { | | 969 | if (mff || ip->ip_off != htons(0)) { |
972 | IP_STATINC(IP_STAT_FRAGMENTS); | | 970 | IP_STATINC(IP_STAT_FRAGMENTS); |
973 | s = splvm(); | | 971 | s = splvm(); |
974 | ipqe = pool_get(&ipqent_pool, PR_NOWAIT); | | 972 | ipqe = pool_get(&ipqent_pool, PR_NOWAIT); |
975 | splx(s); | | 973 | splx(s); |
976 | if (ipqe == NULL) { | | 974 | if (ipqe == NULL) { |
977 | IP_STATINC(IP_STAT_RCVMEMDROP); | | 975 | IP_STATINC(IP_STAT_RCVMEMDROP); |
978 | IPQ_UNLOCK(); | | 976 | IPQ_UNLOCK(); |
979 | goto bad; | | 977 | goto bad; |
980 | } | | 978 | } |
981 | ipqe->ipqe_mff = mff; | | 979 | ipqe->ipqe_mff = mff; |
982 | ipqe->ipqe_m = m; | | 980 | ipqe->ipqe_m = m; |
983 | ipqe->ipqe_ip = ip; | | 981 | ipqe->ipqe_ip = ip; |
984 | m = ip_reass(ipqe, fp, &ipq[hash]); | | 982 | m = ip_reass(ipqe, fp, &ipq[hash]); |
985 | if (m == 0) { | | 983 | if (m == 0) { |
986 | IPQ_UNLOCK(); | | 984 | IPQ_UNLOCK(); |
987 | return; | | 985 | return; |
988 | } | | 986 | } |
989 | IP_STATINC(IP_STAT_REASSEMBLED); | | 987 | IP_STATINC(IP_STAT_REASSEMBLED); |
990 | ip = mtod(m, struct ip *); | | 988 | ip = mtod(m, struct ip *); |
991 | hlen = ip->ip_hl << 2; | | 989 | hlen = ip->ip_hl << 2; |
992 | ip->ip_len = htons(ntohs(ip->ip_len) + hlen); | | 990 | ip->ip_len = htons(ntohs(ip->ip_len) + hlen); |
993 | } else | | 991 | } else |
994 | if (fp) | | 992 | if (fp) |
995 | ip_freef(fp); | | 993 | ip_freef(fp); |
996 | IPQ_UNLOCK(); | | 994 | IPQ_UNLOCK(); |
997 | } | | 995 | } |
998 | | | 996 | |
999 | #if defined(IPSEC) | | 997 | #if defined(IPSEC) |
1000 | /* | | 998 | /* |
1001 | * enforce IPsec policy checking if we are seeing last header. | | 999 | * enforce IPsec policy checking if we are seeing last header. |
1002 | * note that we do not visit this with protocols with pcb layer | | 1000 | * note that we do not visit this with protocols with pcb layer |
1003 | * code - like udp/tcp/raw ip. | | 1001 | * code - like udp/tcp/raw ip. |
1004 | */ | | 1002 | */ |
1005 | if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 && | | 1003 | if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 && |
1006 | ipsec4_in_reject(m, NULL)) { | | 1004 | ipsec4_in_reject(m, NULL)) { |
1007 | IPSEC_STATINC(IPSEC_STAT_IN_POLVIO); | | 1005 | IPSEC_STATINC(IPSEC_STAT_IN_POLVIO); |
1008 | goto bad; | | 1006 | goto bad; |
1009 | } | | 1007 | } |
1010 | #endif | | 1008 | #endif |
1011 | #ifdef FAST_IPSEC | | 1009 | #ifdef FAST_IPSEC |
1012 | /* | | 1010 | /* |
1013 | * enforce IPsec policy checking if we are seeing last header. | | 1011 | * enforce IPsec policy checking if we are seeing last header. |
1014 | * note that we do not visit this with protocols with pcb layer | | 1012 | * note that we do not visit this with protocols with pcb layer |
1015 | * code - like udp/tcp/raw ip. | | 1013 | * code - like udp/tcp/raw ip. |
1016 | */ | | 1014 | */ |
1017 | if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) { | | 1015 | if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) { |
1018 | /* | | 1016 | /* |
1019 | * Check if the packet has already had IPsec processing | | 1017 | * Check if the packet has already had IPsec processing |
1020 | * done. If so, then just pass it along. This tag gets | | 1018 | * done. If so, then just pass it along. This tag gets |
1021 | * set during AH, ESP, etc. input handling, before the | | 1019 | * set during AH, ESP, etc. input handling, before the |
1022 | * packet is returned to the ip input queue for delivery. | | 1020 | * packet is returned to the ip input queue for delivery. |
1023 | */ | | 1021 | */ |
1024 | mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL); | | 1022 | mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL); |
1025 | s = splsoftnet(); | | 1023 | s = splsoftnet(); |
1026 | if (mtag != NULL) { | | 1024 | if (mtag != NULL) { |
1027 | tdbi = (struct tdb_ident *)(mtag + 1); | | 1025 | tdbi = (struct tdb_ident *)(mtag + 1); |
1028 | sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND); | | 1026 | sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND); |
1029 | } else { | | 1027 | } else { |
1030 | sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, | | 1028 | sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, |
1031 | IP_FORWARDING, &error); | | 1029 | IP_FORWARDING, &error); |
1032 | } | | 1030 | } |
1033 | if (sp != NULL) { | | 1031 | if (sp != NULL) { |
1034 | /* | | 1032 | /* |
1035 | * Check security policy against packet attributes. | | 1033 | * Check security policy against packet attributes. |
1036 | */ | | 1034 | */ |
1037 | error = ipsec_in_reject(sp, m); | | 1035 | error = ipsec_in_reject(sp, m); |
1038 | KEY_FREESP(&sp); | | 1036 | KEY_FREESP(&sp); |
1039 | } else { | | 1037 | } else { |
1040 | /* XXX error stat??? */ | | 1038 | /* XXX error stat??? */ |
1041 | error = EINVAL; | | 1039 | error = EINVAL; |
1042 | DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/ | | 1040 | DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/ |
1043 | } | | 1041 | } |
1044 | splx(s); | | 1042 | splx(s); |
1045 | if (error) | | 1043 | if (error) |
1046 | goto bad; | | 1044 | goto bad; |
1047 | } | | 1045 | } |
1048 | #endif /* FAST_IPSEC */ | | 1046 | #endif /* FAST_IPSEC */ |
1049 | | | 1047 | |
1050 | /* | | 1048 | /* |
1051 | * Switch out to protocol's input routine. | | 1049 | * Switch out to protocol's input routine. |
1052 | */ | | 1050 | */ |
1053 | #if IFA_STATS | | 1051 | #if IFA_STATS |
1054 | if (ia && ip) | | 1052 | if (ia && ip) |
1055 | ia->ia_ifa.ifa_data.ifad_inbytes += ntohs(ip->ip_len); | | 1053 | ia->ia_ifa.ifa_data.ifad_inbytes += ntohs(ip->ip_len); |
1056 | #endif | | 1054 | #endif |
1057 | IP_STATINC(IP_STAT_DELIVERED); | | 1055 | IP_STATINC(IP_STAT_DELIVERED); |
1058 | { | | 1056 | { |
1059 | int off = hlen, nh = ip->ip_p; | | 1057 | int off = hlen, nh = ip->ip_p; |
1060 | | | 1058 | |
1061 | (*inetsw[ip_protox[nh]].pr_input)(m, off, nh); | | 1059 | (*inetsw[ip_protox[nh]].pr_input)(m, off, nh); |
1062 | return; | | 1060 | return; |
1063 | } | | 1061 | } |
1064 | bad: | | 1062 | bad: |
1065 | m_freem(m); | | 1063 | m_freem(m); |
1066 | return; | | 1064 | return; |
1067 | | | 1065 | |
1068 | badcsum: | | 1066 | badcsum: |
1069 | IP_STATINC(IP_STAT_BADSUM); | | 1067 | IP_STATINC(IP_STAT_BADSUM); |
1070 | m_freem(m); | | 1068 | m_freem(m); |
1071 | } | | 1069 | } |
1072 | | | 1070 | |
1073 | /* | | 1071 | /* |
1074 | * Take incoming datagram fragment and try to | | 1072 | * Take incoming datagram fragment and try to |
1075 | * reassemble it into whole datagram. If a chain for | | 1073 | * reassemble it into whole datagram. If a chain for |
1076 | * reassembly of this datagram already exists, then it | | 1074 | * reassembly of this datagram already exists, then it |
1077 | * is given as fp; otherwise have to make a chain. | | 1075 | * is given as fp; otherwise have to make a chain. |
1078 | */ | | 1076 | */ |
1079 | struct mbuf * | | 1077 | struct mbuf * |
1080 | ip_reass(struct ipqent *ipqe, struct ipq *fp, struct ipqhead *ipqhead) | | 1078 | ip_reass(struct ipqent *ipqe, struct ipq *fp, struct ipqhead *ipqhead) |
1081 | { | | 1079 | { |
1082 | struct mbuf *m = ipqe->ipqe_m; | | 1080 | struct mbuf *m = ipqe->ipqe_m; |
1083 | struct ipqent *nq, *p, *q; | | 1081 | struct ipqent *nq, *p, *q; |
1084 | struct ip *ip; | | 1082 | struct ip *ip; |
1085 | struct mbuf *t; | | 1083 | struct mbuf *t; |
1086 | int hlen = ipqe->ipqe_ip->ip_hl << 2; | | 1084 | int hlen = ipqe->ipqe_ip->ip_hl << 2; |
1087 | int i, next, s; | | 1085 | int i, next, s; |
1088 | | | 1086 | |
1089 | IPQ_LOCK_CHECK(); | | 1087 | IPQ_LOCK_CHECK(); |
1090 | | | 1088 | |
1091 | /* | | 1089 | /* |
1092 | * Presence of header sizes in mbufs | | 1090 | * Presence of header sizes in mbufs |
1093 | * would confuse code below. | | 1091 | * would confuse code below. |
1094 | */ | | 1092 | */ |
1095 | m->m_data += hlen; | | 1093 | m->m_data += hlen; |
1096 | m->m_len -= hlen; | | 1094 | m->m_len -= hlen; |
1097 | | | 1095 | |
1098 | #ifdef notyet | | 1096 | #ifdef notyet |
1099 | /* make sure fragment limit is up-to-date */ | | 1097 | /* make sure fragment limit is up-to-date */ |
1100 | CHECK_NMBCLUSTER_PARAMS(); | | 1098 | CHECK_NMBCLUSTER_PARAMS(); |
1101 | | | 1099 | |
1102 | /* If we have too many fragments, drop the older half. */ | | 1100 | /* If we have too many fragments, drop the older half. */ |
1103 | if (ip_nfrags >= ip_maxfrags) | | 1101 | if (ip_nfrags >= ip_maxfrags) |
1104 | ip_reass_drophalf(void); | | 1102 | ip_reass_drophalf(void); |
1105 | #endif | | 1103 | #endif |
1106 | | | 1104 | |
1107 | /* | | 1105 | /* |
1108 | * We are about to add a fragment; increment frag count. | | 1106 | * We are about to add a fragment; increment frag count. |
1109 | */ | | 1107 | */ |
1110 | ip_nfrags++; | | 1108 | ip_nfrags++; |
1111 | | | 1109 | |
1112 | /* | | 1110 | /* |
1113 | * If first fragment to arrive, create a reassembly queue. | | 1111 | * If first fragment to arrive, create a reassembly queue. |
1114 | */ | | 1112 | */ |
1115 | if (fp == 0) { | | 1113 | if (fp == 0) { |
1116 | /* | | 1114 | /* |
1117 | * Enforce upper bound on number of fragmented packets | | 1115 | * Enforce upper bound on number of fragmented packets |
1118 | * for which we attempt reassembly; | | 1116 | * for which we attempt reassembly; |
1119 | * If maxfrag is 0, never accept fragments. | | 1117 | * If maxfrag is 0, never accept fragments. |
1120 | * If maxfrag is -1, accept all fragments without limitation. | | 1118 | * If maxfrag is -1, accept all fragments without limitation. |
1121 | */ | | 1119 | */ |
1122 | if (ip_maxfragpackets < 0) | | 1120 | if (ip_maxfragpackets < 0) |
1123 | ; | | 1121 | ; |
1124 | else if (ip_nfragpackets >= ip_maxfragpackets) | | 1122 | else if (ip_nfragpackets >= ip_maxfragpackets) |
1125 | goto dropfrag; | | 1123 | goto dropfrag; |
1126 | ip_nfragpackets++; | | 1124 | ip_nfragpackets++; |
1127 | MALLOC(fp, struct ipq *, sizeof (struct ipq), | | 1125 | MALLOC(fp, struct ipq *, sizeof (struct ipq), |
1128 | M_FTABLE, M_NOWAIT); | | 1126 | M_FTABLE, M_NOWAIT); |
1129 | if (fp == NULL) | | 1127 | if (fp == NULL) |
1130 | goto dropfrag; | | 1128 | goto dropfrag; |
1131 | LIST_INSERT_HEAD(ipqhead, fp, ipq_q); | | 1129 | LIST_INSERT_HEAD(ipqhead, fp, ipq_q); |
1132 | fp->ipq_nfrags = 1; | | 1130 | fp->ipq_nfrags = 1; |
1133 | fp->ipq_ttl = IPFRAGTTL; | | 1131 | fp->ipq_ttl = IPFRAGTTL; |
1134 | fp->ipq_p = ipqe->ipqe_ip->ip_p; | | 1132 | fp->ipq_p = ipqe->ipqe_ip->ip_p; |
1135 | fp->ipq_id = ipqe->ipqe_ip->ip_id; | | 1133 | fp->ipq_id = ipqe->ipqe_ip->ip_id; |
1136 | fp->ipq_tos = ipqe->ipqe_ip->ip_tos; | | 1134 | fp->ipq_tos = ipqe->ipqe_ip->ip_tos; |
1137 | TAILQ_INIT(&fp->ipq_fragq); | | 1135 | TAILQ_INIT(&fp->ipq_fragq); |
1138 | fp->ipq_src = ipqe->ipqe_ip->ip_src; | | 1136 | fp->ipq_src = ipqe->ipqe_ip->ip_src; |
1139 | fp->ipq_dst = ipqe->ipqe_ip->ip_dst; | | 1137 | fp->ipq_dst = ipqe->ipqe_ip->ip_dst; |
1140 | p = NULL; | | 1138 | p = NULL; |
1141 | goto insert; | | 1139 | goto insert; |
1142 | } else { | | 1140 | } else { |
1143 | fp->ipq_nfrags++; | | 1141 | fp->ipq_nfrags++; |
1144 | } | | 1142 | } |
1145 | | | 1143 | |
1146 | /* | | 1144 | /* |
1147 | * Find a segment which begins after this one does. | | 1145 | * Find a segment which begins after this one does. |
1148 | */ | | 1146 | */ |
1149 | for (p = NULL, q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL; | | 1147 | for (p = NULL, q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL; |
1150 | p = q, q = TAILQ_NEXT(q, ipqe_q)) | | 1148 | p = q, q = TAILQ_NEXT(q, ipqe_q)) |
1151 | if (ntohs(q->ipqe_ip->ip_off) > ntohs(ipqe->ipqe_ip->ip_off)) | | 1149 | if (ntohs(q->ipqe_ip->ip_off) > ntohs(ipqe->ipqe_ip->ip_off)) |
1152 | break; | | 1150 | break; |
1153 | | | 1151 | |
1154 | /* | | 1152 | /* |
1155 | * If there is a preceding segment, it may provide some of | | 1153 | * If there is a preceding segment, it may provide some of |
1156 | * our data already. If so, drop the data from the incoming | | 1154 | * our data already. If so, drop the data from the incoming |
1157 | * segment. If it provides all of our data, drop us. | | 1155 | * segment. If it provides all of our data, drop us. |
1158 | */ | | 1156 | */ |
1159 | if (p != NULL) { | | 1157 | if (p != NULL) { |
1160 | i = ntohs(p->ipqe_ip->ip_off) + ntohs(p->ipqe_ip->ip_len) - | | 1158 | i = ntohs(p->ipqe_ip->ip_off) + ntohs(p->ipqe_ip->ip_len) - |
1161 | ntohs(ipqe->ipqe_ip->ip_off); | | 1159 | ntohs(ipqe->ipqe_ip->ip_off); |
1162 | if (i > 0) { | | 1160 | if (i > 0) { |
1163 | if (i >= ntohs(ipqe->ipqe_ip->ip_len)) | | 1161 | if (i >= ntohs(ipqe->ipqe_ip->ip_len)) |
1164 | goto dropfrag; | | 1162 | goto dropfrag; |
1165 | m_adj(ipqe->ipqe_m, i); | | 1163 | m_adj(ipqe->ipqe_m, i); |
1166 | ipqe->ipqe_ip->ip_off = | | 1164 | ipqe->ipqe_ip->ip_off = |
1167 | htons(ntohs(ipqe->ipqe_ip->ip_off) + i); | | 1165 | htons(ntohs(ipqe->ipqe_ip->ip_off) + i); |
1168 | ipqe->ipqe_ip->ip_len = | | 1166 | ipqe->ipqe_ip->ip_len = |
1169 | htons(ntohs(ipqe->ipqe_ip->ip_len) - i); | | 1167 | htons(ntohs(ipqe->ipqe_ip->ip_len) - i); |
1170 | } | | 1168 | } |
1171 | } | | 1169 | } |
1172 | | | 1170 | |
1173 | /* | | 1171 | /* |
1174 | * While we overlap succeeding segments trim them or, | | 1172 | * While we overlap succeeding segments trim them or, |
1175 | * if they are completely covered, dequeue them. | | 1173 | * if they are completely covered, dequeue them. |
1176 | */ | | 1174 | */ |
1177 | for (; q != NULL && | | 1175 | for (; q != NULL && |
1178 | ntohs(ipqe->ipqe_ip->ip_off) + ntohs(ipqe->ipqe_ip->ip_len) > | | 1176 | ntohs(ipqe->ipqe_ip->ip_off) + ntohs(ipqe->ipqe_ip->ip_len) > |
1179 | ntohs(q->ipqe_ip->ip_off); q = nq) { | | 1177 | ntohs(q->ipqe_ip->ip_off); q = nq) { |
1180 | i = (ntohs(ipqe->ipqe_ip->ip_off) + | | 1178 | i = (ntohs(ipqe->ipqe_ip->ip_off) + |
1181 | ntohs(ipqe->ipqe_ip->ip_len)) - ntohs(q->ipqe_ip->ip_off); | | 1179 | ntohs(ipqe->ipqe_ip->ip_len)) - ntohs(q->ipqe_ip->ip_off); |
1182 | if (i < ntohs(q->ipqe_ip->ip_len)) { | | 1180 | if (i < ntohs(q->ipqe_ip->ip_len)) { |
1183 | q->ipqe_ip->ip_len = | | 1181 | q->ipqe_ip->ip_len = |
1184 | htons(ntohs(q->ipqe_ip->ip_len) - i); | | 1182 | htons(ntohs(q->ipqe_ip->ip_len) - i); |
1185 | q->ipqe_ip->ip_off = | | 1183 | q->ipqe_ip->ip_off = |
1186 | htons(ntohs(q->ipqe_ip->ip_off) + i); | | 1184 | htons(ntohs(q->ipqe_ip->ip_off) + i); |
1187 | m_adj(q->ipqe_m, i); | | 1185 | m_adj(q->ipqe_m, i); |
1188 | break; | | 1186 | break; |
1189 | } | | 1187 | } |
1190 | nq = TAILQ_NEXT(q, ipqe_q); | | 1188 | nq = TAILQ_NEXT(q, ipqe_q); |
1191 | m_freem(q->ipqe_m); | | 1189 | m_freem(q->ipqe_m); |
1192 | TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q); | | 1190 | TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q); |
1193 | s = splvm(); | | 1191 | s = splvm(); |
1194 | pool_put(&ipqent_pool, q); | | 1192 | pool_put(&ipqent_pool, q); |
1195 | splx(s); | | 1193 | splx(s); |
1196 | fp->ipq_nfrags--; | | 1194 | fp->ipq_nfrags--; |
1197 | ip_nfrags--; | | 1195 | ip_nfrags--; |
1198 | } | | 1196 | } |
1199 | | | 1197 | |
1200 | insert: | | 1198 | insert: |
1201 | /* | | 1199 | /* |
1202 | * Stick new segment in its place; | | 1200 | * Stick new segment in its place; |
1203 | * check for complete reassembly. | | 1201 | * check for complete reassembly. |
1204 | */ | | 1202 | */ |
1205 | if (p == NULL) { | | 1203 | if (p == NULL) { |
1206 | TAILQ_INSERT_HEAD(&fp->ipq_fragq, ipqe, ipqe_q); | | 1204 | TAILQ_INSERT_HEAD(&fp->ipq_fragq, ipqe, ipqe_q); |
1207 | } else { | | 1205 | } else { |
1208 | TAILQ_INSERT_AFTER(&fp->ipq_fragq, p, ipqe, ipqe_q); | | 1206 | TAILQ_INSERT_AFTER(&fp->ipq_fragq, p, ipqe, ipqe_q); |
1209 | } | | 1207 | } |
1210 | next = 0; | | 1208 | next = 0; |
1211 | for (p = NULL, q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL; | | 1209 | for (p = NULL, q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL; |
1212 | p = q, q = TAILQ_NEXT(q, ipqe_q)) { | | 1210 | p = q, q = TAILQ_NEXT(q, ipqe_q)) { |
1213 | if (ntohs(q->ipqe_ip->ip_off) != next) | | 1211 | if (ntohs(q->ipqe_ip->ip_off) != next) |
1214 | return (0); | | 1212 | return (0); |
1215 | next += ntohs(q->ipqe_ip->ip_len); | | 1213 | next += ntohs(q->ipqe_ip->ip_len); |
1216 | } | | 1214 | } |
1217 | if (p->ipqe_mff) | | 1215 | if (p->ipqe_mff) |
1218 | return (0); | | 1216 | return (0); |
1219 | | | 1217 | |
1220 | /* | | 1218 | /* |
1221 | * Reassembly is complete. Check for a bogus message size and | | 1219 | * Reassembly is complete. Check for a bogus message size and |
1222 | * concatenate fragments. | | 1220 | * concatenate fragments. |
1223 | */ | | 1221 | */ |
1224 | q = TAILQ_FIRST(&fp->ipq_fragq); | | 1222 | q = TAILQ_FIRST(&fp->ipq_fragq); |
1225 | ip = q->ipqe_ip; | | 1223 | ip = q->ipqe_ip; |
1226 | if ((next + (ip->ip_hl << 2)) > IP_MAXPACKET) { | | 1224 | if ((next + (ip->ip_hl << 2)) > IP_MAXPACKET) { |
1227 | IP_STATINC(IP_STAT_TOOLONG); | | 1225 | IP_STATINC(IP_STAT_TOOLONG); |
1228 | ip_freef(fp); | | 1226 | ip_freef(fp); |
1229 | return (0); | | 1227 | return (0); |
1230 | } | | 1228 | } |
1231 | m = q->ipqe_m; | | 1229 | m = q->ipqe_m; |
1232 | t = m->m_next; | | 1230 | t = m->m_next; |
1233 | m->m_next = 0; | | 1231 | m->m_next = 0; |
1234 | m_cat(m, t); | | 1232 | m_cat(m, t); |
1235 | nq = TAILQ_NEXT(q, ipqe_q); | | 1233 | nq = TAILQ_NEXT(q, ipqe_q); |
1236 | s = splvm(); | | 1234 | s = splvm(); |
1237 | pool_put(&ipqent_pool, q); | | 1235 | pool_put(&ipqent_pool, q); |
1238 | splx(s); | | 1236 | splx(s); |
1239 | for (q = nq; q != NULL; q = nq) { | | 1237 | for (q = nq; q != NULL; q = nq) { |
1240 | t = q->ipqe_m; | | 1238 | t = q->ipqe_m; |
1241 | nq = TAILQ_NEXT(q, ipqe_q); | | 1239 | nq = TAILQ_NEXT(q, ipqe_q); |
1242 | s = splvm(); | | 1240 | s = splvm(); |
1243 | pool_put(&ipqent_pool, q); | | 1241 | pool_put(&ipqent_pool, q); |
1244 | splx(s); | | 1242 | splx(s); |
1245 | m_cat(m, t); | | 1243 | m_cat(m, t); |
1246 | } | | 1244 | } |
1247 | ip_nfrags -= fp->ipq_nfrags; | | 1245 | ip_nfrags -= fp->ipq_nfrags; |
1248 | | | 1246 | |
1249 | /* | | 1247 | /* |
1250 | * Create header for new ip packet by | | 1248 | * Create header for new ip packet by |
1251 | * modifying header of first packet; | | 1249 | * modifying header of first packet; |
1252 | * dequeue and discard fragment reassembly header. | | 1250 | * dequeue and discard fragment reassembly header. |
1253 | * Make header visible. | | 1251 | * Make header visible. |
1254 | */ | | 1252 | */ |
1255 | ip->ip_len = htons(next); | | 1253 | ip->ip_len = htons(next); |
1256 | ip->ip_src = fp->ipq_src; | | 1254 | ip->ip_src = fp->ipq_src; |
1257 | ip->ip_dst = fp->ipq_dst; | | 1255 | ip->ip_dst = fp->ipq_dst; |
1258 | LIST_REMOVE(fp, ipq_q); | | 1256 | LIST_REMOVE(fp, ipq_q); |
1259 | FREE(fp, M_FTABLE); | | 1257 | FREE(fp, M_FTABLE); |
1260 | ip_nfragpackets--; | | 1258 | ip_nfragpackets--; |
1261 | m->m_len += (ip->ip_hl << 2); | | 1259 | m->m_len += (ip->ip_hl << 2); |
1262 | m->m_data -= (ip->ip_hl << 2); | | 1260 | m->m_data -= (ip->ip_hl << 2); |
1263 | /* some debugging cruft by sklower, below, will go away soon */ | | 1261 | /* some debugging cruft by sklower, below, will go away soon */ |
1264 | if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */ | | 1262 | if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */ |
1265 | int plen = 0; | | 1263 | int plen = 0; |
1266 | for (t = m; t; t = t->m_next) | | 1264 | for (t = m; t; t = t->m_next) |
1267 | plen += t->m_len; | | 1265 | plen += t->m_len; |
1268 | m->m_pkthdr.len = plen; | | 1266 | m->m_pkthdr.len = plen; |
1269 | m->m_pkthdr.csum_flags = 0; | | 1267 | m->m_pkthdr.csum_flags = 0; |
1270 | } | | 1268 | } |
1271 | return (m); | | 1269 | return (m); |
1272 | | | 1270 | |
1273 | dropfrag: | | 1271 | dropfrag: |
1274 | if (fp != 0) | | 1272 | if (fp != 0) |
1275 | fp->ipq_nfrags--; | | 1273 | fp->ipq_nfrags--; |
1276 | ip_nfrags--; | | 1274 | ip_nfrags--; |
1277 | IP_STATINC(IP_STAT_FRAGDROPPED); | | 1275 | IP_STATINC(IP_STAT_FRAGDROPPED); |
1278 | m_freem(m); | | 1276 | m_freem(m); |
1279 | s = splvm(); | | 1277 | s = splvm(); |
1280 | pool_put(&ipqent_pool, ipqe); | | 1278 | pool_put(&ipqent_pool, ipqe); |
1281 | splx(s); | | 1279 | splx(s); |
1282 | return (0); | | 1280 | return (0); |
1283 | } | | 1281 | } |
1284 | | | 1282 | |
1285 | /* | | 1283 | /* |
1286 | * Free a fragment reassembly header and all | | 1284 | * Free a fragment reassembly header and all |
1287 | * associated datagrams. | | 1285 | * associated datagrams. |
1288 | */ | | 1286 | */ |
1289 | void | | 1287 | void |
1290 | ip_freef(struct ipq *fp) | | 1288 | ip_freef(struct ipq *fp) |
1291 | { | | 1289 | { |
1292 | struct ipqent *q, *p; | | 1290 | struct ipqent *q, *p; |
1293 | u_int nfrags = 0; | | 1291 | u_int nfrags = 0; |
1294 | int s; | | 1292 | int s; |
1295 | | | 1293 | |
1296 | IPQ_LOCK_CHECK(); | | 1294 | IPQ_LOCK_CHECK(); |
1297 | | | 1295 | |
1298 | for (q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL; q = p) { | | 1296 | for (q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL; q = p) { |
1299 | p = TAILQ_NEXT(q, ipqe_q); | | 1297 | p = TAILQ_NEXT(q, ipqe_q); |
1300 | m_freem(q->ipqe_m); | | 1298 | m_freem(q->ipqe_m); |
1301 | nfrags++; | | 1299 | nfrags++; |
1302 | TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q); | | 1300 | TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q); |
1303 | s = splvm(); | | 1301 | s = splvm(); |
1304 | pool_put(&ipqent_pool, q); | | 1302 | pool_put(&ipqent_pool, q); |
1305 | splx(s); | | 1303 | splx(s); |
1306 | } | | 1304 | } |
1307 | | | 1305 | |
1308 | if (nfrags != fp->ipq_nfrags) | | 1306 | if (nfrags != fp->ipq_nfrags) |
1309 | printf("ip_freef: nfrags %d != %d\n", fp->ipq_nfrags, nfrags); | | 1307 | printf("ip_freef: nfrags %d != %d\n", fp->ipq_nfrags, nfrags); |
1310 | ip_nfrags -= nfrags; | | 1308 | ip_nfrags -= nfrags; |
1311 | LIST_REMOVE(fp, ipq_q); | | 1309 | LIST_REMOVE(fp, ipq_q); |
1312 | FREE(fp, M_FTABLE); | | 1310 | FREE(fp, M_FTABLE); |
1313 | ip_nfragpackets--; | | 1311 | ip_nfragpackets--; |
1314 | } | | 1312 | } |
1315 | | | 1313 | |
1316 | /* | | 1314 | /* |
1317 | * IP reassembly TTL machinery for multiplicative drop. | | 1315 | * IP reassembly TTL machinery for multiplicative drop. |
1318 | */ | | 1316 | */ |
1319 | static u_int fragttl_histo[(IPFRAGTTL+1)]; | | 1317 | static u_int fragttl_histo[(IPFRAGTTL+1)]; |
1320 | | | 1318 | |
1321 | | | 1319 | |
1322 | /* | | 1320 | /* |
1323 | * Decrement TTL of all reasembly queue entries by `ticks'. | | 1321 | * Decrement TTL of all reasembly queue entries by `ticks'. |
1324 | * Count number of distinct fragments (as opposed to partial, fragmented | | 1322 | * Count number of distinct fragments (as opposed to partial, fragmented |
1325 | * datagrams) in the reassembly queue. While we traverse the entire | | 1323 | * datagrams) in the reassembly queue. While we traverse the entire |
1326 | * reassembly queue, compute and return the median TTL over all fragments. | | 1324 | * reassembly queue, compute and return the median TTL over all fragments. |
1327 | */ | | 1325 | */ |
1328 | static u_int | | 1326 | static u_int |
1329 | ip_reass_ttl_decr(u_int ticks) | | 1327 | ip_reass_ttl_decr(u_int ticks) |
1330 | { | | 1328 | { |
1331 | u_int nfrags, median, dropfraction, keepfraction; | | 1329 | u_int nfrags, median, dropfraction, keepfraction; |
1332 | struct ipq *fp, *nfp; | | 1330 | struct ipq *fp, *nfp; |
1333 | int i; | | 1331 | int i; |
1334 | | | 1332 | |
1335 | nfrags = 0; | | 1333 | nfrags = 0; |
1336 | memset(fragttl_histo, 0, sizeof fragttl_histo); | | 1334 | memset(fragttl_histo, 0, sizeof fragttl_histo); |
1337 | | | 1335 | |
1338 | for (i = 0; i < IPREASS_NHASH; i++) { | | 1336 | for (i = 0; i < IPREASS_NHASH; i++) { |
1339 | for (fp = LIST_FIRST(&ipq[i]); fp != NULL; fp = nfp) { | | 1337 | for (fp = LIST_FIRST(&ipq[i]); fp != NULL; fp = nfp) { |
1340 | fp->ipq_ttl = ((fp->ipq_ttl <= ticks) ? | | 1338 | fp->ipq_ttl = ((fp->ipq_ttl <= ticks) ? |
1341 | 0 : fp->ipq_ttl - ticks); | | 1339 | 0 : fp->ipq_ttl - ticks); |
1342 | nfp = LIST_NEXT(fp, ipq_q); | | 1340 | nfp = LIST_NEXT(fp, ipq_q); |
1343 | if (fp->ipq_ttl == 0) { | | 1341 | if (fp->ipq_ttl == 0) { |
1344 | IP_STATINC(IP_STAT_FRAGTIMEOUT); | | 1342 | IP_STATINC(IP_STAT_FRAGTIMEOUT); |
1345 | ip_freef(fp); | | 1343 | ip_freef(fp); |
1346 | } else { | | 1344 | } else { |
1347 | nfrags += fp->ipq_nfrags; | | 1345 | nfrags += fp->ipq_nfrags; |
1348 | fragttl_histo[fp->ipq_ttl] += fp->ipq_nfrags; | | 1346 | fragttl_histo[fp->ipq_ttl] += fp->ipq_nfrags; |
1349 | } | | 1347 | } |
1350 | } | | 1348 | } |
1351 | } | | 1349 | } |
1352 | | | 1350 | |
1353 | KASSERT(ip_nfrags == nfrags); | | 1351 | KASSERT(ip_nfrags == nfrags); |
1354 | | | 1352 | |
1355 | /* Find median (or other drop fraction) in histogram. */ | | 1353 | /* Find median (or other drop fraction) in histogram. */ |
1356 | dropfraction = (ip_nfrags / 2); | | 1354 | dropfraction = (ip_nfrags / 2); |
1357 | keepfraction = ip_nfrags - dropfraction; | | 1355 | keepfraction = ip_nfrags - dropfraction; |
1358 | for (i = IPFRAGTTL, median = 0; i >= 0; i--) { | | 1356 | for (i = IPFRAGTTL, median = 0; i >= 0; i--) { |
1359 | median += fragttl_histo[i]; | | 1357 | median += fragttl_histo[i]; |
1360 | if (median >= keepfraction) | | 1358 | if (median >= keepfraction) |
1361 | break; | | 1359 | break; |
1362 | } | | 1360 | } |
1363 | | | 1361 | |
1364 | /* Return TTL of median (or other fraction). */ | | 1362 | /* Return TTL of median (or other fraction). */ |
1365 | return (u_int)i; | | 1363 | return (u_int)i; |
1366 | } | | 1364 | } |
1367 | | | 1365 | |
1368 | void | | 1366 | void |
1369 | ip_reass_drophalf(void) | | 1367 | ip_reass_drophalf(void) |
1370 | { | | 1368 | { |
1371 | | | 1369 | |
1372 | u_int median_ticks; | | 1370 | u_int median_ticks; |
1373 | /* | | 1371 | /* |
1374 | * Compute median TTL of all fragments, and count frags | | 1372 | * Compute median TTL of all fragments, and count frags |
1375 | * with that TTL or lower (roughly half of all fragments). | | 1373 | * with that TTL or lower (roughly half of all fragments). |
1376 | */ | | 1374 | */ |
1377 | median_ticks = ip_reass_ttl_decr(0); | | 1375 | median_ticks = ip_reass_ttl_decr(0); |
1378 | | | 1376 | |
1379 | /* Drop half. */ | | 1377 | /* Drop half. */ |
1380 | median_ticks = ip_reass_ttl_decr(median_ticks); | | 1378 | median_ticks = ip_reass_ttl_decr(median_ticks); |
1381 | | | 1379 | |
1382 | } | | 1380 | } |
1383 | | | 1381 | |
1384 | /* | | 1382 | /* |
1385 | * IP timer processing; | | 1383 | * IP timer processing; |
1386 | * if a timer expires on a reassembly | | 1384 | * if a timer expires on a reassembly |
1387 | * queue, discard it. | | 1385 | * queue, discard it. |
1388 | */ | | 1386 | */ |
1389 | void | | 1387 | void |
1390 | ip_slowtimo(void) | | 1388 | ip_slowtimo(void) |
1391 | { | | 1389 | { |
1392 | static u_int dropscanidx = 0; | | 1390 | static u_int dropscanidx = 0; |
1393 | u_int i; | | 1391 | u_int i; |
1394 | u_int median_ttl; | | 1392 | u_int median_ttl; |
1395 | | | 1393 | |
1396 | mutex_enter(softnet_lock); | | 1394 | mutex_enter(softnet_lock); |
1397 | KERNEL_LOCK(1, NULL); | | 1395 | KERNEL_LOCK(1, NULL); |
1398 | | | 1396 | |
1399 | IPQ_LOCK(); | | 1397 | IPQ_LOCK(); |
1400 | | | 1398 | |
1401 | /* Age TTL of all fragments by 1 tick .*/ | | 1399 | /* Age TTL of all fragments by 1 tick .*/ |
1402 | median_ttl = ip_reass_ttl_decr(1); | | 1400 | median_ttl = ip_reass_ttl_decr(1); |
1403 | | | 1401 | |
1404 | /* make sure fragment limit is up-to-date */ | | 1402 | /* make sure fragment limit is up-to-date */ |
1405 | CHECK_NMBCLUSTER_PARAMS(); | | 1403 | CHECK_NMBCLUSTER_PARAMS(); |
1406 | | | 1404 | |
1407 | /* If we have too many fragments, drop the older half. */ | | 1405 | /* If we have too many fragments, drop the older half. */ |
1408 | if (ip_nfrags > ip_maxfrags) | | 1406 | if (ip_nfrags > ip_maxfrags) |
1409 | ip_reass_ttl_decr(median_ttl); | | 1407 | ip_reass_ttl_decr(median_ttl); |
1410 | | | 1408 | |
1411 | /* | | 1409 | /* |
1412 | * If we are over the maximum number of fragmented packets | | 1410 | * If we are over the maximum number of fragmented packets |
1413 | * (due to the limit being lowered), drain off | | 1411 | * (due to the limit being lowered), drain off |
1414 | * enough to get down to the new limit. Start draining | | 1412 | * enough to get down to the new limit. Start draining |
1415 | * from the reassembly hashqueue most recently drained. | | 1413 | * from the reassembly hashqueue most recently drained. |
1416 | */ | | 1414 | */ |
1417 | if (ip_maxfragpackets < 0) | | 1415 | if (ip_maxfragpackets < 0) |
1418 | ; | | 1416 | ; |
1419 | else { | | 1417 | else { |
1420 | int wrapped = 0; | | 1418 | int wrapped = 0; |
1421 | | | 1419 | |
1422 | i = dropscanidx; | | 1420 | i = dropscanidx; |
1423 | while (ip_nfragpackets > ip_maxfragpackets && wrapped == 0) { | | 1421 | while (ip_nfragpackets > ip_maxfragpackets && wrapped == 0) { |
1424 | while (LIST_FIRST(&ipq[i]) != NULL) | | 1422 | while (LIST_FIRST(&ipq[i]) != NULL) |
1425 | ip_freef(LIST_FIRST(&ipq[i])); | | 1423 | ip_freef(LIST_FIRST(&ipq[i])); |
1426 | if (++i >= IPREASS_NHASH) { | | 1424 | if (++i >= IPREASS_NHASH) { |
1427 | i = 0; | | 1425 | i = 0; |
1428 | } | | 1426 | } |
1429 | /* | | 1427 | /* |
1430 | * Dont scan forever even if fragment counters are | | 1428 | * Dont scan forever even if fragment counters are |
1431 | * wrong: stop after scanning entire reassembly queue. | | 1429 | * wrong: stop after scanning entire reassembly queue. |
1432 | */ | | 1430 | */ |
1433 | if (i == dropscanidx) | | 1431 | if (i == dropscanidx) |
1434 | wrapped = 1; | | 1432 | wrapped = 1; |
1435 | } | | 1433 | } |
1436 | dropscanidx = i; | | 1434 | dropscanidx = i; |
1437 | } | | 1435 | } |
1438 | IPQ_UNLOCK(); | | 1436 | IPQ_UNLOCK(); |
1439 | | | 1437 | |
1440 | KERNEL_UNLOCK_ONE(NULL); | | 1438 | KERNEL_UNLOCK_ONE(NULL); |
1441 | mutex_exit(softnet_lock); | | 1439 | mutex_exit(softnet_lock); |
1442 | } | | 1440 | } |
1443 | | | 1441 | |
1444 | /* | | 1442 | /* |
1445 | * Drain off all datagram fragments. Don't acquire softnet_lock as | | 1443 | * Drain off all datagram fragments. Don't acquire softnet_lock as |
1446 | * can be called from hardware interrupt context. | | 1444 | * can be called from hardware interrupt context. |
1447 | */ | | 1445 | */ |
1448 | void | | 1446 | void |
1449 | ip_drain(void) | | 1447 | ip_drain(void) |
1450 | { | | 1448 | { |
1451 | | | 1449 | |
1452 | KERNEL_LOCK(1, NULL); | | 1450 | KERNEL_LOCK(1, NULL); |
1453 | | | 1451 | |
1454 | /* | | 1452 | /* |
1455 | * We may be called from a device's interrupt context. If | | 1453 | * We may be called from a device's interrupt context. If |
1456 | * the ipq is already busy, just bail out now. | | 1454 | * the ipq is already busy, just bail out now. |
1457 | */ | | 1455 | */ |
1458 | if (ipq_lock_try() != 0) { | | 1456 | if (ipq_lock_try() != 0) { |
1459 | /* | | 1457 | /* |
1460 | * Drop half the total fragments now. If more mbufs are | | 1458 | * Drop half the total fragments now. If more mbufs are |
1461 | * needed, we will be called again soon. | | 1459 | * needed, we will be called again soon. |
1462 | */ | | 1460 | */ |
1463 | ip_reass_drophalf(); | | 1461 | ip_reass_drophalf(); |
1464 | IPQ_UNLOCK(); | | 1462 | IPQ_UNLOCK(); |
1465 | } | | 1463 | } |
1466 | | | 1464 | |
1467 | KERNEL_UNLOCK_ONE(NULL); | | 1465 | KERNEL_UNLOCK_ONE(NULL); |
1468 | } | | 1466 | } |
1469 | | | 1467 | |
1470 | /* | | 1468 | /* |
1471 | * Do option processing on a datagram, | | 1469 | * Do option processing on a datagram, |
1472 | * possibly discarding it if bad options are encountered, | | 1470 | * possibly discarding it if bad options are encountered, |
1473 | * or forwarding it if source-routed. | | 1471 | * or forwarding it if source-routed. |
1474 | * Returns 1 if packet has been forwarded/freed, | | 1472 | * Returns 1 if packet has been forwarded/freed, |
1475 | * 0 if the packet should be processed further. | | 1473 | * 0 if the packet should be processed further. |
1476 | */ | | 1474 | */ |
1477 | int | | 1475 | int |