| @@ -1,510 +1,510 @@ | | | @@ -1,510 +1,510 @@ |
1 | /* $NetBSD: ip_flow.c,v 1.57 2009/02/01 17:04:11 pooka Exp $ */ | | 1 | /* $NetBSD: ip_flow.c,v 1.58 2009/03/15 22:16:09 cegger Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 1998 The NetBSD Foundation, Inc. | | 4 | * Copyright (c) 1998 The NetBSD Foundation, Inc. |
5 | * All rights reserved. | | 5 | * All rights reserved. |
6 | * | | 6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation | | 7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by the 3am Software Foundry ("3am"). It was developed by Matt Thomas. | | 8 | * by the 3am Software Foundry ("3am"). It was developed by Matt Thomas. |
9 | * | | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
12 | * are met: | | 12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright | | 15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the | | 16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. | | 17 | * documentation and/or other materials provided with the distribution. |
18 | * | | 18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | | 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | | 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | * POSSIBILITY OF SUCH DAMAGE. | | 29 | * POSSIBILITY OF SUCH DAMAGE. |
30 | */ | | 30 | */ |
31 | | | 31 | |
32 | #include <sys/cdefs.h> | | 32 | #include <sys/cdefs.h> |
33 | __KERNEL_RCSID(0, "$NetBSD: ip_flow.c,v 1.57 2009/02/01 17:04:11 pooka Exp $"); | | 33 | __KERNEL_RCSID(0, "$NetBSD: ip_flow.c,v 1.58 2009/03/15 22:16:09 cegger Exp $"); |
34 | | | 34 | |
35 | #include <sys/param.h> | | 35 | #include <sys/param.h> |
36 | #include <sys/systm.h> | | 36 | #include <sys/systm.h> |
37 | #include <sys/malloc.h> | | 37 | #include <sys/malloc.h> |
38 | #include <sys/mbuf.h> | | 38 | #include <sys/mbuf.h> |
39 | #include <sys/domain.h> | | 39 | #include <sys/domain.h> |
40 | #include <sys/protosw.h> | | 40 | #include <sys/protosw.h> |
41 | #include <sys/socket.h> | | 41 | #include <sys/socket.h> |
42 | #include <sys/socketvar.h> | | 42 | #include <sys/socketvar.h> |
43 | #include <sys/errno.h> | | 43 | #include <sys/errno.h> |
44 | #include <sys/time.h> | | 44 | #include <sys/time.h> |
45 | #include <sys/kernel.h> | | 45 | #include <sys/kernel.h> |
46 | #include <sys/pool.h> | | 46 | #include <sys/pool.h> |
47 | #include <sys/sysctl.h> | | 47 | #include <sys/sysctl.h> |
48 | | | 48 | |
49 | #include <net/if.h> | | 49 | #include <net/if.h> |
50 | #include <net/if_dl.h> | | 50 | #include <net/if_dl.h> |
51 | #include <net/route.h> | | 51 | #include <net/route.h> |
52 | #include <net/pfil.h> | | 52 | #include <net/pfil.h> |
53 | | | 53 | |
54 | #include <netinet/in.h> | | 54 | #include <netinet/in.h> |
55 | #include <netinet/in_systm.h> | | 55 | #include <netinet/in_systm.h> |
56 | #include <netinet/ip.h> | | 56 | #include <netinet/ip.h> |
57 | #include <netinet/in_pcb.h> | | 57 | #include <netinet/in_pcb.h> |
58 | #include <netinet/in_var.h> | | 58 | #include <netinet/in_var.h> |
59 | #include <netinet/ip_var.h> | | 59 | #include <netinet/ip_var.h> |
60 | #include <netinet/ip_private.h> | | 60 | #include <netinet/ip_private.h> |
61 | | | 61 | |
62 | /* | | 62 | /* |
63 | * Similar code is very well commented in netinet6/ip6_flow.c | | 63 | * Similar code is very well commented in netinet6/ip6_flow.c |
64 | */ | | 64 | */ |
65 | | | 65 | |
66 | struct ipflow { | | 66 | struct ipflow { |
67 | LIST_ENTRY(ipflow) ipf_list; /* next in active list */ | | 67 | LIST_ENTRY(ipflow) ipf_list; /* next in active list */ |
68 | LIST_ENTRY(ipflow) ipf_hash; /* next ipflow in bucket */ | | 68 | LIST_ENTRY(ipflow) ipf_hash; /* next ipflow in bucket */ |
69 | struct in_addr ipf_dst; /* destination address */ | | 69 | struct in_addr ipf_dst; /* destination address */ |
70 | struct in_addr ipf_src; /* source address */ | | 70 | struct in_addr ipf_src; /* source address */ |
71 | uint8_t ipf_tos; /* type-of-service */ | | 71 | uint8_t ipf_tos; /* type-of-service */ |
72 | struct route ipf_ro; /* associated route entry */ | | 72 | struct route ipf_ro; /* associated route entry */ |
73 | u_long ipf_uses; /* number of uses in this period */ | | 73 | u_long ipf_uses; /* number of uses in this period */ |
74 | u_long ipf_last_uses; /* number of uses in last period */ | | 74 | u_long ipf_last_uses; /* number of uses in last period */ |
75 | u_long ipf_dropped; /* ENOBUFS retured by if_output */ | | 75 | u_long ipf_dropped; /* ENOBUFS retured by if_output */ |
76 | u_long ipf_errors; /* other errors returned by if_output */ | | 76 | u_long ipf_errors; /* other errors returned by if_output */ |
77 | u_int ipf_timer; /* lifetime timer */ | | 77 | u_int ipf_timer; /* lifetime timer */ |
78 | time_t ipf_start; /* creation time */ | | 78 | time_t ipf_start; /* creation time */ |
79 | }; | | 79 | }; |
80 | | | 80 | |
81 | #define IPFLOW_HASHBITS 6 /* should not be a multiple of 8 */ | | 81 | #define IPFLOW_HASHBITS 6 /* should not be a multiple of 8 */ |
82 | | | 82 | |
83 | static struct pool ipflow_pool; | | 83 | static struct pool ipflow_pool; |
84 | | | 84 | |
85 | LIST_HEAD(ipflowhead, ipflow); | | 85 | LIST_HEAD(ipflowhead, ipflow); |
86 | | | 86 | |
87 | #define IPFLOW_TIMER (5 * PR_SLOWHZ) | | 87 | #define IPFLOW_TIMER (5 * PR_SLOWHZ) |
88 | #define IPFLOW_DEFAULT_HASHSIZE (1 << IPFLOW_HASHBITS) | | 88 | #define IPFLOW_DEFAULT_HASHSIZE (1 << IPFLOW_HASHBITS) |
89 | | | 89 | |
90 | static struct ipflowhead *ipflowtable = NULL; | | 90 | static struct ipflowhead *ipflowtable = NULL; |
91 | static struct ipflowhead ipflowlist; | | 91 | static struct ipflowhead ipflowlist; |
92 | static int ipflow_inuse; | | 92 | static int ipflow_inuse; |
93 | | | 93 | |
94 | #define IPFLOW_INSERT(bucket, ipf) \ | | 94 | #define IPFLOW_INSERT(bucket, ipf) \ |
95 | do { \ | | 95 | do { \ |
96 | LIST_INSERT_HEAD((bucket), (ipf), ipf_hash); \ | | 96 | LIST_INSERT_HEAD((bucket), (ipf), ipf_hash); \ |
97 | LIST_INSERT_HEAD(&ipflowlist, (ipf), ipf_list); \ | | 97 | LIST_INSERT_HEAD(&ipflowlist, (ipf), ipf_list); \ |
98 | } while (/*CONSTCOND*/ 0) | | 98 | } while (/*CONSTCOND*/ 0) |
99 | | | 99 | |
100 | #define IPFLOW_REMOVE(ipf) \ | | 100 | #define IPFLOW_REMOVE(ipf) \ |
101 | do { \ | | 101 | do { \ |
102 | LIST_REMOVE((ipf), ipf_hash); \ | | 102 | LIST_REMOVE((ipf), ipf_hash); \ |
103 | LIST_REMOVE((ipf), ipf_list); \ | | 103 | LIST_REMOVE((ipf), ipf_list); \ |
104 | } while (/*CONSTCOND*/ 0) | | 104 | } while (/*CONSTCOND*/ 0) |
105 | | | 105 | |
106 | #ifndef IPFLOW_MAX | | 106 | #ifndef IPFLOW_MAX |
107 | #define IPFLOW_MAX 256 | | 107 | #define IPFLOW_MAX 256 |
108 | #endif | | 108 | #endif |
109 | int ip_maxflows = IPFLOW_MAX; | | 109 | int ip_maxflows = IPFLOW_MAX; |
110 | int ip_hashsize = IPFLOW_DEFAULT_HASHSIZE; | | 110 | int ip_hashsize = IPFLOW_DEFAULT_HASHSIZE; |
111 | | | 111 | |
112 | static size_t | | 112 | static size_t |
113 | ipflow_hash(const struct ip *ip) | | 113 | ipflow_hash(const struct ip *ip) |
114 | { | | 114 | { |
115 | size_t hash = ip->ip_tos; | | 115 | size_t hash = ip->ip_tos; |
116 | size_t idx; | | 116 | size_t idx; |
117 | | | 117 | |
118 | for (idx = 0; idx < 32; idx += IPFLOW_HASHBITS) { | | 118 | for (idx = 0; idx < 32; idx += IPFLOW_HASHBITS) { |
119 | hash += (ip->ip_dst.s_addr >> (32 - idx)) + | | 119 | hash += (ip->ip_dst.s_addr >> (32 - idx)) + |
120 | (ip->ip_src.s_addr >> idx); | | 120 | (ip->ip_src.s_addr >> idx); |
121 | } | | 121 | } |
122 | | | 122 | |
123 | return hash & (ip_hashsize-1); | | 123 | return hash & (ip_hashsize-1); |
124 | } | | 124 | } |
125 | | | 125 | |
126 | static struct ipflow * | | 126 | static struct ipflow * |
127 | ipflow_lookup(const struct ip *ip) | | 127 | ipflow_lookup(const struct ip *ip) |
128 | { | | 128 | { |
129 | size_t hash; | | 129 | size_t hash; |
130 | struct ipflow *ipf; | | 130 | struct ipflow *ipf; |
131 | | | 131 | |
132 | hash = ipflow_hash(ip); | | 132 | hash = ipflow_hash(ip); |
133 | | | 133 | |
134 | LIST_FOREACH(ipf, &ipflowtable[hash], ipf_hash) { | | 134 | LIST_FOREACH(ipf, &ipflowtable[hash], ipf_hash) { |
135 | if (ip->ip_dst.s_addr == ipf->ipf_dst.s_addr | | 135 | if (ip->ip_dst.s_addr == ipf->ipf_dst.s_addr |
136 | && ip->ip_src.s_addr == ipf->ipf_src.s_addr | | 136 | && ip->ip_src.s_addr == ipf->ipf_src.s_addr |
137 | && ip->ip_tos == ipf->ipf_tos) | | 137 | && ip->ip_tos == ipf->ipf_tos) |
138 | break; | | 138 | break; |
139 | } | | 139 | } |
140 | return ipf; | | 140 | return ipf; |
141 | } | | 141 | } |
142 | | | 142 | |
143 | void | | 143 | void |
144 | ipflow_poolinit() | | 144 | ipflow_poolinit(void) |
145 | { | | 145 | { |
146 | | | 146 | |
147 | pool_init(&ipflow_pool, sizeof(struct ipflow), 0, 0, 0, "ipflowpl", | | 147 | pool_init(&ipflow_pool, sizeof(struct ipflow), 0, 0, 0, "ipflowpl", |
148 | NULL, IPL_NET); | | 148 | NULL, IPL_NET); |
149 | } | | 149 | } |
150 | | | 150 | |
151 | int | | 151 | int |
152 | ipflow_init(int table_size) | | 152 | ipflow_init(int table_size) |
153 | { | | 153 | { |
154 | struct ipflowhead *new_table; | | 154 | struct ipflowhead *new_table; |
155 | size_t i; | | 155 | size_t i; |
156 | | | 156 | |
157 | new_table = (struct ipflowhead *)malloc(sizeof(struct ipflowhead) * | | 157 | new_table = (struct ipflowhead *)malloc(sizeof(struct ipflowhead) * |
158 | table_size, M_RTABLE, M_NOWAIT); | | 158 | table_size, M_RTABLE, M_NOWAIT); |
159 | | | 159 | |
160 | if (new_table == NULL) | | 160 | if (new_table == NULL) |
161 | return 1; | | 161 | return 1; |
162 | | | 162 | |
163 | if (ipflowtable != NULL) | | 163 | if (ipflowtable != NULL) |
164 | free(ipflowtable, M_RTABLE); | | 164 | free(ipflowtable, M_RTABLE); |
165 | | | 165 | |
166 | ipflowtable = new_table; | | 166 | ipflowtable = new_table; |
167 | ip_hashsize = table_size; | | 167 | ip_hashsize = table_size; |
168 | | | 168 | |
169 | LIST_INIT(&ipflowlist); | | 169 | LIST_INIT(&ipflowlist); |
170 | for (i = 0; i < ip_hashsize; i++) | | 170 | for (i = 0; i < ip_hashsize; i++) |
171 | LIST_INIT(&ipflowtable[i]); | | 171 | LIST_INIT(&ipflowtable[i]); |
172 | | | 172 | |
173 | return 0; | | 173 | return 0; |
174 | } | | 174 | } |
175 | | | 175 | |
176 | int | | 176 | int |
177 | ipflow_fastforward(struct mbuf *m) | | 177 | ipflow_fastforward(struct mbuf *m) |
178 | { | | 178 | { |
179 | struct ip *ip; | | 179 | struct ip *ip; |
180 | struct ip ip_store; | | 180 | struct ip ip_store; |
181 | struct ipflow *ipf; | | 181 | struct ipflow *ipf; |
182 | struct rtentry *rt; | | 182 | struct rtentry *rt; |
183 | const struct sockaddr *dst; | | 183 | const struct sockaddr *dst; |
184 | int error; | | 184 | int error; |
185 | int iplen; | | 185 | int iplen; |
186 | | | 186 | |
187 | /* | | 187 | /* |
188 | * Are we forwarding packets? Big enough for an IP packet? | | 188 | * Are we forwarding packets? Big enough for an IP packet? |
189 | */ | | 189 | */ |
190 | if (!ipforwarding || ipflow_inuse == 0 || m->m_len < sizeof(struct ip)) | | 190 | if (!ipforwarding || ipflow_inuse == 0 || m->m_len < sizeof(struct ip)) |
191 | return 0; | | 191 | return 0; |
192 | | | 192 | |
193 | /* | | 193 | /* |
194 | * Was packet received as a link-level multicast or broadcast? | | 194 | * Was packet received as a link-level multicast or broadcast? |
195 | * If so, don't try to fast forward.. | | 195 | * If so, don't try to fast forward.. |
196 | */ | | 196 | */ |
197 | if ((m->m_flags & (M_BCAST|M_MCAST)) != 0) | | 197 | if ((m->m_flags & (M_BCAST|M_MCAST)) != 0) |
198 | return 0; | | 198 | return 0; |
199 | | | 199 | |
200 | /* | | 200 | /* |
201 | * IP header with no option and valid version and length | | 201 | * IP header with no option and valid version and length |
202 | */ | | 202 | */ |
203 | if (IP_HDR_ALIGNED_P(mtod(m, const void *))) | | 203 | if (IP_HDR_ALIGNED_P(mtod(m, const void *))) |
204 | ip = mtod(m, struct ip *); | | 204 | ip = mtod(m, struct ip *); |
205 | else { | | 205 | else { |
206 | memcpy(&ip_store, mtod(m, const void *), sizeof(ip_store)); | | 206 | memcpy(&ip_store, mtod(m, const void *), sizeof(ip_store)); |
207 | ip = &ip_store; | | 207 | ip = &ip_store; |
208 | } | | 208 | } |
209 | iplen = ntohs(ip->ip_len); | | 209 | iplen = ntohs(ip->ip_len); |
210 | if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2) || | | 210 | if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2) || |
211 | iplen < sizeof(struct ip) || iplen > m->m_pkthdr.len) | | 211 | iplen < sizeof(struct ip) || iplen > m->m_pkthdr.len) |
212 | return 0; | | 212 | return 0; |
213 | /* | | 213 | /* |
214 | * Find a flow. | | 214 | * Find a flow. |
215 | */ | | 215 | */ |
216 | if ((ipf = ipflow_lookup(ip)) == NULL) | | 216 | if ((ipf = ipflow_lookup(ip)) == NULL) |
217 | return 0; | | 217 | return 0; |
218 | | | 218 | |
219 | /* | | 219 | /* |
220 | * Verify the IP header checksum. | | 220 | * Verify the IP header checksum. |
221 | */ | | 221 | */ |
222 | switch (m->m_pkthdr.csum_flags & | | 222 | switch (m->m_pkthdr.csum_flags & |
223 | ((m->m_pkthdr.rcvif->if_csum_flags_rx & M_CSUM_IPv4) | | | 223 | ((m->m_pkthdr.rcvif->if_csum_flags_rx & M_CSUM_IPv4) | |
224 | M_CSUM_IPv4_BAD)) { | | 224 | M_CSUM_IPv4_BAD)) { |
225 | case M_CSUM_IPv4|M_CSUM_IPv4_BAD: | | 225 | case M_CSUM_IPv4|M_CSUM_IPv4_BAD: |
226 | return (0); | | 226 | return (0); |
227 | | | 227 | |
228 | case M_CSUM_IPv4: | | 228 | case M_CSUM_IPv4: |
229 | /* Checksum was okay. */ | | 229 | /* Checksum was okay. */ |
230 | break; | | 230 | break; |
231 | | | 231 | |
232 | default: | | 232 | default: |
233 | /* Must compute it ourselves. */ | | 233 | /* Must compute it ourselves. */ |
234 | if (in_cksum(m, sizeof(struct ip)) != 0) | | 234 | if (in_cksum(m, sizeof(struct ip)) != 0) |
235 | return (0); | | 235 | return (0); |
236 | break; | | 236 | break; |
237 | } | | 237 | } |
238 | | | 238 | |
239 | /* | | 239 | /* |
240 | * Route and interface still up? | | 240 | * Route and interface still up? |
241 | */ | | 241 | */ |
242 | if ((rt = rtcache_validate(&ipf->ipf_ro)) == NULL || | | 242 | if ((rt = rtcache_validate(&ipf->ipf_ro)) == NULL || |
243 | (rt->rt_ifp->if_flags & IFF_UP) == 0) | | 243 | (rt->rt_ifp->if_flags & IFF_UP) == 0) |
244 | return 0; | | 244 | return 0; |
245 | | | 245 | |
246 | /* | | 246 | /* |
247 | * Packet size OK? TTL? | | 247 | * Packet size OK? TTL? |
248 | */ | | 248 | */ |
249 | if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC) | | 249 | if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC) |
250 | return 0; | | 250 | return 0; |
251 | | | 251 | |
252 | /* | | 252 | /* |
253 | * Clear any in-bound checksum flags for this packet. | | 253 | * Clear any in-bound checksum flags for this packet. |
254 | */ | | 254 | */ |
255 | m->m_pkthdr.csum_flags = 0; | | 255 | m->m_pkthdr.csum_flags = 0; |
256 | | | 256 | |
257 | /* | | 257 | /* |
258 | * Everything checks out and so we can forward this packet. | | 258 | * Everything checks out and so we can forward this packet. |
259 | * Modify the TTL and incrementally change the checksum. | | 259 | * Modify the TTL and incrementally change the checksum. |
260 | * | | 260 | * |
261 | * This method of adding the checksum works on either endian CPU. | | 261 | * This method of adding the checksum works on either endian CPU. |
262 | * If htons() is inlined, all the arithmetic is folded; otherwise | | 262 | * If htons() is inlined, all the arithmetic is folded; otherwise |
263 | * the htons()s are combined by CSE due to the const attribute. | | 263 | * the htons()s are combined by CSE due to the const attribute. |
264 | * | | 264 | * |
265 | * Don't bother using HW checksumming here -- the incremental | | 265 | * Don't bother using HW checksumming here -- the incremental |
266 | * update is pretty fast. | | 266 | * update is pretty fast. |
267 | */ | | 267 | */ |
268 | ip->ip_ttl -= IPTTLDEC; | | 268 | ip->ip_ttl -= IPTTLDEC; |
269 | if (ip->ip_sum >= (u_int16_t) ~htons(IPTTLDEC << 8)) | | 269 | if (ip->ip_sum >= (u_int16_t) ~htons(IPTTLDEC << 8)) |
270 | ip->ip_sum -= ~htons(IPTTLDEC << 8); | | 270 | ip->ip_sum -= ~htons(IPTTLDEC << 8); |
271 | else | | 271 | else |
272 | ip->ip_sum += htons(IPTTLDEC << 8); | | 272 | ip->ip_sum += htons(IPTTLDEC << 8); |
273 | | | 273 | |
274 | /* | | 274 | /* |
275 | * Done modifying the header; copy it back, if necessary. | | 275 | * Done modifying the header; copy it back, if necessary. |
276 | * | | 276 | * |
277 | * XXX Use m_copyback_cow(9) here? --dyoung | | 277 | * XXX Use m_copyback_cow(9) here? --dyoung |
278 | */ | | 278 | */ |
279 | if (IP_HDR_ALIGNED_P(mtod(m, void *)) == 0) | | 279 | if (IP_HDR_ALIGNED_P(mtod(m, void *)) == 0) |
280 | memcpy(mtod(m, void *), &ip_store, sizeof(ip_store)); | | 280 | memcpy(mtod(m, void *), &ip_store, sizeof(ip_store)); |
281 | | | 281 | |
282 | /* | | 282 | /* |
283 | * Trim the packet in case it's too long.. | | 283 | * Trim the packet in case it's too long.. |
284 | */ | | 284 | */ |
285 | if (m->m_pkthdr.len > iplen) { | | 285 | if (m->m_pkthdr.len > iplen) { |
286 | if (m->m_len == m->m_pkthdr.len) { | | 286 | if (m->m_len == m->m_pkthdr.len) { |
287 | m->m_len = iplen; | | 287 | m->m_len = iplen; |
288 | m->m_pkthdr.len = iplen; | | 288 | m->m_pkthdr.len = iplen; |
289 | } else | | 289 | } else |
290 | m_adj(m, iplen - m->m_pkthdr.len); | | 290 | m_adj(m, iplen - m->m_pkthdr.len); |
291 | } | | 291 | } |
292 | | | 292 | |
293 | /* | | 293 | /* |
294 | * Send the packet on it's way. All we can get back is ENOBUFS | | 294 | * Send the packet on it's way. All we can get back is ENOBUFS |
295 | */ | | 295 | */ |
296 | ipf->ipf_uses++; | | 296 | ipf->ipf_uses++; |
297 | PRT_SLOW_ARM(ipf->ipf_timer, IPFLOW_TIMER); | | 297 | PRT_SLOW_ARM(ipf->ipf_timer, IPFLOW_TIMER); |
298 | | | 298 | |
299 | if (rt->rt_flags & RTF_GATEWAY) | | 299 | if (rt->rt_flags & RTF_GATEWAY) |
300 | dst = rt->rt_gateway; | | 300 | dst = rt->rt_gateway; |
301 | else | | 301 | else |
302 | dst = rtcache_getdst(&ipf->ipf_ro); | | 302 | dst = rtcache_getdst(&ipf->ipf_ro); |
303 | | | 303 | |
304 | if ((error = (*rt->rt_ifp->if_output)(rt->rt_ifp, m, dst, rt)) != 0) { | | 304 | if ((error = (*rt->rt_ifp->if_output)(rt->rt_ifp, m, dst, rt)) != 0) { |
305 | if (error == ENOBUFS) | | 305 | if (error == ENOBUFS) |
306 | ipf->ipf_dropped++; | | 306 | ipf->ipf_dropped++; |
307 | else | | 307 | else |
308 | ipf->ipf_errors++; | | 308 | ipf->ipf_errors++; |
309 | } | | 309 | } |
310 | return 1; | | 310 | return 1; |
311 | } | | 311 | } |
312 | | | 312 | |
313 | static void | | 313 | static void |
314 | ipflow_addstats(struct ipflow *ipf) | | 314 | ipflow_addstats(struct ipflow *ipf) |
315 | { | | 315 | { |
316 | struct rtentry *rt; | | 316 | struct rtentry *rt; |
317 | uint64_t *ips; | | 317 | uint64_t *ips; |
318 | | | 318 | |
319 | if ((rt = rtcache_validate(&ipf->ipf_ro)) != NULL) | | 319 | if ((rt = rtcache_validate(&ipf->ipf_ro)) != NULL) |
320 | rt->rt_use += ipf->ipf_uses; | | 320 | rt->rt_use += ipf->ipf_uses; |
321 | | | 321 | |
322 | ips = IP_STAT_GETREF(); | | 322 | ips = IP_STAT_GETREF(); |
323 | ips[IP_STAT_CANTFORWARD] += ipf->ipf_errors + ipf->ipf_dropped; | | 323 | ips[IP_STAT_CANTFORWARD] += ipf->ipf_errors + ipf->ipf_dropped; |
324 | ips[IP_STAT_TOTAL] += ipf->ipf_uses; | | 324 | ips[IP_STAT_TOTAL] += ipf->ipf_uses; |
325 | ips[IP_STAT_FORWARD] += ipf->ipf_uses; | | 325 | ips[IP_STAT_FORWARD] += ipf->ipf_uses; |
326 | ips[IP_STAT_FASTFORWARD] += ipf->ipf_uses; | | 326 | ips[IP_STAT_FASTFORWARD] += ipf->ipf_uses; |
327 | IP_STAT_PUTREF(); | | 327 | IP_STAT_PUTREF(); |
328 | } | | 328 | } |
329 | | | 329 | |
330 | static void | | 330 | static void |
331 | ipflow_free(struct ipflow *ipf) | | 331 | ipflow_free(struct ipflow *ipf) |
332 | { | | 332 | { |
333 | int s; | | 333 | int s; |
334 | /* | | 334 | /* |
335 | * Remove the flow from the hash table (at elevated IPL). | | 335 | * Remove the flow from the hash table (at elevated IPL). |
336 | * Once it's off the list, we can deal with it at normal | | 336 | * Once it's off the list, we can deal with it at normal |
337 | * network IPL. | | 337 | * network IPL. |
338 | */ | | 338 | */ |
339 | s = splnet(); | | 339 | s = splnet(); |
340 | IPFLOW_REMOVE(ipf); | | 340 | IPFLOW_REMOVE(ipf); |
341 | splx(s); | | 341 | splx(s); |
342 | ipflow_addstats(ipf); | | 342 | ipflow_addstats(ipf); |
343 | rtcache_free(&ipf->ipf_ro); | | 343 | rtcache_free(&ipf->ipf_ro); |
344 | ipflow_inuse--; | | 344 | ipflow_inuse--; |
345 | s = splnet(); | | 345 | s = splnet(); |
346 | pool_put(&ipflow_pool, ipf); | | 346 | pool_put(&ipflow_pool, ipf); |
347 | splx(s); | | 347 | splx(s); |
348 | } | | 348 | } |
349 | | | 349 | |
350 | static struct ipflow * | | 350 | static struct ipflow * |
351 | ipflow_reap(bool just_one) | | 351 | ipflow_reap(bool just_one) |
352 | { | | 352 | { |
353 | while (just_one || ipflow_inuse > ip_maxflows) { | | 353 | while (just_one || ipflow_inuse > ip_maxflows) { |
354 | struct ipflow *ipf, *maybe_ipf = NULL; | | 354 | struct ipflow *ipf, *maybe_ipf = NULL; |
355 | int s; | | 355 | int s; |
356 | | | 356 | |
357 | ipf = LIST_FIRST(&ipflowlist); | | 357 | ipf = LIST_FIRST(&ipflowlist); |
358 | while (ipf != NULL) { | | 358 | while (ipf != NULL) { |
359 | /* | | 359 | /* |
360 | * If this no longer points to a valid route | | 360 | * If this no longer points to a valid route |
361 | * reclaim it. | | 361 | * reclaim it. |
362 | */ | | 362 | */ |
363 | if (rtcache_validate(&ipf->ipf_ro) == NULL) | | 363 | if (rtcache_validate(&ipf->ipf_ro) == NULL) |
364 | goto done; | | 364 | goto done; |
365 | /* | | 365 | /* |
366 | * choose the one that's been least recently | | 366 | * choose the one that's been least recently |
367 | * used or has had the least uses in the | | 367 | * used or has had the least uses in the |
368 | * last 1.5 intervals. | | 368 | * last 1.5 intervals. |
369 | */ | | 369 | */ |
370 | if (maybe_ipf == NULL || | | 370 | if (maybe_ipf == NULL || |
371 | ipf->ipf_timer < maybe_ipf->ipf_timer || | | 371 | ipf->ipf_timer < maybe_ipf->ipf_timer || |
372 | (ipf->ipf_timer == maybe_ipf->ipf_timer && | | 372 | (ipf->ipf_timer == maybe_ipf->ipf_timer && |
373 | ipf->ipf_last_uses + ipf->ipf_uses < | | 373 | ipf->ipf_last_uses + ipf->ipf_uses < |
374 | maybe_ipf->ipf_last_uses + | | 374 | maybe_ipf->ipf_last_uses + |
375 | maybe_ipf->ipf_uses)) | | 375 | maybe_ipf->ipf_uses)) |
376 | maybe_ipf = ipf; | | 376 | maybe_ipf = ipf; |
377 | ipf = LIST_NEXT(ipf, ipf_list); | | 377 | ipf = LIST_NEXT(ipf, ipf_list); |
378 | } | | 378 | } |
379 | ipf = maybe_ipf; | | 379 | ipf = maybe_ipf; |
380 | done: | | 380 | done: |
381 | /* | | 381 | /* |
382 | * Remove the entry from the flow table. | | 382 | * Remove the entry from the flow table. |
383 | */ | | 383 | */ |
384 | s = splnet(); | | 384 | s = splnet(); |
385 | IPFLOW_REMOVE(ipf); | | 385 | IPFLOW_REMOVE(ipf); |
386 | splx(s); | | 386 | splx(s); |
387 | ipflow_addstats(ipf); | | 387 | ipflow_addstats(ipf); |
388 | rtcache_free(&ipf->ipf_ro); | | 388 | rtcache_free(&ipf->ipf_ro); |
389 | if (just_one) | | 389 | if (just_one) |
390 | return ipf; | | 390 | return ipf; |
391 | pool_put(&ipflow_pool, ipf); | | 391 | pool_put(&ipflow_pool, ipf); |
392 | ipflow_inuse--; | | 392 | ipflow_inuse--; |
393 | } | | 393 | } |
394 | return NULL; | | 394 | return NULL; |
395 | } | | 395 | } |
396 | | | 396 | |
397 | void | | 397 | void |
398 | ipflow_prune(void) | | 398 | ipflow_prune(void) |
399 | { | | 399 | { |
400 | | | 400 | |
401 | (void) ipflow_reap(false); | | 401 | (void) ipflow_reap(false); |
402 | } | | 402 | } |
403 | | | 403 | |
404 | void | | 404 | void |
405 | ipflow_slowtimo(void) | | 405 | ipflow_slowtimo(void) |
406 | { | | 406 | { |
407 | struct rtentry *rt; | | 407 | struct rtentry *rt; |
408 | struct ipflow *ipf, *next_ipf; | | 408 | struct ipflow *ipf, *next_ipf; |
409 | uint64_t *ips; | | 409 | uint64_t *ips; |
410 | | | 410 | |
411 | mutex_enter(softnet_lock); | | 411 | mutex_enter(softnet_lock); |
412 | KERNEL_LOCK(1, NULL); | | 412 | KERNEL_LOCK(1, NULL); |
413 | for (ipf = LIST_FIRST(&ipflowlist); ipf != NULL; ipf = next_ipf) { | | 413 | for (ipf = LIST_FIRST(&ipflowlist); ipf != NULL; ipf = next_ipf) { |
414 | next_ipf = LIST_NEXT(ipf, ipf_list); | | 414 | next_ipf = LIST_NEXT(ipf, ipf_list); |
415 | if (PRT_SLOW_ISEXPIRED(ipf->ipf_timer) || | | 415 | if (PRT_SLOW_ISEXPIRED(ipf->ipf_timer) || |
416 | (rt = rtcache_validate(&ipf->ipf_ro)) == NULL) { | | 416 | (rt = rtcache_validate(&ipf->ipf_ro)) == NULL) { |
417 | ipflow_free(ipf); | | 417 | ipflow_free(ipf); |
418 | } else { | | 418 | } else { |
419 | ipf->ipf_last_uses = ipf->ipf_uses; | | 419 | ipf->ipf_last_uses = ipf->ipf_uses; |
420 | rt->rt_use += ipf->ipf_uses; | | 420 | rt->rt_use += ipf->ipf_uses; |
421 | ips = IP_STAT_GETREF(); | | 421 | ips = IP_STAT_GETREF(); |
422 | ips[IP_STAT_TOTAL] += ipf->ipf_uses; | | 422 | ips[IP_STAT_TOTAL] += ipf->ipf_uses; |
423 | ips[IP_STAT_FORWARD] += ipf->ipf_uses; | | 423 | ips[IP_STAT_FORWARD] += ipf->ipf_uses; |
424 | ips[IP_STAT_FASTFORWARD] += ipf->ipf_uses; | | 424 | ips[IP_STAT_FASTFORWARD] += ipf->ipf_uses; |
425 | IP_STAT_PUTREF(); | | 425 | IP_STAT_PUTREF(); |
426 | ipf->ipf_uses = 0; | | 426 | ipf->ipf_uses = 0; |
427 | } | | 427 | } |
428 | } | | 428 | } |
429 | KERNEL_UNLOCK_ONE(NULL); | | 429 | KERNEL_UNLOCK_ONE(NULL); |
430 | mutex_exit(softnet_lock); | | 430 | mutex_exit(softnet_lock); |
431 | } | | 431 | } |
432 | | | 432 | |
433 | void | | 433 | void |
434 | ipflow_create(const struct route *ro, struct mbuf *m) | | 434 | ipflow_create(const struct route *ro, struct mbuf *m) |
435 | { | | 435 | { |
436 | const struct ip *const ip = mtod(m, const struct ip *); | | 436 | const struct ip *const ip = mtod(m, const struct ip *); |
437 | struct ipflow *ipf; | | 437 | struct ipflow *ipf; |
438 | size_t hash; | | 438 | size_t hash; |
439 | int s; | | 439 | int s; |
440 | | | 440 | |
441 | /* | | 441 | /* |
442 | * Don't create cache entries for ICMP messages. | | 442 | * Don't create cache entries for ICMP messages. |
443 | */ | | 443 | */ |
444 | if (ip_maxflows == 0 || ip->ip_p == IPPROTO_ICMP) | | 444 | if (ip_maxflows == 0 || ip->ip_p == IPPROTO_ICMP) |
445 | return; | | 445 | return; |
446 | /* | | 446 | /* |
447 | * See if an existing flow struct exists. If so remove it from it's | | 447 | * See if an existing flow struct exists. If so remove it from it's |
448 | * list and free the old route. If not, try to malloc a new one | | 448 | * list and free the old route. If not, try to malloc a new one |
449 | * (if we aren't at our limit). | | 449 | * (if we aren't at our limit). |
450 | */ | | 450 | */ |
451 | ipf = ipflow_lookup(ip); | | 451 | ipf = ipflow_lookup(ip); |
452 | if (ipf == NULL) { | | 452 | if (ipf == NULL) { |
453 | if (ipflow_inuse >= ip_maxflows) { | | 453 | if (ipflow_inuse >= ip_maxflows) { |
454 | ipf = ipflow_reap(true); | | 454 | ipf = ipflow_reap(true); |
455 | } else { | | 455 | } else { |
456 | s = splnet(); | | 456 | s = splnet(); |
457 | ipf = pool_get(&ipflow_pool, PR_NOWAIT); | | 457 | ipf = pool_get(&ipflow_pool, PR_NOWAIT); |
458 | splx(s); | | 458 | splx(s); |
459 | if (ipf == NULL) | | 459 | if (ipf == NULL) |
460 | return; | | 460 | return; |
461 | ipflow_inuse++; | | 461 | ipflow_inuse++; |
462 | } | | 462 | } |
463 | memset(ipf, 0, sizeof(*ipf)); | | 463 | memset(ipf, 0, sizeof(*ipf)); |
464 | } else { | | 464 | } else { |
465 | s = splnet(); | | 465 | s = splnet(); |
466 | IPFLOW_REMOVE(ipf); | | 466 | IPFLOW_REMOVE(ipf); |
467 | splx(s); | | 467 | splx(s); |
468 | ipflow_addstats(ipf); | | 468 | ipflow_addstats(ipf); |
469 | rtcache_free(&ipf->ipf_ro); | | 469 | rtcache_free(&ipf->ipf_ro); |
470 | ipf->ipf_uses = ipf->ipf_last_uses = 0; | | 470 | ipf->ipf_uses = ipf->ipf_last_uses = 0; |
471 | ipf->ipf_errors = ipf->ipf_dropped = 0; | | 471 | ipf->ipf_errors = ipf->ipf_dropped = 0; |
472 | } | | 472 | } |
473 | | | 473 | |
474 | /* | | 474 | /* |
475 | * Fill in the updated information. | | 475 | * Fill in the updated information. |
476 | */ | | 476 | */ |
477 | rtcache_copy(&ipf->ipf_ro, ro); | | 477 | rtcache_copy(&ipf->ipf_ro, ro); |
478 | ipf->ipf_dst = ip->ip_dst; | | 478 | ipf->ipf_dst = ip->ip_dst; |
479 | ipf->ipf_src = ip->ip_src; | | 479 | ipf->ipf_src = ip->ip_src; |
480 | ipf->ipf_tos = ip->ip_tos; | | 480 | ipf->ipf_tos = ip->ip_tos; |
481 | PRT_SLOW_ARM(ipf->ipf_timer, IPFLOW_TIMER); | | 481 | PRT_SLOW_ARM(ipf->ipf_timer, IPFLOW_TIMER); |
482 | ipf->ipf_start = time_uptime; | | 482 | ipf->ipf_start = time_uptime; |
483 | /* | | 483 | /* |
484 | * Insert into the approriate bucket of the flow table. | | 484 | * Insert into the approriate bucket of the flow table. |
485 | */ | | 485 | */ |
486 | hash = ipflow_hash(ip); | | 486 | hash = ipflow_hash(ip); |
487 | s = splnet(); | | 487 | s = splnet(); |
488 | IPFLOW_INSERT(&ipflowtable[hash], ipf); | | 488 | IPFLOW_INSERT(&ipflowtable[hash], ipf); |
489 | splx(s); | | 489 | splx(s); |
490 | } | | 490 | } |
491 | | | 491 | |
492 | int | | 492 | int |
493 | ipflow_invalidate_all(int new_size) | | 493 | ipflow_invalidate_all(int new_size) |
494 | { | | 494 | { |
495 | struct ipflow *ipf, *next_ipf; | | 495 | struct ipflow *ipf, *next_ipf; |
496 | int s, error; | | 496 | int s, error; |
497 | | | 497 | |
498 | error = 0; | | 498 | error = 0; |
499 | s = splnet(); | | 499 | s = splnet(); |
500 | for (ipf = LIST_FIRST(&ipflowlist); ipf != NULL; ipf = next_ipf) { | | 500 | for (ipf = LIST_FIRST(&ipflowlist); ipf != NULL; ipf = next_ipf) { |
501 | next_ipf = LIST_NEXT(ipf, ipf_list); | | 501 | next_ipf = LIST_NEXT(ipf, ipf_list); |
502 | ipflow_free(ipf); | | 502 | ipflow_free(ipf); |
503 | } | | 503 | } |
504 | | | 504 | |
505 | if (new_size) | | 505 | if (new_size) |
506 | error = ipflow_init(new_size); | | 506 | error = ipflow_init(new_size); |
507 | splx(s); | | 507 | splx(s); |
508 | | | 508 | |
509 | return error; | | 509 | return error; |
510 | } | | 510 | } |