Wed Feb 19 03:51:31 2014 UTC ()
NPF: fix the recent breakage of the traceroute ALG.  Also, simplify and
refactor a little bit.


(rmind)
diff -r1.19 -r1.20 src/sys/net/npf/npf_alg_icmp.c
diff -r1.48 -r1.49 src/sys/net/npf/npf_impl.h
diff -r1.29 -r1.30 src/sys/net/npf/npf_inet.c
diff -r1.25 -r1.26 src/sys/net/npf/npf_nat.c

cvs diff -r1.19 -r1.20 src/sys/net/npf/npf_alg_icmp.c (switch to unified diff)

--- src/sys/net/npf/npf_alg_icmp.c 2014/02/16 22:10:40 1.19
+++ src/sys/net/npf/npf_alg_icmp.c 2014/02/19 03:51:31 1.20
@@ -1,437 +1,444 @@ @@ -1,437 +1,444 @@
1/* $NetBSD: npf_alg_icmp.c,v 1.19 2014/02/16 22:10:40 rmind Exp $ */ 1/* $NetBSD: npf_alg_icmp.c,v 1.20 2014/02/19 03:51:31 rmind Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2010 The NetBSD Foundation, Inc. 4 * Copyright (c) 2010 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This material is based upon work partially supported by The 7 * This material is based upon work partially supported by The
8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32/* 32/*
33 * NPF ALG for ICMP and traceroute translations. 33 * NPF ALG for ICMP and traceroute translations.
34 */ 34 */
35 35
36#include <sys/cdefs.h> 36#include <sys/cdefs.h>
37__KERNEL_RCSID(0, "$NetBSD: npf_alg_icmp.c,v 1.19 2014/02/16 22:10:40 rmind Exp $"); 37__KERNEL_RCSID(0, "$NetBSD: npf_alg_icmp.c,v 1.20 2014/02/19 03:51:31 rmind Exp $");
38 38
39#include <sys/param.h> 39#include <sys/param.h>
40#include <sys/module.h> 40#include <sys/module.h>
41 41
42#include <netinet/in_systm.h> 42#include <netinet/in_systm.h>
43#include <netinet/in.h> 43#include <netinet/in.h>
44#include <netinet/ip.h> 44#include <netinet/ip.h>
45#include <netinet/tcp.h> 45#include <netinet/tcp.h>
46#include <netinet/udp.h> 46#include <netinet/udp.h>
47#include <netinet/ip_icmp.h> 47#include <netinet/ip_icmp.h>
48#include <netinet/icmp6.h> 48#include <netinet/icmp6.h>
49#include <net/pfil.h> 49#include <net/pfil.h>
50 50
51#include "npf_impl.h" 51#include "npf_impl.h"
52 52
53MODULE(MODULE_CLASS_MISC, npf_alg_icmp, "npf"); 53MODULE(MODULE_CLASS_MISC, npf_alg_icmp, "npf");
54 54
55/* 55/*
56 * Traceroute criteria. 56 * Traceroute criteria.
57 * 57 *
58 * IANA assigned base port: 33434. However, common practice is to increase 58 * IANA assigned base port: 33434. However, common practice is to increase
59 * the port, thus monitor [33434-33484] range. Additional filter is low TTL. 59 * the port, thus monitor [33434-33484] range. Additional filter is low TTL.
60 */ 60 */
61 61
62#define TR_BASE_PORT 33434 62#define TR_BASE_PORT 33434
63#define TR_PORT_RANGE 33484 63#define TR_PORT_RANGE 33484
64#define TR_MAX_TTL 48 64#define TR_MAX_TTL 48
65 65
66static npf_alg_t * alg_icmp __read_mostly; 66static npf_alg_t * alg_icmp __read_mostly;
67 67
68/* 68/*
69 * npfa_icmp_match: matching insperctor determines ALG case and associates 69 * npfa_icmp_match: matching insperctor determines ALG case and associates
70 * our ALG with the NAT entry. 70 * our ALG with the NAT entry.
71 */ 71 */
72static bool 72static bool
73npfa_icmp_match(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt, int di) 73npfa_icmp_match(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt, int di)
74{ 74{
75 const int proto = npc->npc_proto; 75 const int proto = npc->npc_proto;
76 const struct ip *ip = npc->npc_ip.v4; 76 const struct ip *ip = npc->npc_ip.v4;
77 in_port_t dport; 77 in_port_t dport;
78 78
79 KASSERT(npf_iscached(npc, NPC_IP46)); 79 KASSERT(npf_iscached(npc, NPC_IP46));
80 KASSERT(npf_iscached(npc, NPC_LAYER4)); 80 KASSERT(npf_iscached(npc, NPC_LAYER4));
81 81
82 /* Check for low TTL. Also, we support outbound NAT only. */ 82 /* Check for low TTL. Also, we support outbound NAT only. */
83 if (ip->ip_ttl > TR_MAX_TTL || di != PFIL_OUT) { 83 if (ip->ip_ttl > TR_MAX_TTL || di != PFIL_OUT) {
84 return false; 84 return false;
85 } 85 }
86 86
87 switch (proto) { 87 switch (proto) {
88 case IPPROTO_TCP: { 88 case IPPROTO_TCP: {
89 const struct tcphdr *th = npc->npc_l4.tcp; 89 const struct tcphdr *th = npc->npc_l4.tcp;
90 dport = ntohs(th->th_dport); 90 dport = ntohs(th->th_dport);
91 break; 91 break;
92 } 92 }
93 case IPPROTO_UDP: { 93 case IPPROTO_UDP: {
94 const struct udphdr *uh = npc->npc_l4.udp; 94 const struct udphdr *uh = npc->npc_l4.udp;
95 dport = ntohs(uh->uh_dport); 95 dport = ntohs(uh->uh_dport);
96 break; 96 break;
97 } 97 }
98 case IPPROTO_ICMP: 98 case IPPROTO_ICMP:
99 case IPPROTO_ICMPV6: 99 case IPPROTO_ICMPV6:
100 /* Just to pass the test below. */ 100 /* Just to pass the test below. */
101 dport = TR_BASE_PORT; 101 dport = TR_BASE_PORT;
102 break; 102 break;
103 default: 103 default:
104 return false; 104 return false;
105 } 105 }
106 106
107 /* Handle TCP/UDP traceroute - check for port range. */ 107 /* Handle TCP/UDP traceroute - check for port range. */
108 if (dport < TR_BASE_PORT || dport > TR_PORT_RANGE) { 108 if (dport < TR_BASE_PORT || dport > TR_PORT_RANGE) {
109 return false; 109 return false;
110 } 110 }
111 111
112 /* Associate ALG with translation entry. */ 112 /* Associate ALG with translation entry. */
113 npf_nat_setalg(nt, alg_icmp, 0); 113 npf_nat_setalg(nt, alg_icmp, 0);
114 return true; 114 return true;
115} 115}
116 116
117/* 117/*
118 * npfa_icmp{4,6}_inspect: retrieve unique identifiers - either ICMP query 118 * npfa_icmp{4,6}_inspect: retrieve unique identifiers - either ICMP query
119 * ID or TCP/UDP ports of the original packet, which is embedded. 119 * ID or TCP/UDP ports of the original packet, which is embedded.
120 */ 120 */
121 121
122static bool 122static bool
123npfa_icmp4_inspect(const int type, npf_cache_t *npc, nbuf_t *nbuf) 123npfa_icmp4_inspect(const int type, npf_cache_t *npc, nbuf_t *nbuf)
124{ 124{
125 u_int offby; 125 u_int offby;
126 126
127 /* Per RFC 792. */ 127 /* Per RFC 792. */
128 switch (type) { 128 switch (type) {
129 case ICMP_UNREACH: 129 case ICMP_UNREACH:
130 case ICMP_SOURCEQUENCH: 130 case ICMP_SOURCEQUENCH:
131 case ICMP_REDIRECT: 131 case ICMP_REDIRECT:
132 case ICMP_TIMXCEED: 132 case ICMP_TIMXCEED:
133 case ICMP_PARAMPROB: 133 case ICMP_PARAMPROB:
134 if (npc == NULL) { 134 if (npc == NULL) {
135 return false; 135 return false;
136 } 136 }
137 /* Should contain original IP header. */ 137 /* Should contain original IP header. */
138 if (!nbuf_advance(nbuf, offsetof(struct icmp, icmp_ip), 0)) { 138 if (!nbuf_advance(nbuf, offsetof(struct icmp, icmp_ip), 0)) {
139 return false; 139 return false;
140 } 140 }
141 return (npf_cache_all(npc, nbuf) & NPC_LAYER4) != 0; 141 return (npf_cache_all(npc, nbuf) & NPC_LAYER4) != 0;
142 142
143 case ICMP_ECHOREPLY: 143 case ICMP_ECHOREPLY:
144 case ICMP_ECHO: 144 case ICMP_ECHO:
145 case ICMP_TSTAMP: 145 case ICMP_TSTAMP:
146 case ICMP_TSTAMPREPLY: 146 case ICMP_TSTAMPREPLY:
147 case ICMP_IREQ: 147 case ICMP_IREQ:
148 case ICMP_IREQREPLY: 148 case ICMP_IREQREPLY:
149 /* Should contain ICMP query ID - ensure. */ 149 /* Should contain ICMP query ID - ensure. */
150 offby = offsetof(struct icmp, icmp_id); 150 offby = offsetof(struct icmp, icmp_id);
151 if (!nbuf_advance(nbuf, offby, sizeof(uint16_t))) { 151 if (!nbuf_advance(nbuf, offby, sizeof(uint16_t))) {
152 return false; 152 return false;
153 } 153 }
154 npc->npc_info |= NPC_ICMP_ID; 154 npc->npc_info |= NPC_ICMP_ID;
155 return true; 155 return true;
156 default: 156 default:
157 break; 157 break;
158 } 158 }
159 return false; 159 return false;
160} 160}
161 161
162static bool 162static bool
163npfa_icmp6_inspect(const int type, npf_cache_t *npc, nbuf_t *nbuf) 163npfa_icmp6_inspect(const int type, npf_cache_t *npc, nbuf_t *nbuf)
164{ 164{
165 u_int offby; 165 u_int offby;
166 166
167 /* Per RFC 4443. */ 167 /* Per RFC 4443. */
168 switch (type) { 168 switch (type) {
169 case ICMP6_DST_UNREACH: 169 case ICMP6_DST_UNREACH:
170 case ICMP6_PACKET_TOO_BIG: 170 case ICMP6_PACKET_TOO_BIG:
171 case ICMP6_TIME_EXCEEDED: 171 case ICMP6_TIME_EXCEEDED:
172 case ICMP6_PARAM_PROB: 172 case ICMP6_PARAM_PROB:
173 if (npc == NULL) { 173 if (npc == NULL) {
174 return false; 174 return false;
175 } 175 }
176 /* Should contain original IP header. */ 176 /* Should contain original IP header. */
177 if (!nbuf_advance(nbuf, sizeof(struct icmp6_hdr), 0)) { 177 if (!nbuf_advance(nbuf, sizeof(struct icmp6_hdr), 0)) {
178 return false; 178 return false;
179 } 179 }
180 return (npf_cache_all(npc, nbuf) & NPC_LAYER4) != 0; 180 return (npf_cache_all(npc, nbuf) & NPC_LAYER4) != 0;
181 181
182 case ICMP6_ECHO_REQUEST: 182 case ICMP6_ECHO_REQUEST:
183 case ICMP6_ECHO_REPLY: 183 case ICMP6_ECHO_REPLY:
184 /* Should contain ICMP query ID - ensure. */ 184 /* Should contain ICMP query ID - ensure. */
185 offby = offsetof(struct icmp6_hdr, icmp6_id); 185 offby = offsetof(struct icmp6_hdr, icmp6_id);
186 if (!nbuf_advance(nbuf, offby, sizeof(uint16_t))) { 186 if (!nbuf_advance(nbuf, offby, sizeof(uint16_t))) {
187 return false; 187 return false;
188 } 188 }
189 npc->npc_info |= NPC_ICMP_ID; 189 npc->npc_info |= NPC_ICMP_ID;
190 return true; 190 return true;
191 default: 191 default:
192 break; 192 break;
193 } 193 }
194 return false; 194 return false;
195} 195}
196 196
197/* 197/*
198 * npfa_icmp_session: ALG ICMP inspector. 198 * npfa_icmp_session: ALG ICMP inspector.
199 * 199 *
200 * => Returns true if "enpc" is filled. 200 * => Returns true if "enpc" is filled.
201 */ 201 */
202static bool 202static bool
203npfa_icmp_inspect(npf_cache_t *npc, nbuf_t *nbuf, npf_cache_t *enpc) 203npfa_icmp_inspect(npf_cache_t *npc, nbuf_t *nbuf, npf_cache_t *enpc)
204{ 204{
205 bool ret; 205 bool ret;
206 206
207 KASSERT(npf_iscached(npc, NPC_IP46)); 207 KASSERT(npf_iscached(npc, NPC_IP46));
208 KASSERT(npf_iscached(npc, NPC_ICMP)); 208 KASSERT(npf_iscached(npc, NPC_ICMP));
209 209
210 /* Advance to ICMP header. */ 210 /* Advance to ICMP header. */
211 nbuf_reset(nbuf); 211 nbuf_reset(nbuf);
212 if (!nbuf_advance(nbuf, npc->npc_hlen, 0)) { 212 if (!nbuf_advance(nbuf, npc->npc_hlen, 0)) {
213 return false; 213 return false;
214 } 214 }
215 enpc->npc_info = 0; 215 enpc->npc_info = 0;
216 216
217 /* 217 /*
218 * Inspect the ICMP packet. The relevant data might be in the 218 * Inspect the ICMP packet. The relevant data might be in the
219 * embedded packet. Fill the "enpc" cache, if so. 219 * embedded packet. Fill the "enpc" cache, if so.
220 */ 220 */
221 if (npf_iscached(npc, NPC_IP4)) { 221 if (npf_iscached(npc, NPC_IP4)) {
222 const struct icmp *ic = npc->npc_l4.icmp; 222 const struct icmp *ic = npc->npc_l4.icmp;
223 ret = npfa_icmp4_inspect(ic->icmp_type, enpc, nbuf); 223 ret = npfa_icmp4_inspect(ic->icmp_type, enpc, nbuf);
224 } else if (npf_iscached(npc, NPC_IP6)) { 224 } else if (npf_iscached(npc, NPC_IP6)) {
225 const struct icmp6_hdr *ic6 = npc->npc_l4.icmp6; 225 const struct icmp6_hdr *ic6 = npc->npc_l4.icmp6;
226 ret = npfa_icmp6_inspect(ic6->icmp6_type, enpc, nbuf); 226 ret = npfa_icmp6_inspect(ic6->icmp6_type, enpc, nbuf);
227 } else { 227 } else {
228 ret = false; 228 ret = false;
229 } 229 }
230 if (!ret) { 230 if (!ret) {
231 return false; 231 return false;
232 } 232 }
233 233
234 /* ICMP ID is the original packet, just indicate it. */ 234 /* ICMP ID is the original packet, just indicate it. */
235 if (npf_iscached(enpc, NPC_ICMP_ID)) { 235 if (npf_iscached(enpc, NPC_ICMP_ID)) {
236 npc->npc_info |= NPC_ICMP_ID; 236 npc->npc_info |= NPC_ICMP_ID;
237 return false; 237 return false;
238 } 238 }
239 239
240 /* Indicate that embedded packet is in the cache. */ 240 /* Indicate that embedded packet is in the cache. */
241 return true; 241 return true;
242} 242}
243 243
244static npf_session_t * 244static npf_session_t *
245npfa_icmp_session(npf_cache_t *npc, nbuf_t *nbuf, int di) 245npfa_icmp_session(npf_cache_t *npc, nbuf_t *nbuf, int di)
246{ 246{
247 npf_cache_t enpc; 247 npf_cache_t enpc;
248 248
249 /* Inspect ICMP packet for an embedded packet. */ 249 /* Inspect ICMP packet for an embedded packet. */
250 if (!npf_iscached(npc, NPC_ICMP)) 250 if (!npf_iscached(npc, NPC_ICMP))
251 return NULL; 251 return NULL;
252 if (!npfa_icmp_inspect(npc, nbuf, &enpc)) 252 if (!npfa_icmp_inspect(npc, nbuf, &enpc))
253 return NULL; 253 return NULL;
254 254
255 /* 255 /*
256 * Invert the identifiers of the embedded packet. 256 * Invert the identifiers of the embedded packet.
257 * If it is ICMP, then ensure ICMP ID. 257 * If it is ICMP, then ensure ICMP ID.
258 */ 258 */
259 union l4 { 259 union l4 {
260 struct tcphdr th; 260 struct tcphdr th;
261 struct udphdr uh; 261 struct udphdr uh;
262 } l4; 262 } l4;
263 bool ret, forw; 263 bool ret, forw;
264 264
265 #define SWAP(type, x, y) { type tmp = x; x = y; y = tmp; } 265 #define SWAP(type, x, y) { type tmp = x; x = y; y = tmp; }
266 SWAP(npf_addr_t *, enpc.npc_ips[NPF_SRC], enpc.npc_ips[NPF_DST]); 266 SWAP(npf_addr_t *, enpc.npc_ips[NPF_SRC], enpc.npc_ips[NPF_DST]);
267 267
268 switch (enpc.npc_proto) { 268 switch (enpc.npc_proto) {
269 case IPPROTO_TCP: 269 case IPPROTO_TCP:
270 l4.th.th_sport = enpc.npc_l4.tcp->th_dport; 270 l4.th.th_sport = enpc.npc_l4.tcp->th_dport;
271 l4.th.th_dport = enpc.npc_l4.tcp->th_sport; 271 l4.th.th_dport = enpc.npc_l4.tcp->th_sport;
272 enpc.npc_l4.tcp = &l4.th; 272 enpc.npc_l4.tcp = &l4.th;
273 break; 273 break;
274 case IPPROTO_UDP: 274 case IPPROTO_UDP:
275 l4.uh.uh_sport = enpc.npc_l4.udp->uh_dport; 275 l4.uh.uh_sport = enpc.npc_l4.udp->uh_dport;
276 l4.uh.uh_dport = enpc.npc_l4.udp->uh_sport; 276 l4.uh.uh_dport = enpc.npc_l4.udp->uh_sport;
277 enpc.npc_l4.udp = &l4.uh; 277 enpc.npc_l4.udp = &l4.uh;
278 break; 278 break;
279 case IPPROTO_ICMP: { 279 case IPPROTO_ICMP: {
280 const struct icmp *ic = enpc.npc_l4.icmp; 280 const struct icmp *ic = enpc.npc_l4.icmp;
281 ret = npfa_icmp4_inspect(ic->icmp_type, &enpc, nbuf); 281 ret = npfa_icmp4_inspect(ic->icmp_type, &enpc, nbuf);
282 if (!ret || !npf_iscached(&enpc, NPC_ICMP_ID)) 282 if (!ret || !npf_iscached(&enpc, NPC_ICMP_ID))
283 return false; 283 return false;
284 break; 284 break;
285 } 285 }
286 case IPPROTO_ICMPV6: { 286 case IPPROTO_ICMPV6: {
287 const struct icmp6_hdr *ic6 = enpc.npc_l4.icmp6; 287 const struct icmp6_hdr *ic6 = enpc.npc_l4.icmp6;
288 ret = npfa_icmp6_inspect(ic6->icmp6_type, &enpc, nbuf); 288 ret = npfa_icmp6_inspect(ic6->icmp6_type, &enpc, nbuf);
289 if (!ret || !npf_iscached(&enpc, NPC_ICMP_ID)) 289 if (!ret || !npf_iscached(&enpc, NPC_ICMP_ID))
290 return false; 290 return false;
291 break; 291 break;
292 } 292 }
293 default: 293 default:
294 return false; 294 return false;
295 } 295 }
296 296
297 /* Lookup for a session using embedded packet. */ 297 /* Lookup for a session using embedded packet. */
298 return npf_session_lookup(&enpc, nbuf, di, &forw); 298 return npf_session_lookup(&enpc, nbuf, di, &forw);
299} 299}
300 300
301/* 301/*
302 * npfa_icmp_nat: ALG translator - rewrites IP address in the IP header 302 * npfa_icmp_nat: ALG translator - rewrites IP address in the IP header
303 * which is embedded in ICMP packet. Note: backwards stream only. 303 * which is embedded in ICMP packet. Note: backwards stream only.
304 */ 304 */
305static bool 305static bool
306npfa_icmp_nat(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt, bool forw) 306npfa_icmp_nat(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt, bool forw)
307{ 307{
 308 const u_int which = NPF_SRC;
308 npf_cache_t enpc; 309 npf_cache_t enpc;
309 310
310 if (forw || !npf_iscached(npc, NPC_ICMP)) 311 if (forw || !npf_iscached(npc, NPC_ICMP))
311 return false; 312 return false;
312 if (!npfa_icmp_inspect(npc, nbuf, &enpc)) 313 if (!npfa_icmp_inspect(npc, nbuf, &enpc))
313 return false; 314 return false;
314 315
315 KASSERT(npf_iscached(&enpc, NPC_IP46)); 316 KASSERT(npf_iscached(&enpc, NPC_IP46));
316 KASSERT(npf_iscached(&enpc, NPC_LAYER4)); 317 KASSERT(npf_iscached(&enpc, NPC_LAYER4));
317 318
 319 /*
 320 * ICMP: fetch the current checksum we are going to fixup.
 321 */
318 struct icmp *ic = npc->npc_l4.icmp; 322 struct icmp *ic = npc->npc_l4.icmp;
319 uint16_t cksum = ic->icmp_cksum; 323 uint16_t cksum = ic->icmp_cksum;
320 324
321 CTASSERT(offsetof(struct icmp, icmp_cksum) == 325 CTASSERT(offsetof(struct icmp, icmp_cksum) ==
322 offsetof(struct icmp6_hdr, icmp6_cksum)); 326 offsetof(struct icmp6_hdr, icmp6_cksum));
323 327
324 /* 328 /*
325 * Retrieve the original address and port, then calculate ICMP 329 * Fetch the IP and port in the _embedded_ packet. Also, fetch
326 * checksum for these changes in the embedded packet. While data 330 * the IPv4 and TCP/UDP checksums before they are rewritten.
327 * is not rewritten in the cache, save IP and TCP/UDP checksums. 331 * Calculate the part of the ICMP checksum fixup.
328 * 
329 * XXX: Assumes NPF_NATOUT (source address/port). Currently, 
330 * npfa_icmp_match() matches only for the PFIL_OUT traffic. 
331 */ 332 */
332 const int proto = enpc.npc_proto; 333 const int proto = enpc.npc_proto;
333 uint16_t ipcksum = 0, l4cksum = 0; 334 uint16_t ipcksum = 0, l4cksum = 0;
334 npf_addr_t *addr; 335 npf_addr_t *addr;
335 in_port_t port; 336 in_port_t port;
336 337
337 npf_nat_getorig(nt, &addr, &port); 338 npf_nat_getorig(nt, &addr, &port);
338 339
339 if (npf_iscached(&enpc, NPC_IP4)) { 340 if (npf_iscached(&enpc, NPC_IP4)) {
340 const struct ip *eip = enpc.npc_ip.v4; 341 const struct ip *eip = enpc.npc_ip.v4;
341 ipcksum = eip->ip_sum; 342 ipcksum = eip->ip_sum;
342 } 343 }
343 cksum = npf_addr_cksum(cksum, enpc.npc_alen, enpc.npc_ips[NPF_SRC], addr); 344 cksum = npf_addr_cksum(cksum, enpc.npc_alen, enpc.npc_ips[which], addr);
344 345
345 switch (proto) { 346 switch (proto) {
346 case IPPROTO_TCP: { 347 case IPPROTO_TCP: {
347 const struct tcphdr *th = enpc.npc_l4.tcp; 348 const struct tcphdr *th = enpc.npc_l4.tcp;
348 cksum = npf_fixup16_cksum(cksum, th->th_sport, port); 349 cksum = npf_fixup16_cksum(cksum, th->th_sport, port);
349 l4cksum = th->th_sum; 350 l4cksum = th->th_sum;
350 break; 351 break;
351 } 352 }
352 case IPPROTO_UDP: { 353 case IPPROTO_UDP: {
353 const struct udphdr *uh = enpc.npc_l4.udp; 354 const struct udphdr *uh = enpc.npc_l4.udp;
354 cksum = npf_fixup16_cksum(cksum, uh->uh_sport, port); 355 cksum = npf_fixup16_cksum(cksum, uh->uh_sport, port);
355 l4cksum = uh->uh_sum; 356 l4cksum = uh->uh_sum;
356 break; 357 break;
357 } 358 }
358 case IPPROTO_ICMP: 359 case IPPROTO_ICMP:
359 case IPPROTO_ICMPV6: 360 case IPPROTO_ICMPV6:
360 break; 361 break;
361 default: 362 default:
362 return false; 363 return false;
363 } 364 }
364 365
365 /* 366 /*
366 * Rewrite the source IP address and port of the embedded IP header, 367 * Translate the embedded packet. The following changes will
367 * which represents the original packet. This updates the checksums 368 * be performed by npf_napt_rwr():
368 * in the embedded packet. 369 *
 370 * 1) Rewrite the IP address and, if not ICMP, port.
 371 * 2) Rewrite the TCP/UDP checksum (if not ICMP).
 372 * 3) Rewrite the IPv4 checksum for (1) and (2).
 373 *
 374 * XXX: Assumes NPF_NATOUT (source address/port). Currently,
 375 * npfa_icmp_match() matches only for the PFIL_OUT traffic.
369 */ 376 */
370 if (npf_nat_translate(&enpc, nbuf, nt, forw)) { 377 if (npf_napt_rwr(&enpc, which, addr, port)) {
371 return false; 378 return false;
372 } 379 }
373 380
374 /* 381 /*
375 * Finish calculation of the ICMP checksum: include the checksum 382 * Finally, finish the ICMP checksum fixup: include the checksum
376 * change in the embedded packet. 383 * changes in the embedded packet.
377 */ 384 */
378 if (npf_iscached(&enpc, NPC_IP4)) { 385 if (npf_iscached(&enpc, NPC_IP4)) {
379 const struct ip *eip = enpc.npc_ip.v4; 386 const struct ip *eip = enpc.npc_ip.v4;
380 cksum = npf_fixup16_cksum(cksum, ipcksum, eip->ip_sum); 387 cksum = npf_fixup16_cksum(cksum, ipcksum, eip->ip_sum);
381 } 388 }
382 switch (proto) { 389 switch (proto) {
383 case IPPROTO_TCP: { 390 case IPPROTO_TCP: {
384 const struct tcphdr *th = enpc.npc_l4.tcp; 391 const struct tcphdr *th = enpc.npc_l4.tcp;
385 cksum = npf_fixup16_cksum(cksum, l4cksum, th->th_sum); 392 cksum = npf_fixup16_cksum(cksum, l4cksum, th->th_sum);
386 break; 393 break;
387 } 394 }
388 case IPPROTO_UDP: 395 case IPPROTO_UDP:
389 if (l4cksum) { 396 if (l4cksum) {
390 const struct udphdr *uh = enpc.npc_l4.udp; 397 const struct udphdr *uh = enpc.npc_l4.udp;
391 cksum = npf_fixup16_cksum(cksum, l4cksum, uh->uh_sum); 398 cksum = npf_fixup16_cksum(cksum, l4cksum, uh->uh_sum);
392 } 399 }
393 break; 400 break;
394 } 401 }
395 ic->icmp_cksum = cksum; 402 ic->icmp_cksum = cksum;
396 return true; 403 return true;
397} 404}
398 405
399/* 406/*
400 * npf_alg_icmp_{init,fini,modcmd}: ICMP ALG initialization, destruction 407 * npf_alg_icmp_{init,fini,modcmd}: ICMP ALG initialization, destruction
401 * and module interface. 408 * and module interface.
402 */ 409 */
403 410
404static int 411static int
405npf_alg_icmp_init(void) 412npf_alg_icmp_init(void)
406{ 413{
407 static const npfa_funcs_t icmp = { 414 static const npfa_funcs_t icmp = {
408 .match = npfa_icmp_match, 415 .match = npfa_icmp_match,
409 .translate = npfa_icmp_nat, 416 .translate = npfa_icmp_nat,
410 .inspect = npfa_icmp_session, 417 .inspect = npfa_icmp_session,
411 }; 418 };
412 alg_icmp = npf_alg_register("icmp", &icmp); 419 alg_icmp = npf_alg_register("icmp", &icmp);
413 return alg_icmp ? 0 : ENOMEM; 420 return alg_icmp ? 0 : ENOMEM;
414} 421}
415 422
416static int 423static int
417npf_alg_icmp_fini(void) 424npf_alg_icmp_fini(void)
418{ 425{
419 KASSERT(alg_icmp != NULL); 426 KASSERT(alg_icmp != NULL);
420 return npf_alg_unregister(alg_icmp); 427 return npf_alg_unregister(alg_icmp);
421} 428}
422 429
423static int 430static int
424npf_alg_icmp_modcmd(modcmd_t cmd, void *arg) 431npf_alg_icmp_modcmd(modcmd_t cmd, void *arg)
425{ 432{
426 switch (cmd) { 433 switch (cmd) {
427 case MODULE_CMD_INIT: 434 case MODULE_CMD_INIT:
428 return npf_alg_icmp_init(); 435 return npf_alg_icmp_init();
429 case MODULE_CMD_FINI: 436 case MODULE_CMD_FINI:
430 return npf_alg_icmp_fini(); 437 return npf_alg_icmp_fini();
431 case MODULE_CMD_AUTOUNLOAD: 438 case MODULE_CMD_AUTOUNLOAD:
432 return EBUSY; 439 return EBUSY;
433 default: 440 default:
434 return ENOTTY; 441 return ENOTTY;
435 } 442 }
436 return 0; 443 return 0;
437} 444}

cvs diff -r1.48 -r1.49 src/sys/net/npf/npf_impl.h (switch to unified diff)

--- src/sys/net/npf/npf_impl.h 2014/02/16 22:10:40 1.48
+++ src/sys/net/npf/npf_impl.h 2014/02/19 03:51:31 1.49
@@ -1,370 +1,371 @@ @@ -1,370 +1,371 @@
1/* $NetBSD: npf_impl.h,v 1.48 2014/02/16 22:10:40 rmind Exp $ */ 1/* $NetBSD: npf_impl.h,v 1.49 2014/02/19 03:51:31 rmind Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2009-2014 The NetBSD Foundation, Inc. 4 * Copyright (c) 2009-2014 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This material is based upon work partially supported by The 7 * This material is based upon work partially supported by The
8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32/* 32/*
33 * Private NPF structures and interfaces. 33 * Private NPF structures and interfaces.
34 * For internal use within NPF core only. 34 * For internal use within NPF core only.
35 */ 35 */
36 36
37#ifndef _NPF_IMPL_H_ 37#ifndef _NPF_IMPL_H_
38#define _NPF_IMPL_H_ 38#define _NPF_IMPL_H_
39 39
40#if !defined(_KERNEL) 40#if !defined(_KERNEL)
41#error "Kernel-level header only" 41#error "Kernel-level header only"
42#endif 42#endif
43 43
44#ifdef _KERNEL_OPT 44#ifdef _KERNEL_OPT
45/* For INET/INET6 definitions. */ 45/* For INET/INET6 definitions. */
46#include "opt_inet.h" 46#include "opt_inet.h"
47#include "opt_inet6.h" 47#include "opt_inet6.h"
48#endif 48#endif
49 49
50#include <sys/types.h> 50#include <sys/types.h>
51#include <sys/queue.h> 51#include <sys/queue.h>
52#include <sys/hash.h> 52#include <sys/hash.h>
53#include <sys/rbtree.h> 53#include <sys/rbtree.h>
54#include <sys/ptree.h> 54#include <sys/ptree.h>
55#include <sys/rwlock.h> 55#include <sys/rwlock.h>
56 56
57#include <net/bpf.h> 57#include <net/bpf.h>
58#include <net/bpfjit.h> 58#include <net/bpfjit.h>
59#include <net/if.h> 59#include <net/if.h>
60 60
61#include "npf.h" 61#include "npf.h"
62 62
63#ifdef _NPF_DEBUG 63#ifdef _NPF_DEBUG
64#define NPF_PRINTF(x) printf x 64#define NPF_PRINTF(x) printf x
65#else 65#else
66#define NPF_PRINTF(x) 66#define NPF_PRINTF(x)
67#endif 67#endif
68 68
69/* 69/*
70 * STRUCTURE DECLARATIONS. 70 * STRUCTURE DECLARATIONS.
71 */ 71 */
72 72
73struct npf_ruleset; 73struct npf_ruleset;
74struct npf_rule; 74struct npf_rule;
75struct npf_rprocset; 75struct npf_rprocset;
76struct npf_nat; 76struct npf_nat;
77struct npf_session; 77struct npf_session;
78 78
79typedef struct npf_ruleset npf_ruleset_t; 79typedef struct npf_ruleset npf_ruleset_t;
80typedef struct npf_rule npf_rule_t; 80typedef struct npf_rule npf_rule_t;
81typedef struct npf_nat npf_nat_t; 81typedef struct npf_nat npf_nat_t;
82typedef struct npf_rprocset npf_rprocset_t; 82typedef struct npf_rprocset npf_rprocset_t;
83typedef struct npf_alg npf_alg_t; 83typedef struct npf_alg npf_alg_t;
84typedef struct npf_natpolicy npf_natpolicy_t; 84typedef struct npf_natpolicy npf_natpolicy_t;
85typedef struct npf_session npf_session_t; 85typedef struct npf_session npf_session_t;
86 86
87struct npf_sehash; 87struct npf_sehash;
88struct npf_table; 88struct npf_table;
89struct npf_tableset; 89struct npf_tableset;
90 90
91typedef struct npf_sehash npf_sehash_t; 91typedef struct npf_sehash npf_sehash_t;
92typedef struct npf_table npf_table_t; 92typedef struct npf_table npf_table_t;
93typedef struct npf_tableset npf_tableset_t; 93typedef struct npf_tableset npf_tableset_t;
94 94
95/* 95/*
96 * DEFINITIONS. 96 * DEFINITIONS.
97 */ 97 */
98 98
99typedef void (*npf_workfunc_t)(void); 99typedef void (*npf_workfunc_t)(void);
100 100
101/* 101/*
102 * Some artificial limits. 102 * Some artificial limits.
103 * Note: very unlikely to have many ALGs. 103 * Note: very unlikely to have many ALGs.
104 */ 104 */
105#define NPF_MAX_RULES (1024 * 1024) 105#define NPF_MAX_RULES (1024 * 1024)
106#define NPF_MAX_ALGS 4 106#define NPF_MAX_ALGS 4
107#define NPF_MAX_TABLES 128 107#define NPF_MAX_TABLES 128
108#define NPF_MAX_RPROCS 128 108#define NPF_MAX_RPROCS 128
109#define NPF_MAX_IFMAP 64 109#define NPF_MAX_IFMAP 64
110 110
111/* 111/*
112 * SESSION STATE STRUCTURES 112 * SESSION STATE STRUCTURES
113 */ 113 */
114 114
115#define NPF_FLOW_FORW 0 115#define NPF_FLOW_FORW 0
116#define NPF_FLOW_BACK 1 116#define NPF_FLOW_BACK 1
117 117
118typedef struct { 118typedef struct {
119 uint32_t nst_end; 119 uint32_t nst_end;
120 uint32_t nst_maxend; 120 uint32_t nst_maxend;
121 uint32_t nst_maxwin; 121 uint32_t nst_maxwin;
122 int nst_wscale; 122 int nst_wscale;
123} npf_tcpstate_t; 123} npf_tcpstate_t;
124 124
125typedef struct { 125typedef struct {
126 kmutex_t nst_lock; 126 kmutex_t nst_lock;
127 u_int nst_state; 127 u_int nst_state;
128 npf_tcpstate_t nst_tcpst[2]; 128 npf_tcpstate_t nst_tcpst[2];
129} npf_state_t; 129} npf_state_t;
130 130
131/* 131/*
132 * ALG FUNCTIONS. 132 * ALG FUNCTIONS.
133 */ 133 */
134 134
135typedef struct { 135typedef struct {
136 bool (*match)(npf_cache_t *, nbuf_t *, npf_nat_t *, int); 136 bool (*match)(npf_cache_t *, nbuf_t *, npf_nat_t *, int);
137 bool (*translate)(npf_cache_t *, nbuf_t *, npf_nat_t *, bool); 137 bool (*translate)(npf_cache_t *, nbuf_t *, npf_nat_t *, bool);
138 npf_session_t * (*inspect)(npf_cache_t *, nbuf_t *, int); 138 npf_session_t * (*inspect)(npf_cache_t *, nbuf_t *, int);
139} npfa_funcs_t; 139} npfa_funcs_t;
140 140
141/* 141/*
142 * INTERFACES. 142 * INTERFACES.
143 */ 143 */
144 144
145/* NPF config, statistics, etc. */ 145/* NPF config, statistics, etc. */
146void npf_config_init(void); 146void npf_config_init(void);
147void npf_config_fini(void); 147void npf_config_fini(void);
148 148
149void npf_config_enter(void); 149void npf_config_enter(void);
150void npf_config_exit(void); 150void npf_config_exit(void);
151void npf_config_sync(void); 151void npf_config_sync(void);
152bool npf_config_locked_p(void); 152bool npf_config_locked_p(void);
153int npf_config_read_enter(void); 153int npf_config_read_enter(void);
154void npf_config_read_exit(int); 154void npf_config_read_exit(int);
155 155
156void npf_config_reload(prop_dictionary_t, npf_ruleset_t *, 156void npf_config_reload(prop_dictionary_t, npf_ruleset_t *,
157 npf_tableset_t *, npf_ruleset_t *, npf_rprocset_t *, bool); 157 npf_tableset_t *, npf_ruleset_t *, npf_rprocset_t *, bool);
158npf_ruleset_t * npf_config_ruleset(void); 158npf_ruleset_t * npf_config_ruleset(void);
159npf_ruleset_t * npf_config_natset(void); 159npf_ruleset_t * npf_config_natset(void);
160npf_tableset_t *npf_config_tableset(void); 160npf_tableset_t *npf_config_tableset(void);
161prop_dictionary_t npf_config_dict(void); 161prop_dictionary_t npf_config_dict(void);
162bool npf_default_pass(void); 162bool npf_default_pass(void);
163 163
164int npf_worker_sysinit(void); 164int npf_worker_sysinit(void);
165void npf_worker_sysfini(void); 165void npf_worker_sysfini(void);
166void npf_worker_signal(void); 166void npf_worker_signal(void);
167void npf_worker_register(npf_workfunc_t); 167void npf_worker_register(npf_workfunc_t);
168void npf_worker_unregister(npf_workfunc_t); 168void npf_worker_unregister(npf_workfunc_t);
169 169
170void npflogattach(int); 170void npflogattach(int);
171void npflogdetach(void); 171void npflogdetach(void);
172int npfctl_switch(void *); 172int npfctl_switch(void *);
173int npfctl_reload(u_long, void *); 173int npfctl_reload(u_long, void *);
174int npfctl_getconf(u_long, void *); 174int npfctl_getconf(u_long, void *);
175int npfctl_sessions_save(u_long, void *); 175int npfctl_sessions_save(u_long, void *);
176int npfctl_sessions_load(u_long, void *); 176int npfctl_sessions_load(u_long, void *);
177int npfctl_rule(u_long, void *); 177int npfctl_rule(u_long, void *);
178int npfctl_table(void *); 178int npfctl_table(void *);
179 179
180void npf_stats_inc(npf_stats_t); 180void npf_stats_inc(npf_stats_t);
181void npf_stats_dec(npf_stats_t); 181void npf_stats_dec(npf_stats_t);
182 182
183u_int npf_ifmap_register(const char *); 183u_int npf_ifmap_register(const char *);
184void npf_ifmap_flush(void); 184void npf_ifmap_flush(void);
185void npf_ifmap_attach(ifnet_t *); 185void npf_ifmap_attach(ifnet_t *);
186void npf_ifmap_detach(ifnet_t *); 186void npf_ifmap_detach(ifnet_t *);
187u_int npf_ifmap_id(const ifnet_t *); 187u_int npf_ifmap_id(const ifnet_t *);
188 188
189/* Packet filter hooks. */ 189/* Packet filter hooks. */
190int npf_pfil_register(bool); 190int npf_pfil_register(bool);
191void npf_pfil_unregister(bool); 191void npf_pfil_unregister(bool);
192bool npf_pfil_registered_p(void); 192bool npf_pfil_registered_p(void);
193int npf_packet_handler(void *, struct mbuf **, ifnet_t *, int); 193int npf_packet_handler(void *, struct mbuf **, ifnet_t *, int);
194 194
195/* Protocol helpers. */ 195/* Protocol helpers. */
196int npf_cache_all(npf_cache_t *, nbuf_t *); 196int npf_cache_all(npf_cache_t *, nbuf_t *);
197void npf_recache(npf_cache_t *, nbuf_t *); 197void npf_recache(npf_cache_t *, nbuf_t *);
198 198
199bool npf_rwrip(const npf_cache_t *, u_int, const npf_addr_t *); 199bool npf_rwrip(const npf_cache_t *, u_int, const npf_addr_t *);
200bool npf_rwrport(const npf_cache_t *, u_int, const in_port_t); 200bool npf_rwrport(const npf_cache_t *, u_int, const in_port_t);
201bool npf_rwrcksum(const npf_cache_t *, u_int, 201bool npf_rwrcksum(const npf_cache_t *, u_int,
202 const npf_addr_t *, const in_port_t); 202 const npf_addr_t *, const in_port_t);
 203int npf_napt_rwr(const npf_cache_t *, u_int, const npf_addr_t *,
 204 const in_addr_t);
203int npf_npt66_rwr(const npf_cache_t *, u_int, const npf_addr_t *, 205int npf_npt66_rwr(const npf_cache_t *, u_int, const npf_addr_t *,
204 npf_netmask_t, uint16_t); 206 npf_netmask_t, uint16_t);
205 207
206uint16_t npf_fixup16_cksum(uint16_t, uint16_t, uint16_t); 208uint16_t npf_fixup16_cksum(uint16_t, uint16_t, uint16_t);
207uint16_t npf_fixup32_cksum(uint16_t, uint32_t, uint32_t); 209uint16_t npf_fixup32_cksum(uint16_t, uint32_t, uint32_t);
208uint16_t npf_addr_cksum(uint16_t, int, const npf_addr_t *, 210uint16_t npf_addr_cksum(uint16_t, int, const npf_addr_t *,
209 const npf_addr_t *); 211 const npf_addr_t *);
210uint32_t npf_addr_mix(const int, const npf_addr_t *, const npf_addr_t *); 212uint32_t npf_addr_mix(const int, const npf_addr_t *, const npf_addr_t *);
211int npf_addr_cmp(const npf_addr_t *, const npf_netmask_t, 213int npf_addr_cmp(const npf_addr_t *, const npf_netmask_t,
212 const npf_addr_t *, const npf_netmask_t, const int); 214 const npf_addr_t *, const npf_netmask_t, const int);
213void npf_addr_mask(const npf_addr_t *, const npf_netmask_t, 215void npf_addr_mask(const npf_addr_t *, const npf_netmask_t,
214 const int, npf_addr_t *); 216 const int, npf_addr_t *);
215 217
216int npf_tcpsaw(const npf_cache_t *, tcp_seq *, tcp_seq *, 218int npf_tcpsaw(const npf_cache_t *, tcp_seq *, tcp_seq *,
217 uint32_t *); 219 uint32_t *);
218bool npf_fetch_tcpopts(npf_cache_t *, nbuf_t *, uint16_t *, int *); 220bool npf_fetch_tcpopts(npf_cache_t *, nbuf_t *, uint16_t *, int *);
219bool npf_return_block(npf_cache_t *, nbuf_t *, const int); 221bool npf_return_block(npf_cache_t *, nbuf_t *, const int);
220 222
221/* BPF interface. */ 223/* BPF interface. */
222void npf_bpf_sysinit(void); 224void npf_bpf_sysinit(void);
223void npf_bpf_sysfini(void); 225void npf_bpf_sysfini(void);
224int npf_bpf_filter(bpf_args_t *, const void *, bpfjit_func_t); 226int npf_bpf_filter(bpf_args_t *, const void *, bpfjit_func_t);
225void * npf_bpf_compile(void *, size_t); 227void * npf_bpf_compile(void *, size_t);
226bool npf_bpf_validate(const void *, size_t); 228bool npf_bpf_validate(const void *, size_t);
227 229
228/* Tableset interface. */ 230/* Tableset interface. */
229void npf_tableset_sysinit(void); 231void npf_tableset_sysinit(void);
230void npf_tableset_sysfini(void); 232void npf_tableset_sysfini(void);
231 233
232extern const pt_tree_ops_t npf_table_ptree_ops; 234extern const pt_tree_ops_t npf_table_ptree_ops;
233 235
234npf_tableset_t *npf_tableset_create(u_int); 236npf_tableset_t *npf_tableset_create(u_int);
235void npf_tableset_destroy(npf_tableset_t *); 237void npf_tableset_destroy(npf_tableset_t *);
236int npf_tableset_insert(npf_tableset_t *, npf_table_t *); 238int npf_tableset_insert(npf_tableset_t *, npf_table_t *);
237npf_table_t * npf_tableset_getbyname(npf_tableset_t *, const char *); 239npf_table_t * npf_tableset_getbyname(npf_tableset_t *, const char *);
238npf_table_t * npf_tableset_getbyid(npf_tableset_t *, u_int); 240npf_table_t * npf_tableset_getbyid(npf_tableset_t *, u_int);
239void npf_tableset_reload(npf_tableset_t *, npf_tableset_t *); 241void npf_tableset_reload(npf_tableset_t *, npf_tableset_t *);
240void npf_tableset_syncdict(const npf_tableset_t *, prop_dictionary_t); 242void npf_tableset_syncdict(const npf_tableset_t *, prop_dictionary_t);
241 243
242npf_table_t * npf_table_create(const char *, u_int, int, void *, size_t); 244npf_table_t * npf_table_create(const char *, u_int, int, void *, size_t);
243void npf_table_destroy(npf_table_t *); 245void npf_table_destroy(npf_table_t *);
244 246
245int npf_table_check(npf_tableset_t *, const char *, u_int, int); 247int npf_table_check(npf_tableset_t *, const char *, u_int, int);
246int npf_table_insert(npf_table_t *, const int, 248int npf_table_insert(npf_table_t *, const int,
247 const npf_addr_t *, const npf_netmask_t); 249 const npf_addr_t *, const npf_netmask_t);
248int npf_table_remove(npf_table_t *, const int, 250int npf_table_remove(npf_table_t *, const int,
249 const npf_addr_t *, const npf_netmask_t); 251 const npf_addr_t *, const npf_netmask_t);
250int npf_table_lookup(npf_table_t *, const int, const npf_addr_t *); 252int npf_table_lookup(npf_table_t *, const int, const npf_addr_t *);
251int npf_table_list(npf_table_t *, void *, size_t); 253int npf_table_list(npf_table_t *, void *, size_t);
252int npf_table_flush(npf_table_t *); 254int npf_table_flush(npf_table_t *);
253 255
254/* Ruleset interface. */ 256/* Ruleset interface. */
255npf_ruleset_t * npf_ruleset_create(size_t); 257npf_ruleset_t * npf_ruleset_create(size_t);
256void npf_ruleset_destroy(npf_ruleset_t *); 258void npf_ruleset_destroy(npf_ruleset_t *);
257void npf_ruleset_insert(npf_ruleset_t *, npf_rule_t *); 259void npf_ruleset_insert(npf_ruleset_t *, npf_rule_t *);
258void npf_ruleset_reload(npf_ruleset_t *, npf_ruleset_t *); 260void npf_ruleset_reload(npf_ruleset_t *, npf_ruleset_t *);
259void npf_ruleset_natreload(npf_ruleset_t *, npf_ruleset_t *); 261void npf_ruleset_natreload(npf_ruleset_t *, npf_ruleset_t *);
260npf_rule_t * npf_ruleset_matchnat(npf_ruleset_t *, npf_natpolicy_t *); 262npf_rule_t * npf_ruleset_matchnat(npf_ruleset_t *, npf_natpolicy_t *);
261npf_rule_t * npf_ruleset_sharepm(npf_ruleset_t *, npf_natpolicy_t *); 263npf_rule_t * npf_ruleset_sharepm(npf_ruleset_t *, npf_natpolicy_t *);
262void npf_ruleset_freealg(npf_ruleset_t *, npf_alg_t *); 264void npf_ruleset_freealg(npf_ruleset_t *, npf_alg_t *);
263 265
264int npf_ruleset_add(npf_ruleset_t *, const char *, npf_rule_t *); 266int npf_ruleset_add(npf_ruleset_t *, const char *, npf_rule_t *);
265int npf_ruleset_remove(npf_ruleset_t *, const char *, uint64_t); 267int npf_ruleset_remove(npf_ruleset_t *, const char *, uint64_t);
266int npf_ruleset_remkey(npf_ruleset_t *, const char *, 268int npf_ruleset_remkey(npf_ruleset_t *, const char *,
267 const void *, size_t); 269 const void *, size_t);
268prop_dictionary_t npf_ruleset_list(npf_ruleset_t *, const char *); 270prop_dictionary_t npf_ruleset_list(npf_ruleset_t *, const char *);
269int npf_ruleset_flush(npf_ruleset_t *, const char *); 271int npf_ruleset_flush(npf_ruleset_t *, const char *);
270void npf_ruleset_gc(npf_ruleset_t *); 272void npf_ruleset_gc(npf_ruleset_t *);
271 273
272npf_rule_t * npf_ruleset_inspect(npf_cache_t *, nbuf_t *, 274npf_rule_t * npf_ruleset_inspect(npf_cache_t *, nbuf_t *,
273 const npf_ruleset_t *, const int, const int); 275 const npf_ruleset_t *, const int, const int);
274int npf_rule_conclude(const npf_rule_t *, int *); 276int npf_rule_conclude(const npf_rule_t *, int *);
275 277
276/* Rule interface. */ 278/* Rule interface. */
277npf_rule_t * npf_rule_alloc(prop_dictionary_t); 279npf_rule_t * npf_rule_alloc(prop_dictionary_t);
278void npf_rule_setcode(npf_rule_t *, int, void *, size_t); 280void npf_rule_setcode(npf_rule_t *, int, void *, size_t);
279void npf_rule_setrproc(npf_rule_t *, npf_rproc_t *); 281void npf_rule_setrproc(npf_rule_t *, npf_rproc_t *);
280void npf_rule_free(npf_rule_t *); 282void npf_rule_free(npf_rule_t *);
281uint64_t npf_rule_getid(const npf_rule_t *); 283uint64_t npf_rule_getid(const npf_rule_t *);
282npf_natpolicy_t *npf_rule_getnat(const npf_rule_t *); 284npf_natpolicy_t *npf_rule_getnat(const npf_rule_t *);
283void npf_rule_setnat(npf_rule_t *, npf_natpolicy_t *); 285void npf_rule_setnat(npf_rule_t *, npf_natpolicy_t *);
284npf_rproc_t * npf_rule_getrproc(const npf_rule_t *); 286npf_rproc_t * npf_rule_getrproc(const npf_rule_t *);
285 287
286void npf_ext_sysinit(void); 288void npf_ext_sysinit(void);
287void npf_ext_sysfini(void); 289void npf_ext_sysfini(void);
288int npf_ext_construct(const char *, 290int npf_ext_construct(const char *,
289 npf_rproc_t *, prop_dictionary_t); 291 npf_rproc_t *, prop_dictionary_t);
290 292
291npf_rprocset_t *npf_rprocset_create(void); 293npf_rprocset_t *npf_rprocset_create(void);
292void npf_rprocset_destroy(npf_rprocset_t *); 294void npf_rprocset_destroy(npf_rprocset_t *);
293npf_rproc_t * npf_rprocset_lookup(npf_rprocset_t *, const char *); 295npf_rproc_t * npf_rprocset_lookup(npf_rprocset_t *, const char *);
294void npf_rprocset_insert(npf_rprocset_t *, npf_rproc_t *); 296void npf_rprocset_insert(npf_rprocset_t *, npf_rproc_t *);
295 297
296npf_rproc_t * npf_rproc_create(prop_dictionary_t); 298npf_rproc_t * npf_rproc_create(prop_dictionary_t);
297void npf_rproc_acquire(npf_rproc_t *); 299void npf_rproc_acquire(npf_rproc_t *);
298void npf_rproc_release(npf_rproc_t *); 300void npf_rproc_release(npf_rproc_t *);
299void npf_rproc_run(npf_cache_t *, nbuf_t *, npf_rproc_t *, int *); 301void npf_rproc_run(npf_cache_t *, nbuf_t *, npf_rproc_t *, int *);
300 302
301/* Session handling interface. */ 303/* Session handling interface. */
302void npf_session_sysinit(void); 304void npf_session_sysinit(void);
303void npf_session_sysfini(void); 305void npf_session_sysfini(void);
304void npf_session_tracking(bool); 306void npf_session_tracking(bool);
305 307
306npf_sehash_t * sess_htable_create(void); 308npf_sehash_t * sess_htable_create(void);
307void sess_htable_destroy(npf_sehash_t *); 309void sess_htable_destroy(npf_sehash_t *);
308 310
309npf_session_t * npf_session_lookup(const npf_cache_t *, const nbuf_t *, 311npf_session_t * npf_session_lookup(const npf_cache_t *, const nbuf_t *,
310 const int, bool *); 312 const int, bool *);
311npf_session_t * npf_session_inspect(npf_cache_t *, nbuf_t *, const int, int *); 313npf_session_t * npf_session_inspect(npf_cache_t *, nbuf_t *, const int, int *);
312npf_session_t * npf_session_establish(npf_cache_t *, nbuf_t *, const int); 314npf_session_t * npf_session_establish(npf_cache_t *, nbuf_t *, const int);
313void npf_session_release(npf_session_t *); 315void npf_session_release(npf_session_t *);
314void npf_session_expire(npf_session_t *); 316void npf_session_expire(npf_session_t *);
315bool npf_session_pass(const npf_session_t *, npf_rproc_t **); 317bool npf_session_pass(const npf_session_t *, npf_rproc_t **);
316void npf_session_setpass(npf_session_t *, npf_rproc_t *); 318void npf_session_setpass(npf_session_t *, npf_rproc_t *);
317int npf_session_setnat(npf_session_t *, npf_nat_t *, u_int); 319int npf_session_setnat(npf_session_t *, npf_nat_t *, u_int);
318npf_nat_t * npf_session_retnat(npf_session_t *, const int, bool *); 320npf_nat_t * npf_session_retnat(npf_session_t *, const int, bool *);
319 321
320void npf_session_load(npf_sehash_t *); 322void npf_session_load(npf_sehash_t *);
321int npf_session_save(prop_array_t, prop_array_t); 323int npf_session_save(prop_array_t, prop_array_t);
322int npf_session_restore(npf_sehash_t *, prop_dictionary_t); 324int npf_session_restore(npf_sehash_t *, prop_dictionary_t);
323 325
324/* State handling. */ 326/* State handling. */
325bool npf_state_init(npf_cache_t *, nbuf_t *, npf_state_t *); 327bool npf_state_init(npf_cache_t *, nbuf_t *, npf_state_t *);
326bool npf_state_inspect(npf_cache_t *, nbuf_t *, npf_state_t *, 328bool npf_state_inspect(npf_cache_t *, nbuf_t *, npf_state_t *,
327 const bool); 329 const bool);
328int npf_state_etime(const npf_state_t *, const int); 330int npf_state_etime(const npf_state_t *, const int);
329void npf_state_destroy(npf_state_t *); 331void npf_state_destroy(npf_state_t *);
330 332
331bool npf_state_tcp(npf_cache_t *, nbuf_t *, npf_state_t *, int); 333bool npf_state_tcp(npf_cache_t *, nbuf_t *, npf_state_t *, int);
332int npf_state_tcp_timeout(const npf_state_t *); 334int npf_state_tcp_timeout(const npf_state_t *);
333 335
334/* NAT. */ 336/* NAT. */
335void npf_nat_sysinit(void); 337void npf_nat_sysinit(void);
336void npf_nat_sysfini(void); 338void npf_nat_sysfini(void);
337npf_natpolicy_t *npf_nat_newpolicy(prop_dictionary_t, npf_ruleset_t *); 339npf_natpolicy_t *npf_nat_newpolicy(prop_dictionary_t, npf_ruleset_t *);
338void npf_nat_freepolicy(npf_natpolicy_t *); 340void npf_nat_freepolicy(npf_natpolicy_t *);
339bool npf_nat_matchpolicy(npf_natpolicy_t *, npf_natpolicy_t *); 341bool npf_nat_matchpolicy(npf_natpolicy_t *, npf_natpolicy_t *);
340bool npf_nat_sharepm(npf_natpolicy_t *, npf_natpolicy_t *); 342bool npf_nat_sharepm(npf_natpolicy_t *, npf_natpolicy_t *);
341void npf_nat_freealg(npf_natpolicy_t *, npf_alg_t *); 343void npf_nat_freealg(npf_natpolicy_t *, npf_alg_t *);
342 344
343int npf_do_nat(npf_cache_t *, npf_session_t *, nbuf_t *, const int); 345int npf_do_nat(npf_cache_t *, npf_session_t *, nbuf_t *, const int);
344int npf_nat_translate(npf_cache_t *, nbuf_t *, npf_nat_t *, bool); 
345void npf_nat_destroy(npf_nat_t *); 346void npf_nat_destroy(npf_nat_t *);
346void npf_nat_getorig(npf_nat_t *, npf_addr_t **, in_port_t *); 347void npf_nat_getorig(npf_nat_t *, npf_addr_t **, in_port_t *);
347void npf_nat_gettrans(npf_nat_t *, npf_addr_t **, in_port_t *); 348void npf_nat_gettrans(npf_nat_t *, npf_addr_t **, in_port_t *);
348void npf_nat_setalg(npf_nat_t *, npf_alg_t *, uintptr_t); 349void npf_nat_setalg(npf_nat_t *, npf_alg_t *, uintptr_t);
349 350
350int npf_nat_save(prop_dictionary_t, prop_array_t, npf_nat_t *); 351int npf_nat_save(prop_dictionary_t, prop_array_t, npf_nat_t *);
351npf_nat_t * npf_nat_restore(prop_dictionary_t, npf_session_t *); 352npf_nat_t * npf_nat_restore(prop_dictionary_t, npf_session_t *);
352 353
353/* ALG interface. */ 354/* ALG interface. */
354void npf_alg_sysinit(void); 355void npf_alg_sysinit(void);
355void npf_alg_sysfini(void); 356void npf_alg_sysfini(void);
356npf_alg_t * npf_alg_register(const char *, const npfa_funcs_t *); 357npf_alg_t * npf_alg_register(const char *, const npfa_funcs_t *);
357int npf_alg_unregister(npf_alg_t *); 358int npf_alg_unregister(npf_alg_t *);
358npf_alg_t * npf_alg_construct(const char *); 359npf_alg_t * npf_alg_construct(const char *);
359bool npf_alg_match(npf_cache_t *, nbuf_t *, npf_nat_t *, int); 360bool npf_alg_match(npf_cache_t *, nbuf_t *, npf_nat_t *, int);
360void npf_alg_exec(npf_cache_t *, nbuf_t *, npf_nat_t *, bool); 361void npf_alg_exec(npf_cache_t *, nbuf_t *, npf_nat_t *, bool);
361npf_session_t * npf_alg_session(npf_cache_t *, nbuf_t *, int); 362npf_session_t * npf_alg_session(npf_cache_t *, nbuf_t *, int);
362 363
363/* Debugging routines. */ 364/* Debugging routines. */
364void npf_addr_dump(const npf_addr_t *); 365void npf_addr_dump(const npf_addr_t *);
365void npf_sessions_dump(void); 366void npf_sessions_dump(void);
366void npf_state_dump(const npf_state_t *); 367void npf_state_dump(const npf_state_t *);
367void npf_nat_dump(const npf_nat_t *); 368void npf_nat_dump(const npf_nat_t *);
368void npf_state_setsampler(void (*)(npf_state_t *, bool)); 369void npf_state_setsampler(void (*)(npf_state_t *, bool));
369 370
370#endif /* _NPF_IMPL_H_ */ 371#endif /* _NPF_IMPL_H_ */

cvs diff -r1.29 -r1.30 src/sys/net/npf/npf_inet.c (switch to unified diff)

--- src/sys/net/npf/npf_inet.c 2014/02/13 03:34:40 1.29
+++ src/sys/net/npf/npf_inet.c 2014/02/19 03:51:31 1.30
@@ -1,702 +1,746 @@ @@ -1,702 +1,746 @@
1/* $NetBSD: npf_inet.c,v 1.29 2014/02/13 03:34:40 rmind Exp $ */ 1/* $NetBSD: npf_inet.c,v 1.30 2014/02/19 03:51:31 rmind Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2009-2014 The NetBSD Foundation, Inc. 4 * Copyright (c) 2009-2014 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This material is based upon work partially supported by The 7 * This material is based upon work partially supported by The
8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32/* 32/*
33 * Various protocol related helper routines. 33 * Various protocol related helper routines.
34 * 34 *
35 * This layer manipulates npf_cache_t structure i.e. caches requested headers 35 * This layer manipulates npf_cache_t structure i.e. caches requested headers
36 * and stores which information was cached in the information bit field. 36 * and stores which information was cached in the information bit field.
37 * It is also responsibility of this layer to update or invalidate the cache 37 * It is also responsibility of this layer to update or invalidate the cache
38 * on rewrites (e.g. by translation routines). 38 * on rewrites (e.g. by translation routines).
39 */ 39 */
40 40
41#include <sys/cdefs.h> 41#include <sys/cdefs.h>
42__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.29 2014/02/13 03:34:40 rmind Exp $"); 42__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.30 2014/02/19 03:51:31 rmind Exp $");
43 43
44#include <sys/param.h> 44#include <sys/param.h>
45#include <sys/types.h> 45#include <sys/types.h>
46 46
47#include <net/pfil.h> 47#include <net/pfil.h>
48#include <net/if.h> 48#include <net/if.h>
49#include <net/ethertypes.h> 49#include <net/ethertypes.h>
50#include <net/if_ether.h> 50#include <net/if_ether.h>
51 51
52#include <netinet/in_systm.h> 52#include <netinet/in_systm.h>
53#include <netinet/in.h> 53#include <netinet/in.h>
54#include <netinet/ip.h> 54#include <netinet/ip.h>
55#include <netinet/ip6.h> 55#include <netinet/ip6.h>
56#include <netinet/tcp.h> 56#include <netinet/tcp.h>
57#include <netinet/udp.h> 57#include <netinet/udp.h>
58#include <netinet/ip_icmp.h> 58#include <netinet/ip_icmp.h>
59 59
60#include "npf_impl.h" 60#include "npf_impl.h"
61 61
62/* 62/*
63 * npf_fixup{16,32}_cksum: incremental update of the Internet checksum. 63 * npf_fixup{16,32}_cksum: incremental update of the Internet checksum.
64 */ 64 */
65 65
66uint16_t 66uint16_t
67npf_fixup16_cksum(uint16_t cksum, uint16_t odatum, uint16_t ndatum) 67npf_fixup16_cksum(uint16_t cksum, uint16_t odatum, uint16_t ndatum)
68{ 68{
69 uint32_t sum; 69 uint32_t sum;
70 70
71 /* 71 /*
72 * RFC 1624: 72 * RFC 1624:
73 * HC' = ~(~HC + ~m + m') 73 * HC' = ~(~HC + ~m + m')
74 * 74 *
75 * Note: 1's complement sum is endian-independent (RFC 1071, page 2). 75 * Note: 1's complement sum is endian-independent (RFC 1071, page 2).
76 */ 76 */
77 sum = ~cksum & 0xffff; 77 sum = ~cksum & 0xffff;
78 sum += (~odatum & 0xffff) + ndatum; 78 sum += (~odatum & 0xffff) + ndatum;
79 sum = (sum >> 16) + (sum & 0xffff); 79 sum = (sum >> 16) + (sum & 0xffff);
80 sum += (sum >> 16); 80 sum += (sum >> 16);
81 81
82 return ~sum & 0xffff; 82 return ~sum & 0xffff;
83} 83}
84 84
85uint16_t 85uint16_t
86npf_fixup32_cksum(uint16_t cksum, uint32_t odatum, uint32_t ndatum) 86npf_fixup32_cksum(uint16_t cksum, uint32_t odatum, uint32_t ndatum)
87{ 87{
88 uint32_t sum; 88 uint32_t sum;
89 89
90 /* 90 /*
91 * Checksum 32-bit datum as as two 16-bit. Note, the first 91 * Checksum 32-bit datum as as two 16-bit. Note, the first
92 * 32->16 bit reduction is not necessary. 92 * 32->16 bit reduction is not necessary.
93 */ 93 */
94 sum = ~cksum & 0xffff; 94 sum = ~cksum & 0xffff;
95 sum += (~odatum & 0xffff) + (ndatum & 0xffff); 95 sum += (~odatum & 0xffff) + (ndatum & 0xffff);
96 96
97 sum += (~odatum >> 16) + (ndatum >> 16); 97 sum += (~odatum >> 16) + (ndatum >> 16);
98 sum = (sum >> 16) + (sum & 0xffff); 98 sum = (sum >> 16) + (sum & 0xffff);
99 sum += (sum >> 16); 99 sum += (sum >> 16);
100 return ~sum & 0xffff; 100 return ~sum & 0xffff;
101} 101}
102 102
103/* 103/*
104 * npf_addr_cksum: calculate checksum of the address, either IPv4 or IPv6. 104 * npf_addr_cksum: calculate checksum of the address, either IPv4 or IPv6.
105 */ 105 */
106uint16_t 106uint16_t
107npf_addr_cksum(uint16_t cksum, int sz, const npf_addr_t *oaddr, 107npf_addr_cksum(uint16_t cksum, int sz, const npf_addr_t *oaddr,
108 const npf_addr_t *naddr) 108 const npf_addr_t *naddr)
109{ 109{
110 const uint32_t *oip32 = (const uint32_t *)oaddr; 110 const uint32_t *oip32 = (const uint32_t *)oaddr;
111 const uint32_t *nip32 = (const uint32_t *)naddr; 111 const uint32_t *nip32 = (const uint32_t *)naddr;
112 112
113 KASSERT(sz % sizeof(uint32_t) == 0); 113 KASSERT(sz % sizeof(uint32_t) == 0);
114 do { 114 do {
115 cksum = npf_fixup32_cksum(cksum, *oip32++, *nip32++); 115 cksum = npf_fixup32_cksum(cksum, *oip32++, *nip32++);
116 sz -= sizeof(uint32_t); 116 sz -= sizeof(uint32_t);
117 } while (sz); 117 } while (sz);
118 118
119 return cksum; 119 return cksum;
120} 120}
121 121
122/* 122/*
123 * npf_addr_sum: provide IP addresses as a XORed 32-bit integer. 123 * npf_addr_sum: provide IP addresses as a XORed 32-bit integer.
124 * Note: used for hash function. 124 * Note: used for hash function.
125 */ 125 */
126uint32_t 126uint32_t
127npf_addr_mix(const int sz, const npf_addr_t *a1, const npf_addr_t *a2) 127npf_addr_mix(const int sz, const npf_addr_t *a1, const npf_addr_t *a2)
128{ 128{
129 uint32_t mix = 0; 129 uint32_t mix = 0;
130 130
131 KASSERT(sz > 0 && a1 != NULL && a2 != NULL); 131 KASSERT(sz > 0 && a1 != NULL && a2 != NULL);
132 132
133 for (int i = 0; i < (sz >> 2); i++) { 133 for (int i = 0; i < (sz >> 2); i++) {
134 mix ^= a1->s6_addr32[i]; 134 mix ^= a1->s6_addr32[i];
135 mix ^= a2->s6_addr32[i]; 135 mix ^= a2->s6_addr32[i];
136 } 136 }
137 return mix; 137 return mix;
138} 138}
139 139
140/* 140/*
141 * npf_addr_mask: apply the mask to a given address and store the result. 141 * npf_addr_mask: apply the mask to a given address and store the result.
142 */ 142 */
143void 143void
144npf_addr_mask(const npf_addr_t *addr, const npf_netmask_t mask, 144npf_addr_mask(const npf_addr_t *addr, const npf_netmask_t mask,
145 const int alen, npf_addr_t *out) 145 const int alen, npf_addr_t *out)
146{ 146{
147 const int nwords = alen >> 2; 147 const int nwords = alen >> 2;
148 uint_fast8_t length = mask; 148 uint_fast8_t length = mask;
149 149
150 /* Note: maximum length is 32 for IPv4 and 128 for IPv6. */ 150 /* Note: maximum length is 32 for IPv4 and 128 for IPv6. */
151 KASSERT(length <= NPF_MAX_NETMASK); 151 KASSERT(length <= NPF_MAX_NETMASK);
152 152
153 for (int i = 0; i < nwords; i++) { 153 for (int i = 0; i < nwords; i++) {
154 uint32_t wordmask; 154 uint32_t wordmask;
155 155
156 if (length >= 32) { 156 if (length >= 32) {
157 wordmask = htonl(0xffffffff); 157 wordmask = htonl(0xffffffff);
158 length -= 32; 158 length -= 32;
159 } else if (length) { 159 } else if (length) {
160 wordmask = htonl(0xffffffff << (32 - length)); 160 wordmask = htonl(0xffffffff << (32 - length));
161 length = 0; 161 length = 0;
162 } else { 162 } else {
163 wordmask = 0; 163 wordmask = 0;
164 } 164 }
165 out->s6_addr32[i] = addr->s6_addr32[i] & wordmask; 165 out->s6_addr32[i] = addr->s6_addr32[i] & wordmask;
166 } 166 }
167} 167}
168 168
169/* 169/*
170 * npf_addr_cmp: compare two addresses, either IPv4 or IPv6. 170 * npf_addr_cmp: compare two addresses, either IPv4 or IPv6.
171 * 171 *
172 * => Return 0 if equal and negative/positive if less/greater accordingly. 172 * => Return 0 if equal and negative/positive if less/greater accordingly.
173 * => Ignore the mask, if NPF_NO_NETMASK is specified. 173 * => Ignore the mask, if NPF_NO_NETMASK is specified.
174 */ 174 */
175int 175int
176npf_addr_cmp(const npf_addr_t *addr1, const npf_netmask_t mask1, 176npf_addr_cmp(const npf_addr_t *addr1, const npf_netmask_t mask1,
177 const npf_addr_t *addr2, const npf_netmask_t mask2, const int alen) 177 const npf_addr_t *addr2, const npf_netmask_t mask2, const int alen)
178{ 178{
179 npf_addr_t realaddr1, realaddr2; 179 npf_addr_t realaddr1, realaddr2;
180 180
181 if (mask1 != NPF_NO_NETMASK) { 181 if (mask1 != NPF_NO_NETMASK) {
182 npf_addr_mask(addr1, mask1, alen, &realaddr1); 182 npf_addr_mask(addr1, mask1, alen, &realaddr1);
183 addr1 = &realaddr1; 183 addr1 = &realaddr1;
184 } 184 }
185 if (mask2 != NPF_NO_NETMASK) { 185 if (mask2 != NPF_NO_NETMASK) {
186 npf_addr_mask(addr2, mask2, alen, &realaddr2); 186 npf_addr_mask(addr2, mask2, alen, &realaddr2);
187 addr2 = &realaddr2; 187 addr2 = &realaddr2;
188 } 188 }
189 return memcmp(addr1, addr2, alen); 189 return memcmp(addr1, addr2, alen);
190} 190}
191 191
192/* 192/*
193 * npf_tcpsaw: helper to fetch SEQ, ACK, WIN and return TCP data length. 193 * npf_tcpsaw: helper to fetch SEQ, ACK, WIN and return TCP data length.
194 * 194 *
195 * => Returns all values in host byte-order. 195 * => Returns all values in host byte-order.
196 */ 196 */
197int 197int
198npf_tcpsaw(const npf_cache_t *npc, tcp_seq *seq, tcp_seq *ack, uint32_t *win) 198npf_tcpsaw(const npf_cache_t *npc, tcp_seq *seq, tcp_seq *ack, uint32_t *win)
199{ 199{
200 const struct tcphdr *th = npc->npc_l4.tcp; 200 const struct tcphdr *th = npc->npc_l4.tcp;
201 u_int thlen; 201 u_int thlen;
202 202
203 KASSERT(npf_iscached(npc, NPC_TCP)); 203 KASSERT(npf_iscached(npc, NPC_TCP));
204 204
205 *seq = ntohl(th->th_seq); 205 *seq = ntohl(th->th_seq);
206 *ack = ntohl(th->th_ack); 206 *ack = ntohl(th->th_ack);
207 *win = (uint32_t)ntohs(th->th_win); 207 *win = (uint32_t)ntohs(th->th_win);
208 thlen = th->th_off << 2; 208 thlen = th->th_off << 2;
209 209
210 if (npf_iscached(npc, NPC_IP4)) { 210 if (npf_iscached(npc, NPC_IP4)) {
211 const struct ip *ip = npc->npc_ip.v4; 211 const struct ip *ip = npc->npc_ip.v4;
212 return ntohs(ip->ip_len) - npc->npc_hlen - thlen; 212 return ntohs(ip->ip_len) - npc->npc_hlen - thlen;
213 } else if (npf_iscached(npc, NPC_IP6)) { 213 } else if (npf_iscached(npc, NPC_IP6)) {
214 const struct ip6_hdr *ip6 = npc->npc_ip.v6; 214 const struct ip6_hdr *ip6 = npc->npc_ip.v6;
215 return ntohs(ip6->ip6_plen) - thlen; 215 return ntohs(ip6->ip6_plen) - thlen;
216 } 216 }
217 return 0; 217 return 0;
218} 218}
219 219
220/* 220/*
221 * npf_fetch_tcpopts: parse and return TCP options. 221 * npf_fetch_tcpopts: parse and return TCP options.
222 */ 222 */
223bool 223bool
224npf_fetch_tcpopts(npf_cache_t *npc, nbuf_t *nbuf, uint16_t *mss, int *wscale) 224npf_fetch_tcpopts(npf_cache_t *npc, nbuf_t *nbuf, uint16_t *mss, int *wscale)
225{ 225{
226 const struct tcphdr *th = npc->npc_l4.tcp; 226 const struct tcphdr *th = npc->npc_l4.tcp;
227 int topts_len, step; 227 int topts_len, step;
228 void *nptr; 228 void *nptr;
229 uint8_t val; 229 uint8_t val;
230 bool ok; 230 bool ok;
231 231
232 KASSERT(npf_iscached(npc, NPC_IP46)); 232 KASSERT(npf_iscached(npc, NPC_IP46));
233 KASSERT(npf_iscached(npc, NPC_TCP)); 233 KASSERT(npf_iscached(npc, NPC_TCP));
234 234
235 /* Determine if there are any TCP options, get their length. */ 235 /* Determine if there are any TCP options, get their length. */
236 topts_len = (th->th_off << 2) - sizeof(struct tcphdr); 236 topts_len = (th->th_off << 2) - sizeof(struct tcphdr);
237 if (topts_len <= 0) { 237 if (topts_len <= 0) {
238 /* No options. */ 238 /* No options. */
239 return false; 239 return false;
240 } 240 }
241 KASSERT(topts_len <= MAX_TCPOPTLEN); 241 KASSERT(topts_len <= MAX_TCPOPTLEN);
242 242
243 /* First step: IP and TCP header up to options. */ 243 /* First step: IP and TCP header up to options. */
244 step = npc->npc_hlen + sizeof(struct tcphdr); 244 step = npc->npc_hlen + sizeof(struct tcphdr);
245 nbuf_reset(nbuf); 245 nbuf_reset(nbuf);
246next: 246next:
247 if ((nptr = nbuf_advance(nbuf, step, 1)) == NULL) { 247 if ((nptr = nbuf_advance(nbuf, step, 1)) == NULL) {
248 ok = false; 248 ok = false;
249 goto done; 249 goto done;
250 } 250 }
251 val = *(uint8_t *)nptr; 251 val = *(uint8_t *)nptr;
252 252
253 switch (val) { 253 switch (val) {
254 case TCPOPT_EOL: 254 case TCPOPT_EOL:
255 /* Done. */ 255 /* Done. */
256 ok = true; 256 ok = true;
257 goto done; 257 goto done;
258 case TCPOPT_NOP: 258 case TCPOPT_NOP:
259 topts_len--; 259 topts_len--;
260 step = 1; 260 step = 1;
261 break; 261 break;
262 case TCPOPT_MAXSEG: 262 case TCPOPT_MAXSEG:
263 if ((nptr = nbuf_advance(nbuf, 2, 2)) == NULL) { 263 if ((nptr = nbuf_advance(nbuf, 2, 2)) == NULL) {
264 ok = false; 264 ok = false;
265 goto done; 265 goto done;
266 } 266 }
267 if (mss) { 267 if (mss) {
268 if (*mss) { 268 if (*mss) {
269 memcpy(nptr, mss, sizeof(uint16_t)); 269 memcpy(nptr, mss, sizeof(uint16_t));
270 } else { 270 } else {
271 memcpy(mss, nptr, sizeof(uint16_t)); 271 memcpy(mss, nptr, sizeof(uint16_t));
272 } 272 }
273 } 273 }
274 topts_len -= TCPOLEN_MAXSEG; 274 topts_len -= TCPOLEN_MAXSEG;
275 step = 2; 275 step = 2;
276 break; 276 break;
277 case TCPOPT_WINDOW: 277 case TCPOPT_WINDOW:
278 /* TCP Window Scaling (RFC 1323). */ 278 /* TCP Window Scaling (RFC 1323). */
279 if ((nptr = nbuf_advance(nbuf, 2, 1)) == NULL) { 279 if ((nptr = nbuf_advance(nbuf, 2, 1)) == NULL) {
280 ok = false; 280 ok = false;
281 goto done; 281 goto done;
282 } 282 }
283 val = *(uint8_t *)nptr; 283 val = *(uint8_t *)nptr;
284 *wscale = (val > TCP_MAX_WINSHIFT) ? TCP_MAX_WINSHIFT : val; 284 *wscale = (val > TCP_MAX_WINSHIFT) ? TCP_MAX_WINSHIFT : val;
285 topts_len -= TCPOLEN_WINDOW; 285 topts_len -= TCPOLEN_WINDOW;
286 step = 1; 286 step = 1;
287 break; 287 break;
288 default: 288 default:
289 if ((nptr = nbuf_advance(nbuf, 1, 1)) == NULL) { 289 if ((nptr = nbuf_advance(nbuf, 1, 1)) == NULL) {
290 ok = false; 290 ok = false;
291 goto done; 291 goto done;
292 } 292 }
293 val = *(uint8_t *)nptr; 293 val = *(uint8_t *)nptr;
294 if (val < 2 || val > topts_len) { 294 if (val < 2 || val > topts_len) {
295 ok = false; 295 ok = false;
296 goto done; 296 goto done;
297 } 297 }
298 topts_len -= val; 298 topts_len -= val;
299 step = val - 1; 299 step = val - 1;
300 } 300 }
301 301
302 /* Any options left? */ 302 /* Any options left? */
303 if (__predict_true(topts_len > 0)) { 303 if (__predict_true(topts_len > 0)) {
304 goto next; 304 goto next;
305 } 305 }
306 ok = true; 306 ok = true;
307done: 307done:
308 if (nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)) { 308 if (nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)) {
309 npf_recache(npc, nbuf); 309 npf_recache(npc, nbuf);
310 } 310 }
311 return ok; 311 return ok;
312} 312}
313 313
314static int 314static int
315npf_cache_ip(npf_cache_t *npc, nbuf_t *nbuf) 315npf_cache_ip(npf_cache_t *npc, nbuf_t *nbuf)
316{ 316{
317 const void *nptr = nbuf_dataptr(nbuf); 317 const void *nptr = nbuf_dataptr(nbuf);
318 const uint8_t ver = *(const uint8_t *)nptr; 318 const uint8_t ver = *(const uint8_t *)nptr;
319 int flags = 0; 319 int flags = 0;
320 320
321 switch (ver >> 4) { 321 switch (ver >> 4) {
322 case IPVERSION: { 322 case IPVERSION: {
323 struct ip *ip; 323 struct ip *ip;
324 324
325 ip = nbuf_ensure_contig(nbuf, sizeof(struct ip)); 325 ip = nbuf_ensure_contig(nbuf, sizeof(struct ip));
326 if (ip == NULL) { 326 if (ip == NULL) {
327 return 0; 327 return 0;
328 } 328 }
329 329
330 /* Check header length and fragment offset. */ 330 /* Check header length and fragment offset. */
331 if ((u_int)(ip->ip_hl << 2) < sizeof(struct ip)) { 331 if ((u_int)(ip->ip_hl << 2) < sizeof(struct ip)) {
332 return 0; 332 return 0;
333 } 333 }
334 if (ip->ip_off & ~htons(IP_DF | IP_RF)) { 334 if (ip->ip_off & ~htons(IP_DF | IP_RF)) {
335 /* Note fragmentation. */ 335 /* Note fragmentation. */
336 flags |= NPC_IPFRAG; 336 flags |= NPC_IPFRAG;
337 } 337 }
338 338
339 /* Cache: layer 3 - IPv4. */ 339 /* Cache: layer 3 - IPv4. */
340 npc->npc_alen = sizeof(struct in_addr); 340 npc->npc_alen = sizeof(struct in_addr);
341 npc->npc_ips[NPF_SRC] = (npf_addr_t *)&ip->ip_src; 341 npc->npc_ips[NPF_SRC] = (npf_addr_t *)&ip->ip_src;
342 npc->npc_ips[NPF_DST] = (npf_addr_t *)&ip->ip_dst; 342 npc->npc_ips[NPF_DST] = (npf_addr_t *)&ip->ip_dst;
343 npc->npc_hlen = ip->ip_hl << 2; 343 npc->npc_hlen = ip->ip_hl << 2;
344 npc->npc_proto = ip->ip_p; 344 npc->npc_proto = ip->ip_p;
345 345
346 npc->npc_ip.v4 = ip; 346 npc->npc_ip.v4 = ip;
347 flags |= NPC_IP4; 347 flags |= NPC_IP4;
348 break; 348 break;
349 } 349 }
350 350
351 case (IPV6_VERSION >> 4): { 351 case (IPV6_VERSION >> 4): {
352 struct ip6_hdr *ip6; 352 struct ip6_hdr *ip6;
353 struct ip6_ext *ip6e; 353 struct ip6_ext *ip6e;
354 size_t off, hlen; 354 size_t off, hlen;
355 355
356 ip6 = nbuf_ensure_contig(nbuf, sizeof(struct ip6_hdr)); 356 ip6 = nbuf_ensure_contig(nbuf, sizeof(struct ip6_hdr));
357 if (ip6 == NULL) { 357 if (ip6 == NULL) {
358 return 0; 358 return 0;
359 } 359 }
360 360
361 /* Set initial next-protocol value. */ 361 /* Set initial next-protocol value. */
362 hlen = sizeof(struct ip6_hdr); 362 hlen = sizeof(struct ip6_hdr);
363 npc->npc_proto = ip6->ip6_nxt; 363 npc->npc_proto = ip6->ip6_nxt;
364 npc->npc_hlen = hlen; 364 npc->npc_hlen = hlen;
365 365
366 /* 366 /*
367 * Advance by the length of the current header. 367 * Advance by the length of the current header.
368 */ 368 */
369 off = nbuf_offset(nbuf); 369 off = nbuf_offset(nbuf);
370 while (nbuf_advance(nbuf, hlen, 0) != NULL) { 370 while (nbuf_advance(nbuf, hlen, 0) != NULL) {
371 ip6e = nbuf_ensure_contig(nbuf, sizeof(*ip6e)); 371 ip6e = nbuf_ensure_contig(nbuf, sizeof(*ip6e));
372 if (ip6e == NULL) { 372 if (ip6e == NULL) {
373 return 0; 373 return 0;
374 } 374 }
375 375
376 /* 376 /*
377 * Determine whether we are going to continue. 377 * Determine whether we are going to continue.
378 */ 378 */
379 switch (npc->npc_proto) { 379 switch (npc->npc_proto) {
380 case IPPROTO_HOPOPTS: 380 case IPPROTO_HOPOPTS:
381 case IPPROTO_DSTOPTS: 381 case IPPROTO_DSTOPTS:
382 case IPPROTO_ROUTING: 382 case IPPROTO_ROUTING:
383 hlen = (ip6e->ip6e_len + 1) << 3; 383 hlen = (ip6e->ip6e_len + 1) << 3;
384 break; 384 break;
385 case IPPROTO_FRAGMENT: 385 case IPPROTO_FRAGMENT:
386 hlen = sizeof(struct ip6_frag); 386 hlen = sizeof(struct ip6_frag);
387 flags |= NPC_IPFRAG; 387 flags |= NPC_IPFRAG;
388 break; 388 break;
389 case IPPROTO_AH: 389 case IPPROTO_AH:
390 hlen = (ip6e->ip6e_len + 2) << 2; 390 hlen = (ip6e->ip6e_len + 2) << 2;
391 break; 391 break;
392 default: 392 default:
393 hlen = 0; 393 hlen = 0;
394 break; 394 break;
395 } 395 }
396 396
397 if (!hlen) { 397 if (!hlen) {
398 break; 398 break;
399 } 399 }
400 npc->npc_proto = ip6e->ip6e_nxt; 400 npc->npc_proto = ip6e->ip6e_nxt;
401 npc->npc_hlen += hlen; 401 npc->npc_hlen += hlen;
402 } 402 }
403 403
404 /* 404 /*
405 * Re-fetch the header pointers (nbufs might have been 405 * Re-fetch the header pointers (nbufs might have been
406 * reallocated). Restore the original offset (if any). 406 * reallocated). Restore the original offset (if any).
407 */ 407 */
408 nbuf_reset(nbuf); 408 nbuf_reset(nbuf);
409 ip6 = nbuf_dataptr(nbuf); 409 ip6 = nbuf_dataptr(nbuf);
410 if (off) { 410 if (off) {
411 nbuf_advance(nbuf, off, 0); 411 nbuf_advance(nbuf, off, 0);
412 } 412 }
413 413
414 /* Cache: layer 3 - IPv6. */ 414 /* Cache: layer 3 - IPv6. */
415 npc->npc_alen = sizeof(struct in6_addr); 415 npc->npc_alen = sizeof(struct in6_addr);
416 npc->npc_ips[NPF_SRC] = (npf_addr_t *)&ip6->ip6_src; 416 npc->npc_ips[NPF_SRC] = (npf_addr_t *)&ip6->ip6_src;
417 npc->npc_ips[NPF_DST]= (npf_addr_t *)&ip6->ip6_dst; 417 npc->npc_ips[NPF_DST]= (npf_addr_t *)&ip6->ip6_dst;
418 418
419 npc->npc_ip.v6 = ip6; 419 npc->npc_ip.v6 = ip6;
420 flags |= NPC_IP6; 420 flags |= NPC_IP6;
421 break; 421 break;
422 } 422 }
423 default: 423 default:
424 break; 424 break;
425 } 425 }
426 return flags; 426 return flags;
427} 427}
428 428
429/* 429/*
430 * npf_cache_all: general routine to cache all relevant IP (v4 or v6) 430 * npf_cache_all: general routine to cache all relevant IP (v4 or v6)
431 * and TCP, UDP or ICMP headers. 431 * and TCP, UDP or ICMP headers.
432 * 432 *
433 * => nbuf offset shall be set accordingly. 433 * => nbuf offset shall be set accordingly.
434 */ 434 */
435int 435int
436npf_cache_all(npf_cache_t *npc, nbuf_t *nbuf) 436npf_cache_all(npf_cache_t *npc, nbuf_t *nbuf)
437{ 437{
438 int flags, l4flags; 438 int flags, l4flags;
439 u_int hlen; 439 u_int hlen;
440 440
441 /* 441 /*
442 * This routine is a main point where the references are cached, 442 * This routine is a main point where the references are cached,
443 * therefore clear the flag as we reset. 443 * therefore clear the flag as we reset.
444 */ 444 */
445again: 445again:
446 nbuf_unset_flag(nbuf, NBUF_DATAREF_RESET); 446 nbuf_unset_flag(nbuf, NBUF_DATAREF_RESET);
447 447
448 /* 448 /*
449 * First, cache the L3 header (IPv4 or IPv6). If IP packet is 449 * First, cache the L3 header (IPv4 or IPv6). If IP packet is
450 * fragmented, then we cannot look into L4. 450 * fragmented, then we cannot look into L4.
451 */ 451 */
452 flags = npf_cache_ip(npc, nbuf); 452 flags = npf_cache_ip(npc, nbuf);
453 if ((flags & NPC_IP46) == 0 || (flags & NPC_IPFRAG) != 0) { 453 if ((flags & NPC_IP46) == 0 || (flags & NPC_IPFRAG) != 0) {
454 nbuf_unset_flag(nbuf, NBUF_DATAREF_RESET); 454 nbuf_unset_flag(nbuf, NBUF_DATAREF_RESET);
455 npc->npc_info |= flags; 455 npc->npc_info |= flags;
456 return flags; 456 return flags;
457 } 457 }
458 hlen = npc->npc_hlen; 458 hlen = npc->npc_hlen;
459 459
460 switch (npc->npc_proto) { 460 switch (npc->npc_proto) {
461 case IPPROTO_TCP: 461 case IPPROTO_TCP:
462 /* Cache: layer 4 - TCP. */ 462 /* Cache: layer 4 - TCP. */
463 npc->npc_l4.tcp = nbuf_advance(nbuf, hlen, 463 npc->npc_l4.tcp = nbuf_advance(nbuf, hlen,
464 sizeof(struct tcphdr)); 464 sizeof(struct tcphdr));
465 l4flags = NPC_LAYER4 | NPC_TCP; 465 l4flags = NPC_LAYER4 | NPC_TCP;
466 break; 466 break;
467 case IPPROTO_UDP: 467 case IPPROTO_UDP:
468 /* Cache: layer 4 - UDP. */ 468 /* Cache: layer 4 - UDP. */
469 npc->npc_l4.udp = nbuf_advance(nbuf, hlen, 469 npc->npc_l4.udp = nbuf_advance(nbuf, hlen,
470 sizeof(struct udphdr)); 470 sizeof(struct udphdr));
471 l4flags = NPC_LAYER4 | NPC_UDP; 471 l4flags = NPC_LAYER4 | NPC_UDP;
472 break; 472 break;
473 case IPPROTO_ICMP: 473 case IPPROTO_ICMP:
474 /* Cache: layer 4 - ICMPv4. */ 474 /* Cache: layer 4 - ICMPv4. */
475 npc->npc_l4.icmp = nbuf_advance(nbuf, hlen, 475 npc->npc_l4.icmp = nbuf_advance(nbuf, hlen,
476 offsetof(struct icmp, icmp_void)); 476 offsetof(struct icmp, icmp_void));
477 l4flags = NPC_LAYER4 | NPC_ICMP; 477 l4flags = NPC_LAYER4 | NPC_ICMP;
478 break; 478 break;
479 case IPPROTO_ICMPV6: 479 case IPPROTO_ICMPV6:
480 /* Cache: layer 4 - ICMPv6. */ 480 /* Cache: layer 4 - ICMPv6. */
481 npc->npc_l4.icmp6 = nbuf_advance(nbuf, hlen, 481 npc->npc_l4.icmp6 = nbuf_advance(nbuf, hlen,
482 offsetof(struct icmp6_hdr, icmp6_data32)); 482 offsetof(struct icmp6_hdr, icmp6_data32));
483 l4flags = NPC_LAYER4 | NPC_ICMP; 483 l4flags = NPC_LAYER4 | NPC_ICMP;
484 break; 484 break;
485 default: 485 default:
486 l4flags = 0; 486 l4flags = 0;
487 break; 487 break;
488 } 488 }
489 489
490 if (nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)) { 490 if (nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)) {
491 goto again; 491 goto again;
492 } 492 }
493 493
494 /* Add the L4 flags if nbuf_advance() succeeded. */ 494 /* Add the L4 flags if nbuf_advance() succeeded. */
495 if (l4flags && npc->npc_l4.hdr) { 495 if (l4flags && npc->npc_l4.hdr) {
496 flags |= l4flags; 496 flags |= l4flags;
497 } 497 }
498 npc->npc_info |= flags; 498 npc->npc_info |= flags;
499 return flags; 499 return flags;
500} 500}
501 501
502void 502void
503npf_recache(npf_cache_t *npc, nbuf_t *nbuf) 503npf_recache(npf_cache_t *npc, nbuf_t *nbuf)
504{ 504{
505 const int mflags __diagused = npc->npc_info & (NPC_IP46 | NPC_LAYER4); 505 const int mflags __diagused = npc->npc_info & (NPC_IP46 | NPC_LAYER4);
506 int flags __diagused; 506 int flags __diagused;
507 507
508 nbuf_reset(nbuf); 508 nbuf_reset(nbuf);
509 npc->npc_info = 0; 509 npc->npc_info = 0;
510 flags = npf_cache_all(npc, nbuf); 510 flags = npf_cache_all(npc, nbuf);
511 KASSERT((flags & mflags) == mflags); 511 KASSERT((flags & mflags) == mflags);
512 KASSERT(nbuf_flag_p(nbuf, NBUF_DATAREF_RESET) == 0); 512 KASSERT(nbuf_flag_p(nbuf, NBUF_DATAREF_RESET) == 0);
513} 513}
514 514
515/* 515/*
516 * npf_rwrip: rewrite required IP address. 516 * npf_rwrip: rewrite required IP address.
517 */ 517 */
518bool 518bool
519npf_rwrip(const npf_cache_t *npc, u_int which, const npf_addr_t *addr) 519npf_rwrip(const npf_cache_t *npc, u_int which, const npf_addr_t *addr)
520{ 520{
521 KASSERT(npf_iscached(npc, NPC_IP46)); 521 KASSERT(npf_iscached(npc, NPC_IP46));
522 KASSERT(which == NPF_SRC || which == NPF_DST); 522 KASSERT(which == NPF_SRC || which == NPF_DST);
523 523
524 memcpy(npc->npc_ips[which], addr, npc->npc_alen); 524 memcpy(npc->npc_ips[which], addr, npc->npc_alen);
525 return true; 525 return true;
526} 526}
527 527
528/* 528/*
529 * npf_rwrport: rewrite required TCP/UDP port. 529 * npf_rwrport: rewrite required TCP/UDP port.
530 */ 530 */
531bool 531bool
532npf_rwrport(const npf_cache_t *npc, u_int which, const in_port_t port) 532npf_rwrport(const npf_cache_t *npc, u_int which, const in_port_t port)
533{ 533{
534 const int proto = npc->npc_proto; 534 const int proto = npc->npc_proto;
535 in_port_t *oport; 535 in_port_t *oport;
536 536
537 KASSERT(npf_iscached(npc, NPC_TCP) || npf_iscached(npc, NPC_UDP)); 537 KASSERT(npf_iscached(npc, NPC_TCP) || npf_iscached(npc, NPC_UDP));
538 KASSERT(proto == IPPROTO_TCP || proto == IPPROTO_UDP); 538 KASSERT(proto == IPPROTO_TCP || proto == IPPROTO_UDP);
539 KASSERT(which == NPF_SRC || which == NPF_DST); 539 KASSERT(which == NPF_SRC || which == NPF_DST);
540 540
541 /* Get the offset and store the port in it. */ 541 /* Get the offset and store the port in it. */
542 if (proto == IPPROTO_TCP) { 542 if (proto == IPPROTO_TCP) {
543 struct tcphdr *th = npc->npc_l4.tcp; 543 struct tcphdr *th = npc->npc_l4.tcp;
544 oport = (which == NPF_SRC) ? &th->th_sport : &th->th_dport; 544 oport = (which == NPF_SRC) ? &th->th_sport : &th->th_dport;
545 } else { 545 } else {
546 struct udphdr *uh = npc->npc_l4.udp; 546 struct udphdr *uh = npc->npc_l4.udp;
547 oport = (which == NPF_SRC) ? &uh->uh_sport : &uh->uh_dport; 547 oport = (which == NPF_SRC) ? &uh->uh_sport : &uh->uh_dport;
548 } 548 }
549 memcpy(oport, &port, sizeof(in_port_t)); 549 memcpy(oport, &port, sizeof(in_port_t));
550 return true; 550 return true;
551} 551}
552 552
553/* 553/*
554 * npf_rwrcksum: rewrite IPv4 and/or TCP/UDP checksum. 554 * npf_rwrcksum: rewrite IPv4 and/or TCP/UDP checksum.
555 */ 555 */
556bool 556bool
557npf_rwrcksum(const npf_cache_t *npc, u_int which, 557npf_rwrcksum(const npf_cache_t *npc, u_int which,
558 const npf_addr_t *addr, const in_port_t port) 558 const npf_addr_t *addr, const in_port_t port)
559{ 559{
560 const npf_addr_t *oaddr = npc->npc_ips[which]; 560 const npf_addr_t *oaddr = npc->npc_ips[which];
561 const int proto = npc->npc_proto; 561 const int proto = npc->npc_proto;
562 const int alen = npc->npc_alen; 562 const int alen = npc->npc_alen;
563 uint16_t *ocksum; 563 uint16_t *ocksum;
564 in_port_t oport; 564 in_port_t oport;
565 565
566 KASSERT(npf_iscached(npc, NPC_LAYER4)); 566 KASSERT(npf_iscached(npc, NPC_LAYER4));
567 KASSERT(which == NPF_SRC || which == NPF_DST); 567 KASSERT(which == NPF_SRC || which == NPF_DST);
568 568
569 if (npf_iscached(npc, NPC_IP4)) { 569 if (npf_iscached(npc, NPC_IP4)) {
570 struct ip *ip = npc->npc_ip.v4; 570 struct ip *ip = npc->npc_ip.v4;
571 uint16_t ipsum = ip->ip_sum; 571 uint16_t ipsum = ip->ip_sum;
572 572
573 /* Recalculate IPv4 checksum and rewrite. */ 573 /* Recalculate IPv4 checksum and rewrite. */
574 ip->ip_sum = npf_addr_cksum(ipsum, alen, oaddr, addr); 574 ip->ip_sum = npf_addr_cksum(ipsum, alen, oaddr, addr);
575 } else { 575 } else {
576 /* No checksum for IPv6. */ 576 /* No checksum for IPv6. */
577 KASSERT(npf_iscached(npc, NPC_IP6)); 577 KASSERT(npf_iscached(npc, NPC_IP6));
578 } 578 }
579 579
580 /* Nothing else to do for ICMP. */ 580 /* Nothing else to do for ICMP. */
581 if (proto == IPPROTO_ICMP) { 581 if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) {
582 return true; 582 return true;
583 } 583 }
584 KASSERT(npf_iscached(npc, NPC_TCP) || npf_iscached(npc, NPC_UDP)); 584 KASSERT(npf_iscached(npc, NPC_TCP) || npf_iscached(npc, NPC_UDP));
585 585
586 /* 586 /*
587 * Calculate TCP/UDP checksum: 587 * Calculate TCP/UDP checksum:
588 * - Skip if UDP and the current checksum is zero. 588 * - Skip if UDP and the current checksum is zero.
589 * - Fixup the IP address change. 589 * - Fixup the IP address change.
590 * - Fixup the port change, if required (non-zero). 590 * - Fixup the port change, if required (non-zero).
591 */ 591 */
592 if (proto == IPPROTO_TCP) { 592 if (proto == IPPROTO_TCP) {
593 struct tcphdr *th = npc->npc_l4.tcp; 593 struct tcphdr *th = npc->npc_l4.tcp;
594 594
595 ocksum = &th->th_sum; 595 ocksum = &th->th_sum;
596 oport = (which == NPF_SRC) ? th->th_sport : th->th_dport; 596 oport = (which == NPF_SRC) ? th->th_sport : th->th_dport;
597 } else { 597 } else {
598 struct udphdr *uh = npc->npc_l4.udp; 598 struct udphdr *uh = npc->npc_l4.udp;
599 599
600 KASSERT(proto == IPPROTO_UDP); 600 KASSERT(proto == IPPROTO_UDP);
601 ocksum = &uh->uh_sum; 601 ocksum = &uh->uh_sum;
602 if (*ocksum == 0) { 602 if (*ocksum == 0) {
603 /* No need to update. */ 603 /* No need to update. */
604 return true; 604 return true;
605 } 605 }
606 oport = (which == NPF_SRC) ? uh->uh_sport : uh->uh_dport; 606 oport = (which == NPF_SRC) ? uh->uh_sport : uh->uh_dport;
607 } 607 }
608 608
609 uint16_t cksum = npf_addr_cksum(*ocksum, alen, oaddr, addr); 609 uint16_t cksum = npf_addr_cksum(*ocksum, alen, oaddr, addr);
610 if (port) { 610 if (port) {
611 cksum = npf_fixup16_cksum(cksum, oport, port); 611 cksum = npf_fixup16_cksum(cksum, oport, port);
612 } 612 }
613 613
614 /* Rewrite TCP/UDP checksum. */ 614 /* Rewrite TCP/UDP checksum. */
615 memcpy(ocksum, &cksum, sizeof(uint16_t)); 615 memcpy(ocksum, &cksum, sizeof(uint16_t));
616 return true; 616 return true;
617} 617}
618 618
619/* 619/*
 620 * npf_napt_rwr: perform address and/or port translation.
 621 */
 622int
 623npf_napt_rwr(const npf_cache_t *npc, u_int which,
 624 const npf_addr_t *addr, const in_addr_t port)
 625{
 626 const unsigned proto = npc->npc_proto;
 627
 628 /*
 629 * Rewrite IP and/or TCP/UDP checksums first, since we need the
 630 * current (old) address/port for the calculations. Then perform
 631 * the address translation i.e. rewrite source or destination.
 632 */
 633 if (!npf_rwrcksum(npc, which, addr, port)) {
 634 return EINVAL;
 635 }
 636 if (!npf_rwrip(npc, which, addr)) {
 637 return EINVAL;
 638 }
 639 if (port == 0) {
 640 /* Done. */
 641 return 0;
 642 }
 643
 644 switch (proto) {
 645 case IPPROTO_TCP:
 646 case IPPROTO_UDP:
 647 /* Rewrite source/destination port. */
 648 if (!npf_rwrport(npc, which, port)) {
 649 return EINVAL;
 650 }
 651 break;
 652 case IPPROTO_ICMP:
 653 case IPPROTO_ICMPV6:
 654 KASSERT(npf_iscached(npc, NPC_ICMP));
 655 /* Nothing. */
 656 break;
 657 default:
 658 return ENOTSUP;
 659 }
 660 return 0;
 661}
 662
 663/*
620 * IPv6-to-IPv6 Network Prefix Translation (NPTv6), as per RFC 6296. 664 * IPv6-to-IPv6 Network Prefix Translation (NPTv6), as per RFC 6296.
621 */ 665 */
622 666
623int 667int
624npf_npt66_rwr(const npf_cache_t *npc, u_int which, const npf_addr_t *pref, 668npf_npt66_rwr(const npf_cache_t *npc, u_int which, const npf_addr_t *pref,
625 npf_netmask_t len, uint16_t adj) 669 npf_netmask_t len, uint16_t adj)
626{ 670{
627 npf_addr_t *addr = npc->npc_ips[which]; 671 npf_addr_t *addr = npc->npc_ips[which];
628 unsigned remnant, word, preflen = len >> 4; 672 unsigned remnant, word, preflen = len >> 4;
629 uint32_t sum; 673 uint32_t sum;
630 674
631 KASSERT(which == NPF_SRC || which == NPF_DST); 675 KASSERT(which == NPF_SRC || which == NPF_DST);
632 676
633 if (!npf_iscached(npc, NPC_IP6)) { 677 if (!npf_iscached(npc, NPC_IP6)) {
634 return EINVAL; 678 return EINVAL;
635 } 679 }
636 if (len <= 48) { 680 if (len <= 48) {
637 /* 681 /*
638 * The word to adjust. Cannot translate the 0xffff 682 * The word to adjust. Cannot translate the 0xffff
639 * subnet if /48 or shorter. 683 * subnet if /48 or shorter.
640 */ 684 */
641 word = 3; 685 word = 3;
642 if (addr->s6_addr16[word] == 0xffff) { 686 if (addr->s6_addr16[word] == 0xffff) {
643 return EINVAL; 687 return EINVAL;
644 } 688 }
645 } else { 689 } else {
646 /* 690 /*
647 * Also, all 0s or 1s in the host part are disallowed for 691 * Also, all 0s or 1s in the host part are disallowed for
648 * longer than /48 prefixes. 692 * longer than /48 prefixes.
649 */ 693 */
650 if ((addr->s6_addr32[2] == 0 && addr->s6_addr32[3] == 0) || 694 if ((addr->s6_addr32[2] == 0 && addr->s6_addr32[3] == 0) ||
651 (addr->s6_addr32[2] == ~0U && addr->s6_addr32[3] == ~0U)) 695 (addr->s6_addr32[2] == ~0U && addr->s6_addr32[3] == ~0U))
652 return EINVAL; 696 return EINVAL;
653 697
654 /* Determine the 16-bit word to adjust. */ 698 /* Determine the 16-bit word to adjust. */
655 for (word = 4; word < 8; word++) 699 for (word = 4; word < 8; word++)
656 if (addr->s6_addr16[word] != 0xffff) 700 if (addr->s6_addr16[word] != 0xffff)
657 break; 701 break;
658 } 702 }
659 703
660 /* Rewrite the prefix. */ 704 /* Rewrite the prefix. */
661 for (unsigned i = 0; i < preflen; i++) { 705 for (unsigned i = 0; i < preflen; i++) {
662 addr->s6_addr16[i] = pref->s6_addr16[i]; 706 addr->s6_addr16[i] = pref->s6_addr16[i];
663 } 707 }
664 708
665 /* 709 /*
666 * If prefix length is within a 16-bit word (not dividable by 16), 710 * If prefix length is within a 16-bit word (not dividable by 16),
667 * then prepare a mask, determine the word and adjust it. 711 * then prepare a mask, determine the word and adjust it.
668 */ 712 */
669 if ((remnant = len - (preflen << 4)) != 0) { 713 if ((remnant = len - (preflen << 4)) != 0) {
670 const uint16_t wordmask = (1U << remnant) - 1; 714 const uint16_t wordmask = (1U << remnant) - 1;
671 const unsigned i = preflen; 715 const unsigned i = preflen;
672 716
673 addr->s6_addr16[i] = (pref->s6_addr16[i] & wordmask) | 717 addr->s6_addr16[i] = (pref->s6_addr16[i] & wordmask) |
674 (addr->s6_addr16[i] & ~wordmask); 718 (addr->s6_addr16[i] & ~wordmask);
675 } 719 }
676 720
677 /* 721 /*
678 * Performing 1's complement sum/difference. 722 * Performing 1's complement sum/difference.
679 */ 723 */
680 sum = addr->s6_addr16[word] + adj; 724 sum = addr->s6_addr16[word] + adj;
681 while (sum >> 16) { 725 while (sum >> 16) {
682 sum = (sum >> 16) + (sum & 0xffff); 726 sum = (sum >> 16) + (sum & 0xffff);
683 } 727 }
684 if (sum == 0xffff) { 728 if (sum == 0xffff) {
685 /* RFC 1071. */ 729 /* RFC 1071. */
686 sum = 0x0000; 730 sum = 0x0000;
687 } 731 }
688 addr->s6_addr16[word] = sum; 732 addr->s6_addr16[word] = sum;
689 return 0; 733 return 0;
690} 734}
691 735
692#if defined(DDB) || defined(_NPF_TESTING) 736#if defined(DDB) || defined(_NPF_TESTING)
693 737
694void 738void
695npf_addr_dump(const npf_addr_t *addr) 739npf_addr_dump(const npf_addr_t *addr)
696{ 740{
697 printf("IP[%x:%x:%x:%x]\n", 741 printf("IP[%x:%x:%x:%x]\n",
698 addr->s6_addr32[0], addr->s6_addr32[1], 742 addr->s6_addr32[0], addr->s6_addr32[1],
699 addr->s6_addr32[2], addr->s6_addr32[3]); 743 addr->s6_addr32[2], addr->s6_addr32[3]);
700} 744}
701 745
702#endif 746#endif

cvs diff -r1.25 -r1.26 src/sys/net/npf/npf_nat.c (switch to unified diff)

--- src/sys/net/npf/npf_nat.c 2014/02/13 03:34:40 1.25
+++ src/sys/net/npf/npf_nat.c 2014/02/19 03:51:31 1.26
@@ -1,946 +1,903 @@ @@ -1,946 +1,903 @@
1/* $NetBSD: npf_nat.c,v 1.25 2014/02/13 03:34:40 rmind Exp $ */ 1/* $NetBSD: npf_nat.c,v 1.26 2014/02/19 03:51:31 rmind Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2014 Mindaugas Rasiukevicius <rmind at netbsd org> 4 * Copyright (c) 2014 Mindaugas Rasiukevicius <rmind at netbsd org>
5 * Copyright (c) 2010-2013 The NetBSD Foundation, Inc. 5 * Copyright (c) 2010-2013 The NetBSD Foundation, Inc.
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * This material is based upon work partially supported by The 8 * This material is based upon work partially supported by The
9 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 9 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution. 18 * documentation and/or other materials provided with the distribution.
19 * 19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE. 30 * POSSIBILITY OF SUCH DAMAGE.
31 */ 31 */
32 32
33/* 33/*
34 * NPF network address port translation (NAPT) and other forms of NAT. 34 * NPF network address port translation (NAPT) and other forms of NAT.
35 * Described in RFC 2663, RFC 3022, etc. 35 * Described in RFC 2663, RFC 3022, etc.
36 * 36 *
37 * Overview 37 * Overview
38 * 38 *
39 * There are few mechanisms: NAT policy, port map and translation. 39 * There are few mechanisms: NAT policy, port map and translation.
40 * NAT module has a separate ruleset, where rules contain associated 40 * NAT module has a separate ruleset, where rules contain associated
41 * NAT policy, thus flexible filter criteria can be used. 41 * NAT policy, thus flexible filter criteria can be used.
42 * 42 *
43 * Translation types 43 * Translation types
44 * 44 *
45 * There are two types of translation: outbound (NPF_NATOUT) and 45 * There are two types of translation: outbound (NPF_NATOUT) and
46 * inbound (NPF_NATIN). It should not be confused with connection 46 * inbound (NPF_NATIN). It should not be confused with connection
47 * direction. See npf_nat_which() for the description of how the 47 * direction. See npf_nat_which() for the description of how the
48 * addresses are rewritten. 48 * addresses are rewritten.
49 * 49 *
50 * It should be noted that bi-directional NAT is a combined outbound 50 * It should be noted that bi-directional NAT is a combined outbound
51 * and inbound translation, therefore constructed as two policies. 51 * and inbound translation, therefore constructed as two policies.
52 * 52 *
53 * NAT policies and port maps 53 * NAT policies and port maps
54 * 54 *
55 * NAT (translation) policy is applied when a packet matches the rule. 55 * NAT (translation) policy is applied when a packet matches the rule.
56 * Apart from filter criteria, NAT policy has a translation IP address 56 * Apart from filter criteria, NAT policy has a translation IP address
57 * and associated port map. Port map is a bitmap used to reserve and 57 * and associated port map. Port map is a bitmap used to reserve and
58 * use unique TCP/UDP ports for translation. Port maps are unique to 58 * use unique TCP/UDP ports for translation. Port maps are unique to
59 * the IP addresses, therefore multiple NAT policies with the same IP 59 * the IP addresses, therefore multiple NAT policies with the same IP
60 * will share the same port map. 60 * will share the same port map.
61 * 61 *
62 * Sessions, translation entries and their life-cycle 62 * Sessions, translation entries and their life-cycle
63 * 63 *
64 * NAT module relies on session management module. Each translated 64 * NAT module relies on session management module. Each translated
65 * session has an associated translation entry (npf_nat_t), which 65 * session has an associated translation entry (npf_nat_t), which
66 * contains information used for backwards stream translation, i.e. 66 * contains information used for backwards stream translation, i.e.
67 * original IP address with port and translation port, allocated from 67 * original IP address with port and translation port, allocated from
68 * the port map. Each NAT entry is associated with the policy, which 68 * the port map. Each NAT entry is associated with the policy, which
69 * contains translation IP address. Allocated port is returned to the 69 * contains translation IP address. Allocated port is returned to the
70 * port map and NAT entry is destroyed when session expires. 70 * port map and NAT entry is destroyed when session expires.
71 */ 71 */
72 72
73#include <sys/cdefs.h> 73#include <sys/cdefs.h>
74__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.25 2014/02/13 03:34:40 rmind Exp $"); 74__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.26 2014/02/19 03:51:31 rmind Exp $");
75 75
76#include <sys/param.h> 76#include <sys/param.h>
77#include <sys/types.h> 77#include <sys/types.h>
78 78
79#include <sys/atomic.h> 79#include <sys/atomic.h>
80#include <sys/bitops.h> 80#include <sys/bitops.h>
81#include <sys/condvar.h> 81#include <sys/condvar.h>
82#include <sys/kmem.h> 82#include <sys/kmem.h>
83#include <sys/mutex.h> 83#include <sys/mutex.h>
84#include <sys/pool.h> 84#include <sys/pool.h>
85#include <sys/proc.h> 85#include <sys/proc.h>
86#include <sys/cprng.h> 86#include <sys/cprng.h>
87 87
88#include <net/pfil.h> 88#include <net/pfil.h>
89#include <netinet/in.h> 89#include <netinet/in.h>
90 90
91#include "npf_impl.h" 91#include "npf_impl.h"
92 92
93/* 93/*
94 * NPF portmap structure. 94 * NPF portmap structure.
95 */ 95 */
96typedef struct { 96typedef struct {
97 u_int p_refcnt; 97 u_int p_refcnt;
98 uint32_t p_bitmap[0]; 98 uint32_t p_bitmap[0];
99} npf_portmap_t; 99} npf_portmap_t;
100 100
101/* Portmap range: [ 1024 .. 65535 ] */ 101/* Portmap range: [ 1024 .. 65535 ] */
102#define PORTMAP_FIRST (1024) 102#define PORTMAP_FIRST (1024)
103#define PORTMAP_SIZE ((65536 - PORTMAP_FIRST) / 32) 103#define PORTMAP_SIZE ((65536 - PORTMAP_FIRST) / 32)
104#define PORTMAP_FILLED ((uint32_t)~0U) 104#define PORTMAP_FILLED ((uint32_t)~0U)
105#define PORTMAP_MASK (31) 105#define PORTMAP_MASK (31)
106#define PORTMAP_SHIFT (5) 106#define PORTMAP_SHIFT (5)
107 107
108#define PORTMAP_MEM_SIZE \ 108#define PORTMAP_MEM_SIZE \
109 (sizeof(npf_portmap_t) + (PORTMAP_SIZE * sizeof(uint32_t))) 109 (sizeof(npf_portmap_t) + (PORTMAP_SIZE * sizeof(uint32_t)))
110 110
111/* 111/*
112 * NAT policy structure. 112 * NAT policy structure.
113 */ 113 */
114struct npf_natpolicy { 114struct npf_natpolicy {
115 LIST_HEAD(, npf_nat) n_nat_list; 115 LIST_HEAD(, npf_nat) n_nat_list;
116 volatile u_int n_refcnt; 116 volatile u_int n_refcnt;
117 kmutex_t n_lock; 117 kmutex_t n_lock;
118 kcondvar_t n_cv; 118 kcondvar_t n_cv;
119 npf_portmap_t * n_portmap; 119 npf_portmap_t * n_portmap;
120 /* NPF_NP_CMP_START */ 120 /* NPF_NP_CMP_START */
121 int n_type; 121 int n_type;
122 u_int n_flags; 122 u_int n_flags;
123 size_t n_addr_sz; 123 size_t n_addr_sz;
124 npf_addr_t n_taddr; 124 npf_addr_t n_taddr;
125 npf_netmask_t n_tmask; 125 npf_netmask_t n_tmask;
126 in_port_t n_tport; 126 in_port_t n_tport;
127 u_int n_algo; 127 u_int n_algo;
128 union { 128 union {
129 uint16_t n_npt66_adj; 129 uint16_t n_npt66_adj;
130 }; 130 };
131}; 131};
132 132
133#define NPF_NP_CMP_START offsetof(npf_natpolicy_t, n_type) 133#define NPF_NP_CMP_START offsetof(npf_natpolicy_t, n_type)
134#define NPF_NP_CMP_SIZE (sizeof(npf_natpolicy_t) - NPF_NP_CMP_START) 134#define NPF_NP_CMP_SIZE (sizeof(npf_natpolicy_t) - NPF_NP_CMP_START)
135 135
136/* 136/*
137 * NAT translation entry for a session. 137 * NAT translation entry for a session.
138 */ 138 */
139struct npf_nat { 139struct npf_nat {
140 /* Association (list entry and a link pointer) with NAT policy. */ 140 /* Association (list entry and a link pointer) with NAT policy. */
141 LIST_ENTRY(npf_nat) nt_entry; 141 LIST_ENTRY(npf_nat) nt_entry;
142 npf_natpolicy_t * nt_natpolicy; 142 npf_natpolicy_t * nt_natpolicy;
143 npf_session_t * nt_session; 143 npf_session_t * nt_session;
144 /* Original address and port (for backwards translation). */ 144 /* Original address and port (for backwards translation). */
145 npf_addr_t nt_oaddr; 145 npf_addr_t nt_oaddr;
146 in_port_t nt_oport; 146 in_port_t nt_oport;
147 /* Translation port (for redirects). */ 147 /* Translation port (for redirects). */
148 in_port_t nt_tport; 148 in_port_t nt_tport;
149 /* ALG (if any) associated with this NAT entry. */ 149 /* ALG (if any) associated with this NAT entry. */
150 npf_alg_t * nt_alg; 150 npf_alg_t * nt_alg;
151 uintptr_t nt_alg_arg; 151 uintptr_t nt_alg_arg;
152}; 152};
153 153
154static pool_cache_t nat_cache __read_mostly; 154static pool_cache_t nat_cache __read_mostly;
155 155
156/* 156/*
157 * npf_nat_sys{init,fini}: initialise/destroy NAT subsystem structures. 157 * npf_nat_sys{init,fini}: initialise/destroy NAT subsystem structures.
158 */ 158 */
159 159
160void 160void
161npf_nat_sysinit(void) 161npf_nat_sysinit(void)
162{ 162{
163 nat_cache = pool_cache_init(sizeof(npf_nat_t), coherency_unit, 163 nat_cache = pool_cache_init(sizeof(npf_nat_t), coherency_unit,
164 0, 0, "npfnatpl", NULL, IPL_NET, NULL, NULL, NULL); 164 0, 0, "npfnatpl", NULL, IPL_NET, NULL, NULL, NULL);
165 KASSERT(nat_cache != NULL); 165 KASSERT(nat_cache != NULL);
166} 166}
167 167
168void 168void
169npf_nat_sysfini(void) 169npf_nat_sysfini(void)
170{ 170{
171 /* All NAT policies should already be destroyed. */ 171 /* All NAT policies should already be destroyed. */
172 pool_cache_destroy(nat_cache); 172 pool_cache_destroy(nat_cache);
173} 173}
174 174
175/* 175/*
176 * npf_nat_newpolicy: create a new NAT policy. 176 * npf_nat_newpolicy: create a new NAT policy.
177 * 177 *
178 * => Shares portmap if policy is on existing translation address. 178 * => Shares portmap if policy is on existing translation address.
179 */ 179 */
180npf_natpolicy_t * 180npf_natpolicy_t *
181npf_nat_newpolicy(prop_dictionary_t natdict, npf_ruleset_t *nrlset) 181npf_nat_newpolicy(prop_dictionary_t natdict, npf_ruleset_t *nrlset)
182{ 182{
183 npf_natpolicy_t *np; 183 npf_natpolicy_t *np;
184 prop_object_t obj; 184 prop_object_t obj;
185 npf_portmap_t *pm; 185 npf_portmap_t *pm;
186 186
187 np = kmem_zalloc(sizeof(npf_natpolicy_t), KM_SLEEP); 187 np = kmem_zalloc(sizeof(npf_natpolicy_t), KM_SLEEP);
188 188
189 /* Translation type and flags. */ 189 /* Translation type and flags. */
190 prop_dictionary_get_int32(natdict, "type", &np->n_type); 190 prop_dictionary_get_int32(natdict, "type", &np->n_type);
191 prop_dictionary_get_uint32(natdict, "flags", &np->n_flags); 191 prop_dictionary_get_uint32(natdict, "flags", &np->n_flags);
192 192
193 /* Should be exclusively either inbound or outbound NAT. */ 193 /* Should be exclusively either inbound or outbound NAT. */
194 if (((np->n_type == NPF_NATIN) ^ (np->n_type == NPF_NATOUT)) == 0) { 194 if (((np->n_type == NPF_NATIN) ^ (np->n_type == NPF_NATOUT)) == 0) {
195 goto err; 195 goto err;
196 } 196 }
197 mutex_init(&np->n_lock, MUTEX_DEFAULT, IPL_SOFTNET); 197 mutex_init(&np->n_lock, MUTEX_DEFAULT, IPL_SOFTNET);
198 cv_init(&np->n_cv, "npfnatcv"); 198 cv_init(&np->n_cv, "npfnatcv");
199 LIST_INIT(&np->n_nat_list); 199 LIST_INIT(&np->n_nat_list);
200 200
201 /* Translation IP, mask and port (if applicable). */ 201 /* Translation IP, mask and port (if applicable). */
202 obj = prop_dictionary_get(natdict, "translation-ip"); 202 obj = prop_dictionary_get(natdict, "translation-ip");
203 np->n_addr_sz = prop_data_size(obj); 203 np->n_addr_sz = prop_data_size(obj);
204 if (np->n_addr_sz == 0 || np->n_addr_sz > sizeof(npf_addr_t)) { 204 if (np->n_addr_sz == 0 || np->n_addr_sz > sizeof(npf_addr_t)) {
205 goto err; 205 goto err;
206 } 206 }
207 memcpy(&np->n_taddr, prop_data_data_nocopy(obj), np->n_addr_sz); 207 memcpy(&np->n_taddr, prop_data_data_nocopy(obj), np->n_addr_sz);
208 prop_dictionary_get_uint8(natdict, "translation-mask", &np->n_tmask); 208 prop_dictionary_get_uint8(natdict, "translation-mask", &np->n_tmask);
209 prop_dictionary_get_uint16(natdict, "translation-port", &np->n_tport); 209 prop_dictionary_get_uint16(natdict, "translation-port", &np->n_tport);
210 210
211 prop_dictionary_get_uint32(natdict, "translation-algo", &np->n_algo); 211 prop_dictionary_get_uint32(natdict, "translation-algo", &np->n_algo);
212 switch (np->n_algo) { 212 switch (np->n_algo) {
213 case NPF_ALGO_NPT66: 213 case NPF_ALGO_NPT66:
214 prop_dictionary_get_uint16(natdict, "npt66-adjustment", 214 prop_dictionary_get_uint16(natdict, "npt66-adjustment",
215 &np->n_npt66_adj); 215 &np->n_npt66_adj);
216 break; 216 break;
217 default: 217 default:
218 if (np->n_tmask != NPF_NO_NETMASK) 218 if (np->n_tmask != NPF_NO_NETMASK)
219 goto err; 219 goto err;
220 break; 220 break;
221 } 221 }
222 222
223 /* Determine if port map is needed. */ 223 /* Determine if port map is needed. */
224 np->n_portmap = NULL; 224 np->n_portmap = NULL;
225 if ((np->n_flags & NPF_NAT_PORTMAP) == 0) { 225 if ((np->n_flags & NPF_NAT_PORTMAP) == 0) {
226 /* No port map. */ 226 /* No port map. */
227 return np; 227 return np;
228 } 228 }
229 229
230 /* 230 /*
231 * Inspect NAT policies in the ruleset for port map sharing. 231 * Inspect NAT policies in the ruleset for port map sharing.
232 * Note that npf_ruleset_sharepm() will increase the reference count. 232 * Note that npf_ruleset_sharepm() will increase the reference count.
233 */ 233 */
234 if (!npf_ruleset_sharepm(nrlset, np)) { 234 if (!npf_ruleset_sharepm(nrlset, np)) {
235 /* Allocate a new port map for the NAT policy. */ 235 /* Allocate a new port map for the NAT policy. */
236 pm = kmem_zalloc(PORTMAP_MEM_SIZE, KM_SLEEP); 236 pm = kmem_zalloc(PORTMAP_MEM_SIZE, KM_SLEEP);
237 pm->p_refcnt = 1; 237 pm->p_refcnt = 1;
238 KASSERT((uintptr_t)pm->p_bitmap == (uintptr_t)pm + sizeof(*pm)); 238 KASSERT((uintptr_t)pm->p_bitmap == (uintptr_t)pm + sizeof(*pm));
239 np->n_portmap = pm; 239 np->n_portmap = pm;
240 } else { 240 } else {
241 KASSERT(np->n_portmap != NULL); 241 KASSERT(np->n_portmap != NULL);
242 } 242 }
243 return np; 243 return np;
244err: 244err:
245 kmem_free(np, sizeof(npf_natpolicy_t)); 245 kmem_free(np, sizeof(npf_natpolicy_t));
246 return NULL; 246 return NULL;
247} 247}
248 248
249/* 249/*
250 * npf_nat_freepolicy: free NAT policy and, on last reference, free portmap. 250 * npf_nat_freepolicy: free NAT policy and, on last reference, free portmap.
251 * 251 *
252 * => Called from npf_rule_free() during the reload via npf_ruleset_destroy(). 252 * => Called from npf_rule_free() during the reload via npf_ruleset_destroy().
253 */ 253 */
254void 254void
255npf_nat_freepolicy(npf_natpolicy_t *np) 255npf_nat_freepolicy(npf_natpolicy_t *np)
256{ 256{
257 npf_portmap_t *pm = np->n_portmap; 257 npf_portmap_t *pm = np->n_portmap;
258 npf_session_t *se; 258 npf_session_t *se;
259 npf_nat_t *nt; 259 npf_nat_t *nt;
260 260
261 /* 261 /*
262 * Disassociate all entries from the policy. At this point, 262 * Disassociate all entries from the policy. At this point,
263 * new entries can no longer be created for this policy. 263 * new entries can no longer be created for this policy.
264 */ 264 */
265 mutex_enter(&np->n_lock); 265 mutex_enter(&np->n_lock);
266 LIST_FOREACH(nt, &np->n_nat_list, nt_entry) { 266 LIST_FOREACH(nt, &np->n_nat_list, nt_entry) {
267 se = nt->nt_session; 267 se = nt->nt_session;
268 KASSERT(se != NULL); 268 KASSERT(se != NULL);
269 npf_session_expire(se); 269 npf_session_expire(se);
270 } 270 }
271 while (!LIST_EMPTY(&np->n_nat_list)) { 271 while (!LIST_EMPTY(&np->n_nat_list)) {
272 cv_wait(&np->n_cv, &np->n_lock); 272 cv_wait(&np->n_cv, &np->n_lock);
273 } 273 }
274 mutex_exit(&np->n_lock); 274 mutex_exit(&np->n_lock);
275 275
276 /* Kick the worker - all references should be going away. */ 276 /* Kick the worker - all references should be going away. */
277 npf_worker_signal(); 277 npf_worker_signal();
278 while (np->n_refcnt) { 278 while (np->n_refcnt) {
279 kpause("npfgcnat", false, 1, NULL); 279 kpause("npfgcnat", false, 1, NULL);
280 } 280 }
281 KASSERT(LIST_EMPTY(&np->n_nat_list)); 281 KASSERT(LIST_EMPTY(&np->n_nat_list));
282 282
283 /* Destroy the port map, on last reference. */ 283 /* Destroy the port map, on last reference. */
284 if (pm && --pm->p_refcnt == 0) { 284 if (pm && --pm->p_refcnt == 0) {
285 KASSERT((np->n_flags & NPF_NAT_PORTMAP) != 0); 285 KASSERT((np->n_flags & NPF_NAT_PORTMAP) != 0);
286 kmem_free(pm, PORTMAP_MEM_SIZE); 286 kmem_free(pm, PORTMAP_MEM_SIZE);
287 } 287 }
288 cv_destroy(&np->n_cv); 288 cv_destroy(&np->n_cv);
289 mutex_destroy(&np->n_lock); 289 mutex_destroy(&np->n_lock);
290 kmem_free(np, sizeof(npf_natpolicy_t)); 290 kmem_free(np, sizeof(npf_natpolicy_t));
291} 291}
292 292
293void 293void
294npf_nat_freealg(npf_natpolicy_t *np, npf_alg_t *alg) 294npf_nat_freealg(npf_natpolicy_t *np, npf_alg_t *alg)
295{ 295{
296 npf_nat_t *nt; 296 npf_nat_t *nt;
297 297
298 mutex_enter(&np->n_lock); 298 mutex_enter(&np->n_lock);
299 LIST_FOREACH(nt, &np->n_nat_list, nt_entry) { 299 LIST_FOREACH(nt, &np->n_nat_list, nt_entry) {
300 if (nt->nt_alg != alg) { 300 if (nt->nt_alg != alg) {
301 continue; 301 continue;
302 } 302 }
303 nt->nt_alg = NULL; 303 nt->nt_alg = NULL;
304 } 304 }
305 mutex_exit(&np->n_lock); 305 mutex_exit(&np->n_lock);
306} 306}
307 307
308/* 308/*
309 * npf_nat_matchpolicy: compare two NAT policies. 309 * npf_nat_matchpolicy: compare two NAT policies.
310 * 310 *
311 * => Return 0 on match, and non-zero otherwise. 311 * => Return 0 on match, and non-zero otherwise.
312 */ 312 */
313bool 313bool
314npf_nat_matchpolicy(npf_natpolicy_t *np, npf_natpolicy_t *mnp) 314npf_nat_matchpolicy(npf_natpolicy_t *np, npf_natpolicy_t *mnp)
315{ 315{
316 void *np_raw, *mnp_raw; 316 void *np_raw, *mnp_raw;
317 /* 317 /*
318 * Compare the relevant NAT policy information (in raw form), 318 * Compare the relevant NAT policy information (in raw form),
319 * which is enough for matching criterion. 319 * which is enough for matching criterion.
320 */ 320 */
321 KASSERT(np && mnp && np != mnp); 321 KASSERT(np && mnp && np != mnp);
322 np_raw = (uint8_t *)np + NPF_NP_CMP_START; 322 np_raw = (uint8_t *)np + NPF_NP_CMP_START;
323 mnp_raw = (uint8_t *)mnp + NPF_NP_CMP_START; 323 mnp_raw = (uint8_t *)mnp + NPF_NP_CMP_START;
324 return (memcmp(np_raw, mnp_raw, NPF_NP_CMP_SIZE) == 0); 324 return (memcmp(np_raw, mnp_raw, NPF_NP_CMP_SIZE) == 0);
325} 325}
326 326
327bool 327bool
328npf_nat_sharepm(npf_natpolicy_t *np, npf_natpolicy_t *mnp) 328npf_nat_sharepm(npf_natpolicy_t *np, npf_natpolicy_t *mnp)
329{ 329{
330 npf_portmap_t *pm, *mpm; 330 npf_portmap_t *pm, *mpm;
331 331
332 KASSERT(np && mnp && np != mnp); 332 KASSERT(np && mnp && np != mnp);
333 333
334 /* Using port map and having equal translation address? */ 334 /* Using port map and having equal translation address? */
335 if ((np->n_flags & mnp->n_flags & NPF_NAT_PORTMAP) == 0) { 335 if ((np->n_flags & mnp->n_flags & NPF_NAT_PORTMAP) == 0) {
336 return false; 336 return false;
337 } 337 }
338 if (np->n_addr_sz != mnp->n_addr_sz) { 338 if (np->n_addr_sz != mnp->n_addr_sz) {
339 return false; 339 return false;
340 } 340 }
341 if (memcmp(&np->n_taddr, &mnp->n_taddr, np->n_addr_sz) != 0) { 341 if (memcmp(&np->n_taddr, &mnp->n_taddr, np->n_addr_sz) != 0) {
342 return false; 342 return false;
343 } 343 }
344 /* If NAT policy has an old port map - drop the reference. */ 344 /* If NAT policy has an old port map - drop the reference. */
345 mpm = mnp->n_portmap; 345 mpm = mnp->n_portmap;
346 if (mpm) { 346 if (mpm) {
347 /* Note: at this point we cannot hold a last reference. */ 347 /* Note: at this point we cannot hold a last reference. */
348 KASSERT(mpm->p_refcnt > 1); 348 KASSERT(mpm->p_refcnt > 1);
349 mpm->p_refcnt--; 349 mpm->p_refcnt--;
350 } 350 }
351 /* Share the port map. */ 351 /* Share the port map. */
352 pm = np->n_portmap; 352 pm = np->n_portmap;
353 mnp->n_portmap = pm; 353 mnp->n_portmap = pm;
354 pm->p_refcnt++; 354 pm->p_refcnt++;
355 return true; 355 return true;
356} 356}
357 357
358/* 358/*
359 * npf_nat_getport: allocate and return a port in the NAT policy portmap. 359 * npf_nat_getport: allocate and return a port in the NAT policy portmap.
360 * 360 *
361 * => Returns in network byte-order. 361 * => Returns in network byte-order.
362 * => Zero indicates failure. 362 * => Zero indicates failure.
363 */ 363 */
364static in_port_t 364static in_port_t
365npf_nat_getport(npf_natpolicy_t *np) 365npf_nat_getport(npf_natpolicy_t *np)
366{ 366{
367 npf_portmap_t *pm = np->n_portmap; 367 npf_portmap_t *pm = np->n_portmap;
368 u_int n = PORTMAP_SIZE, idx, bit; 368 u_int n = PORTMAP_SIZE, idx, bit;
369 uint32_t map, nmap; 369 uint32_t map, nmap;
370 370
371 idx = cprng_fast32() % PORTMAP_SIZE; 371 idx = cprng_fast32() % PORTMAP_SIZE;
372 for (;;) { 372 for (;;) {
373 KASSERT(idx < PORTMAP_SIZE); 373 KASSERT(idx < PORTMAP_SIZE);
374 map = pm->p_bitmap[idx]; 374 map = pm->p_bitmap[idx];
375 if (__predict_false(map == PORTMAP_FILLED)) { 375 if (__predict_false(map == PORTMAP_FILLED)) {
376 if (n-- == 0) { 376 if (n-- == 0) {
377 /* No space. */ 377 /* No space. */
378 return 0; 378 return 0;
379 } 379 }
380 /* This bitmap is filled, next. */ 380 /* This bitmap is filled, next. */
381 idx = (idx ? idx : PORTMAP_SIZE) - 1; 381 idx = (idx ? idx : PORTMAP_SIZE) - 1;
382 continue; 382 continue;
383 } 383 }
384 bit = ffs32(~map) - 1; 384 bit = ffs32(~map) - 1;
385 nmap = map | (1 << bit); 385 nmap = map | (1 << bit);
386 if (atomic_cas_32(&pm->p_bitmap[idx], map, nmap) == map) { 386 if (atomic_cas_32(&pm->p_bitmap[idx], map, nmap) == map) {
387 /* Success. */ 387 /* Success. */
388 break; 388 break;
389 } 389 }
390 } 390 }
391 return htons(PORTMAP_FIRST + (idx << PORTMAP_SHIFT) + bit); 391 return htons(PORTMAP_FIRST + (idx << PORTMAP_SHIFT) + bit);
392} 392}
393 393
394/* 394/*
395 * npf_nat_takeport: allocate specific port in the NAT policy portmap. 395 * npf_nat_takeport: allocate specific port in the NAT policy portmap.
396 */ 396 */
397static bool 397static bool
398npf_nat_takeport(npf_natpolicy_t *np, in_port_t port) 398npf_nat_takeport(npf_natpolicy_t *np, in_port_t port)
399{ 399{
400 npf_portmap_t *pm = np->n_portmap; 400 npf_portmap_t *pm = np->n_portmap;
401 uint32_t map, nmap; 401 uint32_t map, nmap;
402 u_int idx, bit; 402 u_int idx, bit;
403 403
404 port = ntohs(port) - PORTMAP_FIRST; 404 port = ntohs(port) - PORTMAP_FIRST;
405 idx = port >> PORTMAP_SHIFT; 405 idx = port >> PORTMAP_SHIFT;
406 bit = port & PORTMAP_MASK; 406 bit = port & PORTMAP_MASK;
407 map = pm->p_bitmap[idx]; 407 map = pm->p_bitmap[idx];
408 nmap = map | (1 << bit); 408 nmap = map | (1 << bit);
409 if (map == nmap) { 409 if (map == nmap) {
410 /* Already taken. */ 410 /* Already taken. */
411 return false; 411 return false;
412 } 412 }
413 return atomic_cas_32(&pm->p_bitmap[idx], map, nmap) == map; 413 return atomic_cas_32(&pm->p_bitmap[idx], map, nmap) == map;
414} 414}
415 415
416/* 416/*
417 * npf_nat_putport: return port as available in the NAT policy portmap. 417 * npf_nat_putport: return port as available in the NAT policy portmap.
418 * 418 *
419 * => Port should be in network byte-order. 419 * => Port should be in network byte-order.
420 */ 420 */
421static void 421static void
422npf_nat_putport(npf_natpolicy_t *np, in_port_t port) 422npf_nat_putport(npf_natpolicy_t *np, in_port_t port)
423{ 423{
424 npf_portmap_t *pm = np->n_portmap; 424 npf_portmap_t *pm = np->n_portmap;
425 uint32_t map, nmap; 425 uint32_t map, nmap;
426 u_int idx, bit; 426 u_int idx, bit;
427 427
428 port = ntohs(port) - PORTMAP_FIRST; 428 port = ntohs(port) - PORTMAP_FIRST;
429 idx = port >> PORTMAP_SHIFT; 429 idx = port >> PORTMAP_SHIFT;
430 bit = port & PORTMAP_MASK; 430 bit = port & PORTMAP_MASK;
431 do { 431 do {
432 map = pm->p_bitmap[idx]; 432 map = pm->p_bitmap[idx];
433 KASSERT(map | (1 << bit)); 433 KASSERT(map | (1 << bit));
434 nmap = map & ~(1 << bit); 434 nmap = map & ~(1 << bit);
435 } while (atomic_cas_32(&pm->p_bitmap[idx], map, nmap) != map); 435 } while (atomic_cas_32(&pm->p_bitmap[idx], map, nmap) != map);
436} 436}
437 437
438/* 438/*
439 * npf_nat_which: tell which address (source or destination) should be 439 * npf_nat_which: tell which address (source or destination) should be
440 * rewritten given the combination of the NAT type and flow direction. 440 * rewritten given the combination of the NAT type and flow direction.
441 */ 441 */
442static inline u_int 442static inline u_int
443npf_nat_which(const int type, bool forw) 443npf_nat_which(const int type, bool forw)
444{ 444{
445 /* 445 /*
446 * Outbound NAT rewrites: 446 * Outbound NAT rewrites:
447 * - Source (NPF_SRC) on "forwards" stream. 447 * - Source (NPF_SRC) on "forwards" stream.
448 * - Destination (NPF_DST) on "backwards" stream. 448 * - Destination (NPF_DST) on "backwards" stream.
449 * Inbound NAT is other way round. 449 * Inbound NAT is other way round.
450 */ 450 */
451 if (type == NPF_NATOUT) { 451 if (type == NPF_NATOUT) {
452 forw = !forw; 452 forw = !forw;
453 } else { 453 } else {
454 KASSERT(type == NPF_NATIN); 454 KASSERT(type == NPF_NATIN);
455 } 455 }
456 CTASSERT(NPF_SRC == 0 && NPF_DST == 1); 456 CTASSERT(NPF_SRC == 0 && NPF_DST == 1);
457 KASSERT(forw == NPF_SRC || forw == NPF_DST); 457 KASSERT(forw == NPF_SRC || forw == NPF_DST);
458 return (u_int)forw; 458 return (u_int)forw;
459} 459}
460 460
461/* 461/*
462 * npf_nat_inspect: inspect packet against NAT ruleset and return a policy. 462 * npf_nat_inspect: inspect packet against NAT ruleset and return a policy.
463 * 463 *
464 * => Acquire a reference on the policy, if found. 464 * => Acquire a reference on the policy, if found.
465 */ 465 */
466static npf_natpolicy_t * 466static npf_natpolicy_t *
467npf_nat_inspect(npf_cache_t *npc, nbuf_t *nbuf, const int di) 467npf_nat_inspect(npf_cache_t *npc, nbuf_t *nbuf, const int di)
468{ 468{
469 int slock = npf_config_read_enter(); 469 int slock = npf_config_read_enter();
470 npf_ruleset_t *rlset = npf_config_natset(); 470 npf_ruleset_t *rlset = npf_config_natset();
471 npf_natpolicy_t *np; 471 npf_natpolicy_t *np;
472 npf_rule_t *rl; 472 npf_rule_t *rl;
473 473
474 rl = npf_ruleset_inspect(npc, nbuf, rlset, di, NPF_LAYER_3); 474 rl = npf_ruleset_inspect(npc, nbuf, rlset, di, NPF_LAYER_3);
475 if (rl == NULL) { 475 if (rl == NULL) {
476 npf_config_read_exit(slock); 476 npf_config_read_exit(slock);
477 return NULL; 477 return NULL;
478 } 478 }
479 np = npf_rule_getnat(rl); 479 np = npf_rule_getnat(rl);
480 atomic_inc_uint(&np->n_refcnt); 480 atomic_inc_uint(&np->n_refcnt);
481 npf_config_read_exit(slock); 481 npf_config_read_exit(slock);
482 return np; 482 return np;
483} 483}
484 484
485/* 485/*
486 * npf_nat_create: create a new NAT translation entry. 486 * npf_nat_create: create a new NAT translation entry.
487 */ 487 */
488static npf_nat_t * 488static npf_nat_t *
489npf_nat_create(npf_cache_t *npc, npf_natpolicy_t *np, npf_session_t *se) 489npf_nat_create(npf_cache_t *npc, npf_natpolicy_t *np, npf_session_t *se)
490{ 490{
491 const int proto = npc->npc_proto; 491 const int proto = npc->npc_proto;
492 npf_nat_t *nt; 492 npf_nat_t *nt;
493 493
494 KASSERT(npf_iscached(npc, NPC_IP46)); 494 KASSERT(npf_iscached(npc, NPC_IP46));
495 KASSERT(npf_iscached(npc, NPC_LAYER4)); 495 KASSERT(npf_iscached(npc, NPC_LAYER4));
496 496
497 /* Construct a new NAT entry and associate it with the session. */ 497 /* Construct a new NAT entry and associate it with the session. */
498 nt = pool_cache_get(nat_cache, PR_NOWAIT); 498 nt = pool_cache_get(nat_cache, PR_NOWAIT);
499 if (nt == NULL){ 499 if (nt == NULL){
500 return NULL; 500 return NULL;
501 } 501 }
502 npf_stats_inc(NPF_STAT_NAT_CREATE); 502 npf_stats_inc(NPF_STAT_NAT_CREATE);
503 nt->nt_natpolicy = np; 503 nt->nt_natpolicy = np;
504 nt->nt_session = se; 504 nt->nt_session = se;
505 nt->nt_alg = NULL; 505 nt->nt_alg = NULL;
506 506
507 /* Save the original address which may be rewritten. */ 507 /* Save the original address which may be rewritten. */
508 if (np->n_type == NPF_NATOUT) { 508 if (np->n_type == NPF_NATOUT) {
509 /* Outbound NAT: source (think internal) address. */ 509 /* Outbound NAT: source (think internal) address. */
510 memcpy(&nt->nt_oaddr, npc->npc_ips[NPF_SRC], npc->npc_alen); 510 memcpy(&nt->nt_oaddr, npc->npc_ips[NPF_SRC], npc->npc_alen);
511 } else { 511 } else {
512 /* Inbound NAT: destination (think external) address. */ 512 /* Inbound NAT: destination (think external) address. */
513 KASSERT(np->n_type == NPF_NATIN); 513 KASSERT(np->n_type == NPF_NATIN);
514 memcpy(&nt->nt_oaddr, npc->npc_ips[NPF_DST], npc->npc_alen); 514 memcpy(&nt->nt_oaddr, npc->npc_ips[NPF_DST], npc->npc_alen);
515 } 515 }
516 516
517 /* 517 /*
518 * Port translation, if required, and if it is TCP/UDP. 518 * Port translation, if required, and if it is TCP/UDP.
519 */ 519 */
520 if ((np->n_flags & NPF_NAT_PORTS) == 0 || 520 if ((np->n_flags & NPF_NAT_PORTS) == 0 ||
521 (proto != IPPROTO_TCP && proto != IPPROTO_UDP)) { 521 (proto != IPPROTO_TCP && proto != IPPROTO_UDP)) {
522 nt->nt_oport = 0; 522 nt->nt_oport = 0;
523 nt->nt_tport = 0; 523 nt->nt_tport = 0;
524 goto out; 524 goto out;
525 } 525 }
526 526
527 /* Save the relevant TCP/UDP port. */ 527 /* Save the relevant TCP/UDP port. */
528 if (proto == IPPROTO_TCP) { 528 if (proto == IPPROTO_TCP) {
529 const struct tcphdr *th = npc->npc_l4.tcp; 529 const struct tcphdr *th = npc->npc_l4.tcp;
530 nt->nt_oport = (np->n_type == NPF_NATOUT) ? 530 nt->nt_oport = (np->n_type == NPF_NATOUT) ?
531 th->th_sport : th->th_dport; 531 th->th_sport : th->th_dport;
532 } else { 532 } else {
533 const struct udphdr *uh = npc->npc_l4.udp; 533 const struct udphdr *uh = npc->npc_l4.udp;
534 nt->nt_oport = (np->n_type == NPF_NATOUT) ? 534 nt->nt_oport = (np->n_type == NPF_NATOUT) ?
535 uh->uh_sport : uh->uh_dport; 535 uh->uh_sport : uh->uh_dport;
536 } 536 }
537 537
538 /* Get a new port for translation. */ 538 /* Get a new port for translation. */
539 if ((np->n_flags & NPF_NAT_PORTMAP) != 0) { 539 if ((np->n_flags & NPF_NAT_PORTMAP) != 0) {
540 nt->nt_tport = npf_nat_getport(np); 540 nt->nt_tport = npf_nat_getport(np);
541 } else { 541 } else {
542 nt->nt_tport = np->n_tport; 542 nt->nt_tport = np->n_tport;
543 } 543 }
544out: 544out:
545 mutex_enter(&np->n_lock); 545 mutex_enter(&np->n_lock);
546 LIST_INSERT_HEAD(&np->n_nat_list, nt, nt_entry); 546 LIST_INSERT_HEAD(&np->n_nat_list, nt, nt_entry);
547 mutex_exit(&np->n_lock); 547 mutex_exit(&np->n_lock);
548 return nt; 548 return nt;
549} 549}
550 550
551/* 551/*
552 * npf_nat_rwr: perform address and/or port translation. 
553 */ 
554static int 
555npf_nat_rwr(npf_cache_t *npc, const npf_natpolicy_t *np, 
556 const npf_addr_t *addr, const in_addr_t port, bool forw) 
557{ 
558 const unsigned proto = npc->npc_proto; 
559 const u_int which = npf_nat_which(np->n_type, forw); 
560 
561 /* 
562 * Rewrite IP and/or TCP/UDP checksums first, since we need the 
563 * current (old) address/port for the calculations. Then perform 
564 * the address translation i.e. rewrite source or destination. 
565 */ 
566 if (!npf_rwrcksum(npc, which, addr, port)) { 
567 return EINVAL; 
568 } 
569 if (!npf_rwrip(npc, which, addr)) { 
570 return EINVAL; 
571 } 
572 if ((np->n_flags & NPF_NAT_PORTS) == 0) { 
573 /* Done. */ 
574 return 0; 
575 } 
576 
577 switch (proto) { 
578 case IPPROTO_TCP: 
579 case IPPROTO_UDP: 
580 /* Rewrite source/destination port. */ 
581 if (!npf_rwrport(npc, which, port)) { 
582 return EINVAL; 
583 } 
584 break; 
585 case IPPROTO_ICMP: 
586 KASSERT(npf_iscached(npc, NPC_ICMP)); 
587 /* Nothing. */ 
588 break; 
589 default: 
590 return ENOTSUP; 
591 } 
592 return 0; 
593} 
594 
595/* 
596 * npf_nat_translate: perform translation given the state data. 552 * npf_nat_translate: perform translation given the state data.
597 */ 553 */
598int 554static inline int
599npf_nat_translate(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt, bool forw) 555npf_nat_translate(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt, bool forw)
600{ 556{
601 const npf_natpolicy_t *np = nt->nt_natpolicy; 557 const npf_natpolicy_t *np = nt->nt_natpolicy;
 558 const u_int which = npf_nat_which(np->n_type, forw);
602 const npf_addr_t *addr; 559 const npf_addr_t *addr;
603 in_port_t port; 560 in_port_t port;
604 561
605 KASSERT(npf_iscached(npc, NPC_IP46)); 562 KASSERT(npf_iscached(npc, NPC_IP46));
606 KASSERT(npf_iscached(npc, NPC_LAYER4)); 563 KASSERT(npf_iscached(npc, NPC_LAYER4));
607 KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)); 
608 564
609 if (forw) { 565 if (forw) {
610 /* "Forwards" stream: use translation address/port. */ 566 /* "Forwards" stream: use translation address/port. */
611 addr = &np->n_taddr; 567 addr = &np->n_taddr;
612 port = nt->nt_tport; 568 port = nt->nt_tport;
613 } else { 569 } else {
614 /* "Backwards" stream: use original address/port. */ 570 /* "Backwards" stream: use original address/port. */
615 addr = &nt->nt_oaddr; 571 addr = &nt->nt_oaddr;
616 port = nt->nt_oport; 572 port = nt->nt_oport;
617 } 573 }
618 KASSERT((np->n_flags & NPF_NAT_PORTS) != 0 || port == 0); 574 KASSERT((np->n_flags & NPF_NAT_PORTS) != 0 || port == 0);
619 575
620 /* Execute ALG hook first. */ 576 /* Execute ALG translation first. */
621 if ((npc->npc_info & NPC_ALG_EXEC) == 0) { 577 if ((npc->npc_info & NPC_ALG_EXEC) == 0) {
622 npc->npc_info |= NPC_ALG_EXEC; 578 npc->npc_info |= NPC_ALG_EXEC;
623 npf_alg_exec(npc, nbuf, nt, forw); 579 npf_alg_exec(npc, nbuf, nt, forw);
 580 npf_recache(npc, nbuf);
624 } 581 }
 582 KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
625 583
626 /* Finally, perform the translation. */ 584 /* Finally, perform the translation. */
627 return npf_nat_rwr(npc, np, addr, port, forw); 585 return npf_napt_rwr(npc, which, addr, port);
628} 586}
629 587
630/* 588/*
631 * npf_nat_algo: perform the translation given the algorithm. 589 * npf_nat_algo: perform the translation given the algorithm.
632 */ 590 */
633static inline int  591static inline int
634npf_nat_algo(npf_cache_t *npc, const npf_natpolicy_t *np, bool forw) 592npf_nat_algo(npf_cache_t *npc, const npf_natpolicy_t *np, bool forw)
635{ 593{
636 u_int which; 594 const u_int which = npf_nat_which(np->n_type, forw);
637 int error; 595 int error;
638 596
639 switch (np->n_algo) { 597 switch (np->n_algo) {
640 case NPF_ALGO_NPT66: 598 case NPF_ALGO_NPT66:
641 which = npf_nat_which(np->n_type, forw); 
642 error = npf_npt66_rwr(npc, which, &np->n_taddr, 599 error = npf_npt66_rwr(npc, which, &np->n_taddr,
643 np->n_tmask, np->n_npt66_adj); 600 np->n_tmask, np->n_npt66_adj);
644 break; 601 break;
645 default: 602 default:
646 error = npf_nat_rwr(npc, np, &np->n_taddr, np->n_tport, forw); 603 error = npf_napt_rwr(npc, which, &np->n_taddr, np->n_tport);
647 break; 604 break;
648 } 605 }
649 606
650 return error; 607 return error;
651}  608}
652 609
653/* 610/*
654 * npf_do_nat: 611 * npf_do_nat:
655 * - Inspect packet for a NAT policy, unless a session with a NAT 612 * - Inspect packet for a NAT policy, unless a session with a NAT
656 * association already exists. In such case, determine whether it 613 * association already exists. In such case, determine whether it
657 * is a "forwards" or "backwards" stream. 614 * is a "forwards" or "backwards" stream.
658 * - Perform translation: rewrite source or destination fields, 615 * - Perform translation: rewrite source or destination fields,
659 * depending on translation type and direction. 616 * depending on translation type and direction.
660 * - Associate a NAT policy with a session (may establish a new). 617 * - Associate a NAT policy with a session (may establish a new).
661 */ 618 */
662int 619int
663npf_do_nat(npf_cache_t *npc, npf_session_t *se, nbuf_t *nbuf, const int di) 620npf_do_nat(npf_cache_t *npc, npf_session_t *se, nbuf_t *nbuf, const int di)
664{ 621{
665 npf_session_t *nse = NULL; 622 npf_session_t *nse = NULL;
666 npf_natpolicy_t *np; 623 npf_natpolicy_t *np;
667 npf_nat_t *nt; 624 npf_nat_t *nt;
668 int error; 625 int error;
669 bool forw; 626 bool forw;
670 627
671 /* All relevant IPv4 data should be already cached. */ 628 /* All relevant IPv4 data should be already cached. */
672 if (!npf_iscached(npc, NPC_IP46) || !npf_iscached(npc, NPC_LAYER4)) { 629 if (!npf_iscached(npc, NPC_IP46) || !npf_iscached(npc, NPC_LAYER4)) {
673 return 0; 630 return 0;
674 } 631 }
675 KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)); 632 KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
676 633
677 /* 634 /*
678 * Return the NAT entry associated with the session, if any. 635 * Return the NAT entry associated with the session, if any.
679 * Determines whether the stream is "forwards" or "backwards". 636 * Determines whether the stream is "forwards" or "backwards".
680 * Note: no need to lock, since reference on session is held. 637 * Note: no need to lock, since reference on session is held.
681 */ 638 */
682 if (se && (nt = npf_session_retnat(se, di, &forw)) != NULL) { 639 if (se && (nt = npf_session_retnat(se, di, &forw)) != NULL) {
683 np = nt->nt_natpolicy; 640 np = nt->nt_natpolicy;
684 goto translate; 641 goto translate;
685 } 642 }
686 643
687 /* 644 /*
688 * Inspect the packet for a NAT policy, if there is no session. 645 * Inspect the packet for a NAT policy, if there is no session.
689 * Note: acquires a reference if found. 646 * Note: acquires a reference if found.
690 */ 647 */
691 np = npf_nat_inspect(npc, nbuf, di); 648 np = npf_nat_inspect(npc, nbuf, di);
692 if (np == NULL) { 649 if (np == NULL) {
693 /* If packet does not match - done. */ 650 /* If packet does not match - done. */
694 return 0; 651 return 0;
695 } 652 }
696 forw = true; 653 forw = true;
697 654
698 /* Static NAT - just perform the translation. */ 655 /* Static NAT - just perform the translation. */
699 if (np->n_flags & NPF_NAT_STATIC) { 656 if (np->n_flags & NPF_NAT_STATIC) {
700 if (nbuf_cksum_barrier(nbuf, di)) { 657 if (nbuf_cksum_barrier(nbuf, di)) {
701 npf_recache(npc, nbuf); 658 npf_recache(npc, nbuf);
702 } 659 }
703 error = npf_nat_algo(npc, np, forw); 660 error = npf_nat_algo(npc, np, forw);
704 atomic_dec_uint(&np->n_refcnt); 661 atomic_dec_uint(&np->n_refcnt);
705 return error; 662 return error;
706 } 663 }
707 664
708 /* 665 /*
709 * If there is no local session (no "stateful" rule - unusual, but 666 * If there is no local session (no "stateful" rule - unusual, but
710 * possible configuration), establish one before translation. Note 667 * possible configuration), establish one before translation. Note
711 * that it is not a "pass" session, therefore passing of "backwards" 668 * that it is not a "pass" session, therefore passing of "backwards"
712 * stream depends on other, stateless filtering rules. 669 * stream depends on other, stateless filtering rules.
713 */ 670 */
714 if (se == NULL) { 671 if (se == NULL) {
715 nse = npf_session_establish(npc, nbuf, di); 672 nse = npf_session_establish(npc, nbuf, di);
716 if (nse == NULL) { 673 if (nse == NULL) {
717 atomic_dec_uint(&np->n_refcnt); 674 atomic_dec_uint(&np->n_refcnt);
718 return ENOMEM; 675 return ENOMEM;
719 } 676 }
720 se = nse; 677 se = nse;
721 } 678 }
722 679
723 /* 680 /*
724 * Create a new NAT entry and associate with the session. 681 * Create a new NAT entry and associate with the session.
725 * We will consume the reference on success (release on error). 682 * We will consume the reference on success (release on error).
726 */ 683 */
727 nt = npf_nat_create(npc, np, se); 684 nt = npf_nat_create(npc, np, se);
728 if (nt == NULL) { 685 if (nt == NULL) {
729 atomic_dec_uint(&np->n_refcnt); 686 atomic_dec_uint(&np->n_refcnt);
730 error = ENOMEM; 687 error = ENOMEM;
731 goto out; 688 goto out;
732 } 689 }
733 690
734 /* Associate the NAT translation entry with the session. */ 691 /* Associate the NAT translation entry with the session. */
735 error = npf_session_setnat(se, nt, np->n_type); 692 error = npf_session_setnat(se, nt, np->n_type);
736 if (error) { 693 if (error) {
737 /* Will release the reference. */ 694 /* Will release the reference. */
738 npf_nat_destroy(nt); 695 npf_nat_destroy(nt);
739 goto out; 696 goto out;
740 } 697 }
741 698
742 /* Determine whether any ALG matches. */ 699 /* Determine whether any ALG matches. */
743 if (npf_alg_match(npc, nbuf, nt, di)) { 700 if (npf_alg_match(npc, nbuf, nt, di)) {
744 KASSERT(nt->nt_alg != NULL); 701 KASSERT(nt->nt_alg != NULL);
745 } 702 }
746 703
747translate: 704translate:
748 /* May need to process the delayed checksums first (XXX: NetBSD). */ 705 /* May need to process the delayed checksums first (XXX: NetBSD). */
749 if (nbuf_cksum_barrier(nbuf, di)) { 706 if (nbuf_cksum_barrier(nbuf, di)) {
750 npf_recache(npc, nbuf); 707 npf_recache(npc, nbuf);
751 } 708 }
752 709
753 /* Perform the translation. */ 710 /* Perform the translation. */
754 error = npf_nat_translate(npc, nbuf, nt, forw); 711 error = npf_nat_translate(npc, nbuf, nt, forw);
755out: 712out:
756 if (__predict_false(nse)) { 713 if (__predict_false(nse)) {
757 if (error) { 714 if (error) {
758 /* It created for NAT - just expire. */ 715 /* It created for NAT - just expire. */
759 npf_session_expire(nse); 716 npf_session_expire(nse);
760 } 717 }
761 npf_session_release(nse); 718 npf_session_release(nse);
762 } 719 }
763 return error; 720 return error;
764} 721}
765 722
766/* 723/*
767 * npf_nat_gettrans: return translation IP address and port. 724 * npf_nat_gettrans: return translation IP address and port.
768 */ 725 */
769void 726void
770npf_nat_gettrans(npf_nat_t *nt, npf_addr_t **addr, in_port_t *port) 727npf_nat_gettrans(npf_nat_t *nt, npf_addr_t **addr, in_port_t *port)
771{ 728{
772 npf_natpolicy_t *np = nt->nt_natpolicy; 729 npf_natpolicy_t *np = nt->nt_natpolicy;
773 730
774 *addr = &np->n_taddr; 731 *addr = &np->n_taddr;
775 *port = nt->nt_tport; 732 *port = nt->nt_tport;
776} 733}
777 734
778/* 735/*
779 * npf_nat_getorig: return original IP address and port from translation entry. 736 * npf_nat_getorig: return original IP address and port from translation entry.
780 */ 737 */
781void 738void
782npf_nat_getorig(npf_nat_t *nt, npf_addr_t **addr, in_port_t *port) 739npf_nat_getorig(npf_nat_t *nt, npf_addr_t **addr, in_port_t *port)
783{ 740{
784 *addr = &nt->nt_oaddr; 741 *addr = &nt->nt_oaddr;
785 *port = nt->nt_oport; 742 *port = nt->nt_oport;
786} 743}
787 744
788/* 745/*
789 * npf_nat_setalg: associate an ALG with the NAT entry. 746 * npf_nat_setalg: associate an ALG with the NAT entry.
790 */ 747 */
791void 748void
792npf_nat_setalg(npf_nat_t *nt, npf_alg_t *alg, uintptr_t arg) 749npf_nat_setalg(npf_nat_t *nt, npf_alg_t *alg, uintptr_t arg)
793{ 750{
794 nt->nt_alg = alg; 751 nt->nt_alg = alg;
795 nt->nt_alg_arg = arg; 752 nt->nt_alg_arg = arg;
796} 753}
797 754
798/* 755/*
799 * npf_nat_destroy: destroy NAT structure (performed on session expiration). 756 * npf_nat_destroy: destroy NAT structure (performed on session expiration).
800 */ 757 */
801void 758void
802npf_nat_destroy(npf_nat_t *nt) 759npf_nat_destroy(npf_nat_t *nt)
803{ 760{
804 npf_natpolicy_t *np = nt->nt_natpolicy; 761 npf_natpolicy_t *np = nt->nt_natpolicy;
805 762
806 /* Return any taken port to the portmap. */ 763 /* Return any taken port to the portmap. */
807 if ((np->n_flags & NPF_NAT_PORTMAP) != 0 && nt->nt_tport) { 764 if ((np->n_flags & NPF_NAT_PORTMAP) != 0 && nt->nt_tport) {
808 npf_nat_putport(np, nt->nt_tport); 765 npf_nat_putport(np, nt->nt_tport);
809 } 766 }
810 767
811 mutex_enter(&np->n_lock); 768 mutex_enter(&np->n_lock);
812 LIST_REMOVE(nt, nt_entry); 769 LIST_REMOVE(nt, nt_entry);
813 if (LIST_EMPTY(&np->n_nat_list)) { 770 if (LIST_EMPTY(&np->n_nat_list)) {
814 /* Notify any waiters if empty. */ 771 /* Notify any waiters if empty. */
815 cv_broadcast(&np->n_cv); 772 cv_broadcast(&np->n_cv);
816 } 773 }
817 atomic_dec_uint(&np->n_refcnt); 774 atomic_dec_uint(&np->n_refcnt);
818 mutex_exit(&np->n_lock); 775 mutex_exit(&np->n_lock);
819 776
820 pool_cache_put(nat_cache, nt); 777 pool_cache_put(nat_cache, nt);
821 npf_stats_inc(NPF_STAT_NAT_DESTROY); 778 npf_stats_inc(NPF_STAT_NAT_DESTROY);
822} 779}
823 780
824/* 781/*
825 * npf_nat_save: construct NAT entry and reference to the NAT policy. 782 * npf_nat_save: construct NAT entry and reference to the NAT policy.
826 */ 783 */
827int 784int
828npf_nat_save(prop_dictionary_t sedict, prop_array_t natlist, npf_nat_t *nt) 785npf_nat_save(prop_dictionary_t sedict, prop_array_t natlist, npf_nat_t *nt)
829{ 786{
830 npf_natpolicy_t *np = nt->nt_natpolicy; 787 npf_natpolicy_t *np = nt->nt_natpolicy;
831 prop_object_iterator_t it; 788 prop_object_iterator_t it;
832 prop_dictionary_t npdict; 789 prop_dictionary_t npdict;
833 prop_data_t nd, npd; 790 prop_data_t nd, npd;
834 uint64_t itnp; 791 uint64_t itnp;
835 792
836 /* Set NAT entry data. */ 793 /* Set NAT entry data. */
837 nd = prop_data_create_data(nt, sizeof(npf_nat_t)); 794 nd = prop_data_create_data(nt, sizeof(npf_nat_t));
838 prop_dictionary_set(sedict, "nat-data", nd); 795 prop_dictionary_set(sedict, "nat-data", nd);
839 prop_object_release(nd); 796 prop_object_release(nd);
840 797
841 /* Find or create a NAT policy. */ 798 /* Find or create a NAT policy. */
842 it = prop_array_iterator(natlist); 799 it = prop_array_iterator(natlist);
843 while ((npdict = prop_object_iterator_next(it)) != NULL) { 800 while ((npdict = prop_object_iterator_next(it)) != NULL) {
844 CTASSERT(sizeof(uintptr_t) <= sizeof(uint64_t)); 801 CTASSERT(sizeof(uintptr_t) <= sizeof(uint64_t));
845 prop_dictionary_get_uint64(npdict, "id-ptr", &itnp); 802 prop_dictionary_get_uint64(npdict, "id-ptr", &itnp);
846 if ((uintptr_t)itnp == (uintptr_t)np) { 803 if ((uintptr_t)itnp == (uintptr_t)np) {
847 break; 804 break;
848 } 805 }
849 } 806 }
850 if (npdict == NULL) { 807 if (npdict == NULL) {
851 /* Create NAT policy dictionary and copy the data. */ 808 /* Create NAT policy dictionary and copy the data. */
852 npdict = prop_dictionary_create(); 809 npdict = prop_dictionary_create();
853 npd = prop_data_create_data(np, sizeof(npf_natpolicy_t)); 810 npd = prop_data_create_data(np, sizeof(npf_natpolicy_t));
854 prop_dictionary_set(npdict, "nat-policy-data", npd); 811 prop_dictionary_set(npdict, "nat-policy-data", npd);
855 prop_object_release(npd); 812 prop_object_release(npd);
856 813
857 CTASSERT(sizeof(uintptr_t) <= sizeof(uint64_t)); 814 CTASSERT(sizeof(uintptr_t) <= sizeof(uint64_t));
858 prop_dictionary_set_uint64(npdict, "id-ptr", (uintptr_t)np); 815 prop_dictionary_set_uint64(npdict, "id-ptr", (uintptr_t)np);
859 prop_array_add(natlist, npdict); 816 prop_array_add(natlist, npdict);
860 prop_object_release(npdict); 817 prop_object_release(npdict);
861 } 818 }
862 prop_dictionary_set(sedict, "nat-policy", npdict); 819 prop_dictionary_set(sedict, "nat-policy", npdict);
863 prop_object_release(npdict); 820 prop_object_release(npdict);
864 return 0; 821 return 0;
865} 822}
866 823
867/* 824/*
868 * npf_nat_restore: find a matching NAT policy and restore NAT entry. 825 * npf_nat_restore: find a matching NAT policy and restore NAT entry.
869 * 826 *
870 * => Caller should lock the active NAT ruleset. 827 * => Caller should lock the active NAT ruleset.
871 */ 828 */
872npf_nat_t * 829npf_nat_t *
873npf_nat_restore(prop_dictionary_t sedict, npf_session_t *se) 830npf_nat_restore(prop_dictionary_t sedict, npf_session_t *se)
874{ 831{
875 const npf_natpolicy_t *onp; 832 const npf_natpolicy_t *onp;
876 const npf_nat_t *ntraw; 833 const npf_nat_t *ntraw;
877 prop_object_t obj; 834 prop_object_t obj;
878 npf_natpolicy_t *np; 835 npf_natpolicy_t *np;
879 npf_rule_t *rl; 836 npf_rule_t *rl;
880 npf_nat_t *nt; 837 npf_nat_t *nt;
881 838
882 /* Get raw NAT entry. */ 839 /* Get raw NAT entry. */
883 obj = prop_dictionary_get(sedict, "nat-data"); 840 obj = prop_dictionary_get(sedict, "nat-data");
884 ntraw = prop_data_data_nocopy(obj); 841 ntraw = prop_data_data_nocopy(obj);
885 if (ntraw == NULL || prop_data_size(obj) != sizeof(npf_nat_t)) { 842 if (ntraw == NULL || prop_data_size(obj) != sizeof(npf_nat_t)) {
886 return NULL; 843 return NULL;
887 } 844 }
888 845
889 /* Find a stored NAT policy information. */ 846 /* Find a stored NAT policy information. */
890 obj = prop_dictionary_get( 847 obj = prop_dictionary_get(
891 prop_dictionary_get(sedict, "nat-policy"), "nat-policy-data"); 848 prop_dictionary_get(sedict, "nat-policy"), "nat-policy-data");
892 onp = prop_data_data_nocopy(obj); 849 onp = prop_data_data_nocopy(obj);
893 if (onp == NULL || prop_data_size(obj) != sizeof(npf_natpolicy_t)) { 850 if (onp == NULL || prop_data_size(obj) != sizeof(npf_natpolicy_t)) {
894 return NULL; 851 return NULL;
895 } 852 }
896 853
897 /* 854 /*
898 * Match if there is an existing NAT policy. Will acquire the 855 * Match if there is an existing NAT policy. Will acquire the
899 * reference on it if further operations are successful. 856 * reference on it if further operations are successful.
900 */ 857 */
901 KASSERT(npf_config_locked_p()); 858 KASSERT(npf_config_locked_p());
902 rl = npf_ruleset_matchnat(npf_config_natset(), __UNCONST(onp)); 859 rl = npf_ruleset_matchnat(npf_config_natset(), __UNCONST(onp));
903 if (rl == NULL) { 860 if (rl == NULL) {
904 return NULL; 861 return NULL;
905 } 862 }
906 np = npf_rule_getnat(rl); 863 np = npf_rule_getnat(rl);
907 KASSERT(np != NULL); 864 KASSERT(np != NULL);
908 865
909 /* Take a specific port from port-map. */ 866 /* Take a specific port from port-map. */
910 if (!npf_nat_takeport(np, ntraw->nt_tport)) { 867 if (!npf_nat_takeport(np, ntraw->nt_tport)) {
911 return NULL; 868 return NULL;
912 } 869 }
913 atomic_inc_uint(&np->n_refcnt); 870 atomic_inc_uint(&np->n_refcnt);
914 871
915 /* Create and return NAT entry for association. */ 872 /* Create and return NAT entry for association. */
916 nt = pool_cache_get(nat_cache, PR_WAITOK); 873 nt = pool_cache_get(nat_cache, PR_WAITOK);
917 memcpy(nt, ntraw, sizeof(npf_nat_t)); 874 memcpy(nt, ntraw, sizeof(npf_nat_t));
918 LIST_INSERT_HEAD(&np->n_nat_list, nt, nt_entry); 875 LIST_INSERT_HEAD(&np->n_nat_list, nt, nt_entry);
919 nt->nt_natpolicy = np; 876 nt->nt_natpolicy = np;
920 nt->nt_session = se; 877 nt->nt_session = se;
921 nt->nt_alg = NULL; 878 nt->nt_alg = NULL;
922 return nt; 879 return nt;
923} 880}
924 881
925#if defined(DDB) || defined(_NPF_TESTING) 882#if defined(DDB) || defined(_NPF_TESTING)
926 883
927void 884void
928npf_nat_dump(const npf_nat_t *nt) 885npf_nat_dump(const npf_nat_t *nt)
929{ 886{
930 const npf_natpolicy_t *np; 887 const npf_natpolicy_t *np;
931 struct in_addr ip; 888 struct in_addr ip;
932 889
933 np = nt->nt_natpolicy; 890 np = nt->nt_natpolicy;
934 memcpy(&ip, &np->n_taddr, sizeof(ip)); 891 memcpy(&ip, &np->n_taddr, sizeof(ip));
935 printf("\tNATP(%p): type %d flags 0x%x taddr %s tport %d\n", 892 printf("\tNATP(%p): type %d flags 0x%x taddr %s tport %d\n",
936 np, np->n_type, np->n_flags, inet_ntoa(ip), np->n_tport); 893 np, np->n_type, np->n_flags, inet_ntoa(ip), np->n_tport);
937 memcpy(&ip, &nt->nt_oaddr, sizeof(ip)); 894 memcpy(&ip, &nt->nt_oaddr, sizeof(ip));
938 printf("\tNAT: original address %s oport %d tport %d\n", 895 printf("\tNAT: original address %s oport %d tport %d\n",
939 inet_ntoa(ip), ntohs(nt->nt_oport), ntohs(nt->nt_tport)); 896 inet_ntoa(ip), ntohs(nt->nt_oport), ntohs(nt->nt_tport));
940 if (nt->nt_alg) { 897 if (nt->nt_alg) {
941 printf("\tNAT ALG = %p, ARG = %p\n", 898 printf("\tNAT ALG = %p, ARG = %p\n",
942 nt->nt_alg, (void *)nt->nt_alg_arg); 899 nt->nt_alg, (void *)nt->nt_alg_arg);
943 } 900 }
944} 901}
945 902
946#endif 903#endif