Tue Mar 13 09:04:03 2018 UTC ()
Fix two consecutive mistakes.

The first mistake was npf_inet.c rev1.37:

	"Don't reassemble ipv6 fragments, instead treat the first fragment
	as a regular packet (subject to filtering rules), and pass
	subsequent fragments in the same group unconditionally."

Doing this was entirely wrong, because then a packet just had to push
the L4 payload in a secondary fragment, and NPF wouldn't apply rules on
it - meaning any IPv6 packet could bypass >=L4 filtering. This mistake
was supposed to be a fix for the second mistake.

The second mistake was that ip6_reass_packet (in npf_reassembly) was
getting called with npc->npc_hlen. But npc_hlen pointed to the last
encountered header in the IPv6 chain, which was not necessarily the
fragment header. So ip6_reass_packet was given garbage, and would fail,
resulting in the packet getting kicked. So basically IPv6 was broken by
NPF.

The first mistake is reverted, and the second one is fixed by doing:

-			hlen = sizeof(struct ip6_frag);
+			hlen = 0;

Now the iteration stops on the fragment header, and the call to
ip6_reass_packet is valid.

My npf_inet.c rev1.38 is partially reverted: we don't need to worry
about failing properly to advance; once the packet is reassembled
npf_cache_ip gets called again, and this time the whole chain should be
there.

Tested with a simple UDPv6 server - send a 3000-byte-sized buffer, the
packet gets correctly reassembled by NPF now.


(maxv)
diff -r1.38 -r1.39 src/sys/net/npf/npf_handler.c
diff -r1.39 -r1.40 src/sys/net/npf/npf_inet.c

cvs diff -r1.38 -r1.39 src/sys/net/npf/npf_handler.c (switch to unified diff)

--- src/sys/net/npf/npf_handler.c 2018/03/08 07:06:13 1.38
+++ src/sys/net/npf/npf_handler.c 2018/03/13 09:04:02 1.39
@@ -1,326 +1,319 @@ @@ -1,326 +1,319 @@
1/* $NetBSD: npf_handler.c,v 1.38 2018/03/08 07:06:13 maxv Exp $ */ 1/* $NetBSD: npf_handler.c,v 1.39 2018/03/13 09:04:02 maxv Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2009-2013 The NetBSD Foundation, Inc. 4 * Copyright (c) 2009-2013 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This material is based upon work partially supported by The 7 * This material is based upon work partially supported by The
8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32/* 32/*
33 * NPF packet handler. 33 * NPF packet handler.
34 * 34 *
35 * Note: pfil(9) hooks are currently locked by softnet_lock and kernel-lock. 35 * Note: pfil(9) hooks are currently locked by softnet_lock and kernel-lock.
36 */ 36 */
37 37
38#ifdef _KERNEL 38#ifdef _KERNEL
39#include <sys/cdefs.h> 39#include <sys/cdefs.h>
40__KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.38 2018/03/08 07:06:13 maxv Exp $"); 40__KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.39 2018/03/13 09:04:02 maxv Exp $");
41 41
42#include <sys/types.h> 42#include <sys/types.h>
43#include <sys/param.h> 43#include <sys/param.h>
44 44
45#include <sys/mbuf.h> 45#include <sys/mbuf.h>
46#include <sys/mutex.h> 46#include <sys/mutex.h>
47#include <net/if.h> 47#include <net/if.h>
48#include <net/pfil.h> 48#include <net/pfil.h>
49#include <sys/socketvar.h> 49#include <sys/socketvar.h>
50 50
51#include <netinet/in_systm.h> 51#include <netinet/in_systm.h>
52#include <netinet/in.h> 52#include <netinet/in.h>
53#include <netinet/ip_var.h> 53#include <netinet/ip_var.h>
54#include <netinet/ip6.h> 54#include <netinet/ip6.h>
55#include <netinet6/ip6_var.h> 55#include <netinet6/ip6_var.h>
56#endif 56#endif
57 57
58#include "npf_impl.h" 58#include "npf_impl.h"
59#include "npf_conn.h" 59#include "npf_conn.h"
60 60
61#if defined(_NPF_STANDALONE) 61#if defined(_NPF_STANDALONE)
62#define m_freem(m) npf->mbufops->free(m) 62#define m_freem(m) npf->mbufops->free(m)
63#define m_clear_flag(m,f) 63#define m_clear_flag(m,f)
64#else 64#else
65#define m_clear_flag(m,f) (m)->m_flags &= ~(f) 65#define m_clear_flag(m,f) (m)->m_flags &= ~(f)
66#endif 66#endif
67 67
68#ifndef INET6 68#ifndef INET6
69#define ip6_reass_packet(x, y) ENOTSUP 69#define ip6_reass_packet(x, y) ENOTSUP
70#endif 70#endif
71 71
72static int 72static int
73npf_reassembly(npf_t *npf, npf_cache_t *npc, struct mbuf **mp) 73npf_reassembly(npf_t *npf, npf_cache_t *npc, struct mbuf **mp)
74{ 74{
75 nbuf_t *nbuf = npc->npc_nbuf; 75 nbuf_t *nbuf = npc->npc_nbuf;
76 int error = EINVAL; 76 int error = EINVAL;
77 77
78 /* Reset the mbuf as it may have changed. */ 78 /* Reset the mbuf as it may have changed. */
79 *mp = nbuf_head_mbuf(nbuf); 79 *mp = nbuf_head_mbuf(nbuf);
80 nbuf_reset(nbuf); 80 nbuf_reset(nbuf);
81 81
82 if (npf_iscached(npc, NPC_IP4)) { 82 if (npf_iscached(npc, NPC_IP4)) {
83 struct ip *ip = nbuf_dataptr(nbuf); 83 struct ip *ip = nbuf_dataptr(nbuf);
84 error = ip_reass_packet(mp, ip); 84 error = ip_reass_packet(mp, ip);
85 } else if (npf_iscached(npc, NPC_IP6)) { 85 } else if (npf_iscached(npc, NPC_IP6)) {
86 /* 86 /*
87 * Note: ip6_reass_packet() offset is the start of 87 * Note: ip6_reass_packet() offset is the start of
88 * the fragment header. 88 * the fragment header.
89 */ 89 */
90 error = ip6_reass_packet(mp, npc->npc_hlen); 90 error = ip6_reass_packet(mp, npc->npc_hlen);
91 if (error && *mp == NULL) { 91 if (error && *mp == NULL) {
92 memset(nbuf, 0, sizeof(nbuf_t)); 92 memset(nbuf, 0, sizeof(nbuf_t));
93 } 93 }
94 } 94 }
95 if (error) { 95 if (error) {
96 npf_stats_inc(npf, NPF_STAT_REASSFAIL); 96 npf_stats_inc(npf, NPF_STAT_REASSFAIL);
97 return error; 97 return error;
98 } 98 }
99 if (*mp == NULL) { 99 if (*mp == NULL) {
100 /* More fragments should come. */ 100 /* More fragments should come. */
101 npf_stats_inc(npf, NPF_STAT_FRAGMENTS); 101 npf_stats_inc(npf, NPF_STAT_FRAGMENTS);
102 return 0; 102 return 0;
103 } 103 }
104 104
105 /* 105 /*
106 * Reassembly is complete, we have the final packet. 106 * Reassembly is complete, we have the final packet.
107 * Cache again, since layer 4 data is accessible now. 107 * Cache again, since layer 4 data is accessible now.
108 */ 108 */
109 nbuf_init(npf, nbuf, *mp, nbuf->nb_ifp); 109 nbuf_init(npf, nbuf, *mp, nbuf->nb_ifp);
110 npc->npc_info = 0; 110 npc->npc_info = 0;
111 111
112 if (npf_cache_all(npc) & (NPC_IPFRAG|NPC_FMTERR)) { 112 if (npf_cache_all(npc) & (NPC_IPFRAG|NPC_FMTERR)) {
113 return EINVAL; 113 return EINVAL;
114 } 114 }
115 npf_stats_inc(npf, NPF_STAT_REASSEMBLY); 115 npf_stats_inc(npf, NPF_STAT_REASSEMBLY);
116 return 0; 116 return 0;
117} 117}
118 118
119/* 119/*
120 * npf_packet_handler: main packet handling routine for layer 3. 120 * npf_packet_handler: main packet handling routine for layer 3.
121 * 121 *
122 * Note: packet flow and inspection logic is in strict order. 122 * Note: packet flow and inspection logic is in strict order.
123 */ 123 */
124__dso_public int 124__dso_public int
125npf_packet_handler(npf_t *npf, struct mbuf **mp, ifnet_t *ifp, int di) 125npf_packet_handler(npf_t *npf, struct mbuf **mp, ifnet_t *ifp, int di)
126{ 126{
127 nbuf_t nbuf; 127 nbuf_t nbuf;
128 npf_cache_t npc; 128 npf_cache_t npc;
129 npf_conn_t *con; 129 npf_conn_t *con;
130 npf_rule_t *rl; 130 npf_rule_t *rl;
131 npf_rproc_t *rp; 131 npf_rproc_t *rp;
132 int error, decision, flags; 132 int error, decision, flags;
133 uint32_t ntag; 133 uint32_t ntag;
134 npf_match_info_t mi; 134 npf_match_info_t mi;
135 135
136 /* QSBR checkpoint. */ 136 /* QSBR checkpoint. */
137 pserialize_checkpoint(npf->qsbr); 137 pserialize_checkpoint(npf->qsbr);
138 KASSERT(ifp != NULL); 138 KASSERT(ifp != NULL);
139 139
140 /* 140 /*
141 * Initialise packet information cache. 141 * Initialise packet information cache.
142 * Note: it is enough to clear the info bits. 142 * Note: it is enough to clear the info bits.
143 */ 143 */
144 npc.npc_ctx = npf; 144 npc.npc_ctx = npf;
145 nbuf_init(npf, &nbuf, *mp, ifp); 145 nbuf_init(npf, &nbuf, *mp, ifp);
146 npc.npc_nbuf = &nbuf; 146 npc.npc_nbuf = &nbuf;
147 npc.npc_info = 0; 147 npc.npc_info = 0;
148 148
149 mi.mi_di = di; 149 mi.mi_di = di;
150 mi.mi_rid = 0; 150 mi.mi_rid = 0;
151 mi.mi_retfl = 0; 151 mi.mi_retfl = 0;
152 152
153 decision = NPF_DECISION_BLOCK; 153 decision = NPF_DECISION_BLOCK;
154 error = 0; 154 error = 0;
155 rp = NULL; 155 rp = NULL;
156 156
157 /* Cache everything. */ 157 /* Cache everything. */
158 flags = npf_cache_all(&npc); 158 flags = npf_cache_all(&npc);
159 159
160 /* If error on the format, leave quickly. */ 160 /* If error on the format, leave quickly. */
161 if (flags & NPC_FMTERR) { 161 if (flags & NPC_FMTERR) {
162 error = EINVAL; 162 error = EINVAL;
163 goto fastout; 163 goto fastout;
164 } 164 }
165 165
166 /* Determine whether it is an IP fragment. */ 166 /* Determine whether it is an IP fragment. */
167 if (__predict_false(flags & NPC_IPFRAG)) { 167 if (__predict_false(flags & NPC_IPFRAG)) {
168 /* 168 /*
169 * We pass IPv6 fragments unconditionally 169 * Pass to IPv4/IPv6 reassembly mechanism.
170 * The first IPv6 fragment is not marked as such 
171 * and passes through the filter 
172 */ 
173 if (flags & NPC_IP6) 
174 return 0; 
175 /* 
176 * Pass to IPv4 reassembly mechanism. 
177 */ 170 */
178 error = npf_reassembly(npf, &npc, mp); 171 error = npf_reassembly(npf, &npc, mp);
179 if (error) { 172 if (error) {
180 con = NULL; 173 con = NULL;
181 goto out; 174 goto out;
182 } 175 }
183 if (*mp == NULL) { 176 if (*mp == NULL) {
184 /* More fragments should come; return. */ 177 /* More fragments should come; return. */
185 return 0; 178 return 0;
186 } 179 }
187 } 180 }
188 181
189 /* Just pass-through if specially tagged. */ 182 /* Just pass-through if specially tagged. */
190 if (nbuf_find_tag(&nbuf, &ntag) == 0 && (ntag & NPF_NTAG_PASS) != 0) { 183 if (nbuf_find_tag(&nbuf, &ntag) == 0 && (ntag & NPF_NTAG_PASS) != 0) {
191 con = NULL; 184 con = NULL;
192 goto pass; 185 goto pass;
193 } 186 }
194 187
195 /* Inspect the list of connections (if found, acquires a reference). */ 188 /* Inspect the list of connections (if found, acquires a reference). */
196 con = npf_conn_inspect(&npc, di, &error); 189 con = npf_conn_inspect(&npc, di, &error);
197 190
198 /* If "passing" connection found - skip the ruleset inspection. */ 191 /* If "passing" connection found - skip the ruleset inspection. */
199 if (con && npf_conn_pass(con, &mi, &rp)) { 192 if (con && npf_conn_pass(con, &mi, &rp)) {
200 npf_stats_inc(npf, NPF_STAT_PASS_CONN); 193 npf_stats_inc(npf, NPF_STAT_PASS_CONN);
201 KASSERT(error == 0); 194 KASSERT(error == 0);
202 goto pass; 195 goto pass;
203 } 196 }
204 if (__predict_false(error)) { 197 if (__predict_false(error)) {
205 if (error == ENETUNREACH) 198 if (error == ENETUNREACH)
206 goto block; 199 goto block;
207 goto out; 200 goto out;
208 } 201 }
209 202
210 /* Acquire the lock, inspect the ruleset using this packet. */ 203 /* Acquire the lock, inspect the ruleset using this packet. */
211 int slock = npf_config_read_enter(); 204 int slock = npf_config_read_enter();
212 npf_ruleset_t *rlset = npf_config_ruleset(npf); 205 npf_ruleset_t *rlset = npf_config_ruleset(npf);
213 206
214 rl = npf_ruleset_inspect(&npc, rlset, di, NPF_LAYER_3); 207 rl = npf_ruleset_inspect(&npc, rlset, di, NPF_LAYER_3);
215 if (__predict_false(rl == NULL)) { 208 if (__predict_false(rl == NULL)) {
216 const bool pass = npf_default_pass(npf); 209 const bool pass = npf_default_pass(npf);
217 npf_config_read_exit(slock); 210 npf_config_read_exit(slock);
218 211
219 if (pass) { 212 if (pass) {
220 npf_stats_inc(npf, NPF_STAT_PASS_DEFAULT); 213 npf_stats_inc(npf, NPF_STAT_PASS_DEFAULT);
221 goto pass; 214 goto pass;
222 } 215 }
223 npf_stats_inc(npf, NPF_STAT_BLOCK_DEFAULT); 216 npf_stats_inc(npf, NPF_STAT_BLOCK_DEFAULT);
224 goto block; 217 goto block;
225 } 218 }
226 219
227 /* 220 /*
228 * Get the rule procedure (acquires a reference) for association 221 * Get the rule procedure (acquires a reference) for association
229 * with a connection (if any) and execution. 222 * with a connection (if any) and execution.
230 */ 223 */
231 KASSERT(rp == NULL); 224 KASSERT(rp == NULL);
232 rp = npf_rule_getrproc(rl); 225 rp = npf_rule_getrproc(rl);
233 226
234 /* Conclude with the rule and release the lock. */ 227 /* Conclude with the rule and release the lock. */
235 error = npf_rule_conclude(rl, &mi); 228 error = npf_rule_conclude(rl, &mi);
236 npf_config_read_exit(slock); 229 npf_config_read_exit(slock);
237 230
238 if (error) { 231 if (error) {
239 npf_stats_inc(npf, NPF_STAT_BLOCK_RULESET); 232 npf_stats_inc(npf, NPF_STAT_BLOCK_RULESET);
240 goto block; 233 goto block;
241 } 234 }
242 npf_stats_inc(npf, NPF_STAT_PASS_RULESET); 235 npf_stats_inc(npf, NPF_STAT_PASS_RULESET);
243 236
244 /* 237 /*
245 * Establish a "pass" connection, if required. Just proceed if 238 * Establish a "pass" connection, if required. Just proceed if
246 * connection creation fails (e.g. due to unsupported protocol). 239 * connection creation fails (e.g. due to unsupported protocol).
247 */ 240 */
248 if ((mi.mi_retfl & NPF_RULE_STATEFUL) != 0 && !con) { 241 if ((mi.mi_retfl & NPF_RULE_STATEFUL) != 0 && !con) {
249 con = npf_conn_establish(&npc, di, 242 con = npf_conn_establish(&npc, di,
250 (mi.mi_retfl & NPF_RULE_MULTIENDS) == 0); 243 (mi.mi_retfl & NPF_RULE_MULTIENDS) == 0);
251 if (con) { 244 if (con) {
252 /* 245 /*
253 * Note: the reference on the rule procedure is 246 * Note: the reference on the rule procedure is
254 * transfered to the connection. It will be 247 * transfered to the connection. It will be
255 * released on connection destruction. 248 * released on connection destruction.
256 */ 249 */
257 npf_conn_setpass(con, &mi, rp); 250 npf_conn_setpass(con, &mi, rp);
258 } 251 }
259 } 252 }
260pass: 253pass:
261 decision = NPF_DECISION_PASS; 254 decision = NPF_DECISION_PASS;
262 KASSERT(error == 0); 255 KASSERT(error == 0);
263 /* 256 /*
264 * Perform NAT. 257 * Perform NAT.
265 */ 258 */
266 error = npf_do_nat(&npc, con, di); 259 error = npf_do_nat(&npc, con, di);
267block: 260block:
268 /* 261 /*
269 * Execute the rule procedure, if any is associated. 262 * Execute the rule procedure, if any is associated.
270 * It may reverse the decision from pass to block. 263 * It may reverse the decision from pass to block.
271 */ 264 */
272 if (rp && !npf_rproc_run(&npc, rp, &mi, &decision)) { 265 if (rp && !npf_rproc_run(&npc, rp, &mi, &decision)) {
273 if (con) { 266 if (con) {
274 npf_conn_release(con); 267 npf_conn_release(con);
275 } 268 }
276 npf_rproc_release(rp); 269 npf_rproc_release(rp);
277 *mp = NULL; 270 *mp = NULL;
278 return 0; 271 return 0;
279 } 272 }
280out: 273out:
281 /* 274 /*
282 * Release the reference on a connection. Release the reference 275 * Release the reference on a connection. Release the reference
283 * on a rule procedure only if there was no association. 276 * on a rule procedure only if there was no association.
284 */ 277 */
285 if (con) { 278 if (con) {
286 npf_conn_release(con); 279 npf_conn_release(con);
287 } else if (rp) { 280 } else if (rp) {
288 npf_rproc_release(rp); 281 npf_rproc_release(rp);
289 } 282 }
290 283
291 /* Reset mbuf pointer before returning to the caller. */ 284 /* Reset mbuf pointer before returning to the caller. */
292 if ((*mp = nbuf_head_mbuf(&nbuf)) == NULL) { 285 if ((*mp = nbuf_head_mbuf(&nbuf)) == NULL) {
293 return error ? error : ENOMEM; 286 return error ? error : ENOMEM;
294 } 287 }
295 288
296 /* Pass the packet if decided and there is no error. */ 289 /* Pass the packet if decided and there is no error. */
297 if (decision == NPF_DECISION_PASS && !error) { 290 if (decision == NPF_DECISION_PASS && !error) {
298 /* 291 /*
299 * XXX: Disable for now, it will be set accordingly later, 292 * XXX: Disable for now, it will be set accordingly later,
300 * for optimisations (to reduce inspection). 293 * for optimisations (to reduce inspection).
301 */ 294 */
302 m_clear_flag(*mp, M_CANFASTFWD); 295 m_clear_flag(*mp, M_CANFASTFWD);
303 return 0; 296 return 0;
304 } 297 }
305 298
306 /* 299 /*
307 * Block the packet. ENETUNREACH is used to indicate blocking. 300 * Block the packet. ENETUNREACH is used to indicate blocking.
308 * Depending on the flags and protocol, return TCP reset (RST) or 301 * Depending on the flags and protocol, return TCP reset (RST) or
309 * ICMP destination unreachable. 302 * ICMP destination unreachable.
310 */ 303 */
311 if (mi.mi_retfl && npf_return_block(&npc, mi.mi_retfl)) { 304 if (mi.mi_retfl && npf_return_block(&npc, mi.mi_retfl)) {
312 *mp = NULL; 305 *mp = NULL;
313 } 306 }
314 307
315 if (!error) { 308 if (!error) {
316 error = ENETUNREACH; 309 error = ENETUNREACH;
317 } 310 }
318 311
319fastout: 312fastout:
320 if (*mp) { 313 if (*mp) {
321 /* Free the mbuf chain. */ 314 /* Free the mbuf chain. */
322 m_freem(*mp); 315 m_freem(*mp);
323 *mp = NULL; 316 *mp = NULL;
324 } 317 }
325 return error; 318 return error;
326} 319}

cvs diff -r1.39 -r1.40 src/sys/net/npf/npf_inet.c (switch to unified diff)

--- src/sys/net/npf/npf_inet.c 2018/03/08 07:54:14 1.39
+++ src/sys/net/npf/npf_inet.c 2018/03/13 09:04:02 1.40
@@ -1,799 +1,763 @@ @@ -1,799 +1,763 @@
1/* $NetBSD: npf_inet.c,v 1.39 2018/03/08 07:54:14 maxv Exp $ */ 1/* $NetBSD: npf_inet.c,v 1.40 2018/03/13 09:04:02 maxv Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2009-2014 The NetBSD Foundation, Inc. 4 * Copyright (c) 2009-2014 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This material is based upon work partially supported by The 7 * This material is based upon work partially supported by The
8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32/* 32/*
33 * Various protocol related helper routines. 33 * Various protocol related helper routines.
34 * 34 *
35 * This layer manipulates npf_cache_t structure i.e. caches requested headers 35 * This layer manipulates npf_cache_t structure i.e. caches requested headers
36 * and stores which information was cached in the information bit field. 36 * and stores which information was cached in the information bit field.
37 * It is also responsibility of this layer to update or invalidate the cache 37 * It is also responsibility of this layer to update or invalidate the cache
38 * on rewrites (e.g. by translation routines). 38 * on rewrites (e.g. by translation routines).
39 */ 39 */
40 40
41#ifdef _KERNEL 41#ifdef _KERNEL
42#include <sys/cdefs.h> 42#include <sys/cdefs.h>
43__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.39 2018/03/08 07:54:14 maxv Exp $"); 43__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.40 2018/03/13 09:04:02 maxv Exp $");
44 44
45#include <sys/param.h> 45#include <sys/param.h>
46#include <sys/types.h> 46#include <sys/types.h>
47 47
48#include <net/pfil.h> 48#include <net/pfil.h>
49#include <net/if.h> 49#include <net/if.h>
50#include <net/ethertypes.h> 50#include <net/ethertypes.h>
51#include <net/if_ether.h> 51#include <net/if_ether.h>
52 52
53#include <netinet/in_systm.h> 53#include <netinet/in_systm.h>
54#include <netinet/in.h> 54#include <netinet/in.h>
55#include <netinet6/in6_var.h> 55#include <netinet6/in6_var.h>
56#include <netinet/ip.h> 56#include <netinet/ip.h>
57#include <netinet/ip6.h> 57#include <netinet/ip6.h>
58#include <netinet/tcp.h> 58#include <netinet/tcp.h>
59#include <netinet/udp.h> 59#include <netinet/udp.h>
60#include <netinet/ip_icmp.h> 60#include <netinet/ip_icmp.h>
61#endif 61#endif
62 62
63#include "npf_impl.h" 63#include "npf_impl.h"
64 64
65/* 65/*
66 * npf_fixup{16,32}_cksum: incremental update of the Internet checksum. 66 * npf_fixup{16,32}_cksum: incremental update of the Internet checksum.
67 */ 67 */
68 68
69uint16_t 69uint16_t
70npf_fixup16_cksum(uint16_t cksum, uint16_t odatum, uint16_t ndatum) 70npf_fixup16_cksum(uint16_t cksum, uint16_t odatum, uint16_t ndatum)
71{ 71{
72 uint32_t sum; 72 uint32_t sum;
73 73
74 /* 74 /*
75 * RFC 1624: 75 * RFC 1624:
76 * HC' = ~(~HC + ~m + m') 76 * HC' = ~(~HC + ~m + m')
77 * 77 *
78 * Note: 1's complement sum is endian-independent (RFC 1071, page 2). 78 * Note: 1's complement sum is endian-independent (RFC 1071, page 2).
79 */ 79 */
80 sum = ~cksum & 0xffff; 80 sum = ~cksum & 0xffff;
81 sum += (~odatum & 0xffff) + ndatum; 81 sum += (~odatum & 0xffff) + ndatum;
82 sum = (sum >> 16) + (sum & 0xffff); 82 sum = (sum >> 16) + (sum & 0xffff);
83 sum += (sum >> 16); 83 sum += (sum >> 16);
84 84
85 return ~sum & 0xffff; 85 return ~sum & 0xffff;
86} 86}
87 87
88uint16_t 88uint16_t
89npf_fixup32_cksum(uint16_t cksum, uint32_t odatum, uint32_t ndatum) 89npf_fixup32_cksum(uint16_t cksum, uint32_t odatum, uint32_t ndatum)
90{ 90{
91 uint32_t sum; 91 uint32_t sum;
92 92
93 /* 93 /*
94 * Checksum 32-bit datum as as two 16-bit. Note, the first 94 * Checksum 32-bit datum as as two 16-bit. Note, the first
95 * 32->16 bit reduction is not necessary. 95 * 32->16 bit reduction is not necessary.
96 */ 96 */
97 sum = ~cksum & 0xffff; 97 sum = ~cksum & 0xffff;
98 sum += (~odatum & 0xffff) + (ndatum & 0xffff); 98 sum += (~odatum & 0xffff) + (ndatum & 0xffff);
99 99
100 sum += (~odatum >> 16) + (ndatum >> 16); 100 sum += (~odatum >> 16) + (ndatum >> 16);
101 sum = (sum >> 16) + (sum & 0xffff); 101 sum = (sum >> 16) + (sum & 0xffff);
102 sum += (sum >> 16); 102 sum += (sum >> 16);
103 return ~sum & 0xffff; 103 return ~sum & 0xffff;
104} 104}
105 105
106/* 106/*
107 * npf_addr_cksum: calculate checksum of the address, either IPv4 or IPv6. 107 * npf_addr_cksum: calculate checksum of the address, either IPv4 or IPv6.
108 */ 108 */
109uint16_t 109uint16_t
110npf_addr_cksum(uint16_t cksum, int sz, const npf_addr_t *oaddr, 110npf_addr_cksum(uint16_t cksum, int sz, const npf_addr_t *oaddr,
111 const npf_addr_t *naddr) 111 const npf_addr_t *naddr)
112{ 112{
113 const uint32_t *oip32 = (const uint32_t *)oaddr; 113 const uint32_t *oip32 = (const uint32_t *)oaddr;
114 const uint32_t *nip32 = (const uint32_t *)naddr; 114 const uint32_t *nip32 = (const uint32_t *)naddr;
115 115
116 KASSERT(sz % sizeof(uint32_t) == 0); 116 KASSERT(sz % sizeof(uint32_t) == 0);
117 do { 117 do {
118 cksum = npf_fixup32_cksum(cksum, *oip32++, *nip32++); 118 cksum = npf_fixup32_cksum(cksum, *oip32++, *nip32++);
119 sz -= sizeof(uint32_t); 119 sz -= sizeof(uint32_t);
120 } while (sz); 120 } while (sz);
121 121
122 return cksum; 122 return cksum;
123} 123}
124 124
125/* 125/*
126 * npf_addr_sum: provide IP addresses as a XORed 32-bit integer. 126 * npf_addr_sum: provide IP addresses as a XORed 32-bit integer.
127 * Note: used for hash function. 127 * Note: used for hash function.
128 */ 128 */
129uint32_t 129uint32_t
130npf_addr_mix(const int sz, const npf_addr_t *a1, const npf_addr_t *a2) 130npf_addr_mix(const int sz, const npf_addr_t *a1, const npf_addr_t *a2)
131{ 131{
132 uint32_t mix = 0; 132 uint32_t mix = 0;
133 133
134 KASSERT(sz > 0 && a1 != NULL && a2 != NULL); 134 KASSERT(sz > 0 && a1 != NULL && a2 != NULL);
135 135
136 for (int i = 0; i < (sz >> 2); i++) { 136 for (int i = 0; i < (sz >> 2); i++) {
137 mix ^= a1->word32[i]; 137 mix ^= a1->word32[i];
138 mix ^= a2->word32[i]; 138 mix ^= a2->word32[i];
139 } 139 }
140 return mix; 140 return mix;
141} 141}
142 142
143/* 143/*
144 * npf_addr_mask: apply the mask to a given address and store the result. 144 * npf_addr_mask: apply the mask to a given address and store the result.
145 */ 145 */
146void 146void
147npf_addr_mask(const npf_addr_t *addr, const npf_netmask_t mask, 147npf_addr_mask(const npf_addr_t *addr, const npf_netmask_t mask,
148 const int alen, npf_addr_t *out) 148 const int alen, npf_addr_t *out)
149{ 149{
150 const int nwords = alen >> 2; 150 const int nwords = alen >> 2;
151 uint_fast8_t length = mask; 151 uint_fast8_t length = mask;
152 152
153 /* Note: maximum length is 32 for IPv4 and 128 for IPv6. */ 153 /* Note: maximum length is 32 for IPv4 and 128 for IPv6. */
154 KASSERT(length <= NPF_MAX_NETMASK); 154 KASSERT(length <= NPF_MAX_NETMASK);
155 155
156 for (int i = 0; i < nwords; i++) { 156 for (int i = 0; i < nwords; i++) {
157 uint32_t wordmask; 157 uint32_t wordmask;
158 158
159 if (length >= 32) { 159 if (length >= 32) {
160 wordmask = htonl(0xffffffff); 160 wordmask = htonl(0xffffffff);
161 length -= 32; 161 length -= 32;
162 } else if (length) { 162 } else if (length) {
163 wordmask = htonl(0xffffffff << (32 - length)); 163 wordmask = htonl(0xffffffff << (32 - length));
164 length = 0; 164 length = 0;
165 } else { 165 } else {
166 wordmask = 0; 166 wordmask = 0;
167 } 167 }
168 out->word32[i] = addr->word32[i] & wordmask; 168 out->word32[i] = addr->word32[i] & wordmask;
169 } 169 }
170} 170}
171 171
172/* 172/*
173 * npf_addr_cmp: compare two addresses, either IPv4 or IPv6. 173 * npf_addr_cmp: compare two addresses, either IPv4 or IPv6.
174 * 174 *
175 * => Return 0 if equal and negative/positive if less/greater accordingly. 175 * => Return 0 if equal and negative/positive if less/greater accordingly.
176 * => Ignore the mask, if NPF_NO_NETMASK is specified. 176 * => Ignore the mask, if NPF_NO_NETMASK is specified.
177 */ 177 */
178int 178int
179npf_addr_cmp(const npf_addr_t *addr1, const npf_netmask_t mask1, 179npf_addr_cmp(const npf_addr_t *addr1, const npf_netmask_t mask1,
180 const npf_addr_t *addr2, const npf_netmask_t mask2, const int alen) 180 const npf_addr_t *addr2, const npf_netmask_t mask2, const int alen)
181{ 181{
182 npf_addr_t realaddr1, realaddr2; 182 npf_addr_t realaddr1, realaddr2;
183 183
184 if (mask1 != NPF_NO_NETMASK) { 184 if (mask1 != NPF_NO_NETMASK) {
185 npf_addr_mask(addr1, mask1, alen, &realaddr1); 185 npf_addr_mask(addr1, mask1, alen, &realaddr1);
186 addr1 = &realaddr1; 186 addr1 = &realaddr1;
187 } 187 }
188 if (mask2 != NPF_NO_NETMASK) { 188 if (mask2 != NPF_NO_NETMASK) {
189 npf_addr_mask(addr2, mask2, alen, &realaddr2); 189 npf_addr_mask(addr2, mask2, alen, &realaddr2);
190 addr2 = &realaddr2; 190 addr2 = &realaddr2;
191 } 191 }
192 return memcmp(addr1, addr2, alen); 192 return memcmp(addr1, addr2, alen);
193} 193}
194 194
195/* 195/*
196 * npf_tcpsaw: helper to fetch SEQ, ACK, WIN and return TCP data length. 196 * npf_tcpsaw: helper to fetch SEQ, ACK, WIN and return TCP data length.
197 * 197 *
198 * => Returns all values in host byte-order. 198 * => Returns all values in host byte-order.
199 */ 199 */
200int 200int
201npf_tcpsaw(const npf_cache_t *npc, tcp_seq *seq, tcp_seq *ack, uint32_t *win) 201npf_tcpsaw(const npf_cache_t *npc, tcp_seq *seq, tcp_seq *ack, uint32_t *win)
202{ 202{
203 const struct tcphdr *th = npc->npc_l4.tcp; 203 const struct tcphdr *th = npc->npc_l4.tcp;
204 u_int thlen; 204 u_int thlen;
205 205
206 KASSERT(npf_iscached(npc, NPC_TCP)); 206 KASSERT(npf_iscached(npc, NPC_TCP));
207 207
208 *seq = ntohl(th->th_seq); 208 *seq = ntohl(th->th_seq);
209 *ack = ntohl(th->th_ack); 209 *ack = ntohl(th->th_ack);
210 *win = (uint32_t)ntohs(th->th_win); 210 *win = (uint32_t)ntohs(th->th_win);
211 thlen = th->th_off << 2; 211 thlen = th->th_off << 2;
212 212
213 if (npf_iscached(npc, NPC_IP4)) { 213 if (npf_iscached(npc, NPC_IP4)) {
214 const struct ip *ip = npc->npc_ip.v4; 214 const struct ip *ip = npc->npc_ip.v4;
215 return ntohs(ip->ip_len) - npc->npc_hlen - thlen; 215 return ntohs(ip->ip_len) - npc->npc_hlen - thlen;
216 } else if (npf_iscached(npc, NPC_IP6)) { 216 } else if (npf_iscached(npc, NPC_IP6)) {
217 const struct ip6_hdr *ip6 = npc->npc_ip.v6; 217 const struct ip6_hdr *ip6 = npc->npc_ip.v6;
218 return ntohs(ip6->ip6_plen) - thlen; 218 return ntohs(ip6->ip6_plen) - thlen;
219 } 219 }
220 return 0; 220 return 0;
221} 221}
222 222
223/* 223/*
224 * npf_fetch_tcpopts: parse and return TCP options. 224 * npf_fetch_tcpopts: parse and return TCP options.
225 */ 225 */
226bool 226bool
227npf_fetch_tcpopts(npf_cache_t *npc, uint16_t *mss, int *wscale) 227npf_fetch_tcpopts(npf_cache_t *npc, uint16_t *mss, int *wscale)
228{ 228{
229 nbuf_t *nbuf = npc->npc_nbuf; 229 nbuf_t *nbuf = npc->npc_nbuf;
230 const struct tcphdr *th = npc->npc_l4.tcp; 230 const struct tcphdr *th = npc->npc_l4.tcp;
231 int topts_len, step; 231 int topts_len, step;
232 uint8_t *nptr; 232 uint8_t *nptr;
233 uint8_t val; 233 uint8_t val;
234 bool ok; 234 bool ok;
235 235
236 KASSERT(npf_iscached(npc, NPC_IP46)); 236 KASSERT(npf_iscached(npc, NPC_IP46));
237 KASSERT(npf_iscached(npc, NPC_TCP)); 237 KASSERT(npf_iscached(npc, NPC_TCP));
238 238
239 /* Determine if there are any TCP options, get their length. */ 239 /* Determine if there are any TCP options, get their length. */
240 topts_len = (th->th_off << 2) - sizeof(struct tcphdr); 240 topts_len = (th->th_off << 2) - sizeof(struct tcphdr);
241 if (topts_len <= 0) { 241 if (topts_len <= 0) {
242 /* No options. */ 242 /* No options. */
243 return false; 243 return false;
244 } 244 }
245 KASSERT(topts_len <= MAX_TCPOPTLEN); 245 KASSERT(topts_len <= MAX_TCPOPTLEN);
246 246
247 /* First step: IP and TCP header up to options. */ 247 /* First step: IP and TCP header up to options. */
248 step = npc->npc_hlen + sizeof(struct tcphdr); 248 step = npc->npc_hlen + sizeof(struct tcphdr);
249 nbuf_reset(nbuf); 249 nbuf_reset(nbuf);
250next: 250next:
251 if ((nptr = nbuf_advance(nbuf, step, 1)) == NULL) { 251 if ((nptr = nbuf_advance(nbuf, step, 1)) == NULL) {
252 ok = false; 252 ok = false;
253 goto done; 253 goto done;
254 } 254 }
255 val = *nptr; 255 val = *nptr;
256 256
257 switch (val) { 257 switch (val) {
258 case TCPOPT_EOL: 258 case TCPOPT_EOL:
259 /* Done. */ 259 /* Done. */
260 ok = true; 260 ok = true;
261 goto done; 261 goto done;
262 case TCPOPT_NOP: 262 case TCPOPT_NOP:
263 topts_len--; 263 topts_len--;
264 step = 1; 264 step = 1;
265 break; 265 break;
266 case TCPOPT_MAXSEG: 266 case TCPOPT_MAXSEG:
267 if ((nptr = nbuf_ensure_contig(nbuf, TCPOLEN_MAXSEG)) == NULL) { 267 if ((nptr = nbuf_ensure_contig(nbuf, TCPOLEN_MAXSEG)) == NULL) {
268 ok = false; 268 ok = false;
269 goto done; 269 goto done;
270 } 270 }
271 if (mss) { 271 if (mss) {
272 if (*mss) { 272 if (*mss) {
273 memcpy(nptr + 2, mss, sizeof(uint16_t)); 273 memcpy(nptr + 2, mss, sizeof(uint16_t));
274 } else { 274 } else {
275 memcpy(mss, nptr + 2, sizeof(uint16_t)); 275 memcpy(mss, nptr + 2, sizeof(uint16_t));
276 } 276 }
277 } 277 }
278 topts_len -= TCPOLEN_MAXSEG; 278 topts_len -= TCPOLEN_MAXSEG;
279 step = TCPOLEN_MAXSEG; 279 step = TCPOLEN_MAXSEG;
280 break; 280 break;
281 case TCPOPT_WINDOW: 281 case TCPOPT_WINDOW:
282 /* TCP Window Scaling (RFC 1323). */ 282 /* TCP Window Scaling (RFC 1323). */
283 if ((nptr = nbuf_ensure_contig(nbuf, TCPOLEN_WINDOW)) == NULL) { 283 if ((nptr = nbuf_ensure_contig(nbuf, TCPOLEN_WINDOW)) == NULL) {
284 ok = false; 284 ok = false;
285 goto done; 285 goto done;
286 } 286 }
287 val = *(nptr + 2); 287 val = *(nptr + 2);
288 *wscale = (val > TCP_MAX_WINSHIFT) ? TCP_MAX_WINSHIFT : val; 288 *wscale = (val > TCP_MAX_WINSHIFT) ? TCP_MAX_WINSHIFT : val;
289 topts_len -= TCPOLEN_WINDOW; 289 topts_len -= TCPOLEN_WINDOW;
290 step = TCPOLEN_WINDOW; 290 step = TCPOLEN_WINDOW;
291 break; 291 break;
292 default: 292 default:
293 if ((nptr = nbuf_ensure_contig(nbuf, 2)) == NULL) { 293 if ((nptr = nbuf_ensure_contig(nbuf, 2)) == NULL) {
294 ok = false; 294 ok = false;
295 goto done; 295 goto done;
296 } 296 }
297 val = *(nptr + 1); 297 val = *(nptr + 1);
298 if (val < 2 || val > topts_len) { 298 if (val < 2 || val > topts_len) {
299 ok = false; 299 ok = false;
300 goto done; 300 goto done;
301 } 301 }
302 topts_len -= val; 302 topts_len -= val;
303 step = val; 303 step = val;
304 } 304 }
305 305
306 /* Any options left? */ 306 /* Any options left? */
307 if (__predict_true(topts_len > 0)) { 307 if (__predict_true(topts_len > 0)) {
308 goto next; 308 goto next;
309 } 309 }
310 ok = true; 310 ok = true;
311done: 311done:
312 if (nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)) { 312 if (nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)) {
313 npf_recache(npc); 313 npf_recache(npc);
314 } 314 }
315 return ok; 315 return ok;
316} 316}
317 317
318static int 318static int
319npf_cache_ip(npf_cache_t *npc, nbuf_t *nbuf) 319npf_cache_ip(npf_cache_t *npc, nbuf_t *nbuf)
320{ 320{
321 const void *nptr = nbuf_dataptr(nbuf); 321 const void *nptr = nbuf_dataptr(nbuf);
322 const uint8_t ver = *(const uint8_t *)nptr; 322 const uint8_t ver = *(const uint8_t *)nptr;
323 int flags = 0; 323 int flags = 0;
324 324
325 switch (ver >> 4) { 325 switch (ver >> 4) {
326 case IPVERSION: { 326 case IPVERSION: {
327 struct ip *ip; 327 struct ip *ip;
328 328
329 ip = nbuf_ensure_contig(nbuf, sizeof(struct ip)); 329 ip = nbuf_ensure_contig(nbuf, sizeof(struct ip));
330 if (ip == NULL) { 330 if (ip == NULL) {
331 return NPC_FMTERR; 331 return NPC_FMTERR;
332 } 332 }
333 333
334 /* Check header length and fragment offset. */ 334 /* Check header length and fragment offset. */
335 if ((u_int)(ip->ip_hl << 2) < sizeof(struct ip)) { 335 if ((u_int)(ip->ip_hl << 2) < sizeof(struct ip)) {
336 return NPC_FMTERR; 336 return NPC_FMTERR;
337 } 337 }
338 if (ip->ip_off & ~htons(IP_DF | IP_RF)) { 338 if (ip->ip_off & ~htons(IP_DF | IP_RF)) {
339 /* Note fragmentation. */ 339 /* Note fragmentation. */
340 flags |= NPC_IPFRAG; 340 flags |= NPC_IPFRAG;
341 } 341 }
342 342
343 /* Cache: layer 3 - IPv4. */ 343 /* Cache: layer 3 - IPv4. */
344 npc->npc_alen = sizeof(struct in_addr); 344 npc->npc_alen = sizeof(struct in_addr);
345 npc->npc_ips[NPF_SRC] = (npf_addr_t *)&ip->ip_src; 345 npc->npc_ips[NPF_SRC] = (npf_addr_t *)&ip->ip_src;
346 npc->npc_ips[NPF_DST] = (npf_addr_t *)&ip->ip_dst; 346 npc->npc_ips[NPF_DST] = (npf_addr_t *)&ip->ip_dst;
347 npc->npc_hlen = ip->ip_hl << 2; 347 npc->npc_hlen = ip->ip_hl << 2;
348 npc->npc_proto = ip->ip_p; 348 npc->npc_proto = ip->ip_p;
349 349
350 npc->npc_ip.v4 = ip; 350 npc->npc_ip.v4 = ip;
351 flags |= NPC_IP4; 351 flags |= NPC_IP4;
352 break; 352 break;
353 } 353 }
354 354
355 case (IPV6_VERSION >> 4): { 355 case (IPV6_VERSION >> 4): {
356 struct ip6_hdr *ip6; 356 struct ip6_hdr *ip6;
357 struct ip6_ext *ip6e; 357 struct ip6_ext *ip6e;
358 struct ip6_frag *ip6f; 358 struct ip6_frag *ip6f;
359 size_t off, hlen; 359 size_t off, hlen;
360 int frag_present; 360 int frag_present;
361 bool is_frag; 
362 uint8_t onxt; 
363 int fragoff; 
364 361
365 ip6 = nbuf_ensure_contig(nbuf, sizeof(struct ip6_hdr)); 362 ip6 = nbuf_ensure_contig(nbuf, sizeof(struct ip6_hdr));
366 if (ip6 == NULL) { 363 if (ip6 == NULL) {
367 return NPC_FMTERR; 364 return NPC_FMTERR;
368 } 365 }
369 366
370 /* Set initial next-protocol value. */ 367 /* Set initial next-protocol value. */
371 hlen = sizeof(struct ip6_hdr); 368 hlen = sizeof(struct ip6_hdr);
372 npc->npc_proto = ip6->ip6_nxt; 369 npc->npc_proto = ip6->ip6_nxt;
373 npc->npc_hlen = hlen; 370 npc->npc_hlen = hlen;
374 371
375 frag_present = 0; 372 frag_present = 0;
376 is_frag = false; 
377 373
378 /* 374 /*
379 * Advance by the length of the current header. 375 * Advance by the length of the current header.
380 */ 376 */
381 off = nbuf_offset(nbuf); 377 off = nbuf_offset(nbuf);
382 while ((ip6e = nbuf_advance(nbuf, hlen, sizeof(*ip6e))) != NULL) { 378 while ((ip6e = nbuf_advance(nbuf, hlen, sizeof(*ip6e))) != NULL) {
383 /* 379 /*
384 * Determine whether we are going to continue. 380 * Determine whether we are going to continue.
385 */ 381 */
386 switch (npc->npc_proto) { 382 switch (npc->npc_proto) {
387 case IPPROTO_HOPOPTS: 383 case IPPROTO_HOPOPTS:
388 case IPPROTO_DSTOPTS: 384 case IPPROTO_DSTOPTS:
389 case IPPROTO_ROUTING: 385 case IPPROTO_ROUTING:
390 hlen = (ip6e->ip6e_len + 1) << 3; 386 hlen = (ip6e->ip6e_len + 1) << 3;
391 break; 387 break;
392 case IPPROTO_FRAGMENT: 388 case IPPROTO_FRAGMENT:
393 if (frag_present++) 389 if (frag_present++)
394 return NPC_FMTERR; 390 return NPC_FMTERR;
395 ip6f = nbuf_ensure_contig(nbuf, sizeof(*ip6f)); 391 ip6f = nbuf_ensure_contig(nbuf, sizeof(*ip6f));
396 if (ip6f == NULL) 392 if (ip6f == NULL)
397 return NPC_FMTERR; 393 return NPC_FMTERR;
398 394
399 hlen = sizeof(struct ip6_frag); 395 hlen = 0;
400 396 flags |= NPC_IPFRAG;
401 /* RFC6946: Skip dummy fragments. */ 
402 fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK); 
403 if (fragoff == 0 && 
404 !(ip6f->ip6f_offlg & IP6F_MORE_FRAG)) { 
405 break; 
406 } 
407 
408 is_frag = true; 
409 
410 /* 
411 * We treat the first fragment as a regular 
412 * packet and then we pass the rest of the 
413 * fragments unconditionally. This way if 
414 * the first packet passes the rest will 
415 * be able to reassembled, if not they will 
416 * be ignored. We can do better later. 
417 */ 
418 if (fragoff != 0) 
419 flags |= NPC_IPFRAG; 
420 397
421 break; 398 break;
422 case IPPROTO_AH: 399 case IPPROTO_AH:
423 hlen = (ip6e->ip6e_len + 2) << 2; 400 hlen = (ip6e->ip6e_len + 2) << 2;
424 break; 401 break;
425 default: 402 default:
426 hlen = 0; 403 hlen = 0;
427 break; 404 break;
428 } 405 }
429 406
430 if (!hlen) { 407 if (!hlen) {
431 break; 408 break;
432 } 409 }
433 onxt = npc->npc_proto; 
434 npc->npc_proto = ip6e->ip6e_nxt; 410 npc->npc_proto = ip6e->ip6e_nxt;
435 npc->npc_hlen += hlen; 411 npc->npc_hlen += hlen;
436 } 412 }
437 413
438 /* 414 /*
439 * We failed to advance. If we are not a fragment, that's 
440 * a format error and we leave. Otherwise, restore npc_hlen 
441 * and npc_proto to their previous (and correct) values. 
442 */ 
443 if (ip6e == NULL) { 
444 if (!is_frag) 
445 return NPC_FMTERR; 
446 npc->npc_proto = onxt; 
447 npc->npc_hlen -= hlen; 
448 } 
449 
450 /* 
451 * Re-fetch the header pointers (nbufs might have been 415 * Re-fetch the header pointers (nbufs might have been
452 * reallocated). Restore the original offset (if any). 416 * reallocated). Restore the original offset (if any).
453 */ 417 */
454 nbuf_reset(nbuf); 418 nbuf_reset(nbuf);
455 ip6 = nbuf_dataptr(nbuf); 419 ip6 = nbuf_dataptr(nbuf);
456 if (off) { 420 if (off) {
457 nbuf_advance(nbuf, off, 0); 421 nbuf_advance(nbuf, off, 0);
458 } 422 }
459 423
460 /* Cache: layer 3 - IPv6. */ 424 /* Cache: layer 3 - IPv6. */
461 npc->npc_alen = sizeof(struct in6_addr); 425 npc->npc_alen = sizeof(struct in6_addr);
462 npc->npc_ips[NPF_SRC] = (npf_addr_t *)&ip6->ip6_src; 426 npc->npc_ips[NPF_SRC] = (npf_addr_t *)&ip6->ip6_src;
463 npc->npc_ips[NPF_DST]= (npf_addr_t *)&ip6->ip6_dst; 427 npc->npc_ips[NPF_DST]= (npf_addr_t *)&ip6->ip6_dst;
464 428
465 npc->npc_ip.v6 = ip6; 429 npc->npc_ip.v6 = ip6;
466 flags |= NPC_IP6; 430 flags |= NPC_IP6;
467 break; 431 break;
468 } 432 }
469 default: 433 default:
470 break; 434 break;
471 } 435 }
472 return flags; 436 return flags;
473} 437}
474 438
475/* 439/*
476 * npf_cache_all: general routine to cache all relevant IP (v4 or v6) 440 * npf_cache_all: general routine to cache all relevant IP (v4 or v6)
477 * and TCP, UDP or ICMP headers. 441 * and TCP, UDP or ICMP headers.
478 * 442 *
479 * => nbuf offset shall be set accordingly. 443 * => nbuf offset shall be set accordingly.
480 */ 444 */
481int 445int
482npf_cache_all(npf_cache_t *npc) 446npf_cache_all(npf_cache_t *npc)
483{ 447{
484 nbuf_t *nbuf = npc->npc_nbuf; 448 nbuf_t *nbuf = npc->npc_nbuf;
485 int flags, l4flags; 449 int flags, l4flags;
486 u_int hlen; 450 u_int hlen;
487 451
488 /* 452 /*
489 * This routine is a main point where the references are cached, 453 * This routine is a main point where the references are cached,
490 * therefore clear the flag as we reset. 454 * therefore clear the flag as we reset.
491 */ 455 */
492again: 456again:
493 nbuf_unset_flag(nbuf, NBUF_DATAREF_RESET); 457 nbuf_unset_flag(nbuf, NBUF_DATAREF_RESET);
494 458
495 /* 459 /*
496 * First, cache the L3 header (IPv4 or IPv6). If IP packet is 460 * First, cache the L3 header (IPv4 or IPv6). If IP packet is
497 * fragmented, then we cannot look into L4. 461 * fragmented, then we cannot look into L4.
498 */ 462 */
499 flags = npf_cache_ip(npc, nbuf); 463 flags = npf_cache_ip(npc, nbuf);
500 if ((flags & NPC_IP46) == 0 || (flags & NPC_IPFRAG) != 0 || 464 if ((flags & NPC_IP46) == 0 || (flags & NPC_IPFRAG) != 0 ||
501 (flags & NPC_FMTERR) != 0) { 465 (flags & NPC_FMTERR) != 0) {
502 nbuf_unset_flag(nbuf, NBUF_DATAREF_RESET); 466 nbuf_unset_flag(nbuf, NBUF_DATAREF_RESET);
503 npc->npc_info |= flags; 467 npc->npc_info |= flags;
504 return flags; 468 return flags;
505 } 469 }
506 hlen = npc->npc_hlen; 470 hlen = npc->npc_hlen;
507 471
508 switch (npc->npc_proto) { 472 switch (npc->npc_proto) {
509 case IPPROTO_TCP: 473 case IPPROTO_TCP:
510 /* Cache: layer 4 - TCP. */ 474 /* Cache: layer 4 - TCP. */
511 npc->npc_l4.tcp = nbuf_advance(nbuf, hlen, 475 npc->npc_l4.tcp = nbuf_advance(nbuf, hlen,
512 sizeof(struct tcphdr)); 476 sizeof(struct tcphdr));
513 l4flags = NPC_LAYER4 | NPC_TCP; 477 l4flags = NPC_LAYER4 | NPC_TCP;
514 break; 478 break;
515 case IPPROTO_UDP: 479 case IPPROTO_UDP:
516 /* Cache: layer 4 - UDP. */ 480 /* Cache: layer 4 - UDP. */
517 npc->npc_l4.udp = nbuf_advance(nbuf, hlen, 481 npc->npc_l4.udp = nbuf_advance(nbuf, hlen,
518 sizeof(struct udphdr)); 482 sizeof(struct udphdr));
519 l4flags = NPC_LAYER4 | NPC_UDP; 483 l4flags = NPC_LAYER4 | NPC_UDP;
520 break; 484 break;
521 case IPPROTO_ICMP: 485 case IPPROTO_ICMP:
522 /* Cache: layer 4 - ICMPv4. */ 486 /* Cache: layer 4 - ICMPv4. */
523 npc->npc_l4.icmp = nbuf_advance(nbuf, hlen, 487 npc->npc_l4.icmp = nbuf_advance(nbuf, hlen,
524 offsetof(struct icmp, icmp_void)); 488 offsetof(struct icmp, icmp_void));
525 l4flags = NPC_LAYER4 | NPC_ICMP; 489 l4flags = NPC_LAYER4 | NPC_ICMP;
526 break; 490 break;
527 case IPPROTO_ICMPV6: 491 case IPPROTO_ICMPV6:
528 /* Cache: layer 4 - ICMPv6. */ 492 /* Cache: layer 4 - ICMPv6. */
529 npc->npc_l4.icmp6 = nbuf_advance(nbuf, hlen, 493 npc->npc_l4.icmp6 = nbuf_advance(nbuf, hlen,
530 offsetof(struct icmp6_hdr, icmp6_data32)); 494 offsetof(struct icmp6_hdr, icmp6_data32));
531 l4flags = NPC_LAYER4 | NPC_ICMP; 495 l4flags = NPC_LAYER4 | NPC_ICMP;
532 break; 496 break;
533 default: 497 default:
534 l4flags = 0; 498 l4flags = 0;
535 break; 499 break;
536 } 500 }
537 501
538 if (nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)) { 502 if (nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)) {
539 goto again; 503 goto again;
540 } 504 }
541 505
542 /* Add the L4 flags if nbuf_advance() succeeded. */ 506 /* Add the L4 flags if nbuf_advance() succeeded. */
543 if (l4flags && npc->npc_l4.hdr) { 507 if (l4flags && npc->npc_l4.hdr) {
544 flags |= l4flags; 508 flags |= l4flags;
545 } 509 }
546 npc->npc_info |= flags; 510 npc->npc_info |= flags;
547 return flags; 511 return flags;
548} 512}
549 513
550void 514void
551npf_recache(npf_cache_t *npc) 515npf_recache(npf_cache_t *npc)
552{ 516{
553 nbuf_t *nbuf = npc->npc_nbuf; 517 nbuf_t *nbuf = npc->npc_nbuf;
554 const int mflags __diagused = npc->npc_info & (NPC_IP46 | NPC_LAYER4); 518 const int mflags __diagused = npc->npc_info & (NPC_IP46 | NPC_LAYER4);
555 int flags __diagused; 519 int flags __diagused;
556 520
557 nbuf_reset(nbuf); 521 nbuf_reset(nbuf);
558 npc->npc_info = 0; 522 npc->npc_info = 0;
559 flags = npf_cache_all(npc); 523 flags = npf_cache_all(npc);
560 524
561 KASSERT((flags & mflags) == mflags); 525 KASSERT((flags & mflags) == mflags);
562 KASSERT(nbuf_flag_p(nbuf, NBUF_DATAREF_RESET) == 0); 526 KASSERT(nbuf_flag_p(nbuf, NBUF_DATAREF_RESET) == 0);
563} 527}
564 528
565/* 529/*
566 * npf_rwrip: rewrite required IP address. 530 * npf_rwrip: rewrite required IP address.
567 */ 531 */
568bool 532bool
569npf_rwrip(const npf_cache_t *npc, u_int which, const npf_addr_t *addr) 533npf_rwrip(const npf_cache_t *npc, u_int which, const npf_addr_t *addr)
570{ 534{
571 KASSERT(npf_iscached(npc, NPC_IP46)); 535 KASSERT(npf_iscached(npc, NPC_IP46));
572 KASSERT(which == NPF_SRC || which == NPF_DST); 536 KASSERT(which == NPF_SRC || which == NPF_DST);
573 537
574 memcpy(npc->npc_ips[which], addr, npc->npc_alen); 538 memcpy(npc->npc_ips[which], addr, npc->npc_alen);
575 return true; 539 return true;
576} 540}
577 541
578/* 542/*
579 * npf_rwrport: rewrite required TCP/UDP port. 543 * npf_rwrport: rewrite required TCP/UDP port.
580 */ 544 */
581bool 545bool
582npf_rwrport(const npf_cache_t *npc, u_int which, const in_port_t port) 546npf_rwrport(const npf_cache_t *npc, u_int which, const in_port_t port)
583{ 547{
584 const int proto = npc->npc_proto; 548 const int proto = npc->npc_proto;
585 in_port_t *oport; 549 in_port_t *oport;
586 550
587 KASSERT(npf_iscached(npc, NPC_TCP) || npf_iscached(npc, NPC_UDP)); 551 KASSERT(npf_iscached(npc, NPC_TCP) || npf_iscached(npc, NPC_UDP));
588 KASSERT(proto == IPPROTO_TCP || proto == IPPROTO_UDP); 552 KASSERT(proto == IPPROTO_TCP || proto == IPPROTO_UDP);
589 KASSERT(which == NPF_SRC || which == NPF_DST); 553 KASSERT(which == NPF_SRC || which == NPF_DST);
590 554
591 /* Get the offset and store the port in it. */ 555 /* Get the offset and store the port in it. */
592 if (proto == IPPROTO_TCP) { 556 if (proto == IPPROTO_TCP) {
593 struct tcphdr *th = npc->npc_l4.tcp; 557 struct tcphdr *th = npc->npc_l4.tcp;
594 oport = (which == NPF_SRC) ? &th->th_sport : &th->th_dport; 558 oport = (which == NPF_SRC) ? &th->th_sport : &th->th_dport;
595 } else { 559 } else {
596 struct udphdr *uh = npc->npc_l4.udp; 560 struct udphdr *uh = npc->npc_l4.udp;
597 oport = (which == NPF_SRC) ? &uh->uh_sport : &uh->uh_dport; 561 oport = (which == NPF_SRC) ? &uh->uh_sport : &uh->uh_dport;
598 } 562 }
599 memcpy(oport, &port, sizeof(in_port_t)); 563 memcpy(oport, &port, sizeof(in_port_t));
600 return true; 564 return true;
601} 565}
602 566
603/* 567/*
604 * npf_rwrcksum: rewrite IPv4 and/or TCP/UDP checksum. 568 * npf_rwrcksum: rewrite IPv4 and/or TCP/UDP checksum.
605 */ 569 */
606bool 570bool
607npf_rwrcksum(const npf_cache_t *npc, u_int which, 571npf_rwrcksum(const npf_cache_t *npc, u_int which,
608 const npf_addr_t *addr, const in_port_t port) 572 const npf_addr_t *addr, const in_port_t port)
609{ 573{
610 const npf_addr_t *oaddr = npc->npc_ips[which]; 574 const npf_addr_t *oaddr = npc->npc_ips[which];
611 const int proto = npc->npc_proto; 575 const int proto = npc->npc_proto;
612 const int alen = npc->npc_alen; 576 const int alen = npc->npc_alen;
613 uint16_t *ocksum; 577 uint16_t *ocksum;
614 in_port_t oport; 578 in_port_t oport;
615 579
616 KASSERT(npf_iscached(npc, NPC_LAYER4)); 580 KASSERT(npf_iscached(npc, NPC_LAYER4));
617 KASSERT(which == NPF_SRC || which == NPF_DST); 581 KASSERT(which == NPF_SRC || which == NPF_DST);
618 582
619 if (npf_iscached(npc, NPC_IP4)) { 583 if (npf_iscached(npc, NPC_IP4)) {
620 struct ip *ip = npc->npc_ip.v4; 584 struct ip *ip = npc->npc_ip.v4;
621 uint16_t ipsum = ip->ip_sum; 585 uint16_t ipsum = ip->ip_sum;
622 586
623 /* Recalculate IPv4 checksum and rewrite. */ 587 /* Recalculate IPv4 checksum and rewrite. */
624 ip->ip_sum = npf_addr_cksum(ipsum, alen, oaddr, addr); 588 ip->ip_sum = npf_addr_cksum(ipsum, alen, oaddr, addr);
625 } else { 589 } else {
626 /* No checksum for IPv6. */ 590 /* No checksum for IPv6. */
627 KASSERT(npf_iscached(npc, NPC_IP6)); 591 KASSERT(npf_iscached(npc, NPC_IP6));
628 } 592 }
629 593
630 /* Nothing else to do for ICMP. */ 594 /* Nothing else to do for ICMP. */
631 if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) { 595 if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) {
632 return true; 596 return true;
633 } 597 }
634 KASSERT(npf_iscached(npc, NPC_TCP) || npf_iscached(npc, NPC_UDP)); 598 KASSERT(npf_iscached(npc, NPC_TCP) || npf_iscached(npc, NPC_UDP));
635 599
636 /* 600 /*
637 * Calculate TCP/UDP checksum: 601 * Calculate TCP/UDP checksum:
638 * - Skip if UDP and the current checksum is zero. 602 * - Skip if UDP and the current checksum is zero.
639 * - Fixup the IP address change. 603 * - Fixup the IP address change.
640 * - Fixup the port change, if required (non-zero). 604 * - Fixup the port change, if required (non-zero).
641 */ 605 */
642 if (proto == IPPROTO_TCP) { 606 if (proto == IPPROTO_TCP) {
643 struct tcphdr *th = npc->npc_l4.tcp; 607 struct tcphdr *th = npc->npc_l4.tcp;
644 608
645 ocksum = &th->th_sum; 609 ocksum = &th->th_sum;
646 oport = (which == NPF_SRC) ? th->th_sport : th->th_dport; 610 oport = (which == NPF_SRC) ? th->th_sport : th->th_dport;
647 } else { 611 } else {
648 struct udphdr *uh = npc->npc_l4.udp; 612 struct udphdr *uh = npc->npc_l4.udp;
649 613
650 KASSERT(proto == IPPROTO_UDP); 614 KASSERT(proto == IPPROTO_UDP);
651 ocksum = &uh->uh_sum; 615 ocksum = &uh->uh_sum;
652 if (*ocksum == 0) { 616 if (*ocksum == 0) {
653 /* No need to update. */ 617 /* No need to update. */
654 return true; 618 return true;
655 } 619 }
656 oport = (which == NPF_SRC) ? uh->uh_sport : uh->uh_dport; 620 oport = (which == NPF_SRC) ? uh->uh_sport : uh->uh_dport;
657 } 621 }
658 622
659 uint16_t cksum = npf_addr_cksum(*ocksum, alen, oaddr, addr); 623 uint16_t cksum = npf_addr_cksum(*ocksum, alen, oaddr, addr);
660 if (port) { 624 if (port) {
661 cksum = npf_fixup16_cksum(cksum, oport, port); 625 cksum = npf_fixup16_cksum(cksum, oport, port);
662 } 626 }
663 627
664 /* Rewrite TCP/UDP checksum. */ 628 /* Rewrite TCP/UDP checksum. */
665 memcpy(ocksum, &cksum, sizeof(uint16_t)); 629 memcpy(ocksum, &cksum, sizeof(uint16_t));
666 return true; 630 return true;
667} 631}
668 632
669/* 633/*
670 * npf_napt_rwr: perform address and/or port translation. 634 * npf_napt_rwr: perform address and/or port translation.
671 */ 635 */
672int 636int
673npf_napt_rwr(const npf_cache_t *npc, u_int which, 637npf_napt_rwr(const npf_cache_t *npc, u_int which,
674 const npf_addr_t *addr, const in_addr_t port) 638 const npf_addr_t *addr, const in_addr_t port)
675{ 639{
676 const unsigned proto = npc->npc_proto; 640 const unsigned proto = npc->npc_proto;
677 641
678 /* 642 /*
679 * Rewrite IP and/or TCP/UDP checksums first, since we need the 643 * Rewrite IP and/or TCP/UDP checksums first, since we need the
680 * current (old) address/port for the calculations. Then perform 644 * current (old) address/port for the calculations. Then perform
681 * the address translation i.e. rewrite source or destination. 645 * the address translation i.e. rewrite source or destination.
682 */ 646 */
683 if (!npf_rwrcksum(npc, which, addr, port)) { 647 if (!npf_rwrcksum(npc, which, addr, port)) {
684 return EINVAL; 648 return EINVAL;
685 } 649 }
686 if (!npf_rwrip(npc, which, addr)) { 650 if (!npf_rwrip(npc, which, addr)) {
687 return EINVAL; 651 return EINVAL;
688 } 652 }
689 if (port == 0) { 653 if (port == 0) {
690 /* Done. */ 654 /* Done. */
691 return 0; 655 return 0;
692 } 656 }
693 657
694 switch (proto) { 658 switch (proto) {
695 case IPPROTO_TCP: 659 case IPPROTO_TCP:
696 case IPPROTO_UDP: 660 case IPPROTO_UDP:
697 /* Rewrite source/destination port. */ 661 /* Rewrite source/destination port. */
698 if (!npf_rwrport(npc, which, port)) { 662 if (!npf_rwrport(npc, which, port)) {
699 return EINVAL; 663 return EINVAL;
700 } 664 }
701 break; 665 break;
702 case IPPROTO_ICMP: 666 case IPPROTO_ICMP:
703 case IPPROTO_ICMPV6: 667 case IPPROTO_ICMPV6:
704 KASSERT(npf_iscached(npc, NPC_ICMP)); 668 KASSERT(npf_iscached(npc, NPC_ICMP));
705 /* Nothing. */ 669 /* Nothing. */
706 break; 670 break;
707 default: 671 default:
708 return ENOTSUP; 672 return ENOTSUP;
709 } 673 }
710 return 0; 674 return 0;
711} 675}
712 676
713/* 677/*
714 * IPv6-to-IPv6 Network Prefix Translation (NPTv6), as per RFC 6296. 678 * IPv6-to-IPv6 Network Prefix Translation (NPTv6), as per RFC 6296.
715 */ 679 */
716 680
717int 681int
718npf_npt66_rwr(const npf_cache_t *npc, u_int which, const npf_addr_t *pref, 682npf_npt66_rwr(const npf_cache_t *npc, u_int which, const npf_addr_t *pref,
719 npf_netmask_t len, uint16_t adj) 683 npf_netmask_t len, uint16_t adj)
720{ 684{
721 npf_addr_t *addr = npc->npc_ips[which]; 685 npf_addr_t *addr = npc->npc_ips[which];
722 unsigned remnant, word, preflen = len >> 4; 686 unsigned remnant, word, preflen = len >> 4;
723 uint32_t sum; 687 uint32_t sum;
724 688
725 KASSERT(which == NPF_SRC || which == NPF_DST); 689 KASSERT(which == NPF_SRC || which == NPF_DST);
726 690
727 if (!npf_iscached(npc, NPC_IP6)) { 691 if (!npf_iscached(npc, NPC_IP6)) {
728 return EINVAL; 692 return EINVAL;
729 } 693 }
730 if (len <= 48) { 694 if (len <= 48) {
731 /* 695 /*
732 * The word to adjust. Cannot translate the 0xffff 696 * The word to adjust. Cannot translate the 0xffff
733 * subnet if /48 or shorter. 697 * subnet if /48 or shorter.
734 */ 698 */
735 word = 3; 699 word = 3;
736 if (addr->word16[word] == 0xffff) { 700 if (addr->word16[word] == 0xffff) {
737 return EINVAL; 701 return EINVAL;
738 } 702 }
739 } else { 703 } else {
740 /* 704 /*
741 * Also, all 0s or 1s in the host part are disallowed for 705 * Also, all 0s or 1s in the host part are disallowed for
742 * longer than /48 prefixes. 706 * longer than /48 prefixes.
743 */ 707 */
744 if ((addr->word32[2] == 0 && addr->word32[3] == 0) || 708 if ((addr->word32[2] == 0 && addr->word32[3] == 0) ||
745 (addr->word32[2] == ~0U && addr->word32[3] == ~0U)) 709 (addr->word32[2] == ~0U && addr->word32[3] == ~0U))
746 return EINVAL; 710 return EINVAL;
747 711
748 /* Determine the 16-bit word to adjust. */ 712 /* Determine the 16-bit word to adjust. */
749 for (word = 4; word < 8; word++) 713 for (word = 4; word < 8; word++)
750 if (addr->word16[word] != 0xffff) 714 if (addr->word16[word] != 0xffff)
751 break; 715 break;
752 } 716 }
753 717
754 /* Rewrite the prefix. */ 718 /* Rewrite the prefix. */
755 for (unsigned i = 0; i < preflen; i++) { 719 for (unsigned i = 0; i < preflen; i++) {
756 addr->word16[i] = pref->word16[i]; 720 addr->word16[i] = pref->word16[i];
757 } 721 }
758 722
759 /* 723 /*
760 * If prefix length is within a 16-bit word (not dividable by 16), 724 * If prefix length is within a 16-bit word (not dividable by 16),
761 * then prepare a mask, determine the word and adjust it. 725 * then prepare a mask, determine the word and adjust it.
762 */ 726 */
763 if ((remnant = len - (preflen << 4)) != 0) { 727 if ((remnant = len - (preflen << 4)) != 0) {
764 const uint16_t wordmask = (1U << remnant) - 1; 728 const uint16_t wordmask = (1U << remnant) - 1;
765 const unsigned i = preflen; 729 const unsigned i = preflen;
766 730
767 addr->word16[i] = (pref->word16[i] & wordmask) | 731 addr->word16[i] = (pref->word16[i] & wordmask) |
768 (addr->word16[i] & ~wordmask); 732 (addr->word16[i] & ~wordmask);
769 } 733 }
770 734
771 /* 735 /*
772 * Performing 1's complement sum/difference. 736 * Performing 1's complement sum/difference.
773 */ 737 */
774 sum = addr->word16[word] + adj; 738 sum = addr->word16[word] + adj;
775 while (sum >> 16) { 739 while (sum >> 16) {
776 sum = (sum >> 16) + (sum & 0xffff); 740 sum = (sum >> 16) + (sum & 0xffff);
777 } 741 }
778 if (sum == 0xffff) { 742 if (sum == 0xffff) {
779 /* RFC 1071. */ 743 /* RFC 1071. */
780 sum = 0x0000; 744 sum = 0x0000;
781 } 745 }
782 addr->word16[word] = sum; 746 addr->word16[word] = sum;
783 return 0; 747 return 0;
784} 748}
785 749
786#if defined(DDB) || defined(_NPF_TESTING) 750#if defined(DDB) || defined(_NPF_TESTING)
787 751
788const char * 752const char *
789npf_addr_dump(const npf_addr_t *addr, int alen) 753npf_addr_dump(const npf_addr_t *addr, int alen)
790{ 754{
791 if (alen == sizeof(struct in_addr)) { 755 if (alen == sizeof(struct in_addr)) {
792 struct in_addr ip; 756 struct in_addr ip;
793 memcpy(&ip, addr, alen); 757 memcpy(&ip, addr, alen);
794 return inet_ntoa(ip); 758 return inet_ntoa(ip);
795 } 759 }
796 return "[IPv6]"; 760 return "[IPv6]";
797} 761}
798 762
799#endif 763#endif