Tue Jul 7 11:56:57 2020 UTC ()
Pull up following revision(s) (requested by christos in ticket #1566):

	sys/netinet/tcp_input.c: revision 1.418 (via patch)

- always set both ip and ip6, otherwise a kernel assertion can be triggered
- move alignment early so that we do less work


(martin)
diff -r1.357.4.3 -r1.357.4.4 src/sys/netinet/tcp_input.c

cvs diff -r1.357.4.3 -r1.357.4.4 src/sys/netinet/tcp_input.c (switch to unified diff)

--- src/sys/netinet/tcp_input.c 2018/03/30 11:17:19 1.357.4.3
+++ src/sys/netinet/tcp_input.c 2020/07/07 11:56:57 1.357.4.4
@@ -1,2603 +1,2599 @@ @@ -1,2603 +1,2599 @@
1/* $NetBSD: tcp_input.c,v 1.357.4.3 2018/03/30 11:17:19 martin Exp $ */ 1/* $NetBSD: tcp_input.c,v 1.357.4.4 2020/07/07 11:56:57 martin Exp $ */
2 2
3/* 3/*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * Redistribution and use in source and binary forms, with or without 7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions 8 * modification, are permitted provided that the following conditions
9 * are met: 9 * are met:
10 * 1. Redistributions of source code must retain the above copyright 10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer. 11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright 12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the 13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution. 14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors 15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software 16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission. 17 * without specific prior written permission.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE. 29 * SUCH DAMAGE.
30 */ 30 */
31 31
32/* 32/*
33 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995
34 * 34 *
35 * NRL grants permission for redistribution and use in source and binary 35 * NRL grants permission for redistribution and use in source and binary
36 * forms, with or without modification, of the software and documentation 36 * forms, with or without modification, of the software and documentation
37 * created at NRL provided that the following conditions are met: 37 * created at NRL provided that the following conditions are met:
38 * 38 *
39 * 1. Redistributions of source code must retain the above copyright 39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer. 40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright 41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the 42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution. 43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software 44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgements: 45 * must display the following acknowledgements:
46 * This product includes software developed by the University of 46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors. 47 * California, Berkeley and its contributors.
48 * This product includes software developed at the Information 48 * This product includes software developed at the Information
49 * Technology Division, US Naval Research Laboratory. 49 * Technology Division, US Naval Research Laboratory.
50 * 4. Neither the name of the NRL nor the names of its contributors 50 * 4. Neither the name of the NRL nor the names of its contributors
51 * may be used to endorse or promote products derived from this software 51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission. 52 * without specific prior written permission.
53 * 53 *
54 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
55 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
56 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
57 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
58 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
59 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
60 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
61 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
62 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
63 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
64 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
65 * 65 *
66 * The views and conclusions contained in the software and documentation 66 * The views and conclusions contained in the software and documentation
67 * are those of the authors and should not be interpreted as representing 67 * are those of the authors and should not be interpreted as representing
68 * official policies, either expressed or implied, of the US Naval 68 * official policies, either expressed or implied, of the US Naval
69 * Research Laboratory (NRL). 69 * Research Laboratory (NRL).
70 */ 70 */
71 71
72/*- 72/*-
73 * Copyright (c) 1997, 1998, 1999, 2001, 2005, 2006, 73 * Copyright (c) 1997, 1998, 1999, 2001, 2005, 2006,
74 * 2011 The NetBSD Foundation, Inc. 74 * 2011 The NetBSD Foundation, Inc.
75 * All rights reserved. 75 * All rights reserved.
76 * 76 *
77 * This code is derived from software contributed to The NetBSD Foundation 77 * This code is derived from software contributed to The NetBSD Foundation
78 * by Coyote Point Systems, Inc. 78 * by Coyote Point Systems, Inc.
79 * This code is derived from software contributed to The NetBSD Foundation 79 * This code is derived from software contributed to The NetBSD Foundation
80 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 80 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation
81 * Facility, NASA Ames Research Center. 81 * Facility, NASA Ames Research Center.
82 * This code is derived from software contributed to The NetBSD Foundation 82 * This code is derived from software contributed to The NetBSD Foundation
83 * by Charles M. Hannum. 83 * by Charles M. Hannum.
84 * This code is derived from software contributed to The NetBSD Foundation 84 * This code is derived from software contributed to The NetBSD Foundation
85 * by Rui Paulo. 85 * by Rui Paulo.
86 * 86 *
87 * Redistribution and use in source and binary forms, with or without 87 * Redistribution and use in source and binary forms, with or without
88 * modification, are permitted provided that the following conditions 88 * modification, are permitted provided that the following conditions
89 * are met: 89 * are met:
90 * 1. Redistributions of source code must retain the above copyright 90 * 1. Redistributions of source code must retain the above copyright
91 * notice, this list of conditions and the following disclaimer. 91 * notice, this list of conditions and the following disclaimer.
92 * 2. Redistributions in binary form must reproduce the above copyright 92 * 2. Redistributions in binary form must reproduce the above copyright
93 * notice, this list of conditions and the following disclaimer in the 93 * notice, this list of conditions and the following disclaimer in the
94 * documentation and/or other materials provided with the distribution. 94 * documentation and/or other materials provided with the distribution.
95 * 95 *
96 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 96 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
97 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 97 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
98 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 98 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
99 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 99 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
100 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 100 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
101 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 101 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
102 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 102 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
103 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 103 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
104 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 104 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
105 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 105 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
106 * POSSIBILITY OF SUCH DAMAGE. 106 * POSSIBILITY OF SUCH DAMAGE.
107 */ 107 */
108 108
109/* 109/*
110 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 110 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
111 * The Regents of the University of California. All rights reserved. 111 * The Regents of the University of California. All rights reserved.
112 * 112 *
113 * Redistribution and use in source and binary forms, with or without 113 * Redistribution and use in source and binary forms, with or without
114 * modification, are permitted provided that the following conditions 114 * modification, are permitted provided that the following conditions
115 * are met: 115 * are met:
116 * 1. Redistributions of source code must retain the above copyright 116 * 1. Redistributions of source code must retain the above copyright
117 * notice, this list of conditions and the following disclaimer. 117 * notice, this list of conditions and the following disclaimer.
118 * 2. Redistributions in binary form must reproduce the above copyright 118 * 2. Redistributions in binary form must reproduce the above copyright
119 * notice, this list of conditions and the following disclaimer in the 119 * notice, this list of conditions and the following disclaimer in the
120 * documentation and/or other materials provided with the distribution. 120 * documentation and/or other materials provided with the distribution.
121 * 3. Neither the name of the University nor the names of its contributors 121 * 3. Neither the name of the University nor the names of its contributors
122 * may be used to endorse or promote products derived from this software 122 * may be used to endorse or promote products derived from this software
123 * without specific prior written permission. 123 * without specific prior written permission.
124 * 124 *
125 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 125 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
126 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 126 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
127 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 127 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
128 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 128 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
129 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 129 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
130 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 130 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
131 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 131 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
132 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 132 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
133 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 133 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
134 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 134 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
135 * SUCH DAMAGE. 135 * SUCH DAMAGE.
136 * 136 *
137 * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95 137 * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95
138 */ 138 */
139 139
140/* 140/*
141 * TODO list for SYN cache stuff: 141 * TODO list for SYN cache stuff:
142 * 142 *
143 * Find room for a "state" field, which is needed to keep a 143 * Find room for a "state" field, which is needed to keep a
144 * compressed state for TIME_WAIT TCBs. It's been noted already 144 * compressed state for TIME_WAIT TCBs. It's been noted already
145 * that this is fairly important for very high-volume web and 145 * that this is fairly important for very high-volume web and
146 * mail servers, which use a large number of short-lived 146 * mail servers, which use a large number of short-lived
147 * connections. 147 * connections.
148 */ 148 */
149 149
150#include <sys/cdefs.h> 150#include <sys/cdefs.h>
151__KERNEL_RCSID(0, "$NetBSD: tcp_input.c,v 1.357.4.3 2018/03/30 11:17:19 martin Exp $"); 151__KERNEL_RCSID(0, "$NetBSD: tcp_input.c,v 1.357.4.4 2020/07/07 11:56:57 martin Exp $");
152 152
153#ifdef _KERNEL_OPT 153#ifdef _KERNEL_OPT
154#include "opt_inet.h" 154#include "opt_inet.h"
155#include "opt_ipsec.h" 155#include "opt_ipsec.h"
156#include "opt_inet_csum.h" 156#include "opt_inet_csum.h"
157#include "opt_tcp_debug.h" 157#include "opt_tcp_debug.h"
158#endif 158#endif
159 159
160#include <sys/param.h> 160#include <sys/param.h>
161#include <sys/systm.h> 161#include <sys/systm.h>
162#include <sys/malloc.h> 162#include <sys/malloc.h>
163#include <sys/mbuf.h> 163#include <sys/mbuf.h>
164#include <sys/protosw.h> 164#include <sys/protosw.h>
165#include <sys/socket.h> 165#include <sys/socket.h>
166#include <sys/socketvar.h> 166#include <sys/socketvar.h>
167#include <sys/errno.h> 167#include <sys/errno.h>
168#include <sys/syslog.h> 168#include <sys/syslog.h>
169#include <sys/pool.h> 169#include <sys/pool.h>
170#include <sys/domain.h> 170#include <sys/domain.h>
171#include <sys/kernel.h> 171#include <sys/kernel.h>
172#ifdef TCP_SIGNATURE 172#ifdef TCP_SIGNATURE
173#include <sys/md5.h> 173#include <sys/md5.h>
174#endif 174#endif
175#include <sys/lwp.h> /* for lwp0 */ 175#include <sys/lwp.h> /* for lwp0 */
176#include <sys/cprng.h> 176#include <sys/cprng.h>
177 177
178#include <net/if.h> 178#include <net/if.h>
179#include <net/if_types.h> 179#include <net/if_types.h>
180 180
181#include <netinet/in.h> 181#include <netinet/in.h>
182#include <netinet/in_systm.h> 182#include <netinet/in_systm.h>
183#include <netinet/ip.h> 183#include <netinet/ip.h>
184#include <netinet/in_pcb.h> 184#include <netinet/in_pcb.h>
185#include <netinet/in_var.h> 185#include <netinet/in_var.h>
186#include <netinet/ip_var.h> 186#include <netinet/ip_var.h>
187#include <netinet/in_offload.h> 187#include <netinet/in_offload.h>
188 188
189#ifdef INET6 189#ifdef INET6
190#ifndef INET 190#ifndef INET
191#include <netinet/in.h> 191#include <netinet/in.h>
192#endif 192#endif
193#include <netinet/ip6.h> 193#include <netinet/ip6.h>
194#include <netinet6/ip6_var.h> 194#include <netinet6/ip6_var.h>
195#include <netinet6/in6_pcb.h> 195#include <netinet6/in6_pcb.h>
196#include <netinet6/ip6_var.h> 196#include <netinet6/ip6_var.h>
197#include <netinet6/in6_var.h> 197#include <netinet6/in6_var.h>
198#include <netinet/icmp6.h> 198#include <netinet/icmp6.h>
199#include <netinet6/nd6.h> 199#include <netinet6/nd6.h>
200#ifdef TCP_SIGNATURE 200#ifdef TCP_SIGNATURE
201#include <netinet6/scope6_var.h> 201#include <netinet6/scope6_var.h>
202#endif 202#endif
203#endif 203#endif
204 204
205#ifndef INET6 205#ifndef INET6
206/* always need ip6.h for IP6_EXTHDR_GET */ 206/* always need ip6.h for IP6_EXTHDR_GET */
207#include <netinet/ip6.h> 207#include <netinet/ip6.h>
208#endif 208#endif
209 209
210#include <netinet/tcp.h> 210#include <netinet/tcp.h>
211#include <netinet/tcp_fsm.h> 211#include <netinet/tcp_fsm.h>
212#include <netinet/tcp_seq.h> 212#include <netinet/tcp_seq.h>
213#include <netinet/tcp_timer.h> 213#include <netinet/tcp_timer.h>
214#include <netinet/tcp_var.h> 214#include <netinet/tcp_var.h>
215#include <netinet/tcp_private.h> 215#include <netinet/tcp_private.h>
216#include <netinet/tcpip.h> 216#include <netinet/tcpip.h>
217#include <netinet/tcp_congctl.h> 217#include <netinet/tcp_congctl.h>
218#include <netinet/tcp_debug.h> 218#include <netinet/tcp_debug.h>
219 219
220#ifdef INET6 220#ifdef INET6
221#include "faith.h" 221#include "faith.h"
222#if defined(NFAITH) && NFAITH > 0 222#if defined(NFAITH) && NFAITH > 0
223#include <net/if_faith.h> 223#include <net/if_faith.h>
224#endif 224#endif
225#endif /* INET6 */ 225#endif /* INET6 */
226 226
227#ifdef IPSEC 227#ifdef IPSEC
228#include <netipsec/ipsec.h> 228#include <netipsec/ipsec.h>
229#include <netipsec/ipsec_var.h> 229#include <netipsec/ipsec_var.h>
230#include <netipsec/ipsec_private.h> 230#include <netipsec/ipsec_private.h>
231#include <netipsec/key.h> 231#include <netipsec/key.h>
232#ifdef INET6 232#ifdef INET6
233#include <netipsec/ipsec6.h> 233#include <netipsec/ipsec6.h>
234#endif 234#endif
235#endif /* IPSEC*/ 235#endif /* IPSEC*/
236 236
237#include <netinet/tcp_vtw.h> 237#include <netinet/tcp_vtw.h>
238 238
239int tcprexmtthresh = 3; 239int tcprexmtthresh = 3;
240int tcp_log_refused; 240int tcp_log_refused;
241 241
242int tcp_do_autorcvbuf = 1; 242int tcp_do_autorcvbuf = 1;
243int tcp_autorcvbuf_inc = 16 * 1024; 243int tcp_autorcvbuf_inc = 16 * 1024;
244int tcp_autorcvbuf_max = 256 * 1024; 244int tcp_autorcvbuf_max = 256 * 1024;
245int tcp_msl = (TCPTV_MSL / PR_SLOWHZ); 245int tcp_msl = (TCPTV_MSL / PR_SLOWHZ);
246 246
247static int tcp_rst_ppslim_count = 0; 247static int tcp_rst_ppslim_count = 0;
248static struct timeval tcp_rst_ppslim_last; 248static struct timeval tcp_rst_ppslim_last;
249static int tcp_ackdrop_ppslim_count = 0; 249static int tcp_ackdrop_ppslim_count = 0;
250static struct timeval tcp_ackdrop_ppslim_last; 250static struct timeval tcp_ackdrop_ppslim_last;
251 251
252#define TCP_PAWS_IDLE (24U * 24 * 60 * 60 * PR_SLOWHZ) 252#define TCP_PAWS_IDLE (24U * 24 * 60 * 60 * PR_SLOWHZ)
253 253
254/* for modulo comparisons of timestamps */ 254/* for modulo comparisons of timestamps */
255#define TSTMP_LT(a,b) ((int)((a)-(b)) < 0) 255#define TSTMP_LT(a,b) ((int)((a)-(b)) < 0)
256#define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0) 256#define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0)
257 257
258/* 258/*
259 * Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. 259 * Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint.
260 */ 260 */
261#ifdef INET6 261#ifdef INET6
262static inline void 262static inline void
263nd6_hint(struct tcpcb *tp) 263nd6_hint(struct tcpcb *tp)
264{ 264{
265 struct rtentry *rt = NULL; 265 struct rtentry *rt = NULL;
266 266
267 if (tp != NULL && tp->t_in6pcb != NULL && tp->t_family == AF_INET6 && 267 if (tp != NULL && tp->t_in6pcb != NULL && tp->t_family == AF_INET6 &&
268 (rt = rtcache_validate(&tp->t_in6pcb->in6p_route)) != NULL) 268 (rt = rtcache_validate(&tp->t_in6pcb->in6p_route)) != NULL)
269 nd6_nud_hint(rt); 269 nd6_nud_hint(rt);
270 rtcache_unref(rt, &tp->t_in6pcb->in6p_route); 270 rtcache_unref(rt, &tp->t_in6pcb->in6p_route);
271} 271}
272#else 272#else
273static inline void 273static inline void
274nd6_hint(struct tcpcb *tp) 274nd6_hint(struct tcpcb *tp)
275{ 275{
276} 276}
277#endif 277#endif
278 278
279/* 279/*
280 * Compute ACK transmission behavior. Delay the ACK unless 280 * Compute ACK transmission behavior. Delay the ACK unless
281 * we have already delayed an ACK (must send an ACK every two segments). 281 * we have already delayed an ACK (must send an ACK every two segments).
282 * We also ACK immediately if we received a PUSH and the ACK-on-PUSH 282 * We also ACK immediately if we received a PUSH and the ACK-on-PUSH
283 * option is enabled. 283 * option is enabled.
284 */ 284 */
285static void 285static void
286tcp_setup_ack(struct tcpcb *tp, const struct tcphdr *th) 286tcp_setup_ack(struct tcpcb *tp, const struct tcphdr *th)
287{ 287{
288 288
289 if (tp->t_flags & TF_DELACK || 289 if (tp->t_flags & TF_DELACK ||
290 (tcp_ack_on_push && th->th_flags & TH_PUSH)) 290 (tcp_ack_on_push && th->th_flags & TH_PUSH))
291 tp->t_flags |= TF_ACKNOW; 291 tp->t_flags |= TF_ACKNOW;
292 else 292 else
293 TCP_SET_DELACK(tp); 293 TCP_SET_DELACK(tp);
294} 294}
295 295
296static void 296static void
297icmp_check(struct tcpcb *tp, const struct tcphdr *th, int acked) 297icmp_check(struct tcpcb *tp, const struct tcphdr *th, int acked)
298{ 298{
299 299
300 /* 300 /*
301 * If we had a pending ICMP message that refers to data that have 301 * If we had a pending ICMP message that refers to data that have
302 * just been acknowledged, disregard the recorded ICMP message. 302 * just been acknowledged, disregard the recorded ICMP message.
303 */ 303 */
304 if ((tp->t_flags & TF_PMTUD_PEND) && 304 if ((tp->t_flags & TF_PMTUD_PEND) &&
305 SEQ_GT(th->th_ack, tp->t_pmtud_th_seq)) 305 SEQ_GT(th->th_ack, tp->t_pmtud_th_seq))
306 tp->t_flags &= ~TF_PMTUD_PEND; 306 tp->t_flags &= ~TF_PMTUD_PEND;
307 307
308 /* 308 /*
309 * Keep track of the largest chunk of data 309 * Keep track of the largest chunk of data
310 * acknowledged since last PMTU update 310 * acknowledged since last PMTU update
311 */ 311 */
312 if (tp->t_pmtud_mss_acked < acked) 312 if (tp->t_pmtud_mss_acked < acked)
313 tp->t_pmtud_mss_acked = acked; 313 tp->t_pmtud_mss_acked = acked;
314} 314}
315 315
316/* 316/*
317 * Convert TCP protocol fields to host order for easier processing. 317 * Convert TCP protocol fields to host order for easier processing.
318 */ 318 */
319static void 319static void
320tcp_fields_to_host(struct tcphdr *th) 320tcp_fields_to_host(struct tcphdr *th)
321{ 321{
322 322
323 NTOHL(th->th_seq); 323 NTOHL(th->th_seq);
324 NTOHL(th->th_ack); 324 NTOHL(th->th_ack);
325 NTOHS(th->th_win); 325 NTOHS(th->th_win);
326 NTOHS(th->th_urp); 326 NTOHS(th->th_urp);
327} 327}
328 328
329/* 329/*
330 * ... and reverse the above. 330 * ... and reverse the above.
331 */ 331 */
332static void 332static void
333tcp_fields_to_net(struct tcphdr *th) 333tcp_fields_to_net(struct tcphdr *th)
334{ 334{
335 335
336 HTONL(th->th_seq); 336 HTONL(th->th_seq);
337 HTONL(th->th_ack); 337 HTONL(th->th_ack);
338 HTONS(th->th_win); 338 HTONS(th->th_win);
339 HTONS(th->th_urp); 339 HTONS(th->th_urp);
340} 340}
341 341
342#ifdef TCP_CSUM_COUNTERS 342#ifdef TCP_CSUM_COUNTERS
343#include <sys/device.h> 343#include <sys/device.h>
344 344
345#if defined(INET) 345#if defined(INET)
346extern struct evcnt tcp_hwcsum_ok; 346extern struct evcnt tcp_hwcsum_ok;
347extern struct evcnt tcp_hwcsum_bad; 347extern struct evcnt tcp_hwcsum_bad;
348extern struct evcnt tcp_hwcsum_data; 348extern struct evcnt tcp_hwcsum_data;
349extern struct evcnt tcp_swcsum; 349extern struct evcnt tcp_swcsum;
350#endif /* defined(INET) */ 350#endif /* defined(INET) */
351#if defined(INET6) 351#if defined(INET6)
352extern struct evcnt tcp6_hwcsum_ok; 352extern struct evcnt tcp6_hwcsum_ok;
353extern struct evcnt tcp6_hwcsum_bad; 353extern struct evcnt tcp6_hwcsum_bad;
354extern struct evcnt tcp6_hwcsum_data; 354extern struct evcnt tcp6_hwcsum_data;
355extern struct evcnt tcp6_swcsum; 355extern struct evcnt tcp6_swcsum;
356#endif /* defined(INET6) */ 356#endif /* defined(INET6) */
357 357
358#define TCP_CSUM_COUNTER_INCR(ev) (ev)->ev_count++ 358#define TCP_CSUM_COUNTER_INCR(ev) (ev)->ev_count++
359 359
360#else 360#else
361 361
362#define TCP_CSUM_COUNTER_INCR(ev) /* nothing */ 362#define TCP_CSUM_COUNTER_INCR(ev) /* nothing */
363 363
364#endif /* TCP_CSUM_COUNTERS */ 364#endif /* TCP_CSUM_COUNTERS */
365 365
366#ifdef TCP_REASS_COUNTERS 366#ifdef TCP_REASS_COUNTERS
367#include <sys/device.h> 367#include <sys/device.h>
368 368
369extern struct evcnt tcp_reass_; 369extern struct evcnt tcp_reass_;
370extern struct evcnt tcp_reass_empty; 370extern struct evcnt tcp_reass_empty;
371extern struct evcnt tcp_reass_iteration[8]; 371extern struct evcnt tcp_reass_iteration[8];
372extern struct evcnt tcp_reass_prependfirst; 372extern struct evcnt tcp_reass_prependfirst;
373extern struct evcnt tcp_reass_prepend; 373extern struct evcnt tcp_reass_prepend;
374extern struct evcnt tcp_reass_insert; 374extern struct evcnt tcp_reass_insert;
375extern struct evcnt tcp_reass_inserttail; 375extern struct evcnt tcp_reass_inserttail;
376extern struct evcnt tcp_reass_append; 376extern struct evcnt tcp_reass_append;
377extern struct evcnt tcp_reass_appendtail; 377extern struct evcnt tcp_reass_appendtail;
378extern struct evcnt tcp_reass_overlaptail; 378extern struct evcnt tcp_reass_overlaptail;
379extern struct evcnt tcp_reass_overlapfront; 379extern struct evcnt tcp_reass_overlapfront;
380extern struct evcnt tcp_reass_segdup; 380extern struct evcnt tcp_reass_segdup;
381extern struct evcnt tcp_reass_fragdup; 381extern struct evcnt tcp_reass_fragdup;
382 382
383#define TCP_REASS_COUNTER_INCR(ev) (ev)->ev_count++ 383#define TCP_REASS_COUNTER_INCR(ev) (ev)->ev_count++
384 384
385#else 385#else
386 386
387#define TCP_REASS_COUNTER_INCR(ev) /* nothing */ 387#define TCP_REASS_COUNTER_INCR(ev) /* nothing */
388 388
389#endif /* TCP_REASS_COUNTERS */ 389#endif /* TCP_REASS_COUNTERS */
390 390
391static int tcp_reass(struct tcpcb *, const struct tcphdr *, struct mbuf *, 391static int tcp_reass(struct tcpcb *, const struct tcphdr *, struct mbuf *,
392 int *); 392 int *);
393static int tcp_dooptions(struct tcpcb *, const u_char *, int, 393static int tcp_dooptions(struct tcpcb *, const u_char *, int,
394 struct tcphdr *, struct mbuf *, int, struct tcp_opt_info *); 394 struct tcphdr *, struct mbuf *, int, struct tcp_opt_info *);
395 395
396#ifdef INET 396#ifdef INET
397static void tcp4_log_refused(const struct ip *, const struct tcphdr *); 397static void tcp4_log_refused(const struct ip *, const struct tcphdr *);
398#endif 398#endif
399#ifdef INET6 399#ifdef INET6
400static void tcp6_log_refused(const struct ip6_hdr *, const struct tcphdr *); 400static void tcp6_log_refused(const struct ip6_hdr *, const struct tcphdr *);
401#endif 401#endif
402 402
403#define TRAVERSE(x) while ((x)->m_next) (x) = (x)->m_next 403#define TRAVERSE(x) while ((x)->m_next) (x) = (x)->m_next
404 404
405#if defined(MBUFTRACE) 405#if defined(MBUFTRACE)
406struct mowner tcp_reass_mowner = MOWNER_INIT("tcp", "reass"); 406struct mowner tcp_reass_mowner = MOWNER_INIT("tcp", "reass");
407#endif /* defined(MBUFTRACE) */ 407#endif /* defined(MBUFTRACE) */
408 408
409static struct pool tcpipqent_pool; 409static struct pool tcpipqent_pool;
410 410
411void 411void
412tcpipqent_init(void) 412tcpipqent_init(void)
413{ 413{
414 414
415 pool_init(&tcpipqent_pool, sizeof(struct ipqent), 0, 0, 0, "tcpipqepl", 415 pool_init(&tcpipqent_pool, sizeof(struct ipqent), 0, 0, 0, "tcpipqepl",
416 NULL, IPL_VM); 416 NULL, IPL_VM);
417} 417}
418 418
419struct ipqent * 419struct ipqent *
420tcpipqent_alloc(void) 420tcpipqent_alloc(void)
421{ 421{
422 struct ipqent *ipqe; 422 struct ipqent *ipqe;
423 int s; 423 int s;
424 424
425 s = splvm(); 425 s = splvm();
426 ipqe = pool_get(&tcpipqent_pool, PR_NOWAIT); 426 ipqe = pool_get(&tcpipqent_pool, PR_NOWAIT);
427 splx(s); 427 splx(s);
428 428
429 return ipqe; 429 return ipqe;
430} 430}
431 431
432void 432void
433tcpipqent_free(struct ipqent *ipqe) 433tcpipqent_free(struct ipqent *ipqe)
434{ 434{
435 int s; 435 int s;
436 436
437 s = splvm(); 437 s = splvm();
438 pool_put(&tcpipqent_pool, ipqe); 438 pool_put(&tcpipqent_pool, ipqe);
439 splx(s); 439 splx(s);
440} 440}
441 441
442static int 442static int
443tcp_reass(struct tcpcb *tp, const struct tcphdr *th, struct mbuf *m, int *tlen) 443tcp_reass(struct tcpcb *tp, const struct tcphdr *th, struct mbuf *m, int *tlen)
444{ 444{
445 struct ipqent *p, *q, *nq, *tiqe = NULL; 445 struct ipqent *p, *q, *nq, *tiqe = NULL;
446 struct socket *so = NULL; 446 struct socket *so = NULL;
447 int pkt_flags; 447 int pkt_flags;
448 tcp_seq pkt_seq; 448 tcp_seq pkt_seq;
449 unsigned pkt_len; 449 unsigned pkt_len;
450 u_long rcvpartdupbyte = 0; 450 u_long rcvpartdupbyte = 0;
451 u_long rcvoobyte; 451 u_long rcvoobyte;
452#ifdef TCP_REASS_COUNTERS 452#ifdef TCP_REASS_COUNTERS
453 u_int count = 0; 453 u_int count = 0;
454#endif 454#endif
455 uint64_t *tcps; 455 uint64_t *tcps;
456 456
457 if (tp->t_inpcb) 457 if (tp->t_inpcb)
458 so = tp->t_inpcb->inp_socket; 458 so = tp->t_inpcb->inp_socket;
459#ifdef INET6 459#ifdef INET6
460 else if (tp->t_in6pcb) 460 else if (tp->t_in6pcb)
461 so = tp->t_in6pcb->in6p_socket; 461 so = tp->t_in6pcb->in6p_socket;
462#endif 462#endif
463 463
464 TCP_REASS_LOCK_CHECK(tp); 464 TCP_REASS_LOCK_CHECK(tp);
465 465
466 /* 466 /*
467 * Call with th==0 after become established to 467 * Call with th==0 after become established to
468 * force pre-ESTABLISHED data up to user socket. 468 * force pre-ESTABLISHED data up to user socket.
469 */ 469 */
470 if (th == 0) 470 if (th == 0)
471 goto present; 471 goto present;
472 472
473 m_claimm(m, &tcp_reass_mowner); 473 m_claimm(m, &tcp_reass_mowner);
474 474
475 rcvoobyte = *tlen; 475 rcvoobyte = *tlen;
476 /* 476 /*
477 * Copy these to local variables because the tcpiphdr 477 * Copy these to local variables because the tcpiphdr
478 * gets munged while we are collapsing mbufs. 478 * gets munged while we are collapsing mbufs.
479 */ 479 */
480 pkt_seq = th->th_seq; 480 pkt_seq = th->th_seq;
481 pkt_len = *tlen; 481 pkt_len = *tlen;
482 pkt_flags = th->th_flags; 482 pkt_flags = th->th_flags;
483 483
484 TCP_REASS_COUNTER_INCR(&tcp_reass_); 484 TCP_REASS_COUNTER_INCR(&tcp_reass_);
485 485
486 if ((p = TAILQ_LAST(&tp->segq, ipqehead)) != NULL) { 486 if ((p = TAILQ_LAST(&tp->segq, ipqehead)) != NULL) {
487 /* 487 /*
488 * When we miss a packet, the vast majority of time we get 488 * When we miss a packet, the vast majority of time we get
489 * packets that follow it in order. So optimize for that. 489 * packets that follow it in order. So optimize for that.
490 */ 490 */
491 if (pkt_seq == p->ipqe_seq + p->ipqe_len) { 491 if (pkt_seq == p->ipqe_seq + p->ipqe_len) {
492 p->ipqe_len += pkt_len; 492 p->ipqe_len += pkt_len;
493 p->ipqe_flags |= pkt_flags; 493 p->ipqe_flags |= pkt_flags;
494 m_cat(p->ipre_mlast, m); 494 m_cat(p->ipre_mlast, m);
495 TRAVERSE(p->ipre_mlast); 495 TRAVERSE(p->ipre_mlast);
496 m = NULL; 496 m = NULL;
497 tiqe = p; 497 tiqe = p;
498 TAILQ_REMOVE(&tp->timeq, p, ipqe_timeq); 498 TAILQ_REMOVE(&tp->timeq, p, ipqe_timeq);
499 TCP_REASS_COUNTER_INCR(&tcp_reass_appendtail); 499 TCP_REASS_COUNTER_INCR(&tcp_reass_appendtail);
500 goto skip_replacement; 500 goto skip_replacement;
501 } 501 }
502 /* 502 /*
503 * While we're here, if the pkt is completely beyond 503 * While we're here, if the pkt is completely beyond
504 * anything we have, just insert it at the tail. 504 * anything we have, just insert it at the tail.
505 */ 505 */
506 if (SEQ_GT(pkt_seq, p->ipqe_seq + p->ipqe_len)) { 506 if (SEQ_GT(pkt_seq, p->ipqe_seq + p->ipqe_len)) {
507 TCP_REASS_COUNTER_INCR(&tcp_reass_inserttail); 507 TCP_REASS_COUNTER_INCR(&tcp_reass_inserttail);
508 goto insert_it; 508 goto insert_it;
509 } 509 }
510 } 510 }
511 511
512 q = TAILQ_FIRST(&tp->segq); 512 q = TAILQ_FIRST(&tp->segq);
513 513
514 if (q != NULL) { 514 if (q != NULL) {
515 /* 515 /*
516 * If this segment immediately precedes the first out-of-order 516 * If this segment immediately precedes the first out-of-order
517 * block, simply slap the segment in front of it and (mostly) 517 * block, simply slap the segment in front of it and (mostly)
518 * skip the complicated logic. 518 * skip the complicated logic.
519 */ 519 */
520 if (pkt_seq + pkt_len == q->ipqe_seq) { 520 if (pkt_seq + pkt_len == q->ipqe_seq) {
521 q->ipqe_seq = pkt_seq; 521 q->ipqe_seq = pkt_seq;
522 q->ipqe_len += pkt_len; 522 q->ipqe_len += pkt_len;
523 q->ipqe_flags |= pkt_flags; 523 q->ipqe_flags |= pkt_flags;
524 m_cat(m, q->ipqe_m); 524 m_cat(m, q->ipqe_m);
525 q->ipqe_m = m; 525 q->ipqe_m = m;
526 q->ipre_mlast = m; /* last mbuf may have changed */ 526 q->ipre_mlast = m; /* last mbuf may have changed */
527 TRAVERSE(q->ipre_mlast); 527 TRAVERSE(q->ipre_mlast);
528 tiqe = q; 528 tiqe = q;
529 TAILQ_REMOVE(&tp->timeq, q, ipqe_timeq); 529 TAILQ_REMOVE(&tp->timeq, q, ipqe_timeq);
530 TCP_REASS_COUNTER_INCR(&tcp_reass_prependfirst); 530 TCP_REASS_COUNTER_INCR(&tcp_reass_prependfirst);
531 goto skip_replacement; 531 goto skip_replacement;
532 } 532 }
533 } else { 533 } else {
534 TCP_REASS_COUNTER_INCR(&tcp_reass_empty); 534 TCP_REASS_COUNTER_INCR(&tcp_reass_empty);
535 } 535 }
536 536
537 /* 537 /*
538 * Find a segment which begins after this one does. 538 * Find a segment which begins after this one does.
539 */ 539 */
540 for (p = NULL; q != NULL; q = nq) { 540 for (p = NULL; q != NULL; q = nq) {
541 nq = TAILQ_NEXT(q, ipqe_q); 541 nq = TAILQ_NEXT(q, ipqe_q);
542#ifdef TCP_REASS_COUNTERS 542#ifdef TCP_REASS_COUNTERS
543 count++; 543 count++;
544#endif 544#endif
545 /* 545 /*
546 * If the received segment is just right after this 546 * If the received segment is just right after this
547 * fragment, merge the two together and then check 547 * fragment, merge the two together and then check
548 * for further overlaps. 548 * for further overlaps.
549 */ 549 */
550 if (q->ipqe_seq + q->ipqe_len == pkt_seq) { 550 if (q->ipqe_seq + q->ipqe_len == pkt_seq) {
551#ifdef TCPREASS_DEBUG 551#ifdef TCPREASS_DEBUG
552 printf("tcp_reass[%p]: concat %u:%u(%u) to %u:%u(%u)\n", 552 printf("tcp_reass[%p]: concat %u:%u(%u) to %u:%u(%u)\n",
553 tp, pkt_seq, pkt_seq + pkt_len, pkt_len, 553 tp, pkt_seq, pkt_seq + pkt_len, pkt_len,
554 q->ipqe_seq, q->ipqe_seq + q->ipqe_len, q->ipqe_len); 554 q->ipqe_seq, q->ipqe_seq + q->ipqe_len, q->ipqe_len);
555#endif 555#endif
556 pkt_len += q->ipqe_len; 556 pkt_len += q->ipqe_len;
557 pkt_flags |= q->ipqe_flags; 557 pkt_flags |= q->ipqe_flags;
558 pkt_seq = q->ipqe_seq; 558 pkt_seq = q->ipqe_seq;
559 m_cat(q->ipre_mlast, m); 559 m_cat(q->ipre_mlast, m);
560 TRAVERSE(q->ipre_mlast); 560 TRAVERSE(q->ipre_mlast);
561 m = q->ipqe_m; 561 m = q->ipqe_m;
562 TCP_REASS_COUNTER_INCR(&tcp_reass_append); 562 TCP_REASS_COUNTER_INCR(&tcp_reass_append);
563 goto free_ipqe; 563 goto free_ipqe;
564 } 564 }
565 /* 565 /*
566 * If the received segment is completely past this 566 * If the received segment is completely past this
567 * fragment, we need to go the next fragment. 567 * fragment, we need to go the next fragment.
568 */ 568 */
569 if (SEQ_LT(q->ipqe_seq + q->ipqe_len, pkt_seq)) { 569 if (SEQ_LT(q->ipqe_seq + q->ipqe_len, pkt_seq)) {
570 p = q; 570 p = q;
571 continue; 571 continue;
572 } 572 }
573 /* 573 /*
574 * If the fragment is past the received segment, 574 * If the fragment is past the received segment,
575 * it (or any following) can't be concatenated. 575 * it (or any following) can't be concatenated.
576 */ 576 */
577 if (SEQ_GT(q->ipqe_seq, pkt_seq + pkt_len)) { 577 if (SEQ_GT(q->ipqe_seq, pkt_seq + pkt_len)) {
578 TCP_REASS_COUNTER_INCR(&tcp_reass_insert); 578 TCP_REASS_COUNTER_INCR(&tcp_reass_insert);
579 break; 579 break;
580 } 580 }
581 581
582 /* 582 /*
583 * We've received all the data in this segment before. 583 * We've received all the data in this segment before.
584 * mark it as a duplicate and return. 584 * mark it as a duplicate and return.
585 */ 585 */
586 if (SEQ_LEQ(q->ipqe_seq, pkt_seq) && 586 if (SEQ_LEQ(q->ipqe_seq, pkt_seq) &&
587 SEQ_GEQ(q->ipqe_seq + q->ipqe_len, pkt_seq + pkt_len)) { 587 SEQ_GEQ(q->ipqe_seq + q->ipqe_len, pkt_seq + pkt_len)) {
588 tcps = TCP_STAT_GETREF(); 588 tcps = TCP_STAT_GETREF();
589 tcps[TCP_STAT_RCVDUPPACK]++; 589 tcps[TCP_STAT_RCVDUPPACK]++;
590 tcps[TCP_STAT_RCVDUPBYTE] += pkt_len; 590 tcps[TCP_STAT_RCVDUPBYTE] += pkt_len;
591 TCP_STAT_PUTREF(); 591 TCP_STAT_PUTREF();
592 tcp_new_dsack(tp, pkt_seq, pkt_len); 592 tcp_new_dsack(tp, pkt_seq, pkt_len);
593 m_freem(m); 593 m_freem(m);
594 if (tiqe != NULL) { 594 if (tiqe != NULL) {
595 tcpipqent_free(tiqe); 595 tcpipqent_free(tiqe);
596 } 596 }
597 TCP_REASS_COUNTER_INCR(&tcp_reass_segdup); 597 TCP_REASS_COUNTER_INCR(&tcp_reass_segdup);
598 goto out; 598 goto out;
599 } 599 }
600 /* 600 /*
601 * Received segment completely overlaps this fragment 601 * Received segment completely overlaps this fragment
602 * so we drop the fragment (this keeps the temporal 602 * so we drop the fragment (this keeps the temporal
603 * ordering of segments correct). 603 * ordering of segments correct).
604 */ 604 */
605 if (SEQ_GEQ(q->ipqe_seq, pkt_seq) && 605 if (SEQ_GEQ(q->ipqe_seq, pkt_seq) &&
606 SEQ_LEQ(q->ipqe_seq + q->ipqe_len, pkt_seq + pkt_len)) { 606 SEQ_LEQ(q->ipqe_seq + q->ipqe_len, pkt_seq + pkt_len)) {
607 rcvpartdupbyte += q->ipqe_len; 607 rcvpartdupbyte += q->ipqe_len;
608 m_freem(q->ipqe_m); 608 m_freem(q->ipqe_m);
609 TCP_REASS_COUNTER_INCR(&tcp_reass_fragdup); 609 TCP_REASS_COUNTER_INCR(&tcp_reass_fragdup);
610 goto free_ipqe; 610 goto free_ipqe;
611 } 611 }
612 /* 612 /*
613 * RX'ed segment extends past the end of the 613 * RX'ed segment extends past the end of the
614 * fragment. Drop the overlapping bytes. Then 614 * fragment. Drop the overlapping bytes. Then
615 * merge the fragment and segment then treat as 615 * merge the fragment and segment then treat as
616 * a longer received packet. 616 * a longer received packet.
617 */ 617 */
618 if (SEQ_LT(q->ipqe_seq, pkt_seq) && 618 if (SEQ_LT(q->ipqe_seq, pkt_seq) &&
619 SEQ_GT(q->ipqe_seq + q->ipqe_len, pkt_seq)) { 619 SEQ_GT(q->ipqe_seq + q->ipqe_len, pkt_seq)) {
620 int overlap = q->ipqe_seq + q->ipqe_len - pkt_seq; 620 int overlap = q->ipqe_seq + q->ipqe_len - pkt_seq;
621#ifdef TCPREASS_DEBUG 621#ifdef TCPREASS_DEBUG
622 printf("tcp_reass[%p]: trim starting %d bytes of %u:%u(%u)\n", 622 printf("tcp_reass[%p]: trim starting %d bytes of %u:%u(%u)\n",
623 tp, overlap, 623 tp, overlap,
624 pkt_seq, pkt_seq + pkt_len, pkt_len); 624 pkt_seq, pkt_seq + pkt_len, pkt_len);
625#endif 625#endif
626 m_adj(m, overlap); 626 m_adj(m, overlap);
627 rcvpartdupbyte += overlap; 627 rcvpartdupbyte += overlap;
628 m_cat(q->ipre_mlast, m); 628 m_cat(q->ipre_mlast, m);
629 TRAVERSE(q->ipre_mlast); 629 TRAVERSE(q->ipre_mlast);
630 m = q->ipqe_m; 630 m = q->ipqe_m;
631 pkt_seq = q->ipqe_seq; 631 pkt_seq = q->ipqe_seq;
632 pkt_len += q->ipqe_len - overlap; 632 pkt_len += q->ipqe_len - overlap;
633 rcvoobyte -= overlap; 633 rcvoobyte -= overlap;
634 TCP_REASS_COUNTER_INCR(&tcp_reass_overlaptail); 634 TCP_REASS_COUNTER_INCR(&tcp_reass_overlaptail);
635 goto free_ipqe; 635 goto free_ipqe;
636 } 636 }
637 /* 637 /*
638 * RX'ed segment extends past the front of the 638 * RX'ed segment extends past the front of the
639 * fragment. Drop the overlapping bytes on the 639 * fragment. Drop the overlapping bytes on the
640 * received packet. The packet will then be 640 * received packet. The packet will then be
641 * contatentated with this fragment a bit later. 641 * contatentated with this fragment a bit later.
642 */ 642 */
643 if (SEQ_GT(q->ipqe_seq, pkt_seq) && 643 if (SEQ_GT(q->ipqe_seq, pkt_seq) &&
644 SEQ_LT(q->ipqe_seq, pkt_seq + pkt_len)) { 644 SEQ_LT(q->ipqe_seq, pkt_seq + pkt_len)) {
645 int overlap = pkt_seq + pkt_len - q->ipqe_seq; 645 int overlap = pkt_seq + pkt_len - q->ipqe_seq;
646#ifdef TCPREASS_DEBUG 646#ifdef TCPREASS_DEBUG
647 printf("tcp_reass[%p]: trim trailing %d bytes of %u:%u(%u)\n", 647 printf("tcp_reass[%p]: trim trailing %d bytes of %u:%u(%u)\n",
648 tp, overlap, 648 tp, overlap,
649 pkt_seq, pkt_seq + pkt_len, pkt_len); 649 pkt_seq, pkt_seq + pkt_len, pkt_len);
650#endif 650#endif
651 m_adj(m, -overlap); 651 m_adj(m, -overlap);
652 pkt_len -= overlap; 652 pkt_len -= overlap;
653 rcvpartdupbyte += overlap; 653 rcvpartdupbyte += overlap;
654 TCP_REASS_COUNTER_INCR(&tcp_reass_overlapfront); 654 TCP_REASS_COUNTER_INCR(&tcp_reass_overlapfront);
655 rcvoobyte -= overlap; 655 rcvoobyte -= overlap;
656 } 656 }
657 /* 657 /*
658 * If the received segment immediates precedes this 658 * If the received segment immediates precedes this
659 * fragment then tack the fragment onto this segment 659 * fragment then tack the fragment onto this segment
660 * and reinsert the data. 660 * and reinsert the data.
661 */ 661 */
662 if (q->ipqe_seq == pkt_seq + pkt_len) { 662 if (q->ipqe_seq == pkt_seq + pkt_len) {
663#ifdef TCPREASS_DEBUG 663#ifdef TCPREASS_DEBUG
664 printf("tcp_reass[%p]: append %u:%u(%u) to %u:%u(%u)\n", 664 printf("tcp_reass[%p]: append %u:%u(%u) to %u:%u(%u)\n",
665 tp, q->ipqe_seq, q->ipqe_seq + q->ipqe_len, q->ipqe_len, 665 tp, q->ipqe_seq, q->ipqe_seq + q->ipqe_len, q->ipqe_len,
666 pkt_seq, pkt_seq + pkt_len, pkt_len); 666 pkt_seq, pkt_seq + pkt_len, pkt_len);
667#endif 667#endif
668 pkt_len += q->ipqe_len; 668 pkt_len += q->ipqe_len;
669 pkt_flags |= q->ipqe_flags; 669 pkt_flags |= q->ipqe_flags;
670 m_cat(m, q->ipqe_m); 670 m_cat(m, q->ipqe_m);
671 TAILQ_REMOVE(&tp->segq, q, ipqe_q); 671 TAILQ_REMOVE(&tp->segq, q, ipqe_q);
672 TAILQ_REMOVE(&tp->timeq, q, ipqe_timeq); 672 TAILQ_REMOVE(&tp->timeq, q, ipqe_timeq);
673 tp->t_segqlen--; 673 tp->t_segqlen--;
674 KASSERT(tp->t_segqlen >= 0); 674 KASSERT(tp->t_segqlen >= 0);
675 KASSERT(tp->t_segqlen != 0 || 675 KASSERT(tp->t_segqlen != 0 ||
676 (TAILQ_EMPTY(&tp->segq) && 676 (TAILQ_EMPTY(&tp->segq) &&
677 TAILQ_EMPTY(&tp->timeq))); 677 TAILQ_EMPTY(&tp->timeq)));
678 if (tiqe == NULL) { 678 if (tiqe == NULL) {
679 tiqe = q; 679 tiqe = q;
680 } else { 680 } else {
681 tcpipqent_free(q); 681 tcpipqent_free(q);
682 } 682 }
683 TCP_REASS_COUNTER_INCR(&tcp_reass_prepend); 683 TCP_REASS_COUNTER_INCR(&tcp_reass_prepend);
684 break; 684 break;
685 } 685 }
686 /* 686 /*
687 * If the fragment is before the segment, remember it. 687 * If the fragment is before the segment, remember it.
688 * When this loop is terminated, p will contain the 688 * When this loop is terminated, p will contain the
689 * pointer to fragment that is right before the received 689 * pointer to fragment that is right before the received
690 * segment. 690 * segment.
691 */ 691 */
692 if (SEQ_LEQ(q->ipqe_seq, pkt_seq)) 692 if (SEQ_LEQ(q->ipqe_seq, pkt_seq))
693 p = q; 693 p = q;
694 694
695 continue; 695 continue;
696 696
697 /* 697 /*
698 * This is a common operation. It also will allow 698 * This is a common operation. It also will allow
699 * to save doing a malloc/free in most instances. 699 * to save doing a malloc/free in most instances.
700 */ 700 */
701 free_ipqe: 701 free_ipqe:
702 TAILQ_REMOVE(&tp->segq, q, ipqe_q); 702 TAILQ_REMOVE(&tp->segq, q, ipqe_q);
703 TAILQ_REMOVE(&tp->timeq, q, ipqe_timeq); 703 TAILQ_REMOVE(&tp->timeq, q, ipqe_timeq);
704 tp->t_segqlen--; 704 tp->t_segqlen--;
705 KASSERT(tp->t_segqlen >= 0); 705 KASSERT(tp->t_segqlen >= 0);
706 KASSERT(tp->t_segqlen != 0 || 706 KASSERT(tp->t_segqlen != 0 ||
707 (TAILQ_EMPTY(&tp->segq) && TAILQ_EMPTY(&tp->timeq))); 707 (TAILQ_EMPTY(&tp->segq) && TAILQ_EMPTY(&tp->timeq)));
708 if (tiqe == NULL) { 708 if (tiqe == NULL) {
709 tiqe = q; 709 tiqe = q;
710 } else { 710 } else {
711 tcpipqent_free(q); 711 tcpipqent_free(q);
712 } 712 }
713 } 713 }
714 714
715#ifdef TCP_REASS_COUNTERS 715#ifdef TCP_REASS_COUNTERS
716 if (count > 7) 716 if (count > 7)
717 TCP_REASS_COUNTER_INCR(&tcp_reass_iteration[0]); 717 TCP_REASS_COUNTER_INCR(&tcp_reass_iteration[0]);
718 else if (count > 0) 718 else if (count > 0)
719 TCP_REASS_COUNTER_INCR(&tcp_reass_iteration[count]); 719 TCP_REASS_COUNTER_INCR(&tcp_reass_iteration[count]);
720#endif 720#endif
721 721
722 insert_it: 722 insert_it:
723 723
724 /* 724 /*
725 * Allocate a new queue entry since the received segment did not 725 * Allocate a new queue entry since the received segment did not
726 * collapse onto any other out-of-order block; thus we are allocating 726 * collapse onto any other out-of-order block; thus we are allocating
727 * a new block. If it had collapsed, tiqe would not be NULL and 727 * a new block. If it had collapsed, tiqe would not be NULL and
728 * we would be reusing it. 728 * we would be reusing it.
729 * XXX If we can't, just drop the packet. XXX 729 * XXX If we can't, just drop the packet. XXX
730 */ 730 */
731 if (tiqe == NULL) { 731 if (tiqe == NULL) {
732 tiqe = tcpipqent_alloc(); 732 tiqe = tcpipqent_alloc();
733 if (tiqe == NULL) { 733 if (tiqe == NULL) {
734 TCP_STATINC(TCP_STAT_RCVMEMDROP); 734 TCP_STATINC(TCP_STAT_RCVMEMDROP);
735 m_freem(m); 735 m_freem(m);
736 goto out; 736 goto out;
737 } 737 }
738 } 738 }
739 739
740 /* 740 /*
741 * Update the counters. 741 * Update the counters.
742 */ 742 */
743 tp->t_rcvoopack++; 743 tp->t_rcvoopack++;
744 tcps = TCP_STAT_GETREF(); 744 tcps = TCP_STAT_GETREF();
745 tcps[TCP_STAT_RCVOOPACK]++; 745 tcps[TCP_STAT_RCVOOPACK]++;
746 tcps[TCP_STAT_RCVOOBYTE] += rcvoobyte; 746 tcps[TCP_STAT_RCVOOBYTE] += rcvoobyte;
747 if (rcvpartdupbyte) { 747 if (rcvpartdupbyte) {
748 tcps[TCP_STAT_RCVPARTDUPPACK]++; 748 tcps[TCP_STAT_RCVPARTDUPPACK]++;
749 tcps[TCP_STAT_RCVPARTDUPBYTE] += rcvpartdupbyte; 749 tcps[TCP_STAT_RCVPARTDUPBYTE] += rcvpartdupbyte;
750 } 750 }
751 TCP_STAT_PUTREF(); 751 TCP_STAT_PUTREF();
752 752
753 /* 753 /*
754 * Insert the new fragment queue entry into both queues. 754 * Insert the new fragment queue entry into both queues.
755 */ 755 */
756 tiqe->ipqe_m = m; 756 tiqe->ipqe_m = m;
757 tiqe->ipre_mlast = m; 757 tiqe->ipre_mlast = m;
758 tiqe->ipqe_seq = pkt_seq; 758 tiqe->ipqe_seq = pkt_seq;
759 tiqe->ipqe_len = pkt_len; 759 tiqe->ipqe_len = pkt_len;
760 tiqe->ipqe_flags = pkt_flags; 760 tiqe->ipqe_flags = pkt_flags;
761 if (p == NULL) { 761 if (p == NULL) {
762 TAILQ_INSERT_HEAD(&tp->segq, tiqe, ipqe_q); 762 TAILQ_INSERT_HEAD(&tp->segq, tiqe, ipqe_q);
763#ifdef TCPREASS_DEBUG 763#ifdef TCPREASS_DEBUG
764 if (tiqe->ipqe_seq != tp->rcv_nxt) 764 if (tiqe->ipqe_seq != tp->rcv_nxt)
765 printf("tcp_reass[%p]: insert %u:%u(%u) at front\n", 765 printf("tcp_reass[%p]: insert %u:%u(%u) at front\n",
766 tp, pkt_seq, pkt_seq + pkt_len, pkt_len); 766 tp, pkt_seq, pkt_seq + pkt_len, pkt_len);
767#endif 767#endif
768 } else { 768 } else {
769 TAILQ_INSERT_AFTER(&tp->segq, p, tiqe, ipqe_q); 769 TAILQ_INSERT_AFTER(&tp->segq, p, tiqe, ipqe_q);
770#ifdef TCPREASS_DEBUG 770#ifdef TCPREASS_DEBUG
771 printf("tcp_reass[%p]: insert %u:%u(%u) after %u:%u(%u)\n", 771 printf("tcp_reass[%p]: insert %u:%u(%u) after %u:%u(%u)\n",
772 tp, pkt_seq, pkt_seq + pkt_len, pkt_len, 772 tp, pkt_seq, pkt_seq + pkt_len, pkt_len,
773 p->ipqe_seq, p->ipqe_seq + p->ipqe_len, p->ipqe_len); 773 p->ipqe_seq, p->ipqe_seq + p->ipqe_len, p->ipqe_len);
774#endif 774#endif
775 } 775 }
776 tp->t_segqlen++; 776 tp->t_segqlen++;
777 777
778skip_replacement: 778skip_replacement:
779 779
780 TAILQ_INSERT_HEAD(&tp->timeq, tiqe, ipqe_timeq); 780 TAILQ_INSERT_HEAD(&tp->timeq, tiqe, ipqe_timeq);
781 781
782present: 782present:
783 /* 783 /*
784 * Present data to user, advancing rcv_nxt through 784 * Present data to user, advancing rcv_nxt through
785 * completed sequence space. 785 * completed sequence space.
786 */ 786 */
787 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 787 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
788 goto out; 788 goto out;
789 q = TAILQ_FIRST(&tp->segq); 789 q = TAILQ_FIRST(&tp->segq);
790 if (q == NULL || q->ipqe_seq != tp->rcv_nxt) 790 if (q == NULL || q->ipqe_seq != tp->rcv_nxt)
791 goto out; 791 goto out;
792 if (tp->t_state == TCPS_SYN_RECEIVED && q->ipqe_len) 792 if (tp->t_state == TCPS_SYN_RECEIVED && q->ipqe_len)
793 goto out; 793 goto out;
794 794
795 tp->rcv_nxt += q->ipqe_len; 795 tp->rcv_nxt += q->ipqe_len;
796 pkt_flags = q->ipqe_flags & TH_FIN; 796 pkt_flags = q->ipqe_flags & TH_FIN;
797 nd6_hint(tp); 797 nd6_hint(tp);
798 798
799 TAILQ_REMOVE(&tp->segq, q, ipqe_q); 799 TAILQ_REMOVE(&tp->segq, q, ipqe_q);
800 TAILQ_REMOVE(&tp->timeq, q, ipqe_timeq); 800 TAILQ_REMOVE(&tp->timeq, q, ipqe_timeq);
801 tp->t_segqlen--; 801 tp->t_segqlen--;
802 KASSERT(tp->t_segqlen >= 0); 802 KASSERT(tp->t_segqlen >= 0);
803 KASSERT(tp->t_segqlen != 0 || 803 KASSERT(tp->t_segqlen != 0 ||
804 (TAILQ_EMPTY(&tp->segq) && TAILQ_EMPTY(&tp->timeq))); 804 (TAILQ_EMPTY(&tp->segq) && TAILQ_EMPTY(&tp->timeq)));
805 if (so->so_state & SS_CANTRCVMORE) 805 if (so->so_state & SS_CANTRCVMORE)
806 m_freem(q->ipqe_m); 806 m_freem(q->ipqe_m);
807 else 807 else
808 sbappendstream(&so->so_rcv, q->ipqe_m); 808 sbappendstream(&so->so_rcv, q->ipqe_m);
809 tcpipqent_free(q); 809 tcpipqent_free(q);
810 TCP_REASS_UNLOCK(tp); 810 TCP_REASS_UNLOCK(tp);
811 sorwakeup(so); 811 sorwakeup(so);
812 return (pkt_flags); 812 return (pkt_flags);
813out: 813out:
814 TCP_REASS_UNLOCK(tp); 814 TCP_REASS_UNLOCK(tp);
815 return (0); 815 return (0);
816} 816}
817 817
818#ifdef INET6 818#ifdef INET6
819int 819int
820tcp6_input(struct mbuf **mp, int *offp, int proto) 820tcp6_input(struct mbuf **mp, int *offp, int proto)
821{ 821{
822 struct mbuf *m = *mp; 822 struct mbuf *m = *mp;
823 823
824 /* 824 /*
825 * draft-itojun-ipv6-tcp-to-anycast 825 * draft-itojun-ipv6-tcp-to-anycast
826 * better place to put this in? 826 * better place to put this in?
827 */ 827 */
828 if (m->m_flags & M_ANYCAST6) { 828 if (m->m_flags & M_ANYCAST6) {
829 struct ip6_hdr *ip6; 829 struct ip6_hdr *ip6;
830 if (m->m_len < sizeof(struct ip6_hdr)) { 830 if (m->m_len < sizeof(struct ip6_hdr)) {
831 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) { 831 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
832 TCP_STATINC(TCP_STAT_RCVSHORT); 832 TCP_STATINC(TCP_STAT_RCVSHORT);
833 return IPPROTO_DONE; 833 return IPPROTO_DONE;
834 } 834 }
835 } 835 }
836 ip6 = mtod(m, struct ip6_hdr *); 836 ip6 = mtod(m, struct ip6_hdr *);
837 icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR, 837 icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR,
838 (char *)&ip6->ip6_dst - (char *)ip6); 838 (char *)&ip6->ip6_dst - (char *)ip6);
839 return IPPROTO_DONE; 839 return IPPROTO_DONE;
840 } 840 }
841 841
842 tcp_input(m, *offp, proto); 842 tcp_input(m, *offp, proto);
843 return IPPROTO_DONE; 843 return IPPROTO_DONE;
844} 844}
845#endif 845#endif
846 846
847#ifdef INET 847#ifdef INET
848static void 848static void
849tcp4_log_refused(const struct ip *ip, const struct tcphdr *th) 849tcp4_log_refused(const struct ip *ip, const struct tcphdr *th)
850{ 850{
851 char src[INET_ADDRSTRLEN]; 851 char src[INET_ADDRSTRLEN];
852 char dst[INET_ADDRSTRLEN]; 852 char dst[INET_ADDRSTRLEN];
853 853
854 if (ip) { 854 if (ip) {
855 in_print(src, sizeof(src), &ip->ip_src); 855 in_print(src, sizeof(src), &ip->ip_src);
856 in_print(dst, sizeof(dst), &ip->ip_dst); 856 in_print(dst, sizeof(dst), &ip->ip_dst);
857 } 857 }
858 else { 858 else {
859 strlcpy(src, "(unknown)", sizeof(src)); 859 strlcpy(src, "(unknown)", sizeof(src));
860 strlcpy(dst, "(unknown)", sizeof(dst)); 860 strlcpy(dst, "(unknown)", sizeof(dst));
861 } 861 }
862 log(LOG_INFO, 862 log(LOG_INFO,
863 "Connection attempt to TCP %s:%d from %s:%d\n", 863 "Connection attempt to TCP %s:%d from %s:%d\n",
864 dst, ntohs(th->th_dport), 864 dst, ntohs(th->th_dport),
865 src, ntohs(th->th_sport)); 865 src, ntohs(th->th_sport));
866} 866}
867#endif 867#endif
868 868
869#ifdef INET6 869#ifdef INET6
870static void 870static void
871tcp6_log_refused(const struct ip6_hdr *ip6, const struct tcphdr *th) 871tcp6_log_refused(const struct ip6_hdr *ip6, const struct tcphdr *th)
872{ 872{
873 char src[INET6_ADDRSTRLEN]; 873 char src[INET6_ADDRSTRLEN];
874 char dst[INET6_ADDRSTRLEN]; 874 char dst[INET6_ADDRSTRLEN];
875 875
876 if (ip6) { 876 if (ip6) {
877 in6_print(src, sizeof(src), &ip6->ip6_src); 877 in6_print(src, sizeof(src), &ip6->ip6_src);
878 in6_print(dst, sizeof(dst), &ip6->ip6_dst); 878 in6_print(dst, sizeof(dst), &ip6->ip6_dst);
879 } 879 }
880 else { 880 else {
881 strlcpy(src, "(unknown v6)", sizeof(src)); 881 strlcpy(src, "(unknown v6)", sizeof(src));
882 strlcpy(dst, "(unknown v6)", sizeof(dst)); 882 strlcpy(dst, "(unknown v6)", sizeof(dst));
883 } 883 }
884 log(LOG_INFO, 884 log(LOG_INFO,
885 "Connection attempt to TCP [%s]:%d from [%s]:%d\n", 885 "Connection attempt to TCP [%s]:%d from [%s]:%d\n",
886 dst, ntohs(th->th_dport), 886 dst, ntohs(th->th_dport),
887 src, ntohs(th->th_sport)); 887 src, ntohs(th->th_sport));
888} 888}
889#endif 889#endif
890 890
891/* 891/*
892 * Checksum extended TCP header and data. 892 * Checksum extended TCP header and data.
893 */ 893 */
894int 894int
895tcp_input_checksum(int af, struct mbuf *m, const struct tcphdr *th, 895tcp_input_checksum(int af, struct mbuf *m, const struct tcphdr *th,
896 int toff, int off, int tlen) 896 int toff, int off, int tlen)
897{ 897{
898 struct ifnet *rcvif; 898 struct ifnet *rcvif;
899 int s; 899 int s;
900 900
901 /* 901 /*
902 * XXX it's better to record and check if this mbuf is 902 * XXX it's better to record and check if this mbuf is
903 * already checked. 903 * already checked.
904 */ 904 */
905 905
906 rcvif = m_get_rcvif(m, &s); 906 rcvif = m_get_rcvif(m, &s);
907 if (__predict_false(rcvif == NULL)) 907 if (__predict_false(rcvif == NULL))
908 goto badcsum; /* XXX */ 908 goto badcsum; /* XXX */
909 909
910 switch (af) { 910 switch (af) {
911#ifdef INET 911#ifdef INET
912 case AF_INET: 912 case AF_INET:
913 switch (m->m_pkthdr.csum_flags & 913 switch (m->m_pkthdr.csum_flags &
914 ((rcvif->if_csum_flags_rx & M_CSUM_TCPv4) | 914 ((rcvif->if_csum_flags_rx & M_CSUM_TCPv4) |
915 M_CSUM_TCP_UDP_BAD | M_CSUM_DATA)) { 915 M_CSUM_TCP_UDP_BAD | M_CSUM_DATA)) {
916 case M_CSUM_TCPv4|M_CSUM_TCP_UDP_BAD: 916 case M_CSUM_TCPv4|M_CSUM_TCP_UDP_BAD:
917 TCP_CSUM_COUNTER_INCR(&tcp_hwcsum_bad); 917 TCP_CSUM_COUNTER_INCR(&tcp_hwcsum_bad);
918 goto badcsum; 918 goto badcsum;
919 919
920 case M_CSUM_TCPv4|M_CSUM_DATA: { 920 case M_CSUM_TCPv4|M_CSUM_DATA: {
921 u_int32_t hw_csum = m->m_pkthdr.csum_data; 921 u_int32_t hw_csum = m->m_pkthdr.csum_data;
922 922
923 TCP_CSUM_COUNTER_INCR(&tcp_hwcsum_data); 923 TCP_CSUM_COUNTER_INCR(&tcp_hwcsum_data);
924 if (m->m_pkthdr.csum_flags & M_CSUM_NO_PSEUDOHDR) { 924 if (m->m_pkthdr.csum_flags & M_CSUM_NO_PSEUDOHDR) {
925 const struct ip *ip = 925 const struct ip *ip =
926 mtod(m, const struct ip *); 926 mtod(m, const struct ip *);
927 927
928 hw_csum = in_cksum_phdr(ip->ip_src.s_addr, 928 hw_csum = in_cksum_phdr(ip->ip_src.s_addr,
929 ip->ip_dst.s_addr, 929 ip->ip_dst.s_addr,
930 htons(hw_csum + tlen + off + IPPROTO_TCP)); 930 htons(hw_csum + tlen + off + IPPROTO_TCP));
931 } 931 }
932 if ((hw_csum ^ 0xffff) != 0) 932 if ((hw_csum ^ 0xffff) != 0)
933 goto badcsum; 933 goto badcsum;
934 break; 934 break;
935 } 935 }
936 936
937 case M_CSUM_TCPv4: 937 case M_CSUM_TCPv4:
938 /* Checksum was okay. */ 938 /* Checksum was okay. */
939 TCP_CSUM_COUNTER_INCR(&tcp_hwcsum_ok); 939 TCP_CSUM_COUNTER_INCR(&tcp_hwcsum_ok);
940 break; 940 break;
941 941
942 default: 942 default:
943 /* 943 /*
944 * Must compute it ourselves. Maybe skip checksum 944 * Must compute it ourselves. Maybe skip checksum
945 * on loopback interfaces. 945 * on loopback interfaces.
946 */ 946 */
947 if (__predict_true(!(rcvif->if_flags & IFF_LOOPBACK) || 947 if (__predict_true(!(rcvif->if_flags & IFF_LOOPBACK) ||
948 tcp_do_loopback_cksum)) { 948 tcp_do_loopback_cksum)) {
949 TCP_CSUM_COUNTER_INCR(&tcp_swcsum); 949 TCP_CSUM_COUNTER_INCR(&tcp_swcsum);
950 if (in4_cksum(m, IPPROTO_TCP, toff, 950 if (in4_cksum(m, IPPROTO_TCP, toff,
951 tlen + off) != 0) 951 tlen + off) != 0)
952 goto badcsum; 952 goto badcsum;
953 } 953 }
954 break; 954 break;
955 } 955 }
956 break; 956 break;
957#endif /* INET4 */ 957#endif /* INET4 */
958 958
959#ifdef INET6 959#ifdef INET6
960 case AF_INET6: 960 case AF_INET6:
961 switch (m->m_pkthdr.csum_flags & 961 switch (m->m_pkthdr.csum_flags &
962 ((rcvif->if_csum_flags_rx & M_CSUM_TCPv6) | 962 ((rcvif->if_csum_flags_rx & M_CSUM_TCPv6) |
963 M_CSUM_TCP_UDP_BAD | M_CSUM_DATA)) { 963 M_CSUM_TCP_UDP_BAD | M_CSUM_DATA)) {
964 case M_CSUM_TCPv6|M_CSUM_TCP_UDP_BAD: 964 case M_CSUM_TCPv6|M_CSUM_TCP_UDP_BAD:
965 TCP_CSUM_COUNTER_INCR(&tcp6_hwcsum_bad); 965 TCP_CSUM_COUNTER_INCR(&tcp6_hwcsum_bad);
966 goto badcsum; 966 goto badcsum;
967 967
968#if 0 /* notyet */ 968#if 0 /* notyet */
969 case M_CSUM_TCPv6|M_CSUM_DATA: 969 case M_CSUM_TCPv6|M_CSUM_DATA:
970#endif 970#endif
971 971
972 case M_CSUM_TCPv6: 972 case M_CSUM_TCPv6:
973 /* Checksum was okay. */ 973 /* Checksum was okay. */
974 TCP_CSUM_COUNTER_INCR(&tcp6_hwcsum_ok); 974 TCP_CSUM_COUNTER_INCR(&tcp6_hwcsum_ok);
975 break; 975 break;
976 976
977 default: 977 default:
978 /* 978 /*
979 * Must compute it ourselves. Maybe skip checksum 979 * Must compute it ourselves. Maybe skip checksum
980 * on loopback interfaces. 980 * on loopback interfaces.
981 */ 981 */
982 if (__predict_true((m->m_flags & M_LOOP) == 0 || 982 if (__predict_true((m->m_flags & M_LOOP) == 0 ||
983 tcp_do_loopback_cksum)) { 983 tcp_do_loopback_cksum)) {
984 TCP_CSUM_COUNTER_INCR(&tcp6_swcsum); 984 TCP_CSUM_COUNTER_INCR(&tcp6_swcsum);
985 if (in6_cksum(m, IPPROTO_TCP, toff, 985 if (in6_cksum(m, IPPROTO_TCP, toff,
986 tlen + off) != 0) 986 tlen + off) != 0)
987 goto badcsum; 987 goto badcsum;
988 } 988 }
989 } 989 }
990 break; 990 break;
991#endif /* INET6 */ 991#endif /* INET6 */
992 } 992 }
993 m_put_rcvif(rcvif, &s); 993 m_put_rcvif(rcvif, &s);
994 994
995 return 0; 995 return 0;
996 996
997badcsum: 997badcsum:
998 m_put_rcvif(rcvif, &s); 998 m_put_rcvif(rcvif, &s);
999 TCP_STATINC(TCP_STAT_RCVBADSUM); 999 TCP_STATINC(TCP_STAT_RCVBADSUM);
1000 return -1; 1000 return -1;
1001} 1001}
1002 1002
1003/* When a packet arrives addressed to a vestigial tcpbp, we 1003/* When a packet arrives addressed to a vestigial tcpbp, we
1004 * nevertheless have to respond to it per the spec. 1004 * nevertheless have to respond to it per the spec.
1005 */ 1005 */
1006static void tcp_vtw_input(struct tcphdr *th, vestigial_inpcb_t *vp, 1006static void tcp_vtw_input(struct tcphdr *th, vestigial_inpcb_t *vp,
1007 struct mbuf *m, int tlen, int multicast) 1007 struct mbuf *m, int tlen, int multicast)
1008{ 1008{
1009 int tiflags; 1009 int tiflags;
1010 int todrop; 1010 int todrop;
1011 uint32_t t_flags = 0; 1011 uint32_t t_flags = 0;
1012 uint64_t *tcps; 1012 uint64_t *tcps;
1013 1013
1014 tiflags = th->th_flags; 1014 tiflags = th->th_flags;
1015 todrop = vp->rcv_nxt - th->th_seq; 1015 todrop = vp->rcv_nxt - th->th_seq;
1016 1016
1017 if (todrop > 0) { 1017 if (todrop > 0) {
1018 if (tiflags & TH_SYN) { 1018 if (tiflags & TH_SYN) {
1019 tiflags &= ~TH_SYN; 1019 tiflags &= ~TH_SYN;
1020 ++th->th_seq; 1020 ++th->th_seq;
1021 if (th->th_urp > 1) 1021 if (th->th_urp > 1)
1022 --th->th_urp; 1022 --th->th_urp;
1023 else { 1023 else {
1024 tiflags &= ~TH_URG; 1024 tiflags &= ~TH_URG;
1025 th->th_urp = 0; 1025 th->th_urp = 0;
1026 } 1026 }
1027 --todrop; 1027 --todrop;
1028 } 1028 }
1029 if (todrop > tlen || 1029 if (todrop > tlen ||
1030 (todrop == tlen && (tiflags & TH_FIN) == 0)) { 1030 (todrop == tlen && (tiflags & TH_FIN) == 0)) {
1031 /* 1031 /*
1032 * Any valid FIN or RST must be to the left of the 1032 * Any valid FIN or RST must be to the left of the
1033 * window. At this point the FIN or RST must be a 1033 * window. At this point the FIN or RST must be a
1034 * duplicate or out of sequence; drop it. 1034 * duplicate or out of sequence; drop it.
1035 */ 1035 */
1036 if (tiflags & TH_RST) 1036 if (tiflags & TH_RST)
1037 goto drop; 1037 goto drop;
1038 tiflags &= ~(TH_FIN|TH_RST); 1038 tiflags &= ~(TH_FIN|TH_RST);
1039 /* 1039 /*
1040 * Send an ACK to resynchronize and drop any data. 1040 * Send an ACK to resynchronize and drop any data.
1041 * But keep on processing for RST or ACK. 1041 * But keep on processing for RST or ACK.
1042 */ 1042 */
1043 t_flags |= TF_ACKNOW; 1043 t_flags |= TF_ACKNOW;
1044 todrop = tlen; 1044 todrop = tlen;
1045 tcps = TCP_STAT_GETREF(); 1045 tcps = TCP_STAT_GETREF();
1046 tcps[TCP_STAT_RCVDUPPACK] += 1; 1046 tcps[TCP_STAT_RCVDUPPACK] += 1;
1047 tcps[TCP_STAT_RCVDUPBYTE] += todrop; 1047 tcps[TCP_STAT_RCVDUPBYTE] += todrop;
1048 TCP_STAT_PUTREF(); 1048 TCP_STAT_PUTREF();
1049 } else if ((tiflags & TH_RST) 1049 } else if ((tiflags & TH_RST)
1050 && th->th_seq != vp->rcv_nxt) { 1050 && th->th_seq != vp->rcv_nxt) {
1051 /* 1051 /*
1052 * Test for reset before adjusting the sequence 1052 * Test for reset before adjusting the sequence
1053 * number for overlapping data. 1053 * number for overlapping data.
1054 */ 1054 */
1055 goto dropafterack_ratelim; 1055 goto dropafterack_ratelim;
1056 } else { 1056 } else {
1057 tcps = TCP_STAT_GETREF(); 1057 tcps = TCP_STAT_GETREF();
1058 tcps[TCP_STAT_RCVPARTDUPPACK] += 1; 1058 tcps[TCP_STAT_RCVPARTDUPPACK] += 1;
1059 tcps[TCP_STAT_RCVPARTDUPBYTE] += todrop; 1059 tcps[TCP_STAT_RCVPARTDUPBYTE] += todrop;
1060 TCP_STAT_PUTREF(); 1060 TCP_STAT_PUTREF();
1061 } 1061 }
1062 1062
1063// tcp_new_dsack(tp, th->th_seq, todrop); 1063// tcp_new_dsack(tp, th->th_seq, todrop);
1064// hdroptlen += todrop; /*drop from head afterwards*/ 1064// hdroptlen += todrop; /*drop from head afterwards*/
1065 1065
1066 th->th_seq += todrop; 1066 th->th_seq += todrop;
1067 tlen -= todrop; 1067 tlen -= todrop;
1068 1068
1069 if (th->th_urp > todrop) 1069 if (th->th_urp > todrop)
1070 th->th_urp -= todrop; 1070 th->th_urp -= todrop;
1071 else { 1071 else {
1072 tiflags &= ~TH_URG; 1072 tiflags &= ~TH_URG;
1073 th->th_urp = 0; 1073 th->th_urp = 0;
1074 } 1074 }
1075 } 1075 }
1076 1076
1077 /* 1077 /*
1078 * If new data are received on a connection after the 1078 * If new data are received on a connection after the
1079 * user processes are gone, then RST the other end. 1079 * user processes are gone, then RST the other end.
1080 */ 1080 */
1081 if (tlen) { 1081 if (tlen) {
1082 TCP_STATINC(TCP_STAT_RCVAFTERCLOSE); 1082 TCP_STATINC(TCP_STAT_RCVAFTERCLOSE);
1083 goto dropwithreset; 1083 goto dropwithreset;
1084 } 1084 }
1085 1085
1086 /* 1086 /*
1087 * If segment ends after window, drop trailing data 1087 * If segment ends after window, drop trailing data
1088 * (and PUSH and FIN); if nothing left, just ACK. 1088 * (and PUSH and FIN); if nothing left, just ACK.
1089 */ 1089 */
1090 todrop = (th->th_seq + tlen) - (vp->rcv_nxt+vp->rcv_wnd); 1090 todrop = (th->th_seq + tlen) - (vp->rcv_nxt+vp->rcv_wnd);
1091 1091
1092 if (todrop > 0) { 1092 if (todrop > 0) {
1093 TCP_STATINC(TCP_STAT_RCVPACKAFTERWIN); 1093 TCP_STATINC(TCP_STAT_RCVPACKAFTERWIN);
1094 if (todrop >= tlen) { 1094 if (todrop >= tlen) {
1095 /* 1095 /*
1096 * The segment actually starts after the window. 1096 * The segment actually starts after the window.
1097 * th->th_seq + tlen - vp->rcv_nxt - vp->rcv_wnd >= tlen 1097 * th->th_seq + tlen - vp->rcv_nxt - vp->rcv_wnd >= tlen
1098 * th->th_seq - vp->rcv_nxt - vp->rcv_wnd >= 0 1098 * th->th_seq - vp->rcv_nxt - vp->rcv_wnd >= 0
1099 * th->th_seq >= vp->rcv_nxt + vp->rcv_wnd 1099 * th->th_seq >= vp->rcv_nxt + vp->rcv_wnd
1100 */ 1100 */
1101 TCP_STATADD(TCP_STAT_RCVBYTEAFTERWIN, tlen); 1101 TCP_STATADD(TCP_STAT_RCVBYTEAFTERWIN, tlen);
1102 /* 1102 /*
1103 * If a new connection request is received 1103 * If a new connection request is received
1104 * while in TIME_WAIT, drop the old connection 1104 * while in TIME_WAIT, drop the old connection
1105 * and start over if the sequence numbers 1105 * and start over if the sequence numbers
1106 * are above the previous ones. 1106 * are above the previous ones.
1107 */ 1107 */
1108 if ((tiflags & TH_SYN) 1108 if ((tiflags & TH_SYN)
1109 && SEQ_GT(th->th_seq, vp->rcv_nxt)) { 1109 && SEQ_GT(th->th_seq, vp->rcv_nxt)) {
1110 /* We only support this in the !NOFDREF case, which 1110 /* We only support this in the !NOFDREF case, which
1111 * is to say: not here. 1111 * is to say: not here.
1112 */ 1112 */
1113 goto dropwithreset; 1113 goto dropwithreset;
1114 } 1114 }
1115 /* 1115 /*
1116 * If window is closed can only take segments at 1116 * If window is closed can only take segments at
1117 * window edge, and have to drop data and PUSH from 1117 * window edge, and have to drop data and PUSH from
1118 * incoming segments. Continue processing, but 1118 * incoming segments. Continue processing, but
1119 * remember to ack. Otherwise, drop segment 1119 * remember to ack. Otherwise, drop segment
1120 * and (if not RST) ack. 1120 * and (if not RST) ack.
1121 */ 1121 */
1122 if (vp->rcv_wnd == 0 && th->th_seq == vp->rcv_nxt) { 1122 if (vp->rcv_wnd == 0 && th->th_seq == vp->rcv_nxt) {
1123 t_flags |= TF_ACKNOW; 1123 t_flags |= TF_ACKNOW;
1124 TCP_STATINC(TCP_STAT_RCVWINPROBE); 1124 TCP_STATINC(TCP_STAT_RCVWINPROBE);
1125 } else 1125 } else
1126 goto dropafterack; 1126 goto dropafterack;
1127 } else 1127 } else
1128 TCP_STATADD(TCP_STAT_RCVBYTEAFTERWIN, todrop); 1128 TCP_STATADD(TCP_STAT_RCVBYTEAFTERWIN, todrop);
1129 m_adj(m, -todrop); 1129 m_adj(m, -todrop);
1130 tlen -= todrop; 1130 tlen -= todrop;
1131 tiflags &= ~(TH_PUSH|TH_FIN); 1131 tiflags &= ~(TH_PUSH|TH_FIN);
1132 } 1132 }
1133 1133
1134 if (tiflags & TH_RST) { 1134 if (tiflags & TH_RST) {
1135 if (th->th_seq != vp->rcv_nxt) 1135 if (th->th_seq != vp->rcv_nxt)
1136 goto dropafterack_ratelim; 1136 goto dropafterack_ratelim;
1137 1137
1138 vtw_del(vp->ctl, vp->vtw); 1138 vtw_del(vp->ctl, vp->vtw);
1139 goto drop; 1139 goto drop;
1140 } 1140 }
1141 1141
1142 /* 1142 /*
1143 * If the ACK bit is off we drop the segment and return. 1143 * If the ACK bit is off we drop the segment and return.
1144 */ 1144 */
1145 if ((tiflags & TH_ACK) == 0) { 1145 if ((tiflags & TH_ACK) == 0) {
1146 if (t_flags & TF_ACKNOW) 1146 if (t_flags & TF_ACKNOW)
1147 goto dropafterack; 1147 goto dropafterack;
1148 else 1148 else
1149 goto drop; 1149 goto drop;
1150 } 1150 }
1151 1151
1152 /* 1152 /*
1153 * In TIME_WAIT state the only thing that should arrive 1153 * In TIME_WAIT state the only thing that should arrive
1154 * is a retransmission of the remote FIN. Acknowledge 1154 * is a retransmission of the remote FIN. Acknowledge
1155 * it and restart the finack timer. 1155 * it and restart the finack timer.
1156 */ 1156 */
1157 vtw_restart(vp); 1157 vtw_restart(vp);
1158 goto dropafterack; 1158 goto dropafterack;
1159 1159
1160dropafterack: 1160dropafterack:
1161 /* 1161 /*
1162 * Generate an ACK dropping incoming segment if it occupies 1162 * Generate an ACK dropping incoming segment if it occupies
1163 * sequence space, where the ACK reflects our state. 1163 * sequence space, where the ACK reflects our state.
1164 */ 1164 */
1165 if (tiflags & TH_RST) 1165 if (tiflags & TH_RST)
1166 goto drop; 1166 goto drop;
1167 goto dropafterack2; 1167 goto dropafterack2;
1168 1168
1169dropafterack_ratelim: 1169dropafterack_ratelim:
1170 /* 1170 /*
1171 * We may want to rate-limit ACKs against SYN/RST attack. 1171 * We may want to rate-limit ACKs against SYN/RST attack.
1172 */ 1172 */
1173 if (ppsratecheck(&tcp_ackdrop_ppslim_last, &tcp_ackdrop_ppslim_count, 1173 if (ppsratecheck(&tcp_ackdrop_ppslim_last, &tcp_ackdrop_ppslim_count,
1174 tcp_ackdrop_ppslim) == 0) { 1174 tcp_ackdrop_ppslim) == 0) {
1175 /* XXX stat */ 1175 /* XXX stat */
1176 goto drop; 1176 goto drop;
1177 } 1177 }
1178 /* ...fall into dropafterack2... */ 1178 /* ...fall into dropafterack2... */
1179 1179
1180dropafterack2: 1180dropafterack2:
1181 (void)tcp_respond(0, m, m, th, th->th_seq + tlen, th->th_ack, 1181 (void)tcp_respond(0, m, m, th, th->th_seq + tlen, th->th_ack,
1182 TH_ACK); 1182 TH_ACK);
1183 return; 1183 return;
1184 1184
1185dropwithreset: 1185dropwithreset:
1186 /* 1186 /*
1187 * Generate a RST, dropping incoming segment. 1187 * Generate a RST, dropping incoming segment.
1188 * Make ACK acceptable to originator of segment. 1188 * Make ACK acceptable to originator of segment.
1189 */ 1189 */
1190 if (tiflags & TH_RST) 1190 if (tiflags & TH_RST)
1191 goto drop; 1191 goto drop;
1192 1192
1193 if (tiflags & TH_ACK) 1193 if (tiflags & TH_ACK)
1194 tcp_respond(0, m, m, th, (tcp_seq)0, th->th_ack, TH_RST); 1194 tcp_respond(0, m, m, th, (tcp_seq)0, th->th_ack, TH_RST);
1195 else { 1195 else {
1196 if (tiflags & TH_SYN) 1196 if (tiflags & TH_SYN)
1197 ++tlen; 1197 ++tlen;
1198 (void)tcp_respond(0, m, m, th, th->th_seq + tlen, (tcp_seq)0, 1198 (void)tcp_respond(0, m, m, th, th->th_seq + tlen, (tcp_seq)0,
1199 TH_RST|TH_ACK); 1199 TH_RST|TH_ACK);
1200 } 1200 }
1201 return; 1201 return;
1202drop: 1202drop:
1203 m_freem(m); 1203 m_freem(m);
1204} 1204}
1205 1205
1206/* 1206/*
1207 * TCP input routine, follows pages 65-76 of RFC 793 very closely. 1207 * TCP input routine, follows pages 65-76 of RFC 793 very closely.
1208 */ 1208 */
1209void 1209void
1210tcp_input(struct mbuf *m, ...) 1210tcp_input(struct mbuf *m, ...)
1211{ 1211{
1212 struct tcphdr *th; 1212 struct tcphdr *th;
1213 struct ip *ip; 1213 struct ip *ip;
1214 struct inpcb *inp; 1214 struct inpcb *inp;
1215#ifdef INET6 1215#ifdef INET6
1216 struct ip6_hdr *ip6; 1216 struct ip6_hdr *ip6;
1217 struct in6pcb *in6p; 1217 struct in6pcb *in6p;
1218#endif 1218#endif
1219 u_int8_t *optp = NULL; 1219 u_int8_t *optp = NULL;
1220 int optlen = 0; 1220 int optlen = 0;
1221 int len, tlen, toff, hdroptlen = 0; 1221 int len, tlen, toff, hdroptlen = 0;
1222 struct tcpcb *tp = 0; 1222 struct tcpcb *tp = 0;
1223 int tiflags; 1223 int tiflags;
1224 struct socket *so = NULL; 1224 struct socket *so = NULL;
1225 int todrop, acked, ourfinisacked, needoutput = 0; 1225 int todrop, acked, ourfinisacked, needoutput = 0;
1226 bool dupseg; 1226 bool dupseg;
1227#ifdef TCP_DEBUG 1227#ifdef TCP_DEBUG
1228 short ostate = 0; 1228 short ostate = 0;
1229#endif 1229#endif
1230 u_long tiwin; 1230 u_long tiwin;
1231 struct tcp_opt_info opti; 1231 struct tcp_opt_info opti;
1232 int off, iphlen; 1232 int off, iphlen;
1233 va_list ap; 1233 va_list ap;
1234 int af; /* af on the wire */ 1234 int af; /* af on the wire */
1235 struct mbuf *tcp_saveti = NULL; 1235 struct mbuf *tcp_saveti = NULL;
1236 uint32_t ts_rtt; 1236 uint32_t ts_rtt;
1237 uint8_t iptos; 1237 uint8_t iptos;
1238 uint64_t *tcps; 1238 uint64_t *tcps;
1239 vestigial_inpcb_t vestige; 1239 vestigial_inpcb_t vestige;
1240 1240
1241 vestige.valid = 0; 1241 vestige.valid = 0;
1242 1242
1243 MCLAIM(m, &tcp_rx_mowner); 1243 MCLAIM(m, &tcp_rx_mowner);
1244 va_start(ap, m); 1244 va_start(ap, m);
1245 toff = va_arg(ap, int); 1245 toff = va_arg(ap, int);
1246 (void)va_arg(ap, int); /* ignore value, advance ap */ 1246 (void)va_arg(ap, int); /* ignore value, advance ap */
1247 va_end(ap); 1247 va_end(ap);
1248 1248
1249 TCP_STATINC(TCP_STAT_RCVTOTAL); 1249 TCP_STATINC(TCP_STAT_RCVTOTAL);
1250 1250
1251 memset(&opti, 0, sizeof(opti)); 1251 memset(&opti, 0, sizeof(opti));
1252 opti.ts_present = 0; 1252 opti.ts_present = 0;
1253 opti.maxseg = 0; 1253 opti.maxseg = 0;
1254 1254
1255 /* 1255 /*
1256 * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN. 1256 * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN.
1257 * 1257 *
1258 * TCP is, by definition, unicast, so we reject all 1258 * TCP is, by definition, unicast, so we reject all
1259 * multicast outright. 1259 * multicast outright.
1260 * 1260 *
1261 * Note, there are additional src/dst address checks in 1261 * Note, there are additional src/dst address checks in
1262 * the AF-specific code below. 1262 * the AF-specific code below.
1263 */ 1263 */
1264 if (m->m_flags & (M_BCAST|M_MCAST)) { 1264 if (m->m_flags & (M_BCAST|M_MCAST)) {
1265 /* XXX stat */ 1265 /* XXX stat */
1266 goto drop; 1266 goto drop;
1267 } 1267 }
1268#ifdef INET6 1268#ifdef INET6
1269 if (m->m_flags & M_ANYCAST6) { 1269 if (m->m_flags & M_ANYCAST6) {
1270 /* XXX stat */ 1270 /* XXX stat */
1271 goto drop; 1271 goto drop;
1272 } 1272 }
1273#endif 1273#endif
1274 1274
1275 /* 1275 /*
 1276 * Enforce alignment requirements that are violated in
 1277 * some cases, see kern/50766 for details.
 1278 */
 1279 if (TCP_HDR_ALIGNED_P(th) == 0) {
 1280 m = m_copyup(m, toff + sizeof(struct tcphdr), 0);
 1281 if (m == NULL) {
 1282 TCP_STATINC(TCP_STAT_RCVSHORT);
 1283 return;
 1284 }
 1285 th = (struct tcphdr *)(mtod(m, char *) + toff);
 1286 }
 1287 KASSERT(TCP_HDR_ALIGNED_P(th));
 1288
 1289 /*
1276 * Get IP and TCP header. 1290 * Get IP and TCP header.
1277 * Note: IP leaves IP header in first mbuf. 1291 * Note: IP leaves IP header in first mbuf.
1278 */ 1292 */
 1293#ifdef INET6
 1294 ip6 = mtod(m, struct ip6_hdr *);
 1295#endif
 1296#ifdef INET
1279 ip = mtod(m, struct ip *); 1297 ip = mtod(m, struct ip *);
 1298#endif
1280 switch (ip->ip_v) { 1299 switch (ip->ip_v) {
1281#ifdef INET 1300#ifdef INET
1282 case 4: 1301 case 4:
1283#ifdef INET6 
1284 ip6 = NULL; 
1285#endif 
1286 af = AF_INET; 1302 af = AF_INET;
1287 iphlen = sizeof(struct ip); 1303 iphlen = sizeof(struct ip);
1288 IP6_EXTHDR_GET(th, struct tcphdr *, m, toff, 1304 IP6_EXTHDR_GET(th, struct tcphdr *, m, toff,
1289 sizeof(struct tcphdr)); 1305 sizeof(struct tcphdr));
1290 if (th == NULL) { 1306 if (th == NULL) {
1291 TCP_STATINC(TCP_STAT_RCVSHORT); 1307 TCP_STATINC(TCP_STAT_RCVSHORT);
1292 return; 1308 return;
1293 } 1309 }
1294 /* We do the checksum after PCB lookup... */ 1310 /* We do the checksum after PCB lookup... */
1295 len = ntohs(ip->ip_len); 1311 len = ntohs(ip->ip_len);
1296 tlen = len - toff; 1312 tlen = len - toff;
1297 iptos = ip->ip_tos; 1313 iptos = ip->ip_tos;
1298 break; 1314 break;
1299#endif 1315#endif
1300#ifdef INET6 1316#ifdef INET6
1301 case 6: 1317 case 6:
1302 ip = NULL; 
1303 iphlen = sizeof(struct ip6_hdr); 1318 iphlen = sizeof(struct ip6_hdr);
1304 af = AF_INET6; 1319 af = AF_INET6;
1305 ip6 = mtod(m, struct ip6_hdr *); 
1306 IP6_EXTHDR_GET(th, struct tcphdr *, m, toff, 1320 IP6_EXTHDR_GET(th, struct tcphdr *, m, toff,
1307 sizeof(struct tcphdr)); 1321 sizeof(struct tcphdr));
1308 if (th == NULL) { 1322 if (th == NULL) {
1309 TCP_STATINC(TCP_STAT_RCVSHORT); 1323 TCP_STATINC(TCP_STAT_RCVSHORT);
1310 return; 1324 return;
1311 } 1325 }
1312 1326
1313 /* Be proactive about malicious use of IPv4 mapped address */ 1327 /* Be proactive about malicious use of IPv4 mapped address */
1314 if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) || 1328 if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
1315 IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) { 1329 IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
1316 /* XXX stat */ 1330 /* XXX stat */
1317 goto drop; 1331 goto drop;
1318 } 1332 }
1319 1333
1320 /* 1334 /*
1321 * Be proactive about unspecified IPv6 address in source. 1335 * Be proactive about unspecified IPv6 address in source.
1322 * As we use all-zero to indicate unbounded/unconnected pcb, 1336 * As we use all-zero to indicate unbounded/unconnected pcb,
1323 * unspecified IPv6 address can be used to confuse us. 1337 * unspecified IPv6 address can be used to confuse us.
1324 * 1338 *
1325 * Note that packets with unspecified IPv6 destination is 1339 * Note that packets with unspecified IPv6 destination is
1326 * already dropped in ip6_input. 1340 * already dropped in ip6_input.
1327 */ 1341 */
1328 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { 1342 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
1329 /* XXX stat */ 1343 /* XXX stat */
1330 goto drop; 1344 goto drop;
1331 } 1345 }
1332 1346
1333 /* 1347 /*
1334 * Make sure destination address is not multicast. 1348 * Make sure destination address is not multicast.
1335 * Source address checked in ip6_input(). 1349 * Source address checked in ip6_input().
1336 */ 1350 */
1337 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { 1351 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
1338 /* XXX stat */ 1352 /* XXX stat */
1339 goto drop; 1353 goto drop;
1340 } 1354 }
1341 1355
1342 /* We do the checksum after PCB lookup... */ 1356 /* We do the checksum after PCB lookup... */
1343 len = m->m_pkthdr.len; 1357 len = m->m_pkthdr.len;
1344 tlen = len - toff; 1358 tlen = len - toff;
1345 iptos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 1359 iptos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
1346 break; 1360 break;
1347#endif 1361#endif
1348 default: 1362 default:
1349 m_freem(m); 1363 m_freem(m);
1350 return; 1364 return;
1351 } 1365 }
1352 /* 
1353 * Enforce alignment requirements that are violated in 
1354 * some cases, see kern/50766 for details. 
1355 */ 
1356 if (TCP_HDR_ALIGNED_P(th) == 0) { 
1357 m = m_copyup(m, toff + sizeof(struct tcphdr), 0); 
1358 if (m == NULL) { 
1359 TCP_STATINC(TCP_STAT_RCVSHORT); 
1360 return; 
1361 } 
1362 ip = mtod(m, struct ip *); 
1363#ifdef INET6 
1364 ip6 = mtod(m, struct ip6_hdr *); 
1365#endif 
1366 th = (struct tcphdr *)(mtod(m, char *) + toff); 
1367 } 
1368 KASSERT(TCP_HDR_ALIGNED_P(th)); 
1369 1366
1370 /* 1367 /*
1371 * Check that TCP offset makes sense, 1368 * Check that TCP offset makes sense,
1372 * pull out TCP options and adjust length. XXX 1369 * pull out TCP options and adjust length. XXX
1373 */ 1370 */
1374 off = th->th_off << 2; 1371 off = th->th_off << 2;
1375 if (off < sizeof (struct tcphdr) || off > tlen) { 1372 if (off < sizeof (struct tcphdr) || off > tlen) {
1376 TCP_STATINC(TCP_STAT_RCVBADOFF); 1373 TCP_STATINC(TCP_STAT_RCVBADOFF);
1377 goto drop; 1374 goto drop;
1378 } 1375 }
1379 tlen -= off; 1376 tlen -= off;
1380 1377
1381 /* 1378 /*
1382 * tcp_input() has been modified to use tlen to mean the TCP data 1379 * tcp_input() has been modified to use tlen to mean the TCP data
1383 * length throughout the function. Other functions can use 1380 * length throughout the function. Other functions can use
1384 * m->m_pkthdr.len as the basis for calculating the TCP data length. 1381 * m->m_pkthdr.len as the basis for calculating the TCP data length.
1385 * rja 1382 * rja
1386 */ 1383 */
1387 1384
1388 if (off > sizeof (struct tcphdr)) { 1385 if (off > sizeof (struct tcphdr)) {
1389 IP6_EXTHDR_GET(th, struct tcphdr *, m, toff, off); 1386 IP6_EXTHDR_GET(th, struct tcphdr *, m, toff, off);
1390 if (th == NULL) { 1387 if (th == NULL) {
1391 TCP_STATINC(TCP_STAT_RCVSHORT); 1388 TCP_STATINC(TCP_STAT_RCVSHORT);
1392 return; 1389 return;
1393 } 1390 }
1394 /* 1391 /*
1395 * NOTE: ip/ip6 will not be affected by m_pulldown() 1392 * NOTE: ip/ip6 will not be affected by m_pulldown()
1396 * (as they're before toff) and we don't need to update those. 1393 * (as they're before toff) and we don't need to update those.
1397 */ 1394 */
1398 KASSERT(TCP_HDR_ALIGNED_P(th)); 1395 KASSERT(TCP_HDR_ALIGNED_P(th));
1399 optlen = off - sizeof (struct tcphdr); 1396 optlen = off - sizeof (struct tcphdr);
1400 optp = ((u_int8_t *)th) + sizeof(struct tcphdr); 1397 optp = ((u_int8_t *)th) + sizeof(struct tcphdr);
1401 /* 1398 /*
1402 * Do quick retrieval of timestamp options ("options 1399 * Do quick retrieval of timestamp options ("options
1403 * prediction?"). If timestamp is the only option and it's 1400 * prediction?"). If timestamp is the only option and it's
1404 * formatted as recommended in RFC 1323 appendix A, we 1401 * formatted as recommended in RFC 1323 appendix A, we
1405 * quickly get the values now and not bother calling 1402 * quickly get the values now and not bother calling
1406 * tcp_dooptions(), etc. 1403 * tcp_dooptions(), etc.
1407 */ 1404 */
1408 if ((optlen == TCPOLEN_TSTAMP_APPA || 1405 if ((optlen == TCPOLEN_TSTAMP_APPA ||
1409 (optlen > TCPOLEN_TSTAMP_APPA && 1406 (optlen > TCPOLEN_TSTAMP_APPA &&
1410 optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) && 1407 optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) &&
1411 *(u_int32_t *)optp == htonl(TCPOPT_TSTAMP_HDR) && 1408 *(u_int32_t *)optp == htonl(TCPOPT_TSTAMP_HDR) &&
1412 (th->th_flags & TH_SYN) == 0) { 1409 (th->th_flags & TH_SYN) == 0) {
1413 opti.ts_present = 1; 1410 opti.ts_present = 1;
1414 opti.ts_val = ntohl(*(u_int32_t *)(optp + 4)); 1411 opti.ts_val = ntohl(*(u_int32_t *)(optp + 4));
1415 opti.ts_ecr = ntohl(*(u_int32_t *)(optp + 8)); 1412 opti.ts_ecr = ntohl(*(u_int32_t *)(optp + 8));
1416 optp = NULL; /* we've parsed the options */ 1413 optp = NULL; /* we've parsed the options */
1417 } 1414 }
1418 } 1415 }
1419 tiflags = th->th_flags; 1416 tiflags = th->th_flags;
1420 1417
1421 /* 1418 /*
1422 * Checksum extended TCP header and data 1419 * Checksum extended TCP header and data
1423 */ 1420 */
1424 if (tcp_input_checksum(af, m, th, toff, off, tlen)) 1421 if (tcp_input_checksum(af, m, th, toff, off, tlen))
1425 goto badcsum; 1422 goto badcsum;
1426 1423
1427 /* 1424 /*
1428 * Locate pcb for segment. 1425 * Locate pcb for segment.
1429 */ 1426 */
1430findpcb: 1427findpcb:
1431 inp = NULL; 1428 inp = NULL;
1432#ifdef INET6 1429#ifdef INET6
1433 in6p = NULL; 1430 in6p = NULL;
1434#endif 1431#endif
1435 switch (af) { 1432 switch (af) {
1436#ifdef INET 1433#ifdef INET
1437 case AF_INET: 1434 case AF_INET:
1438 inp = in_pcblookup_connect(&tcbtable, ip->ip_src, th->th_sport, 1435 inp = in_pcblookup_connect(&tcbtable, ip->ip_src, th->th_sport,
1439 ip->ip_dst, th->th_dport, 1436 ip->ip_dst, th->th_dport,
1440 &vestige); 1437 &vestige);
1441 if (inp == 0 && !vestige.valid) { 1438 if (inp == 0 && !vestige.valid) {
1442 TCP_STATINC(TCP_STAT_PCBHASHMISS); 1439 TCP_STATINC(TCP_STAT_PCBHASHMISS);
1443 inp = in_pcblookup_bind(&tcbtable, ip->ip_dst, th->th_dport); 1440 inp = in_pcblookup_bind(&tcbtable, ip->ip_dst, th->th_dport);
1444 } 1441 }
1445#ifdef INET6 1442#ifdef INET6
1446 if (inp == 0 && !vestige.valid) { 1443 if (inp == 0 && !vestige.valid) {
1447 struct in6_addr s, d; 1444 struct in6_addr s, d;
1448 1445
1449 /* mapped addr case */ 1446 /* mapped addr case */
1450 in6_in_2_v4mapin6(&ip->ip_src, &s); 1447 in6_in_2_v4mapin6(&ip->ip_src, &s);
1451 in6_in_2_v4mapin6(&ip->ip_dst, &d); 1448 in6_in_2_v4mapin6(&ip->ip_dst, &d);
1452 in6p = in6_pcblookup_connect(&tcbtable, &s, 1449 in6p = in6_pcblookup_connect(&tcbtable, &s,
1453 th->th_sport, &d, th->th_dport, 1450 th->th_sport, &d, th->th_dport,
1454 0, &vestige); 1451 0, &vestige);
1455 if (in6p == 0 && !vestige.valid) { 1452 if (in6p == 0 && !vestige.valid) {
1456 TCP_STATINC(TCP_STAT_PCBHASHMISS); 1453 TCP_STATINC(TCP_STAT_PCBHASHMISS);
1457 in6p = in6_pcblookup_bind(&tcbtable, &d, 1454 in6p = in6_pcblookup_bind(&tcbtable, &d,
1458 th->th_dport, 0); 1455 th->th_dport, 0);
1459 } 1456 }
1460 } 1457 }
1461#endif 1458#endif
1462#ifndef INET6 1459#ifndef INET6
1463 if (inp == 0 && !vestige.valid) 1460 if (inp == 0 && !vestige.valid)
1464#else 1461#else
1465 if (inp == 0 && in6p == 0 && !vestige.valid) 1462 if (inp == 0 && in6p == 0 && !vestige.valid)
1466#endif 1463#endif
1467 { 1464 {
1468 TCP_STATINC(TCP_STAT_NOPORT); 1465 TCP_STATINC(TCP_STAT_NOPORT);
1469 if (tcp_log_refused && 1466 if (tcp_log_refused &&
1470 (tiflags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN) { 1467 (tiflags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN) {
1471 tcp4_log_refused(ip, th); 1468 tcp4_log_refused(ip, th);
1472 } 1469 }
1473 tcp_fields_to_host(th); 1470 tcp_fields_to_host(th);
1474 goto dropwithreset_ratelim; 1471 goto dropwithreset_ratelim;
1475 } 1472 }
1476#if defined(IPSEC) 1473#if defined(IPSEC)
1477 if (ipsec_used) { 1474 if (ipsec_used) {
1478 if (inp && ipsec4_in_reject(m, inp)) { 1475 if (inp && ipsec4_in_reject(m, inp)) {
1479 IPSEC_STATINC(IPSEC_STAT_IN_POLVIO); 1476 IPSEC_STATINC(IPSEC_STAT_IN_POLVIO);
1480 goto drop; 1477 goto drop;
1481 } 1478 }
1482#ifdef INET6 1479#ifdef INET6
1483 else if (in6p && ipsec6_in_reject(m, in6p)) { 1480 else if (in6p && ipsec6_in_reject(m, in6p)) {
1484 IPSEC_STATINC(IPSEC_STAT_IN_POLVIO); 1481 IPSEC_STATINC(IPSEC_STAT_IN_POLVIO);
1485 goto drop; 1482 goto drop;
1486 } 1483 }
1487#endif 1484#endif
1488 } 1485 }
1489#endif /*IPSEC*/ 1486#endif /*IPSEC*/
1490 break; 1487 break;
1491#endif /*INET*/ 1488#endif /*INET*/
1492#ifdef INET6 1489#ifdef INET6
1493 case AF_INET6: 1490 case AF_INET6:
1494 { 1491 {
1495 int faith; 1492 int faith;
1496 1493
1497#if defined(NFAITH) && NFAITH > 0 1494#if defined(NFAITH) && NFAITH > 0
1498 faith = faithprefix(&ip6->ip6_dst); 1495 faith = faithprefix(&ip6->ip6_dst);
1499#else 1496#else
1500 faith = 0; 1497 faith = 0;
1501#endif 1498#endif
1502 in6p = in6_pcblookup_connect(&tcbtable, &ip6->ip6_src, 1499 in6p = in6_pcblookup_connect(&tcbtable, &ip6->ip6_src,
1503 th->th_sport, &ip6->ip6_dst, th->th_dport, faith, &vestige); 1500 th->th_sport, &ip6->ip6_dst, th->th_dport, faith, &vestige);
1504 if (!in6p && !vestige.valid) { 1501 if (!in6p && !vestige.valid) {
1505 TCP_STATINC(TCP_STAT_PCBHASHMISS); 1502 TCP_STATINC(TCP_STAT_PCBHASHMISS);
1506 in6p = in6_pcblookup_bind(&tcbtable, &ip6->ip6_dst, 1503 in6p = in6_pcblookup_bind(&tcbtable, &ip6->ip6_dst,
1507 th->th_dport, faith); 1504 th->th_dport, faith);
1508 } 1505 }
1509 if (!in6p && !vestige.valid) { 1506 if (!in6p && !vestige.valid) {
1510 TCP_STATINC(TCP_STAT_NOPORT); 1507 TCP_STATINC(TCP_STAT_NOPORT);
1511 if (tcp_log_refused && 1508 if (tcp_log_refused &&
1512 (tiflags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN) { 1509 (tiflags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN) {
1513 tcp6_log_refused(ip6, th); 1510 tcp6_log_refused(ip6, th);
1514 } 1511 }
1515 tcp_fields_to_host(th); 1512 tcp_fields_to_host(th);
1516 goto dropwithreset_ratelim; 1513 goto dropwithreset_ratelim;
1517 } 1514 }
1518#if defined(IPSEC) 1515#if defined(IPSEC)
1519 if (ipsec_used && in6p && ipsec6_in_reject(m, in6p)) { 1516 if (ipsec_used && in6p && ipsec6_in_reject(m, in6p)) {
1520 IPSEC6_STATINC(IPSEC_STAT_IN_POLVIO); 1517 IPSEC6_STATINC(IPSEC_STAT_IN_POLVIO);
1521 goto drop; 1518 goto drop;
1522 } 1519 }
1523#endif /*IPSEC*/ 1520#endif /*IPSEC*/
1524 break; 1521 break;
1525 } 1522 }
1526#endif 1523#endif
1527 } 1524 }
1528 1525
1529 /* 1526 /*
1530 * If the state is CLOSED (i.e., TCB does not exist) then 1527 * If the state is CLOSED (i.e., TCB does not exist) then
1531 * all data in the incoming segment is discarded. 1528 * all data in the incoming segment is discarded.
1532 * If the TCB exists but is in CLOSED state, it is embryonic, 1529 * If the TCB exists but is in CLOSED state, it is embryonic,
1533 * but should either do a listen or a connect soon. 1530 * but should either do a listen or a connect soon.
1534 */ 1531 */
1535 tp = NULL; 1532 tp = NULL;
1536 so = NULL; 1533 so = NULL;
1537 if (inp) { 1534 if (inp) {
1538 /* Check the minimum TTL for socket. */ 1535 /* Check the minimum TTL for socket. */
1539 if (ip->ip_ttl < inp->inp_ip_minttl) 1536 if (ip->ip_ttl < inp->inp_ip_minttl)
1540 goto drop; 1537 goto drop;
1541 1538
1542 tp = intotcpcb(inp); 1539 tp = intotcpcb(inp);
1543 so = inp->inp_socket; 1540 so = inp->inp_socket;
1544 } 1541 }
1545#ifdef INET6 1542#ifdef INET6
1546 else if (in6p) { 1543 else if (in6p) {
1547 tp = in6totcpcb(in6p); 1544 tp = in6totcpcb(in6p);
1548 so = in6p->in6p_socket; 1545 so = in6p->in6p_socket;
1549 } 1546 }
1550#endif 1547#endif
1551 else if (vestige.valid) { 1548 else if (vestige.valid) {
1552 int mc = 0; 1549 int mc = 0;
1553 1550
1554 /* We do not support the resurrection of vtw tcpcps. 1551 /* We do not support the resurrection of vtw tcpcps.
1555 */ 1552 */
1556 if (tcp_input_checksum(af, m, th, toff, off, tlen)) 1553 if (tcp_input_checksum(af, m, th, toff, off, tlen))
1557 goto badcsum; 1554 goto badcsum;
1558 1555
1559 switch (af) { 1556 switch (af) {
1560#ifdef INET6 1557#ifdef INET6
1561 case AF_INET6: 1558 case AF_INET6:
1562 mc = IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst); 1559 mc = IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst);
1563 break; 1560 break;
1564#endif 1561#endif
1565 1562
1566 case AF_INET: 1563 case AF_INET:
1567 mc = (IN_MULTICAST(ip->ip_dst.s_addr) 1564 mc = (IN_MULTICAST(ip->ip_dst.s_addr)
1568 || in_broadcast(ip->ip_dst, 1565 || in_broadcast(ip->ip_dst,
1569 m_get_rcvif_NOMPSAFE(m))); 1566 m_get_rcvif_NOMPSAFE(m)));
1570 break; 1567 break;
1571 } 1568 }
1572 1569
1573 tcp_fields_to_host(th); 1570 tcp_fields_to_host(th);
1574 tcp_vtw_input(th, &vestige, m, tlen, mc); 1571 tcp_vtw_input(th, &vestige, m, tlen, mc);
1575 m = 0; 1572 m = 0;
1576 goto drop; 1573 goto drop;
1577 } 1574 }
1578 1575
1579 if (tp == 0) { 1576 if (tp == 0) {
1580 tcp_fields_to_host(th); 1577 tcp_fields_to_host(th);
1581 goto dropwithreset_ratelim; 1578 goto dropwithreset_ratelim;
1582 } 1579 }
1583 if (tp->t_state == TCPS_CLOSED) 1580 if (tp->t_state == TCPS_CLOSED)
1584 goto drop; 1581 goto drop;
1585 1582
1586 KASSERT(so->so_lock == softnet_lock); 1583 KASSERT(so->so_lock == softnet_lock);
1587 KASSERT(solocked(so)); 1584 KASSERT(solocked(so));
1588 1585
1589 tcp_fields_to_host(th); 1586 tcp_fields_to_host(th);
1590 1587
1591 /* Unscale the window into a 32-bit value. */ 1588 /* Unscale the window into a 32-bit value. */
1592 if ((tiflags & TH_SYN) == 0) 1589 if ((tiflags & TH_SYN) == 0)
1593 tiwin = th->th_win << tp->snd_scale; 1590 tiwin = th->th_win << tp->snd_scale;
1594 else 1591 else
1595 tiwin = th->th_win; 1592 tiwin = th->th_win;
1596 1593
1597#ifdef INET6 1594#ifdef INET6
1598 /* save packet options if user wanted */ 1595 /* save packet options if user wanted */
1599 if (in6p && (in6p->in6p_flags & IN6P_CONTROLOPTS)) { 1596 if (in6p && (in6p->in6p_flags & IN6P_CONTROLOPTS)) {
1600 if (in6p->in6p_options) { 1597 if (in6p->in6p_options) {
1601 m_freem(in6p->in6p_options); 1598 m_freem(in6p->in6p_options);
1602 in6p->in6p_options = 0; 1599 in6p->in6p_options = 0;
1603 } 1600 }
1604 KASSERT(ip6 != NULL); 
1605 ip6_savecontrol(in6p, &in6p->in6p_options, ip6, m); 1601 ip6_savecontrol(in6p, &in6p->in6p_options, ip6, m);
1606 } 1602 }
1607#endif 1603#endif
1608 1604
1609 if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) { 1605 if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
1610 union syn_cache_sa src; 1606 union syn_cache_sa src;
1611 union syn_cache_sa dst; 1607 union syn_cache_sa dst;
1612 1608
1613 memset(&src, 0, sizeof(src)); 1609 memset(&src, 0, sizeof(src));
1614 memset(&dst, 0, sizeof(dst)); 1610 memset(&dst, 0, sizeof(dst));
1615 switch (af) { 1611 switch (af) {
1616#ifdef INET 1612#ifdef INET
1617 case AF_INET: 1613 case AF_INET:
1618 src.sin.sin_len = sizeof(struct sockaddr_in); 1614 src.sin.sin_len = sizeof(struct sockaddr_in);
1619 src.sin.sin_family = AF_INET; 1615 src.sin.sin_family = AF_INET;
1620 src.sin.sin_addr = ip->ip_src; 1616 src.sin.sin_addr = ip->ip_src;
1621 src.sin.sin_port = th->th_sport; 1617 src.sin.sin_port = th->th_sport;
1622 1618
1623 dst.sin.sin_len = sizeof(struct sockaddr_in); 1619 dst.sin.sin_len = sizeof(struct sockaddr_in);
1624 dst.sin.sin_family = AF_INET; 1620 dst.sin.sin_family = AF_INET;
1625 dst.sin.sin_addr = ip->ip_dst; 1621 dst.sin.sin_addr = ip->ip_dst;
1626 dst.sin.sin_port = th->th_dport; 1622 dst.sin.sin_port = th->th_dport;
1627 break; 1623 break;
1628#endif 1624#endif
1629#ifdef INET6 1625#ifdef INET6
1630 case AF_INET6: 1626 case AF_INET6:
1631 src.sin6.sin6_len = sizeof(struct sockaddr_in6); 1627 src.sin6.sin6_len = sizeof(struct sockaddr_in6);
1632 src.sin6.sin6_family = AF_INET6; 1628 src.sin6.sin6_family = AF_INET6;
1633 src.sin6.sin6_addr = ip6->ip6_src; 1629 src.sin6.sin6_addr = ip6->ip6_src;
1634 src.sin6.sin6_port = th->th_sport; 1630 src.sin6.sin6_port = th->th_sport;
1635 1631
1636 dst.sin6.sin6_len = sizeof(struct sockaddr_in6); 1632 dst.sin6.sin6_len = sizeof(struct sockaddr_in6);
1637 dst.sin6.sin6_family = AF_INET6; 1633 dst.sin6.sin6_family = AF_INET6;
1638 dst.sin6.sin6_addr = ip6->ip6_dst; 1634 dst.sin6.sin6_addr = ip6->ip6_dst;
1639 dst.sin6.sin6_port = th->th_dport; 1635 dst.sin6.sin6_port = th->th_dport;
1640 break; 1636 break;
1641#endif /* INET6 */ 1637#endif /* INET6 */
1642 default: 1638 default:
1643 goto badsyn; /*sanity*/ 1639 goto badsyn; /*sanity*/
1644 } 1640 }
1645 1641
1646 if (so->so_options & SO_DEBUG) { 1642 if (so->so_options & SO_DEBUG) {
1647#ifdef TCP_DEBUG 1643#ifdef TCP_DEBUG
1648 ostate = tp->t_state; 1644 ostate = tp->t_state;
1649#endif 1645#endif
1650 1646
1651 tcp_saveti = NULL; 1647 tcp_saveti = NULL;
1652 if (iphlen + sizeof(struct tcphdr) > MHLEN) 1648 if (iphlen + sizeof(struct tcphdr) > MHLEN)
1653 goto nosave; 1649 goto nosave;
1654 1650
1655 if (m->m_len > iphlen && (m->m_flags & M_EXT) == 0) { 1651 if (m->m_len > iphlen && (m->m_flags & M_EXT) == 0) {
1656 tcp_saveti = m_copym(m, 0, iphlen, M_DONTWAIT); 1652 tcp_saveti = m_copym(m, 0, iphlen, M_DONTWAIT);
1657 if (!tcp_saveti) 1653 if (!tcp_saveti)
1658 goto nosave; 1654 goto nosave;
1659 } else { 1655 } else {
1660 MGETHDR(tcp_saveti, M_DONTWAIT, MT_HEADER); 1656 MGETHDR(tcp_saveti, M_DONTWAIT, MT_HEADER);
1661 if (!tcp_saveti) 1657 if (!tcp_saveti)
1662 goto nosave; 1658 goto nosave;
1663 MCLAIM(m, &tcp_mowner); 1659 MCLAIM(m, &tcp_mowner);
1664 tcp_saveti->m_len = iphlen; 1660 tcp_saveti->m_len = iphlen;
1665 m_copydata(m, 0, iphlen, 1661 m_copydata(m, 0, iphlen,
1666 mtod(tcp_saveti, void *)); 1662 mtod(tcp_saveti, void *));
1667 } 1663 }
1668 1664
1669 if (M_TRAILINGSPACE(tcp_saveti) < sizeof(struct tcphdr)) { 1665 if (M_TRAILINGSPACE(tcp_saveti) < sizeof(struct tcphdr)) {
1670 m_freem(tcp_saveti); 1666 m_freem(tcp_saveti);
1671 tcp_saveti = NULL; 1667 tcp_saveti = NULL;
1672 } else { 1668 } else {
1673 tcp_saveti->m_len += sizeof(struct tcphdr); 1669 tcp_saveti->m_len += sizeof(struct tcphdr);
1674 memcpy(mtod(tcp_saveti, char *) + iphlen, th, 1670 memcpy(mtod(tcp_saveti, char *) + iphlen, th,
1675 sizeof(struct tcphdr)); 1671 sizeof(struct tcphdr));
1676 } 1672 }
1677 nosave:; 1673 nosave:;
1678 } 1674 }
1679 if (so->so_options & SO_ACCEPTCONN) { 1675 if (so->so_options & SO_ACCEPTCONN) {
1680 if ((tiflags & (TH_RST|TH_ACK|TH_SYN)) != TH_SYN) { 1676 if ((tiflags & (TH_RST|TH_ACK|TH_SYN)) != TH_SYN) {
1681 if (tiflags & TH_RST) { 1677 if (tiflags & TH_RST) {
1682 syn_cache_reset(&src.sa, &dst.sa, th); 1678 syn_cache_reset(&src.sa, &dst.sa, th);
1683 } else if ((tiflags & (TH_ACK|TH_SYN)) == 1679 } else if ((tiflags & (TH_ACK|TH_SYN)) ==
1684 (TH_ACK|TH_SYN)) { 1680 (TH_ACK|TH_SYN)) {
1685 /* 1681 /*
1686 * Received a SYN,ACK. This should 1682 * Received a SYN,ACK. This should
1687 * never happen while we are in 1683 * never happen while we are in
1688 * LISTEN. Send an RST. 1684 * LISTEN. Send an RST.
1689 */ 1685 */
1690 goto badsyn; 1686 goto badsyn;
1691 } else if (tiflags & TH_ACK) { 1687 } else if (tiflags & TH_ACK) {
1692 so = syn_cache_get(&src.sa, &dst.sa, 1688 so = syn_cache_get(&src.sa, &dst.sa,
1693 th, toff, tlen, so, m); 1689 th, toff, tlen, so, m);
1694 if (so == NULL) { 1690 if (so == NULL) {
1695 /* 1691 /*
1696 * We don't have a SYN for 1692 * We don't have a SYN for
1697 * this ACK; send an RST. 1693 * this ACK; send an RST.
1698 */ 1694 */
1699 goto badsyn; 1695 goto badsyn;
1700 } else if (so == 1696 } else if (so ==
1701 (struct socket *)(-1)) { 1697 (struct socket *)(-1)) {
1702 /* 1698 /*
1703 * We were unable to create 1699 * We were unable to create
1704 * the connection. If the 1700 * the connection. If the
1705 * 3-way handshake was 1701 * 3-way handshake was
1706 * completed, and RST has 1702 * completed, and RST has
1707 * been sent to the peer. 1703 * been sent to the peer.
1708 * Since the mbuf might be 1704 * Since the mbuf might be
1709 * in use for the reply, 1705 * in use for the reply,
1710 * do not free it. 1706 * do not free it.
1711 */ 1707 */
1712 m = NULL; 1708 m = NULL;
1713 } else { 1709 } else {
1714 /* 1710 /*
1715 * We have created a 1711 * We have created a
1716 * full-blown connection. 1712 * full-blown connection.
1717 */ 1713 */
1718 tp = NULL; 1714 tp = NULL;
1719 inp = NULL; 1715 inp = NULL;
1720#ifdef INET6 1716#ifdef INET6
1721 in6p = NULL; 1717 in6p = NULL;
1722#endif 1718#endif
1723 switch (so->so_proto->pr_domain->dom_family) { 1719 switch (so->so_proto->pr_domain->dom_family) {
1724#ifdef INET 1720#ifdef INET
1725 case AF_INET: 1721 case AF_INET:
1726 inp = sotoinpcb(so); 1722 inp = sotoinpcb(so);
1727 tp = intotcpcb(inp); 1723 tp = intotcpcb(inp);
1728 break; 1724 break;
1729#endif 1725#endif
1730#ifdef INET6 1726#ifdef INET6
1731 case AF_INET6: 1727 case AF_INET6:
1732 in6p = sotoin6pcb(so); 1728 in6p = sotoin6pcb(so);
1733 tp = in6totcpcb(in6p); 1729 tp = in6totcpcb(in6p);
1734 break; 1730 break;
1735#endif 1731#endif
1736 } 1732 }
1737 if (tp == NULL) 1733 if (tp == NULL)
1738 goto badsyn; /*XXX*/ 1734 goto badsyn; /*XXX*/
1739 tiwin <<= tp->snd_scale; 1735 tiwin <<= tp->snd_scale;
1740 goto after_listen; 1736 goto after_listen;
1741 } 1737 }
1742 } else { 1738 } else {
1743 /* 1739 /*
1744 * None of RST, SYN or ACK was set. 1740 * None of RST, SYN or ACK was set.
1745 * This is an invalid packet for a 1741 * This is an invalid packet for a
1746 * TCB in LISTEN state. Send a RST. 1742 * TCB in LISTEN state. Send a RST.
1747 */ 1743 */
1748 goto badsyn; 1744 goto badsyn;
1749 } 1745 }
1750 } else { 1746 } else {
1751 /* 1747 /*
1752 * Received a SYN. 1748 * Received a SYN.
1753 * 1749 *
1754 * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN 1750 * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN
1755 */ 1751 */
1756 if (m->m_flags & (M_BCAST|M_MCAST)) 1752 if (m->m_flags & (M_BCAST|M_MCAST))
1757 goto drop; 1753 goto drop;
1758 1754
1759 switch (af) { 1755 switch (af) {
1760#ifdef INET6 1756#ifdef INET6
1761 case AF_INET6: 1757 case AF_INET6:
1762 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) 1758 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
1763 goto drop; 1759 goto drop;
1764 break; 1760 break;
1765#endif /* INET6 */ 1761#endif /* INET6 */
1766 case AF_INET: 1762 case AF_INET:
1767 if (IN_MULTICAST(ip->ip_dst.s_addr) || 1763 if (IN_MULTICAST(ip->ip_dst.s_addr) ||
1768 in_broadcast(ip->ip_dst, 1764 in_broadcast(ip->ip_dst,
1769 m_get_rcvif_NOMPSAFE(m))) 1765 m_get_rcvif_NOMPSAFE(m)))
1770 goto drop; 1766 goto drop;
1771 break; 1767 break;
1772 } 1768 }
1773 1769
1774#ifdef INET6 1770#ifdef INET6
1775 /* 1771 /*
1776 * If deprecated address is forbidden, we do 1772 * If deprecated address is forbidden, we do
1777 * not accept SYN to deprecated interface 1773 * not accept SYN to deprecated interface
1778 * address to prevent any new inbound 1774 * address to prevent any new inbound
1779 * connection from getting established. 1775 * connection from getting established.
1780 * When we do not accept SYN, we send a TCP 1776 * When we do not accept SYN, we send a TCP
1781 * RST, with deprecated source address (instead 1777 * RST, with deprecated source address (instead
1782 * of dropping it). We compromise it as it is 1778 * of dropping it). We compromise it as it is
1783 * much better for peer to send a RST, and 1779 * much better for peer to send a RST, and
1784 * RST will be the final packet for the 1780 * RST will be the final packet for the
1785 * exchange. 1781 * exchange.
1786 * 1782 *
1787 * If we do not forbid deprecated addresses, we 1783 * If we do not forbid deprecated addresses, we
1788 * accept the SYN packet. RFC2462 does not 1784 * accept the SYN packet. RFC2462 does not
1789 * suggest dropping SYN in this case. 1785 * suggest dropping SYN in this case.
1790 * If we decipher RFC2462 5.5.4, it says like 1786 * If we decipher RFC2462 5.5.4, it says like
1791 * this: 1787 * this:
1792 * 1. use of deprecated addr with existing 1788 * 1. use of deprecated addr with existing
1793 * communication is okay - "SHOULD continue 1789 * communication is okay - "SHOULD continue
1794 * to be used" 1790 * to be used"
1795 * 2. use of it with new communication: 1791 * 2. use of it with new communication:
1796 * (2a) "SHOULD NOT be used if alternate 1792 * (2a) "SHOULD NOT be used if alternate
1797 * address with sufficient scope is 1793 * address with sufficient scope is
1798 * available" 1794 * available"
1799 * (2b) nothing mentioned otherwise. 1795 * (2b) nothing mentioned otherwise.
1800 * Here we fall into (2b) case as we have no 1796 * Here we fall into (2b) case as we have no
1801 * choice in our source address selection - we 1797 * choice in our source address selection - we
1802 * must obey the peer. 1798 * must obey the peer.
1803 * 1799 *
1804 * The wording in RFC2462 is confusing, and 1800 * The wording in RFC2462 is confusing, and
1805 * there are multiple description text for 1801 * there are multiple description text for
1806 * deprecated address handling - worse, they 1802 * deprecated address handling - worse, they
1807 * are not exactly the same. I believe 5.5.4 1803 * are not exactly the same. I believe 5.5.4
1808 * is the best one, so we follow 5.5.4. 1804 * is the best one, so we follow 5.5.4.
1809 */ 1805 */
1810 if (af == AF_INET6 && !ip6_use_deprecated) { 1806 if (af == AF_INET6 && !ip6_use_deprecated) {
1811 struct in6_ifaddr *ia6; 1807 struct in6_ifaddr *ia6;
1812 int s; 1808 int s;
1813 struct ifnet *rcvif = m_get_rcvif(m, &s); 1809 struct ifnet *rcvif = m_get_rcvif(m, &s);
1814 if (rcvif == NULL) 1810 if (rcvif == NULL)
1815 goto dropwithreset; /* XXX */ 1811 goto dropwithreset; /* XXX */
1816 if ((ia6 = in6ifa_ifpwithaddr(rcvif, 1812 if ((ia6 = in6ifa_ifpwithaddr(rcvif,
1817 &ip6->ip6_dst)) && 1813 &ip6->ip6_dst)) &&
1818 (ia6->ia6_flags & IN6_IFF_DEPRECATED)) { 1814 (ia6->ia6_flags & IN6_IFF_DEPRECATED)) {
1819 tp = NULL; 1815 tp = NULL;
1820 m_put_rcvif(rcvif, &s); 1816 m_put_rcvif(rcvif, &s);
1821 goto dropwithreset; 1817 goto dropwithreset;
1822 } 1818 }
1823 m_put_rcvif(rcvif, &s); 1819 m_put_rcvif(rcvif, &s);
1824 } 1820 }
1825#endif 1821#endif
1826 1822
1827 /* 1823 /*
1828 * LISTEN socket received a SYN 1824 * LISTEN socket received a SYN
1829 * from itself? This can't possibly 1825 * from itself? This can't possibly
1830 * be valid; drop the packet. 1826 * be valid; drop the packet.
1831 */ 1827 */
1832 if (th->th_sport == th->th_dport) { 1828 if (th->th_sport == th->th_dport) {
1833 int i; 1829 int i;
1834 1830
1835 switch (af) { 1831 switch (af) {
1836#ifdef INET 1832#ifdef INET
1837 case AF_INET: 1833 case AF_INET:
1838 i = in_hosteq(ip->ip_src, ip->ip_dst); 1834 i = in_hosteq(ip->ip_src, ip->ip_dst);
1839 break; 1835 break;
1840#endif 1836#endif
1841#ifdef INET6 1837#ifdef INET6
1842 case AF_INET6: 1838 case AF_INET6:
1843 i = IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &ip6->ip6_dst); 1839 i = IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &ip6->ip6_dst);
1844 break; 1840 break;
1845#endif 1841#endif
1846 default: 1842 default:
1847 i = 1; 1843 i = 1;
1848 } 1844 }
1849 if (i) { 1845 if (i) {
1850 TCP_STATINC(TCP_STAT_BADSYN); 1846 TCP_STATINC(TCP_STAT_BADSYN);
1851 goto drop; 1847 goto drop;
1852 } 1848 }
1853 } 1849 }
1854 1850
1855 /* 1851 /*
1856 * SYN looks ok; create compressed TCP 1852 * SYN looks ok; create compressed TCP
1857 * state for it. 1853 * state for it.
1858 */ 1854 */
1859 if (so->so_qlen <= so->so_qlimit && 1855 if (so->so_qlen <= so->so_qlimit &&
1860 syn_cache_add(&src.sa, &dst.sa, th, tlen, 1856 syn_cache_add(&src.sa, &dst.sa, th, tlen,
1861 so, m, optp, optlen, &opti)) 1857 so, m, optp, optlen, &opti))
1862 m = NULL; 1858 m = NULL;
1863 } 1859 }
1864 goto drop; 1860 goto drop;
1865 } 1861 }
1866 } 1862 }
1867 1863
1868after_listen: 1864after_listen:
1869#ifdef DIAGNOSTIC 1865#ifdef DIAGNOSTIC
1870 /* 1866 /*
1871 * Should not happen now that all embryonic connections 1867 * Should not happen now that all embryonic connections
1872 * are handled with compressed state. 1868 * are handled with compressed state.
1873 */ 1869 */
1874 if (tp->t_state == TCPS_LISTEN) 1870 if (tp->t_state == TCPS_LISTEN)
1875 panic("tcp_input: TCPS_LISTEN"); 1871 panic("tcp_input: TCPS_LISTEN");
1876#endif 1872#endif
1877 1873
1878 /* 1874 /*
1879 * Segment received on connection. 1875 * Segment received on connection.
1880 * Reset idle time and keep-alive timer. 1876 * Reset idle time and keep-alive timer.
1881 */ 1877 */
1882 tp->t_rcvtime = tcp_now; 1878 tp->t_rcvtime = tcp_now;
1883 if (TCPS_HAVEESTABLISHED(tp->t_state)) 1879 if (TCPS_HAVEESTABLISHED(tp->t_state))
1884 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepidle); 1880 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepidle);
1885 1881
1886 /* 1882 /*
1887 * Process options. 1883 * Process options.
1888 */ 1884 */
1889#ifdef TCP_SIGNATURE 1885#ifdef TCP_SIGNATURE
1890 if (optp || (tp->t_flags & TF_SIGNATURE)) 1886 if (optp || (tp->t_flags & TF_SIGNATURE))
1891#else 1887#else
1892 if (optp) 1888 if (optp)
1893#endif 1889#endif
1894 if (tcp_dooptions(tp, optp, optlen, th, m, toff, &opti) < 0) 1890 if (tcp_dooptions(tp, optp, optlen, th, m, toff, &opti) < 0)
1895 goto drop; 1891 goto drop;
1896 1892
1897 if (TCP_SACK_ENABLED(tp)) { 1893 if (TCP_SACK_ENABLED(tp)) {
1898 tcp_del_sackholes(tp, th); 1894 tcp_del_sackholes(tp, th);
1899 } 1895 }
1900 1896
1901 if (TCP_ECN_ALLOWED(tp)) { 1897 if (TCP_ECN_ALLOWED(tp)) {
1902 if (tiflags & TH_CWR) { 1898 if (tiflags & TH_CWR) {
1903 tp->t_flags &= ~TF_ECN_SND_ECE; 1899 tp->t_flags &= ~TF_ECN_SND_ECE;
1904 } 1900 }
1905 switch (iptos & IPTOS_ECN_MASK) { 1901 switch (iptos & IPTOS_ECN_MASK) {
1906 case IPTOS_ECN_CE: 1902 case IPTOS_ECN_CE:
1907 tp->t_flags |= TF_ECN_SND_ECE; 1903 tp->t_flags |= TF_ECN_SND_ECE;
1908 TCP_STATINC(TCP_STAT_ECN_CE); 1904 TCP_STATINC(TCP_STAT_ECN_CE);
1909 break; 1905 break;
1910 case IPTOS_ECN_ECT0: 1906 case IPTOS_ECN_ECT0:
1911 TCP_STATINC(TCP_STAT_ECN_ECT); 1907 TCP_STATINC(TCP_STAT_ECN_ECT);
1912 break; 1908 break;
1913 case IPTOS_ECN_ECT1: 1909 case IPTOS_ECN_ECT1:
1914 /* XXX: ignore for now -- rpaulo */ 1910 /* XXX: ignore for now -- rpaulo */
1915 break; 1911 break;
1916 } 1912 }
1917 /* 1913 /*
1918 * Congestion experienced. 1914 * Congestion experienced.
1919 * Ignore if we are already trying to recover. 1915 * Ignore if we are already trying to recover.
1920 */ 1916 */
1921 if ((tiflags & TH_ECE) && SEQ_GEQ(tp->snd_una, tp->snd_recover)) 1917 if ((tiflags & TH_ECE) && SEQ_GEQ(tp->snd_una, tp->snd_recover))
1922 tp->t_congctl->cong_exp(tp); 1918 tp->t_congctl->cong_exp(tp);
1923 } 1919 }
1924 1920
1925 if (opti.ts_present && opti.ts_ecr) { 1921 if (opti.ts_present && opti.ts_ecr) {
1926 /* 1922 /*
1927 * Calculate the RTT from the returned time stamp and the 1923 * Calculate the RTT from the returned time stamp and the
1928 * connection's time base. If the time stamp is later than 1924 * connection's time base. If the time stamp is later than
1929 * the current time, or is extremely old, fall back to non-1323 1925 * the current time, or is extremely old, fall back to non-1323
1930 * RTT calculation. Since ts_rtt is unsigned, we can test both 1926 * RTT calculation. Since ts_rtt is unsigned, we can test both
1931 * at the same time. 1927 * at the same time.
1932 * 1928 *
1933 * Note that ts_rtt is in units of slow ticks (500 1929 * Note that ts_rtt is in units of slow ticks (500
1934 * ms). Since most earthbound RTTs are < 500 ms, 1930 * ms). Since most earthbound RTTs are < 500 ms,
1935 * observed values will have large quantization noise. 1931 * observed values will have large quantization noise.
1936 * Our smoothed RTT is then the fraction of observed 1932 * Our smoothed RTT is then the fraction of observed
1937 * samples that are 1 tick instead of 0 (times 500 1933 * samples that are 1 tick instead of 0 (times 500
1938 * ms). 1934 * ms).
1939 * 1935 *
1940 * ts_rtt is increased by 1 to denote a valid sample, 1936 * ts_rtt is increased by 1 to denote a valid sample,
1941 * with 0 indicating an invalid measurement. This 1937 * with 0 indicating an invalid measurement. This
1942 * extra 1 must be removed when ts_rtt is used, or 1938 * extra 1 must be removed when ts_rtt is used, or
1943 * else an an erroneous extra 500 ms will result. 1939 * else an an erroneous extra 500 ms will result.
1944 */ 1940 */
1945 ts_rtt = TCP_TIMESTAMP(tp) - opti.ts_ecr + 1; 1941 ts_rtt = TCP_TIMESTAMP(tp) - opti.ts_ecr + 1;
1946 if (ts_rtt > TCP_PAWS_IDLE) 1942 if (ts_rtt > TCP_PAWS_IDLE)
1947 ts_rtt = 0; 1943 ts_rtt = 0;
1948 } else { 1944 } else {
1949 ts_rtt = 0; 1945 ts_rtt = 0;
1950 } 1946 }
1951 1947
1952 /* 1948 /*
1953 * Header prediction: check for the two common cases 1949 * Header prediction: check for the two common cases
1954 * of a uni-directional data xfer. If the packet has 1950 * of a uni-directional data xfer. If the packet has
1955 * no control flags, is in-sequence, the window didn't 1951 * no control flags, is in-sequence, the window didn't
1956 * change and we're not retransmitting, it's a 1952 * change and we're not retransmitting, it's a
1957 * candidate. If the length is zero and the ack moved 1953 * candidate. If the length is zero and the ack moved
1958 * forward, we're the sender side of the xfer. Just 1954 * forward, we're the sender side of the xfer. Just
1959 * free the data acked & wake any higher level process 1955 * free the data acked & wake any higher level process
1960 * that was blocked waiting for space. If the length 1956 * that was blocked waiting for space. If the length
1961 * is non-zero and the ack didn't move, we're the 1957 * is non-zero and the ack didn't move, we're the
1962 * receiver side. If we're getting packets in-order 1958 * receiver side. If we're getting packets in-order
1963 * (the reassembly queue is empty), add the data to 1959 * (the reassembly queue is empty), add the data to
1964 * the socket buffer and note that we need a delayed ack. 1960 * the socket buffer and note that we need a delayed ack.
1965 */ 1961 */
1966 if (tp->t_state == TCPS_ESTABLISHED && 1962 if (tp->t_state == TCPS_ESTABLISHED &&
1967 (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ECE|TH_CWR|TH_ACK)) 1963 (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ECE|TH_CWR|TH_ACK))
1968 == TH_ACK && 1964 == TH_ACK &&
1969 (!opti.ts_present || TSTMP_GEQ(opti.ts_val, tp->ts_recent)) && 1965 (!opti.ts_present || TSTMP_GEQ(opti.ts_val, tp->ts_recent)) &&
1970 th->th_seq == tp->rcv_nxt && 1966 th->th_seq == tp->rcv_nxt &&
1971 tiwin && tiwin == tp->snd_wnd && 1967 tiwin && tiwin == tp->snd_wnd &&
1972 tp->snd_nxt == tp->snd_max) { 1968 tp->snd_nxt == tp->snd_max) {
1973 1969
1974 /* 1970 /*
1975 * If last ACK falls within this segment's sequence numbers, 1971 * If last ACK falls within this segment's sequence numbers,
1976 * record the timestamp. 1972 * record the timestamp.
1977 * NOTE that the test is modified according to the latest 1973 * NOTE that the test is modified according to the latest
1978 * proposal of the tcplw@cray.com list (Braden 1993/04/26). 1974 * proposal of the tcplw@cray.com list (Braden 1993/04/26).
1979 * 1975 *
1980 * note that we already know 1976 * note that we already know
1981 * TSTMP_GEQ(opti.ts_val, tp->ts_recent) 1977 * TSTMP_GEQ(opti.ts_val, tp->ts_recent)
1982 */ 1978 */
1983 if (opti.ts_present && 1979 if (opti.ts_present &&
1984 SEQ_LEQ(th->th_seq, tp->last_ack_sent)) { 1980 SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
1985 tp->ts_recent_age = tcp_now; 1981 tp->ts_recent_age = tcp_now;
1986 tp->ts_recent = opti.ts_val; 1982 tp->ts_recent = opti.ts_val;
1987 } 1983 }
1988 1984
1989 if (tlen == 0) { 1985 if (tlen == 0) {
1990 /* Ack prediction. */ 1986 /* Ack prediction. */
1991 if (SEQ_GT(th->th_ack, tp->snd_una) && 1987 if (SEQ_GT(th->th_ack, tp->snd_una) &&
1992 SEQ_LEQ(th->th_ack, tp->snd_max) && 1988 SEQ_LEQ(th->th_ack, tp->snd_max) &&
1993 tp->snd_cwnd >= tp->snd_wnd && 1989 tp->snd_cwnd >= tp->snd_wnd &&
1994 tp->t_partialacks < 0) { 1990 tp->t_partialacks < 0) {
1995 /* 1991 /*
1996 * this is a pure ack for outstanding data. 1992 * this is a pure ack for outstanding data.
1997 */ 1993 */
1998 if (ts_rtt) 1994 if (ts_rtt)
1999 tcp_xmit_timer(tp, ts_rtt - 1); 1995 tcp_xmit_timer(tp, ts_rtt - 1);
2000 else if (tp->t_rtttime && 1996 else if (tp->t_rtttime &&
2001 SEQ_GT(th->th_ack, tp->t_rtseq)) 1997 SEQ_GT(th->th_ack, tp->t_rtseq))
2002 tcp_xmit_timer(tp, 1998 tcp_xmit_timer(tp,
2003 tcp_now - tp->t_rtttime); 1999 tcp_now - tp->t_rtttime);
2004 acked = th->th_ack - tp->snd_una; 2000 acked = th->th_ack - tp->snd_una;
2005 tcps = TCP_STAT_GETREF(); 2001 tcps = TCP_STAT_GETREF();
2006 tcps[TCP_STAT_PREDACK]++; 2002 tcps[TCP_STAT_PREDACK]++;
2007 tcps[TCP_STAT_RCVACKPACK]++; 2003 tcps[TCP_STAT_RCVACKPACK]++;
2008 tcps[TCP_STAT_RCVACKBYTE] += acked; 2004 tcps[TCP_STAT_RCVACKBYTE] += acked;
2009 TCP_STAT_PUTREF(); 2005 TCP_STAT_PUTREF();
2010 nd6_hint(tp); 2006 nd6_hint(tp);
2011 2007
2012 if (acked > (tp->t_lastoff - tp->t_inoff)) 2008 if (acked > (tp->t_lastoff - tp->t_inoff))
2013 tp->t_lastm = NULL; 2009 tp->t_lastm = NULL;
2014 sbdrop(&so->so_snd, acked); 2010 sbdrop(&so->so_snd, acked);
2015 tp->t_lastoff -= acked; 2011 tp->t_lastoff -= acked;
2016 2012
2017 icmp_check(tp, th, acked); 2013 icmp_check(tp, th, acked);
2018 2014
2019 tp->snd_una = th->th_ack; 2015 tp->snd_una = th->th_ack;
2020 tp->snd_fack = tp->snd_una; 2016 tp->snd_fack = tp->snd_una;
2021 if (SEQ_LT(tp->snd_high, tp->snd_una)) 2017 if (SEQ_LT(tp->snd_high, tp->snd_una))
2022 tp->snd_high = tp->snd_una; 2018 tp->snd_high = tp->snd_una;
2023 m_freem(m); 2019 m_freem(m);
2024 2020
2025 /* 2021 /*
2026 * If all outstanding data are acked, stop 2022 * If all outstanding data are acked, stop
2027 * retransmit timer, otherwise restart timer 2023 * retransmit timer, otherwise restart timer
2028 * using current (possibly backed-off) value. 2024 * using current (possibly backed-off) value.
2029 * If process is waiting for space, 2025 * If process is waiting for space,
2030 * wakeup/selnotify/signal. If data 2026 * wakeup/selnotify/signal. If data
2031 * are ready to send, let tcp_output 2027 * are ready to send, let tcp_output
2032 * decide between more output or persist. 2028 * decide between more output or persist.
2033 */ 2029 */
2034 if (tp->snd_una == tp->snd_max) 2030 if (tp->snd_una == tp->snd_max)
2035 TCP_TIMER_DISARM(tp, TCPT_REXMT); 2031 TCP_TIMER_DISARM(tp, TCPT_REXMT);
2036 else if (TCP_TIMER_ISARMED(tp, 2032 else if (TCP_TIMER_ISARMED(tp,
2037 TCPT_PERSIST) == 0) 2033 TCPT_PERSIST) == 0)
2038 TCP_TIMER_ARM(tp, TCPT_REXMT, 2034 TCP_TIMER_ARM(tp, TCPT_REXMT,
2039 tp->t_rxtcur); 2035 tp->t_rxtcur);
2040 2036
2041 sowwakeup(so); 2037 sowwakeup(so);
2042 if (so->so_snd.sb_cc) { 2038 if (so->so_snd.sb_cc) {
2043 KERNEL_LOCK(1, NULL); 2039 KERNEL_LOCK(1, NULL);
2044 (void) tcp_output(tp); 2040 (void) tcp_output(tp);
2045 KERNEL_UNLOCK_ONE(NULL); 2041 KERNEL_UNLOCK_ONE(NULL);
2046 } 2042 }
2047 if (tcp_saveti) 2043 if (tcp_saveti)
2048 m_freem(tcp_saveti); 2044 m_freem(tcp_saveti);
2049 return; 2045 return;
2050 } 2046 }
2051 } else if (th->th_ack == tp->snd_una && 2047 } else if (th->th_ack == tp->snd_una &&
2052 TAILQ_FIRST(&tp->segq) == NULL && 2048 TAILQ_FIRST(&tp->segq) == NULL &&
2053 tlen <= sbspace(&so->so_rcv)) { 2049 tlen <= sbspace(&so->so_rcv)) {
2054 int newsize = 0; /* automatic sockbuf scaling */ 2050 int newsize = 0; /* automatic sockbuf scaling */
2055 2051
2056 /* 2052 /*
2057 * this is a pure, in-sequence data packet 2053 * this is a pure, in-sequence data packet
2058 * with nothing on the reassembly queue and 2054 * with nothing on the reassembly queue and
2059 * we have enough buffer space to take it. 2055 * we have enough buffer space to take it.
2060 */ 2056 */
2061 tp->rcv_nxt += tlen; 2057 tp->rcv_nxt += tlen;
2062 tcps = TCP_STAT_GETREF(); 2058 tcps = TCP_STAT_GETREF();
2063 tcps[TCP_STAT_PREDDAT]++; 2059 tcps[TCP_STAT_PREDDAT]++;
2064 tcps[TCP_STAT_RCVPACK]++; 2060 tcps[TCP_STAT_RCVPACK]++;
2065 tcps[TCP_STAT_RCVBYTE] += tlen; 2061 tcps[TCP_STAT_RCVBYTE] += tlen;
2066 TCP_STAT_PUTREF(); 2062 TCP_STAT_PUTREF();
2067 nd6_hint(tp); 2063 nd6_hint(tp);
2068 2064
2069 /* 2065 /*
2070 * Automatic sizing enables the performance of large buffers 2066 * Automatic sizing enables the performance of large buffers
2071 * and most of the efficiency of small ones by only allocating 2067 * and most of the efficiency of small ones by only allocating
2072 * space when it is needed. 2068 * space when it is needed.
2073 * 2069 *
2074 * On the receive side the socket buffer memory is only rarely 2070 * On the receive side the socket buffer memory is only rarely
2075 * used to any significant extent. This allows us to be much 2071 * used to any significant extent. This allows us to be much
2076 * more aggressive in scaling the receive socket buffer. For 2072 * more aggressive in scaling the receive socket buffer. For
2077 * the case that the buffer space is actually used to a large 2073 * the case that the buffer space is actually used to a large
2078 * extent and we run out of kernel memory we can simply drop 2074 * extent and we run out of kernel memory we can simply drop
2079 * the new segments; TCP on the sender will just retransmit it 2075 * the new segments; TCP on the sender will just retransmit it
2080 * later. Setting the buffer size too big may only consume too 2076 * later. Setting the buffer size too big may only consume too
2081 * much kernel memory if the application doesn't read() from 2077 * much kernel memory if the application doesn't read() from
2082 * the socket or packet loss or reordering makes use of the 2078 * the socket or packet loss or reordering makes use of the
2083 * reassembly queue. 2079 * reassembly queue.
2084 * 2080 *
2085 * The criteria to step up the receive buffer one notch are: 2081 * The criteria to step up the receive buffer one notch are:
2086 * 1. the number of bytes received during the time it takes 2082 * 1. the number of bytes received during the time it takes
2087 * one timestamp to be reflected back to us (the RTT); 2083 * one timestamp to be reflected back to us (the RTT);
2088 * 2. received bytes per RTT is within seven eighth of the 2084 * 2. received bytes per RTT is within seven eighth of the
2089 * current socket buffer size; 2085 * current socket buffer size;
2090 * 3. receive buffer size has not hit maximal automatic size; 2086 * 3. receive buffer size has not hit maximal automatic size;
2091 * 2087 *
2092 * This algorithm does one step per RTT at most and only if 2088 * This algorithm does one step per RTT at most and only if
2093 * we receive a bulk stream w/o packet losses or reorderings. 2089 * we receive a bulk stream w/o packet losses or reorderings.
2094 * Shrinking the buffer during idle times is not necessary as 2090 * Shrinking the buffer during idle times is not necessary as
2095 * it doesn't consume any memory when idle. 2091 * it doesn't consume any memory when idle.
2096 * 2092 *
2097 * TODO: Only step up if the application is actually serving 2093 * TODO: Only step up if the application is actually serving
2098 * the buffer to better manage the socket buffer resources. 2094 * the buffer to better manage the socket buffer resources.
2099 */ 2095 */
2100 if (tcp_do_autorcvbuf && 2096 if (tcp_do_autorcvbuf &&
2101 opti.ts_ecr && 2097 opti.ts_ecr &&
2102 (so->so_rcv.sb_flags & SB_AUTOSIZE)) { 2098 (so->so_rcv.sb_flags & SB_AUTOSIZE)) {
2103 if (opti.ts_ecr > tp->rfbuf_ts && 2099 if (opti.ts_ecr > tp->rfbuf_ts &&
2104 opti.ts_ecr - tp->rfbuf_ts < PR_SLOWHZ) { 2100 opti.ts_ecr - tp->rfbuf_ts < PR_SLOWHZ) {
2105 if (tp->rfbuf_cnt > 2101 if (tp->rfbuf_cnt >
2106 (so->so_rcv.sb_hiwat / 8 * 7) && 2102 (so->so_rcv.sb_hiwat / 8 * 7) &&
2107 so->so_rcv.sb_hiwat < 2103 so->so_rcv.sb_hiwat <
2108 tcp_autorcvbuf_max) { 2104 tcp_autorcvbuf_max) {
2109 newsize = 2105 newsize =
2110 min(so->so_rcv.sb_hiwat + 2106 min(so->so_rcv.sb_hiwat +
2111 tcp_autorcvbuf_inc, 2107 tcp_autorcvbuf_inc,
2112 tcp_autorcvbuf_max); 2108 tcp_autorcvbuf_max);
2113 } 2109 }
2114 /* Start over with next RTT. */ 2110 /* Start over with next RTT. */
2115 tp->rfbuf_ts = 0; 2111 tp->rfbuf_ts = 0;
2116 tp->rfbuf_cnt = 0; 2112 tp->rfbuf_cnt = 0;
2117 } else 2113 } else
2118 tp->rfbuf_cnt += tlen; /* add up */ 2114 tp->rfbuf_cnt += tlen; /* add up */
2119 } 2115 }
2120 2116
2121 /* 2117 /*
2122 * Drop TCP, IP headers and TCP options then add data 2118 * Drop TCP, IP headers and TCP options then add data
2123 * to socket buffer. 2119 * to socket buffer.
2124 */ 2120 */
2125 if (so->so_state & SS_CANTRCVMORE) 2121 if (so->so_state & SS_CANTRCVMORE)
2126 m_freem(m); 2122 m_freem(m);
2127 else { 2123 else {
2128 /* 2124 /*
2129 * Set new socket buffer size. 2125 * Set new socket buffer size.
2130 * Give up when limit is reached. 2126 * Give up when limit is reached.
2131 */ 2127 */
2132 if (newsize) 2128 if (newsize)
2133 if (!sbreserve(&so->so_rcv, 2129 if (!sbreserve(&so->so_rcv,
2134 newsize, so)) 2130 newsize, so))
2135 so->so_rcv.sb_flags &= ~SB_AUTOSIZE; 2131 so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
2136 m_adj(m, toff + off); 2132 m_adj(m, toff + off);
2137 sbappendstream(&so->so_rcv, m); 2133 sbappendstream(&so->so_rcv, m);
2138 } 2134 }
2139 sorwakeup(so); 2135 sorwakeup(so);
2140 tcp_setup_ack(tp, th); 2136 tcp_setup_ack(tp, th);
2141 if (tp->t_flags & TF_ACKNOW) { 2137 if (tp->t_flags & TF_ACKNOW) {
2142 KERNEL_LOCK(1, NULL); 2138 KERNEL_LOCK(1, NULL);
2143 (void) tcp_output(tp); 2139 (void) tcp_output(tp);
2144 KERNEL_UNLOCK_ONE(NULL); 2140 KERNEL_UNLOCK_ONE(NULL);
2145 } 2141 }
2146 if (tcp_saveti) 2142 if (tcp_saveti)
2147 m_freem(tcp_saveti); 2143 m_freem(tcp_saveti);
2148 return; 2144 return;
2149 } 2145 }
2150 } 2146 }
2151 2147
2152 /* 2148 /*
2153 * Compute mbuf offset to TCP data segment. 2149 * Compute mbuf offset to TCP data segment.
2154 */ 2150 */
2155 hdroptlen = toff + off; 2151 hdroptlen = toff + off;
2156 2152
2157 /* 2153 /*
2158 * Calculate amount of space in receive window, 2154 * Calculate amount of space in receive window,
2159 * and then do TCP input processing. 2155 * and then do TCP input processing.
2160 * Receive window is amount of space in rcv queue, 2156 * Receive window is amount of space in rcv queue,
2161 * but not less than advertised window. 2157 * but not less than advertised window.
2162 */ 2158 */
2163 { int win; 2159 { int win;
2164 2160
2165 win = sbspace(&so->so_rcv); 2161 win = sbspace(&so->so_rcv);
2166 if (win < 0) 2162 if (win < 0)
2167 win = 0; 2163 win = 0;
2168 tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt)); 2164 tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
2169 } 2165 }
2170 2166
2171 /* Reset receive buffer auto scaling when not in bulk receive mode. */ 2167 /* Reset receive buffer auto scaling when not in bulk receive mode. */
2172 tp->rfbuf_ts = 0; 2168 tp->rfbuf_ts = 0;
2173 tp->rfbuf_cnt = 0; 2169 tp->rfbuf_cnt = 0;
2174 2170
2175 switch (tp->t_state) { 2171 switch (tp->t_state) {
2176 /* 2172 /*
2177 * If the state is SYN_SENT: 2173 * If the state is SYN_SENT:
2178 * if seg contains an ACK, but not for our SYN, drop the input. 2174 * if seg contains an ACK, but not for our SYN, drop the input.
2179 * if seg contains a RST, then drop the connection. 2175 * if seg contains a RST, then drop the connection.
2180 * if seg does not contain SYN, then drop it. 2176 * if seg does not contain SYN, then drop it.
2181 * Otherwise this is an acceptable SYN segment 2177 * Otherwise this is an acceptable SYN segment
2182 * initialize tp->rcv_nxt and tp->irs 2178 * initialize tp->rcv_nxt and tp->irs
2183 * if seg contains ack then advance tp->snd_una 2179 * if seg contains ack then advance tp->snd_una
2184 * if seg contains a ECE and ECN support is enabled, the stream 2180 * if seg contains a ECE and ECN support is enabled, the stream
2185 * is ECN capable. 2181 * is ECN capable.
2186 * if SYN has been acked change to ESTABLISHED else SYN_RCVD state 2182 * if SYN has been acked change to ESTABLISHED else SYN_RCVD state
2187 * arrange for segment to be acked (eventually) 2183 * arrange for segment to be acked (eventually)
2188 * continue processing rest of data/controls, beginning with URG 2184 * continue processing rest of data/controls, beginning with URG
2189 */ 2185 */
2190 case TCPS_SYN_SENT: 2186 case TCPS_SYN_SENT:
2191 if ((tiflags & TH_ACK) && 2187 if ((tiflags & TH_ACK) &&
2192 (SEQ_LEQ(th->th_ack, tp->iss) || 2188 (SEQ_LEQ(th->th_ack, tp->iss) ||
2193 SEQ_GT(th->th_ack, tp->snd_max))) 2189 SEQ_GT(th->th_ack, tp->snd_max)))
2194 goto dropwithreset; 2190 goto dropwithreset;
2195 if (tiflags & TH_RST) { 2191 if (tiflags & TH_RST) {
2196 if (tiflags & TH_ACK) 2192 if (tiflags & TH_ACK)
2197 tp = tcp_drop(tp, ECONNREFUSED); 2193 tp = tcp_drop(tp, ECONNREFUSED);
2198 goto drop; 2194 goto drop;
2199 } 2195 }
2200 if ((tiflags & TH_SYN) == 0) 2196 if ((tiflags & TH_SYN) == 0)
2201 goto drop; 2197 goto drop;
2202 if (tiflags & TH_ACK) { 2198 if (tiflags & TH_ACK) {
2203 tp->snd_una = th->th_ack; 2199 tp->snd_una = th->th_ack;
2204 if (SEQ_LT(tp->snd_nxt, tp->snd_una)) 2200 if (SEQ_LT(tp->snd_nxt, tp->snd_una))
2205 tp->snd_nxt = tp->snd_una; 2201 tp->snd_nxt = tp->snd_una;
2206 if (SEQ_LT(tp->snd_high, tp->snd_una)) 2202 if (SEQ_LT(tp->snd_high, tp->snd_una))
2207 tp->snd_high = tp->snd_una; 2203 tp->snd_high = tp->snd_una;
2208 TCP_TIMER_DISARM(tp, TCPT_REXMT); 2204 TCP_TIMER_DISARM(tp, TCPT_REXMT);
2209 2205
2210 if ((tiflags & TH_ECE) && tcp_do_ecn) { 2206 if ((tiflags & TH_ECE) && tcp_do_ecn) {
2211 tp->t_flags |= TF_ECN_PERMIT; 2207 tp->t_flags |= TF_ECN_PERMIT;
2212 TCP_STATINC(TCP_STAT_ECN_SHS); 2208 TCP_STATINC(TCP_STAT_ECN_SHS);
2213 } 2209 }
2214 2210
2215 } 2211 }
2216 tp->irs = th->th_seq; 2212 tp->irs = th->th_seq;
2217 tcp_rcvseqinit(tp); 2213 tcp_rcvseqinit(tp);
2218 tp->t_flags |= TF_ACKNOW; 2214 tp->t_flags |= TF_ACKNOW;
2219 tcp_mss_from_peer(tp, opti.maxseg); 2215 tcp_mss_from_peer(tp, opti.maxseg);
2220 2216
2221 /* 2217 /*
2222 * Initialize the initial congestion window. If we 2218 * Initialize the initial congestion window. If we
2223 * had to retransmit the SYN, we must initialize cwnd 2219 * had to retransmit the SYN, we must initialize cwnd
2224 * to 1 segment (i.e. the Loss Window). 2220 * to 1 segment (i.e. the Loss Window).
2225 */ 2221 */
2226 if (tp->t_flags & TF_SYN_REXMT) 2222 if (tp->t_flags & TF_SYN_REXMT)
2227 tp->snd_cwnd = tp->t_peermss; 2223 tp->snd_cwnd = tp->t_peermss;
2228 else { 2224 else {
2229 int ss = tcp_init_win; 2225 int ss = tcp_init_win;
2230#ifdef INET 2226#ifdef INET
2231 if (inp != NULL && in_localaddr(inp->inp_faddr)) 2227 if (inp != NULL && in_localaddr(inp->inp_faddr))
2232 ss = tcp_init_win_local; 2228 ss = tcp_init_win_local;
2233#endif 2229#endif
2234#ifdef INET6 2230#ifdef INET6
2235 if (in6p != NULL && in6_localaddr(&in6p->in6p_faddr)) 2231 if (in6p != NULL && in6_localaddr(&in6p->in6p_faddr))
2236 ss = tcp_init_win_local; 2232 ss = tcp_init_win_local;
2237#endif 2233#endif
2238 tp->snd_cwnd = TCP_INITIAL_WINDOW(ss, tp->t_peermss); 2234 tp->snd_cwnd = TCP_INITIAL_WINDOW(ss, tp->t_peermss);
2239 } 2235 }
2240 2236
2241 tcp_rmx_rtt(tp); 2237 tcp_rmx_rtt(tp);
2242 if (tiflags & TH_ACK) { 2238 if (tiflags & TH_ACK) {
2243 TCP_STATINC(TCP_STAT_CONNECTS); 2239 TCP_STATINC(TCP_STAT_CONNECTS);
2244 /* 2240 /*
2245 * move tcp_established before soisconnected 2241 * move tcp_established before soisconnected
2246 * because upcall handler can drive tcp_output 2242 * because upcall handler can drive tcp_output
2247 * functionality. 2243 * functionality.
2248 * XXX we might call soisconnected at the end of 2244 * XXX we might call soisconnected at the end of
2249 * all processing 2245 * all processing
2250 */ 2246 */
2251 tcp_established(tp); 2247 tcp_established(tp);
2252 soisconnected(so); 2248 soisconnected(so);
2253 /* Do window scaling on this connection? */ 2249 /* Do window scaling on this connection? */
2254 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == 2250 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
2255 (TF_RCVD_SCALE|TF_REQ_SCALE)) { 2251 (TF_RCVD_SCALE|TF_REQ_SCALE)) {
2256 tp->snd_scale = tp->requested_s_scale; 2252 tp->snd_scale = tp->requested_s_scale;
2257 tp->rcv_scale = tp->request_r_scale; 2253 tp->rcv_scale = tp->request_r_scale;
2258 } 2254 }
2259 TCP_REASS_LOCK(tp); 2255 TCP_REASS_LOCK(tp);
2260 (void) tcp_reass(tp, NULL, NULL, &tlen); 2256 (void) tcp_reass(tp, NULL, NULL, &tlen);
2261 /* 2257 /*
2262 * if we didn't have to retransmit the SYN, 2258 * if we didn't have to retransmit the SYN,
2263 * use its rtt as our initial srtt & rtt var. 2259 * use its rtt as our initial srtt & rtt var.
2264 */ 2260 */
2265 if (tp->t_rtttime) 2261 if (tp->t_rtttime)
2266 tcp_xmit_timer(tp, tcp_now - tp->t_rtttime); 2262 tcp_xmit_timer(tp, tcp_now - tp->t_rtttime);
2267 } else 2263 } else
2268 tp->t_state = TCPS_SYN_RECEIVED; 2264 tp->t_state = TCPS_SYN_RECEIVED;
2269 2265
2270 /* 2266 /*
2271 * Advance th->th_seq to correspond to first data byte. 2267 * Advance th->th_seq to correspond to first data byte.
2272 * If data, trim to stay within window, 2268 * If data, trim to stay within window,
2273 * dropping FIN if necessary. 2269 * dropping FIN if necessary.
2274 */ 2270 */
2275 th->th_seq++; 2271 th->th_seq++;
2276 if (tlen > tp->rcv_wnd) { 2272 if (tlen > tp->rcv_wnd) {
2277 todrop = tlen - tp->rcv_wnd; 2273 todrop = tlen - tp->rcv_wnd;
2278 m_adj(m, -todrop); 2274 m_adj(m, -todrop);
2279 tlen = tp->rcv_wnd; 2275 tlen = tp->rcv_wnd;
2280 tiflags &= ~TH_FIN; 2276 tiflags &= ~TH_FIN;
2281 tcps = TCP_STAT_GETREF(); 2277 tcps = TCP_STAT_GETREF();
2282 tcps[TCP_STAT_RCVPACKAFTERWIN]++; 2278 tcps[TCP_STAT_RCVPACKAFTERWIN]++;
2283 tcps[TCP_STAT_RCVBYTEAFTERWIN] += todrop; 2279 tcps[TCP_STAT_RCVBYTEAFTERWIN] += todrop;
2284 TCP_STAT_PUTREF(); 2280 TCP_STAT_PUTREF();
2285 } 2281 }
2286 tp->snd_wl1 = th->th_seq - 1; 2282 tp->snd_wl1 = th->th_seq - 1;
2287 tp->rcv_up = th->th_seq; 2283 tp->rcv_up = th->th_seq;
2288 goto step6; 2284 goto step6;
2289 2285
2290 /* 2286 /*
2291 * If the state is SYN_RECEIVED: 2287 * If the state is SYN_RECEIVED:
2292 * If seg contains an ACK, but not for our SYN, drop the input 2288 * If seg contains an ACK, but not for our SYN, drop the input
2293 * and generate an RST. See page 36, rfc793 2289 * and generate an RST. See page 36, rfc793
2294 */ 2290 */
2295 case TCPS_SYN_RECEIVED: 2291 case TCPS_SYN_RECEIVED:
2296 if ((tiflags & TH_ACK) && 2292 if ((tiflags & TH_ACK) &&
2297 (SEQ_LEQ(th->th_ack, tp->iss) || 2293 (SEQ_LEQ(th->th_ack, tp->iss) ||
2298 SEQ_GT(th->th_ack, tp->snd_max))) 2294 SEQ_GT(th->th_ack, tp->snd_max)))
2299 goto dropwithreset; 2295 goto dropwithreset;
2300 break; 2296 break;
2301 } 2297 }
2302 2298
2303 /* 2299 /*
2304 * States other than LISTEN or SYN_SENT. 2300 * States other than LISTEN or SYN_SENT.
2305 * First check timestamp, if present. 2301 * First check timestamp, if present.
2306 * Then check that at least some bytes of segment are within 2302 * Then check that at least some bytes of segment are within
2307 * receive window. If segment begins before rcv_nxt, 2303 * receive window. If segment begins before rcv_nxt,
2308 * drop leading data (and SYN); if nothing left, just ack. 2304 * drop leading data (and SYN); if nothing left, just ack.
2309 * 2305 *
2310 * RFC 1323 PAWS: If we have a timestamp reply on this segment 2306 * RFC 1323 PAWS: If we have a timestamp reply on this segment
2311 * and it's less than ts_recent, drop it. 2307 * and it's less than ts_recent, drop it.
2312 */ 2308 */
2313 if (opti.ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent && 2309 if (opti.ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent &&
2314 TSTMP_LT(opti.ts_val, tp->ts_recent)) { 2310 TSTMP_LT(opti.ts_val, tp->ts_recent)) {
2315 2311
2316 /* Check to see if ts_recent is over 24 days old. */ 2312 /* Check to see if ts_recent is over 24 days old. */
2317 if (tcp_now - tp->ts_recent_age > TCP_PAWS_IDLE) { 2313 if (tcp_now - tp->ts_recent_age > TCP_PAWS_IDLE) {
2318 /* 2314 /*
2319 * Invalidate ts_recent. If this segment updates 2315 * Invalidate ts_recent. If this segment updates
2320 * ts_recent, the age will be reset later and ts_recent 2316 * ts_recent, the age will be reset later and ts_recent
2321 * will get a valid value. If it does not, setting 2317 * will get a valid value. If it does not, setting
2322 * ts_recent to zero will at least satisfy the 2318 * ts_recent to zero will at least satisfy the
2323 * requirement that zero be placed in the timestamp 2319 * requirement that zero be placed in the timestamp
2324 * echo reply when ts_recent isn't valid. The 2320 * echo reply when ts_recent isn't valid. The
2325 * age isn't reset until we get a valid ts_recent 2321 * age isn't reset until we get a valid ts_recent
2326 * because we don't want out-of-order segments to be 2322 * because we don't want out-of-order segments to be
2327 * dropped when ts_recent is old. 2323 * dropped when ts_recent is old.
2328 */ 2324 */
2329 tp->ts_recent = 0; 2325 tp->ts_recent = 0;
2330 } else { 2326 } else {
2331 tcps = TCP_STAT_GETREF(); 2327 tcps = TCP_STAT_GETREF();
2332 tcps[TCP_STAT_RCVDUPPACK]++; 2328 tcps[TCP_STAT_RCVDUPPACK]++;
2333 tcps[TCP_STAT_RCVDUPBYTE] += tlen; 2329 tcps[TCP_STAT_RCVDUPBYTE] += tlen;
2334 tcps[TCP_STAT_PAWSDROP]++; 2330 tcps[TCP_STAT_PAWSDROP]++;
2335 TCP_STAT_PUTREF(); 2331 TCP_STAT_PUTREF();
2336 tcp_new_dsack(tp, th->th_seq, tlen); 2332 tcp_new_dsack(tp, th->th_seq, tlen);
2337 goto dropafterack; 2333 goto dropafterack;
2338 } 2334 }
2339 } 2335 }
2340 2336
2341 todrop = tp->rcv_nxt - th->th_seq; 2337 todrop = tp->rcv_nxt - th->th_seq;
2342 dupseg = false; 2338 dupseg = false;
2343 if (todrop > 0) { 2339 if (todrop > 0) {
2344 if (tiflags & TH_SYN) { 2340 if (tiflags & TH_SYN) {
2345 tiflags &= ~TH_SYN; 2341 tiflags &= ~TH_SYN;
2346 th->th_seq++; 2342 th->th_seq++;
2347 if (th->th_urp > 1) 2343 if (th->th_urp > 1)
2348 th->th_urp--; 2344 th->th_urp--;
2349 else { 2345 else {
2350 tiflags &= ~TH_URG; 2346 tiflags &= ~TH_URG;
2351 th->th_urp = 0; 2347 th->th_urp = 0;
2352 } 2348 }
2353 todrop--; 2349 todrop--;
2354 } 2350 }
2355 if (todrop > tlen || 2351 if (todrop > tlen ||
2356 (todrop == tlen && (tiflags & TH_FIN) == 0)) { 2352 (todrop == tlen && (tiflags & TH_FIN) == 0)) {
2357 /* 2353 /*
2358 * Any valid FIN or RST must be to the left of the 2354 * Any valid FIN or RST must be to the left of the
2359 * window. At this point the FIN or RST must be a 2355 * window. At this point the FIN or RST must be a
2360 * duplicate or out of sequence; drop it. 2356 * duplicate or out of sequence; drop it.
2361 */ 2357 */
2362 if (tiflags & TH_RST) 2358 if (tiflags & TH_RST)
2363 goto drop; 2359 goto drop;
2364 tiflags &= ~(TH_FIN|TH_RST); 2360 tiflags &= ~(TH_FIN|TH_RST);
2365 /* 2361 /*
2366 * Send an ACK to resynchronize and drop any data. 2362 * Send an ACK to resynchronize and drop any data.
2367 * But keep on processing for RST or ACK. 2363 * But keep on processing for RST or ACK.
2368 */ 2364 */
2369 tp->t_flags |= TF_ACKNOW; 2365 tp->t_flags |= TF_ACKNOW;
2370 todrop = tlen; 2366 todrop = tlen;
2371 dupseg = true; 2367 dupseg = true;
2372 tcps = TCP_STAT_GETREF(); 2368 tcps = TCP_STAT_GETREF();
2373 tcps[TCP_STAT_RCVDUPPACK]++; 2369 tcps[TCP_STAT_RCVDUPPACK]++;
2374 tcps[TCP_STAT_RCVDUPBYTE] += todrop; 2370 tcps[TCP_STAT_RCVDUPBYTE] += todrop;
2375 TCP_STAT_PUTREF(); 2371 TCP_STAT_PUTREF();
2376 } else if ((tiflags & TH_RST) && 2372 } else if ((tiflags & TH_RST) &&
2377 th->th_seq != tp->rcv_nxt) { 2373 th->th_seq != tp->rcv_nxt) {
2378 /* 2374 /*
2379 * Test for reset before adjusting the sequence 2375 * Test for reset before adjusting the sequence
2380 * number for overlapping data. 2376 * number for overlapping data.
2381 */ 2377 */
2382 goto dropafterack_ratelim; 2378 goto dropafterack_ratelim;
2383 } else { 2379 } else {
2384 tcps = TCP_STAT_GETREF(); 2380 tcps = TCP_STAT_GETREF();
2385 tcps[TCP_STAT_RCVPARTDUPPACK]++; 2381 tcps[TCP_STAT_RCVPARTDUPPACK]++;
2386 tcps[TCP_STAT_RCVPARTDUPBYTE] += todrop; 2382 tcps[TCP_STAT_RCVPARTDUPBYTE] += todrop;
2387 TCP_STAT_PUTREF(); 2383 TCP_STAT_PUTREF();
2388 } 2384 }
2389 tcp_new_dsack(tp, th->th_seq, todrop); 2385 tcp_new_dsack(tp, th->th_seq, todrop);
2390 hdroptlen += todrop; /*drop from head afterwards*/ 2386 hdroptlen += todrop; /*drop from head afterwards*/
2391 th->th_seq += todrop; 2387 th->th_seq += todrop;
2392 tlen -= todrop; 2388 tlen -= todrop;
2393 if (th->th_urp > todrop) 2389 if (th->th_urp > todrop)
2394 th->th_urp -= todrop; 2390 th->th_urp -= todrop;
2395 else { 2391 else {
2396 tiflags &= ~TH_URG; 2392 tiflags &= ~TH_URG;
2397 th->th_urp = 0; 2393 th->th_urp = 0;
2398 } 2394 }
2399 } 2395 }
2400 2396
2401 /* 2397 /*
2402 * If new data are received on a connection after the 2398 * If new data are received on a connection after the
2403 * user processes are gone, then RST the other end. 2399 * user processes are gone, then RST the other end.
2404 */ 2400 */
2405 if ((so->so_state & SS_NOFDREF) && 2401 if ((so->so_state & SS_NOFDREF) &&
2406 tp->t_state > TCPS_CLOSE_WAIT && tlen) { 2402 tp->t_state > TCPS_CLOSE_WAIT && tlen) {
2407 tp = tcp_close(tp); 2403 tp = tcp_close(tp);
2408 TCP_STATINC(TCP_STAT_RCVAFTERCLOSE); 2404 TCP_STATINC(TCP_STAT_RCVAFTERCLOSE);
2409 goto dropwithreset; 2405 goto dropwithreset;
2410 } 2406 }
2411 2407
2412 /* 2408 /*
2413 * If segment ends after window, drop trailing data 2409 * If segment ends after window, drop trailing data
2414 * (and PUSH and FIN); if nothing left, just ACK. 2410 * (and PUSH and FIN); if nothing left, just ACK.
2415 */ 2411 */
2416 todrop = (th->th_seq + tlen) - (tp->rcv_nxt+tp->rcv_wnd); 2412 todrop = (th->th_seq + tlen) - (tp->rcv_nxt+tp->rcv_wnd);
2417 if (todrop > 0) { 2413 if (todrop > 0) {
2418 TCP_STATINC(TCP_STAT_RCVPACKAFTERWIN); 2414 TCP_STATINC(TCP_STAT_RCVPACKAFTERWIN);
2419 if (todrop >= tlen) { 2415 if (todrop >= tlen) {
2420 /* 2416 /*
2421 * The segment actually starts after the window. 2417 * The segment actually starts after the window.
2422 * th->th_seq + tlen - tp->rcv_nxt - tp->rcv_wnd >= tlen 2418 * th->th_seq + tlen - tp->rcv_nxt - tp->rcv_wnd >= tlen
2423 * th->th_seq - tp->rcv_nxt - tp->rcv_wnd >= 0 2419 * th->th_seq - tp->rcv_nxt - tp->rcv_wnd >= 0
2424 * th->th_seq >= tp->rcv_nxt + tp->rcv_wnd 2420 * th->th_seq >= tp->rcv_nxt + tp->rcv_wnd
2425 */ 2421 */
2426 TCP_STATADD(TCP_STAT_RCVBYTEAFTERWIN, tlen); 2422 TCP_STATADD(TCP_STAT_RCVBYTEAFTERWIN, tlen);
2427 /* 2423 /*
2428 * If a new connection request is received 2424 * If a new connection request is received
2429 * while in TIME_WAIT, drop the old connection 2425 * while in TIME_WAIT, drop the old connection
2430 * and start over if the sequence numbers 2426 * and start over if the sequence numbers
2431 * are above the previous ones. 2427 * are above the previous ones.
2432 * 2428 *
2433 * NOTE: We will checksum the packet again, and 2429 * NOTE: We will checksum the packet again, and
2434 * so we need to put the header fields back into 2430 * so we need to put the header fields back into
2435 * network order! 2431 * network order!
2436 * XXX This kind of sucks, but we don't expect 2432 * XXX This kind of sucks, but we don't expect
2437 * XXX this to happen very often, so maybe it 2433 * XXX this to happen very often, so maybe it
2438 * XXX doesn't matter so much. 2434 * XXX doesn't matter so much.
2439 */ 2435 */
2440 if (tiflags & TH_SYN && 2436 if (tiflags & TH_SYN &&
2441 tp->t_state == TCPS_TIME_WAIT && 2437 tp->t_state == TCPS_TIME_WAIT &&
2442 SEQ_GT(th->th_seq, tp->rcv_nxt)) { 2438 SEQ_GT(th->th_seq, tp->rcv_nxt)) {
2443 tp = tcp_close(tp); 2439 tp = tcp_close(tp);
2444 tcp_fields_to_net(th); 2440 tcp_fields_to_net(th);
2445 goto findpcb; 2441 goto findpcb;
2446 } 2442 }
2447 /* 2443 /*
2448 * If window is closed can only take segments at 2444 * If window is closed can only take segments at
2449 * window edge, and have to drop data and PUSH from 2445 * window edge, and have to drop data and PUSH from
2450 * incoming segments. Continue processing, but 2446 * incoming segments. Continue processing, but
2451 * remember to ack. Otherwise, drop segment 2447 * remember to ack. Otherwise, drop segment
2452 * and (if not RST) ack. 2448 * and (if not RST) ack.
2453 */ 2449 */
2454 if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) { 2450 if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) {
2455 tp->t_flags |= TF_ACKNOW; 2451 tp->t_flags |= TF_ACKNOW;
2456 TCP_STATINC(TCP_STAT_RCVWINPROBE); 2452 TCP_STATINC(TCP_STAT_RCVWINPROBE);
2457 } else 2453 } else
2458 goto dropafterack; 2454 goto dropafterack;
2459 } else 2455 } else
2460 TCP_STATADD(TCP_STAT_RCVBYTEAFTERWIN, todrop); 2456 TCP_STATADD(TCP_STAT_RCVBYTEAFTERWIN, todrop);
2461 m_adj(m, -todrop); 2457 m_adj(m, -todrop);
2462 tlen -= todrop; 2458 tlen -= todrop;
2463 tiflags &= ~(TH_PUSH|TH_FIN); 2459 tiflags &= ~(TH_PUSH|TH_FIN);
2464 } 2460 }
2465 2461
2466 /* 2462 /*
2467 * If last ACK falls within this segment's sequence numbers, 2463 * If last ACK falls within this segment's sequence numbers,
2468 * record the timestamp. 2464 * record the timestamp.
2469 * NOTE:  2465 * NOTE:
2470 * 1) That the test incorporates suggestions from the latest 2466 * 1) That the test incorporates suggestions from the latest
2471 * proposal of the tcplw@cray.com list (Braden 1993/04/26). 2467 * proposal of the tcplw@cray.com list (Braden 1993/04/26).
2472 * 2) That updating only on newer timestamps interferes with 2468 * 2) That updating only on newer timestamps interferes with
2473 * our earlier PAWS tests, so this check should be solely 2469 * our earlier PAWS tests, so this check should be solely
2474 * predicated on the sequence space of this segment. 2470 * predicated on the sequence space of this segment.
2475 * 3) That we modify the segment boundary check to be  2471 * 3) That we modify the segment boundary check to be
2476 * Last.ACK.Sent <= SEG.SEQ + SEG.Len  2472 * Last.ACK.Sent <= SEG.SEQ + SEG.Len
2477 * instead of RFC1323's 2473 * instead of RFC1323's
2478 * Last.ACK.Sent < SEG.SEQ + SEG.Len, 2474 * Last.ACK.Sent < SEG.SEQ + SEG.Len,
2479 * This modified check allows us to overcome RFC1323's 2475 * This modified check allows us to overcome RFC1323's
2480 * limitations as described in Stevens TCP/IP Illustrated 2476 * limitations as described in Stevens TCP/IP Illustrated
2481 * Vol. 2 p.869. In such cases, we can still calculate the 2477 * Vol. 2 p.869. In such cases, we can still calculate the
2482 * RTT correctly when RCV.NXT == Last.ACK.Sent. 2478 * RTT correctly when RCV.NXT == Last.ACK.Sent.
2483 */ 2479 */
2484 if (opti.ts_present && 2480 if (opti.ts_present &&
2485 SEQ_LEQ(th->th_seq, tp->last_ack_sent) && 2481 SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
2486 SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen + 2482 SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
2487 ((tiflags & (TH_SYN|TH_FIN)) != 0))) { 2483 ((tiflags & (TH_SYN|TH_FIN)) != 0))) {
2488 tp->ts_recent_age = tcp_now; 2484 tp->ts_recent_age = tcp_now;
2489 tp->ts_recent = opti.ts_val; 2485 tp->ts_recent = opti.ts_val;
2490 } 2486 }
2491 2487
2492 /* 2488 /*
2493 * If the RST bit is set examine the state: 2489 * If the RST bit is set examine the state:
2494 * SYN_RECEIVED STATE: 2490 * SYN_RECEIVED STATE:
2495 * If passive open, return to LISTEN state. 2491 * If passive open, return to LISTEN state.
2496 * If active open, inform user that connection was refused. 2492 * If active open, inform user that connection was refused.
2497 * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: 2493 * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
2498 * Inform user that connection was reset, and close tcb. 2494 * Inform user that connection was reset, and close tcb.
2499 * CLOSING, LAST_ACK, TIME_WAIT STATES 2495 * CLOSING, LAST_ACK, TIME_WAIT STATES
2500 * Close the tcb. 2496 * Close the tcb.
2501 */ 2497 */
2502 if (tiflags & TH_RST) { 2498 if (tiflags & TH_RST) {
2503 if (th->th_seq != tp->rcv_nxt) 2499 if (th->th_seq != tp->rcv_nxt)
2504 goto dropafterack_ratelim; 2500 goto dropafterack_ratelim;
2505 2501
2506 switch (tp->t_state) { 2502 switch (tp->t_state) {
2507 case TCPS_SYN_RECEIVED: 2503 case TCPS_SYN_RECEIVED:
2508 so->so_error = ECONNREFUSED; 2504 so->so_error = ECONNREFUSED;
2509 goto close; 2505 goto close;
2510 2506
2511 case TCPS_ESTABLISHED: 2507 case TCPS_ESTABLISHED:
2512 case TCPS_FIN_WAIT_1: 2508 case TCPS_FIN_WAIT_1:
2513 case TCPS_FIN_WAIT_2: 2509 case TCPS_FIN_WAIT_2:
2514 case TCPS_CLOSE_WAIT: 2510 case TCPS_CLOSE_WAIT:
2515 so->so_error = ECONNRESET; 2511 so->so_error = ECONNRESET;
2516 close: 2512 close:
2517 tp->t_state = TCPS_CLOSED; 2513 tp->t_state = TCPS_CLOSED;
2518 TCP_STATINC(TCP_STAT_DROPS); 2514 TCP_STATINC(TCP_STAT_DROPS);
2519 tp = tcp_close(tp); 2515 tp = tcp_close(tp);
2520 goto drop; 2516 goto drop;
2521 2517
2522 case TCPS_CLOSING: 2518 case TCPS_CLOSING:
2523 case TCPS_LAST_ACK: 2519 case TCPS_LAST_ACK:
2524 case TCPS_TIME_WAIT: 2520 case TCPS_TIME_WAIT:
2525 tp = tcp_close(tp); 2521 tp = tcp_close(tp);
2526 goto drop; 2522 goto drop;
2527 } 2523 }
2528 } 2524 }
2529 2525
2530 /* 2526 /*
2531 * Since we've covered the SYN-SENT and SYN-RECEIVED states above 2527 * Since we've covered the SYN-SENT and SYN-RECEIVED states above
2532 * we must be in a synchronized state. RFC791 states (under RST 2528 * we must be in a synchronized state. RFC791 states (under RST
2533 * generation) that any unacceptable segment (an out-of-order SYN 2529 * generation) that any unacceptable segment (an out-of-order SYN
2534 * qualifies) received in a synchronized state must elicit only an 2530 * qualifies) received in a synchronized state must elicit only an
2535 * empty acknowledgment segment ... and the connection remains in 2531 * empty acknowledgment segment ... and the connection remains in
2536 * the same state. 2532 * the same state.
2537 */ 2533 */
2538 if (tiflags & TH_SYN) { 2534 if (tiflags & TH_SYN) {
2539 if (tp->rcv_nxt == th->th_seq) { 2535 if (tp->rcv_nxt == th->th_seq) {
2540 tcp_respond(tp, m, m, th, (tcp_seq)0, th->th_ack - 1, 2536 tcp_respond(tp, m, m, th, (tcp_seq)0, th->th_ack - 1,
2541 TH_ACK); 2537 TH_ACK);
2542 if (tcp_saveti) 2538 if (tcp_saveti)
2543 m_freem(tcp_saveti); 2539 m_freem(tcp_saveti);
2544 return; 2540 return;
2545 } 2541 }
2546 2542
2547 goto dropafterack_ratelim; 2543 goto dropafterack_ratelim;
2548 } 2544 }
2549 2545
2550 /* 2546 /*
2551 * If the ACK bit is off we drop the segment and return. 2547 * If the ACK bit is off we drop the segment and return.
2552 */ 2548 */
2553 if ((tiflags & TH_ACK) == 0) { 2549 if ((tiflags & TH_ACK) == 0) {
2554 if (tp->t_flags & TF_ACKNOW) 2550 if (tp->t_flags & TF_ACKNOW)
2555 goto dropafterack; 2551 goto dropafterack;
2556 else 2552 else
2557 goto drop; 2553 goto drop;
2558 } 2554 }
2559 2555
2560 /* 2556 /*
2561 * Ack processing. 2557 * Ack processing.
2562 */ 2558 */
2563 switch (tp->t_state) { 2559 switch (tp->t_state) {
2564 2560
2565 /* 2561 /*
2566 * In SYN_RECEIVED state if the ack ACKs our SYN then enter 2562 * In SYN_RECEIVED state if the ack ACKs our SYN then enter
2567 * ESTABLISHED state and continue processing, otherwise 2563 * ESTABLISHED state and continue processing, otherwise
2568 * send an RST. 2564 * send an RST.
2569 */ 2565 */
2570 case TCPS_SYN_RECEIVED: 2566 case TCPS_SYN_RECEIVED:
2571 if (SEQ_GT(tp->snd_una, th->th_ack) || 2567 if (SEQ_GT(tp->snd_una, th->th_ack) ||
2572 SEQ_GT(th->th_ack, tp->snd_max)) 2568 SEQ_GT(th->th_ack, tp->snd_max))
2573 goto dropwithreset; 2569 goto dropwithreset;
2574 TCP_STATINC(TCP_STAT_CONNECTS); 2570 TCP_STATINC(TCP_STAT_CONNECTS);
2575 soisconnected(so); 2571 soisconnected(so);
2576 tcp_established(tp); 2572 tcp_established(tp);
2577 /* Do window scaling? */ 2573 /* Do window scaling? */
2578 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == 2574 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
2579 (TF_RCVD_SCALE|TF_REQ_SCALE)) { 2575 (TF_RCVD_SCALE|TF_REQ_SCALE)) {
2580 tp->snd_scale = tp->requested_s_scale; 2576 tp->snd_scale = tp->requested_s_scale;
2581 tp->rcv_scale = tp->request_r_scale; 2577 tp->rcv_scale = tp->request_r_scale;
2582 } 2578 }
2583 TCP_REASS_LOCK(tp); 2579 TCP_REASS_LOCK(tp);
2584 (void) tcp_reass(tp, NULL, NULL, &tlen); 2580 (void) tcp_reass(tp, NULL, NULL, &tlen);
2585 tp->snd_wl1 = th->th_seq - 1; 2581 tp->snd_wl1 = th->th_seq - 1;
2586 /* fall into ... */ 2582 /* fall into ... */
2587 2583
2588 /* 2584 /*
2589 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range 2585 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
2590 * ACKs. If the ack is in the range 2586 * ACKs. If the ack is in the range
2591 * tp->snd_una < th->th_ack <= tp->snd_max 2587 * tp->snd_una < th->th_ack <= tp->snd_max
2592 * then advance tp->snd_una to th->th_ack and drop 2588 * then advance tp->snd_una to th->th_ack and drop
2593 * data from the retransmission queue. If this ACK reflects 2589 * data from the retransmission queue. If this ACK reflects
2594 * more up to date window information we update our window information. 2590 * more up to date window information we update our window information.
2595 */ 2591 */
2596 case TCPS_ESTABLISHED: 2592 case TCPS_ESTABLISHED:
2597 case TCPS_FIN_WAIT_1: 2593 case TCPS_FIN_WAIT_1:
2598 case TCPS_FIN_WAIT_2: 2594 case TCPS_FIN_WAIT_2:
2599 case TCPS_CLOSE_WAIT: 2595 case TCPS_CLOSE_WAIT:
2600 case TCPS_CLOSING: 2596 case TCPS_CLOSING:
2601 case TCPS_LAST_ACK: 2597 case TCPS_LAST_ACK:
2602 case TCPS_TIME_WAIT: 2598 case TCPS_TIME_WAIT:
2603 2599