| @@ -1,985 +1,985 @@ | | | @@ -1,985 +1,985 @@ |
1 | /* $NetBSD: tcp_congctl.c,v 1.26 2018/09/03 16:29:36 riastradh Exp $ */ | | 1 | /* $NetBSD: tcp_congctl.c,v 1.27 2019/10/09 05:29:18 msaitoh Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 1997, 1998, 1999, 2001, 2005, 2006 The NetBSD Foundation, Inc. | | 4 | * Copyright (c) 1997, 1998, 1999, 2001, 2005, 2006 The NetBSD Foundation, Inc. |
5 | * All rights reserved. | | 5 | * All rights reserved. |
6 | * | | 6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation | | 7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation | | 8 | * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation |
9 | * Facility, NASA Ames Research Center. | | 9 | * Facility, NASA Ames Research Center. |
10 | * This code is derived from software contributed to The NetBSD Foundation | | 10 | * This code is derived from software contributed to The NetBSD Foundation |
11 | * by Charles M. Hannum. | | 11 | * by Charles M. Hannum. |
12 | * This code is derived from software contributed to The NetBSD Foundation | | 12 | * This code is derived from software contributed to The NetBSD Foundation |
13 | * by Rui Paulo. | | 13 | * by Rui Paulo. |
14 | * | | 14 | * |
15 | * Redistribution and use in source and binary forms, with or without | | 15 | * Redistribution and use in source and binary forms, with or without |
16 | * modification, are permitted provided that the following conditions | | 16 | * modification, are permitted provided that the following conditions |
17 | * are met: | | 17 | * are met: |
18 | * 1. Redistributions of source code must retain the above copyright | | 18 | * 1. Redistributions of source code must retain the above copyright |
19 | * notice, this list of conditions and the following disclaimer. | | 19 | * notice, this list of conditions and the following disclaimer. |
20 | * 2. Redistributions in binary form must reproduce the above copyright | | 20 | * 2. Redistributions in binary form must reproduce the above copyright |
21 | * notice, this list of conditions and the following disclaimer in the | | 21 | * notice, this list of conditions and the following disclaimer in the |
22 | * documentation and/or other materials provided with the distribution. | | 22 | * documentation and/or other materials provided with the distribution. |
23 | * | | 23 | * |
24 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | | 24 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
25 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 25 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
26 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 26 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
27 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | | 27 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
28 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | 28 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
29 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | 29 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
30 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | 30 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
31 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 31 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
32 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 32 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
33 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 33 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
34 | * POSSIBILITY OF SUCH DAMAGE. | | 34 | * POSSIBILITY OF SUCH DAMAGE. |
35 | */ | | 35 | */ |
36 | | | 36 | |
37 | /* | | 37 | /* |
38 | * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. | | 38 | * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. |
39 | * All rights reserved. | | 39 | * All rights reserved. |
40 | * | | 40 | * |
41 | * Redistribution and use in source and binary forms, with or without | | 41 | * Redistribution and use in source and binary forms, with or without |
42 | * modification, are permitted provided that the following conditions | | 42 | * modification, are permitted provided that the following conditions |
43 | * are met: | | 43 | * are met: |
44 | * 1. Redistributions of source code must retain the above copyright | | 44 | * 1. Redistributions of source code must retain the above copyright |
45 | * notice, this list of conditions and the following disclaimer. | | 45 | * notice, this list of conditions and the following disclaimer. |
46 | * 2. Redistributions in binary form must reproduce the above copyright | | 46 | * 2. Redistributions in binary form must reproduce the above copyright |
47 | * notice, this list of conditions and the following disclaimer in the | | 47 | * notice, this list of conditions and the following disclaimer in the |
48 | * documentation and/or other materials provided with the distribution. | | 48 | * documentation and/or other materials provided with the distribution. |
49 | * 3. Neither the name of the project nor the names of its contributors | | 49 | * 3. Neither the name of the project nor the names of its contributors |
50 | * may be used to endorse or promote products derived from this software | | 50 | * may be used to endorse or promote products derived from this software |
51 | * without specific prior written permission. | | 51 | * without specific prior written permission. |
52 | * | | 52 | * |
53 | * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND | | 53 | * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND |
54 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | | 54 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
55 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | | 55 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
56 | * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE | | 56 | * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE |
57 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 57 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
58 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | | 58 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
59 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 59 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
60 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 60 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
61 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 61 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
62 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 62 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
63 | * SUCH DAMAGE. | | 63 | * SUCH DAMAGE. |
64 | */ | | 64 | */ |
65 | | | 65 | |
66 | /* | | 66 | /* |
67 | * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 | | 67 | * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 |
68 | * | | 68 | * |
69 | * NRL grants permission for redistribution and use in source and binary | | 69 | * NRL grants permission for redistribution and use in source and binary |
70 | * forms, with or without modification, of the software and documentation | | 70 | * forms, with or without modification, of the software and documentation |
71 | * created at NRL provided that the following conditions are met: | | 71 | * created at NRL provided that the following conditions are met: |
72 | * | | 72 | * |
73 | * 1. Redistributions of source code must retain the above copyright | | 73 | * 1. Redistributions of source code must retain the above copyright |
74 | * notice, this list of conditions and the following disclaimer. | | 74 | * notice, this list of conditions and the following disclaimer. |
75 | * 2. Redistributions in binary form must reproduce the above copyright | | 75 | * 2. Redistributions in binary form must reproduce the above copyright |
76 | * notice, this list of conditions and the following disclaimer in the | | 76 | * notice, this list of conditions and the following disclaimer in the |
77 | * documentation and/or other materials provided with the distribution. | | 77 | * documentation and/or other materials provided with the distribution. |
78 | * 3. All advertising materials mentioning features or use of this software | | 78 | * 3. All advertising materials mentioning features or use of this software |
79 | * must display the following acknowledgements: | | 79 | * must display the following acknowledgements: |
80 | * This product includes software developed by the University of | | 80 | * This product includes software developed by the University of |
81 | * California, Berkeley and its contributors. | | 81 | * California, Berkeley and its contributors. |
82 | * This product includes software developed at the Information | | 82 | * This product includes software developed at the Information |
83 | * Technology Division, US Naval Research Laboratory. | | 83 | * Technology Division, US Naval Research Laboratory. |
84 | * 4. Neither the name of the NRL nor the names of its contributors | | 84 | * 4. Neither the name of the NRL nor the names of its contributors |
85 | * may be used to endorse or promote products derived from this software | | 85 | * may be used to endorse or promote products derived from this software |
86 | * without specific prior written permission. | | 86 | * without specific prior written permission. |
87 | * | | 87 | * |
88 | * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS | | 88 | * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS |
89 | * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 89 | * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
90 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A | | 90 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
91 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR | | 91 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR |
92 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | | 92 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
93 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | | 93 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
94 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | | 94 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
95 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | | 95 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
96 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | | 96 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
97 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | | 97 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
98 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | | 98 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
99 | * | | 99 | * |
100 | * The views and conclusions contained in the software and documentation | | 100 | * The views and conclusions contained in the software and documentation |
101 | * are those of the authors and should not be interpreted as representing | | 101 | * are those of the authors and should not be interpreted as representing |
102 | * official policies, either expressed or implied, of the US Naval | | 102 | * official policies, either expressed or implied, of the US Naval |
103 | * Research Laboratory (NRL). | | 103 | * Research Laboratory (NRL). |
104 | */ | | 104 | */ |
105 | | | 105 | |
106 | /* | | 106 | /* |
107 | * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 | | 107 | * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 |
108 | * The Regents of the University of California. All rights reserved. | | 108 | * The Regents of the University of California. All rights reserved. |
109 | * | | 109 | * |
110 | * Redistribution and use in source and binary forms, with or without | | 110 | * Redistribution and use in source and binary forms, with or without |
111 | * modification, are permitted provided that the following conditions | | 111 | * modification, are permitted provided that the following conditions |
112 | * are met: | | 112 | * are met: |
113 | * 1. Redistributions of source code must retain the above copyright | | 113 | * 1. Redistributions of source code must retain the above copyright |
114 | * notice, this list of conditions and the following disclaimer. | | 114 | * notice, this list of conditions and the following disclaimer. |
115 | * 2. Redistributions in binary form must reproduce the above copyright | | 115 | * 2. Redistributions in binary form must reproduce the above copyright |
116 | * notice, this list of conditions and the following disclaimer in the | | 116 | * notice, this list of conditions and the following disclaimer in the |
117 | * documentation and/or other materials provided with the distribution. | | 117 | * documentation and/or other materials provided with the distribution. |
118 | * 3. Neither the name of the University nor the names of its contributors | | 118 | * 3. Neither the name of the University nor the names of its contributors |
119 | * may be used to endorse or promote products derived from this software | | 119 | * may be used to endorse or promote products derived from this software |
120 | * without specific prior written permission. | | 120 | * without specific prior written permission. |
121 | * | | 121 | * |
122 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | | 122 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
123 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | | 123 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
124 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | | 124 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
125 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | | 125 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
126 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 126 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
127 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | | 127 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
128 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 128 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
129 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 129 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
130 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 130 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
131 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 131 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
132 | * SUCH DAMAGE. | | 132 | * SUCH DAMAGE. |
133 | * | | 133 | * |
134 | * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95 | | 134 | * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95 |
135 | */ | | 135 | */ |
136 | | | 136 | |
137 | #include <sys/cdefs.h> | | 137 | #include <sys/cdefs.h> |
138 | __KERNEL_RCSID(0, "$NetBSD: tcp_congctl.c,v 1.26 2018/09/03 16:29:36 riastradh Exp $"); | | 138 | __KERNEL_RCSID(0, "$NetBSD: tcp_congctl.c,v 1.27 2019/10/09 05:29:18 msaitoh Exp $"); |
139 | | | 139 | |
140 | #ifdef _KERNEL_OPT | | 140 | #ifdef _KERNEL_OPT |
141 | #include "opt_inet.h" | | 141 | #include "opt_inet.h" |
142 | #include "opt_tcp_debug.h" | | 142 | #include "opt_tcp_debug.h" |
143 | #include "opt_tcp_congctl.h" | | 143 | #include "opt_tcp_congctl.h" |
144 | #endif | | 144 | #endif |
145 | | | 145 | |
146 | #include <sys/param.h> | | 146 | #include <sys/param.h> |
147 | #include <sys/systm.h> | | 147 | #include <sys/systm.h> |
148 | #include <sys/malloc.h> | | 148 | #include <sys/malloc.h> |
149 | #include <sys/mbuf.h> | | 149 | #include <sys/mbuf.h> |
150 | #include <sys/protosw.h> | | 150 | #include <sys/protosw.h> |
151 | #include <sys/socket.h> | | 151 | #include <sys/socket.h> |
152 | #include <sys/socketvar.h> | | 152 | #include <sys/socketvar.h> |
153 | #include <sys/errno.h> | | 153 | #include <sys/errno.h> |
154 | #include <sys/syslog.h> | | 154 | #include <sys/syslog.h> |
155 | #include <sys/pool.h> | | 155 | #include <sys/pool.h> |
156 | #include <sys/domain.h> | | 156 | #include <sys/domain.h> |
157 | #include <sys/kernel.h> | | 157 | #include <sys/kernel.h> |
158 | #include <sys/mutex.h> | | 158 | #include <sys/mutex.h> |
159 | | | 159 | |
160 | #include <net/if.h> | | 160 | #include <net/if.h> |
161 | | | 161 | |
162 | #include <netinet/in.h> | | 162 | #include <netinet/in.h> |
163 | #include <netinet/in_systm.h> | | 163 | #include <netinet/in_systm.h> |
164 | #include <netinet/ip.h> | | 164 | #include <netinet/ip.h> |
165 | #include <netinet/in_pcb.h> | | 165 | #include <netinet/in_pcb.h> |
166 | #include <netinet/in_var.h> | | 166 | #include <netinet/in_var.h> |
167 | #include <netinet/ip_var.h> | | 167 | #include <netinet/ip_var.h> |
168 | | | 168 | |
169 | #ifdef INET6 | | 169 | #ifdef INET6 |
170 | #include <netinet/ip6.h> | | 170 | #include <netinet/ip6.h> |
171 | #include <netinet6/ip6_var.h> | | 171 | #include <netinet6/ip6_var.h> |
172 | #include <netinet6/in6_pcb.h> | | 172 | #include <netinet6/in6_pcb.h> |
173 | #include <netinet6/ip6_var.h> | | 173 | #include <netinet6/ip6_var.h> |
174 | #include <netinet6/in6_var.h> | | 174 | #include <netinet6/in6_var.h> |
175 | #include <netinet/icmp6.h> | | 175 | #include <netinet/icmp6.h> |
176 | #endif | | 176 | #endif |
177 | | | 177 | |
178 | #include <netinet/tcp.h> | | 178 | #include <netinet/tcp.h> |
179 | #include <netinet/tcp_fsm.h> | | 179 | #include <netinet/tcp_fsm.h> |
180 | #include <netinet/tcp_seq.h> | | 180 | #include <netinet/tcp_seq.h> |
181 | #include <netinet/tcp_timer.h> | | 181 | #include <netinet/tcp_timer.h> |
182 | #include <netinet/tcp_var.h> | | 182 | #include <netinet/tcp_var.h> |
183 | #include <netinet/tcp_congctl.h> | | 183 | #include <netinet/tcp_congctl.h> |
184 | #ifdef TCP_DEBUG | | 184 | #ifdef TCP_DEBUG |
185 | #include <netinet/tcp_debug.h> | | 185 | #include <netinet/tcp_debug.h> |
186 | #endif | | 186 | #endif |
187 | | | 187 | |
188 | /* | | 188 | /* |
189 | * TODO: | | 189 | * TODO: |
190 | * consider separating the actual implementations in another file. | | 190 | * consider separating the actual implementations in another file. |
191 | */ | | 191 | */ |
192 | | | 192 | |
193 | static void tcp_common_congestion_exp(struct tcpcb *, int, int); | | 193 | static void tcp_common_congestion_exp(struct tcpcb *, int, int); |
194 | | | 194 | |
195 | static int tcp_reno_do_fast_retransmit(struct tcpcb *, const struct tcphdr *); | | 195 | static int tcp_reno_do_fast_retransmit(struct tcpcb *, const struct tcphdr *); |
196 | static int tcp_reno_fast_retransmit(struct tcpcb *, const struct tcphdr *); | | 196 | static int tcp_reno_fast_retransmit(struct tcpcb *, const struct tcphdr *); |
197 | static void tcp_reno_slow_retransmit(struct tcpcb *); | | 197 | static void tcp_reno_slow_retransmit(struct tcpcb *); |
198 | static void tcp_reno_fast_retransmit_newack(struct tcpcb *, | | 198 | static void tcp_reno_fast_retransmit_newack(struct tcpcb *, |
199 | const struct tcphdr *); | | 199 | const struct tcphdr *); |
200 | static void tcp_reno_newack(struct tcpcb *, const struct tcphdr *); | | 200 | static void tcp_reno_newack(struct tcpcb *, const struct tcphdr *); |
201 | static void tcp_reno_congestion_exp(struct tcpcb *tp); | | 201 | static void tcp_reno_congestion_exp(struct tcpcb *tp); |
202 | | | 202 | |
203 | static int tcp_newreno_fast_retransmit(struct tcpcb *, const struct tcphdr *); | | 203 | static int tcp_newreno_fast_retransmit(struct tcpcb *, const struct tcphdr *); |
204 | static void tcp_newreno_fast_retransmit_newack(struct tcpcb *, | | 204 | static void tcp_newreno_fast_retransmit_newack(struct tcpcb *, |
205 | const struct tcphdr *); | | 205 | const struct tcphdr *); |
206 | static void tcp_newreno_newack(struct tcpcb *, const struct tcphdr *); | | 206 | static void tcp_newreno_newack(struct tcpcb *, const struct tcphdr *); |
207 | | | 207 | |
208 | static int tcp_cubic_fast_retransmit(struct tcpcb *, const struct tcphdr *); | | 208 | static int tcp_cubic_fast_retransmit(struct tcpcb *, const struct tcphdr *); |
209 | static void tcp_cubic_slow_retransmit(struct tcpcb *tp); | | 209 | static void tcp_cubic_slow_retransmit(struct tcpcb *tp); |
210 | static void tcp_cubic_newack(struct tcpcb *, const struct tcphdr *); | | 210 | static void tcp_cubic_newack(struct tcpcb *, const struct tcphdr *); |
211 | static void tcp_cubic_congestion_exp(struct tcpcb *); | | 211 | static void tcp_cubic_congestion_exp(struct tcpcb *); |
212 | | | 212 | |
213 | static void tcp_congctl_fillnames(void); | | 213 | static void tcp_congctl_fillnames(void); |
214 | | | 214 | |
215 | extern int tcprexmtthresh; | | 215 | extern int tcprexmtthresh; |
216 | | | 216 | |
217 | MALLOC_DEFINE(M_TCPCONGCTL, "tcpcongctl", "TCP congestion control structures"); | | 217 | MALLOC_DEFINE(M_TCPCONGCTL, "tcpcongctl", "TCP congestion control structures"); |
218 | | | 218 | |
219 | /* currently selected global congestion control */ | | 219 | /* currently selected global congestion control */ |
220 | char tcp_congctl_global_name[TCPCC_MAXLEN]; | | 220 | char tcp_congctl_global_name[TCPCC_MAXLEN]; |
221 | | | 221 | |
222 | /* available global congestion control algorithms */ | | 222 | /* available global congestion control algorithms */ |
223 | char tcp_congctl_avail[10 * TCPCC_MAXLEN]; | | 223 | char tcp_congctl_avail[10 * TCPCC_MAXLEN]; |
224 | | | 224 | |
225 | /* | | 225 | /* |
226 | * Used to list the available congestion control algorithms. | | 226 | * Used to list the available congestion control algorithms. |
227 | */ | | 227 | */ |
228 | TAILQ_HEAD(, tcp_congctlent) tcp_congctlhd = | | 228 | TAILQ_HEAD(, tcp_congctlent) tcp_congctlhd = |
229 | TAILQ_HEAD_INITIALIZER(tcp_congctlhd); | | 229 | TAILQ_HEAD_INITIALIZER(tcp_congctlhd); |
230 | | | 230 | |
231 | static struct tcp_congctlent * tcp_congctl_global; | | 231 | static struct tcp_congctlent * tcp_congctl_global; |
232 | | | 232 | |
233 | static kmutex_t tcp_congctl_mtx; | | 233 | static kmutex_t tcp_congctl_mtx; |
234 | | | 234 | |
235 | void | | 235 | void |
236 | tcp_congctl_init(void) | | 236 | tcp_congctl_init(void) |
237 | { | | 237 | { |
238 | int r __diagused; | | 238 | int r __diagused; |
239 | | | 239 | |
240 | mutex_init(&tcp_congctl_mtx, MUTEX_DEFAULT, IPL_NONE); | | 240 | mutex_init(&tcp_congctl_mtx, MUTEX_DEFAULT, IPL_NONE); |
241 | | | 241 | |
242 | /* Base algorithms. */ | | 242 | /* Base algorithms. */ |
243 | r = tcp_congctl_register("reno", &tcp_reno_ctl); | | 243 | r = tcp_congctl_register("reno", &tcp_reno_ctl); |
244 | KASSERT(r == 0); | | 244 | KASSERT(r == 0); |
245 | r = tcp_congctl_register("newreno", &tcp_newreno_ctl); | | 245 | r = tcp_congctl_register("newreno", &tcp_newreno_ctl); |
246 | KASSERT(r == 0); | | 246 | KASSERT(r == 0); |
247 | r = tcp_congctl_register("cubic", &tcp_cubic_ctl); | | 247 | r = tcp_congctl_register("cubic", &tcp_cubic_ctl); |
248 | KASSERT(r == 0); | | 248 | KASSERT(r == 0); |
249 | | | 249 | |
250 | /* NewReno is the default. */ | | 250 | /* NewReno is the default. */ |
251 | #ifndef TCP_CONGCTL_DEFAULT | | 251 | #ifndef TCP_CONGCTL_DEFAULT |
252 | #define TCP_CONGCTL_DEFAULT "newreno" | | 252 | #define TCP_CONGCTL_DEFAULT "newreno" |
253 | #endif | | 253 | #endif |
254 | | | 254 | |
255 | r = tcp_congctl_select(NULL, TCP_CONGCTL_DEFAULT); | | 255 | r = tcp_congctl_select(NULL, TCP_CONGCTL_DEFAULT); |
256 | KASSERT(r == 0); | | 256 | KASSERT(r == 0); |
257 | } | | 257 | } |
258 | | | 258 | |
259 | /* | | 259 | /* |
260 | * Register a congestion algorithm and select it if we have none. | | 260 | * Register a congestion algorithm and select it if we have none. |
261 | */ | | 261 | */ |
262 | int | | 262 | int |
263 | tcp_congctl_register(const char *name, const struct tcp_congctl *tcc) | | 263 | tcp_congctl_register(const char *name, const struct tcp_congctl *tcc) |
264 | { | | 264 | { |
265 | struct tcp_congctlent *ntcc, *tccp; | | 265 | struct tcp_congctlent *ntcc, *tccp; |
266 | | | 266 | |
267 | TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) | | 267 | TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) |
268 | if (!strcmp(name, tccp->congctl_name)) { | | 268 | if (!strcmp(name, tccp->congctl_name)) { |
269 | /* name already registered */ | | 269 | /* name already registered */ |
270 | return EEXIST; | | 270 | return EEXIST; |
271 | } | | 271 | } |
272 | | | 272 | |
273 | ntcc = malloc(sizeof(*ntcc), M_TCPCONGCTL, M_WAITOK|M_ZERO); | | 273 | ntcc = malloc(sizeof(*ntcc), M_TCPCONGCTL, M_WAITOK|M_ZERO); |
274 | | | 274 | |
275 | strlcpy(ntcc->congctl_name, name, sizeof(ntcc->congctl_name) - 1); | | 275 | strlcpy(ntcc->congctl_name, name, sizeof(ntcc->congctl_name) - 1); |
276 | ntcc->congctl_ctl = tcc; | | 276 | ntcc->congctl_ctl = tcc; |
277 | | | 277 | |
278 | TAILQ_INSERT_TAIL(&tcp_congctlhd, ntcc, congctl_ent); | | 278 | TAILQ_INSERT_TAIL(&tcp_congctlhd, ntcc, congctl_ent); |
279 | tcp_congctl_fillnames(); | | 279 | tcp_congctl_fillnames(); |
280 | | | 280 | |
281 | if (TAILQ_FIRST(&tcp_congctlhd) == ntcc) | | 281 | if (TAILQ_FIRST(&tcp_congctlhd) == ntcc) |
282 | tcp_congctl_select(NULL, name); | | 282 | tcp_congctl_select(NULL, name); |
283 | | | 283 | |
284 | return 0; | | 284 | return 0; |
285 | } | | 285 | } |
286 | | | 286 | |
287 | int | | 287 | int |
288 | tcp_congctl_unregister(const char *name) | | 288 | tcp_congctl_unregister(const char *name) |
289 | { | | 289 | { |
290 | struct tcp_congctlent *tccp, *rtccp; | | 290 | struct tcp_congctlent *tccp, *rtccp; |
291 | unsigned int size; | | 291 | unsigned int size; |
292 | | | 292 | |
293 | rtccp = NULL; | | 293 | rtccp = NULL; |
294 | size = 0; | | 294 | size = 0; |
295 | TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) { | | 295 | TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) { |
296 | if (!strcmp(name, tccp->congctl_name)) | | 296 | if (!strcmp(name, tccp->congctl_name)) |
297 | rtccp = tccp; | | 297 | rtccp = tccp; |
298 | size++; | | 298 | size++; |
299 | } | | 299 | } |
300 | | | 300 | |
301 | if (!rtccp) | | 301 | if (!rtccp) |
302 | return ENOENT; | | 302 | return ENOENT; |
303 | | | 303 | |
304 | if (size <= 1 || tcp_congctl_global == rtccp || rtccp->congctl_refcnt) | | 304 | if (size <= 1 || tcp_congctl_global == rtccp || rtccp->congctl_refcnt) |
305 | return EBUSY; | | 305 | return EBUSY; |
306 | | | 306 | |
307 | TAILQ_REMOVE(&tcp_congctlhd, rtccp, congctl_ent); | | 307 | TAILQ_REMOVE(&tcp_congctlhd, rtccp, congctl_ent); |
308 | free(rtccp, M_TCPCONGCTL); | | 308 | free(rtccp, M_TCPCONGCTL); |
309 | tcp_congctl_fillnames(); | | 309 | tcp_congctl_fillnames(); |
310 | | | 310 | |
311 | return 0; | | 311 | return 0; |
312 | } | | 312 | } |
313 | | | 313 | |
314 | /* | | 314 | /* |
315 | * Select a congestion algorithm by name. | | 315 | * Select a congestion algorithm by name. |
316 | */ | | 316 | */ |
317 | int | | 317 | int |
318 | tcp_congctl_select(struct tcpcb *tp, const char *name) | | 318 | tcp_congctl_select(struct tcpcb *tp, const char *name) |
319 | { | | 319 | { |
320 | struct tcp_congctlent *tccp, *old_tccp, *new_tccp; | | 320 | struct tcp_congctlent *tccp, *old_tccp, *new_tccp; |
321 | bool old_found, new_found; | | 321 | bool old_found, new_found; |
322 | | | 322 | |
323 | KASSERT(name); | | 323 | KASSERT(name); |
324 | | | 324 | |
325 | old_found = (tp == NULL || tp->t_congctl == NULL); | | 325 | old_found = (tp == NULL || tp->t_congctl == NULL); |
326 | old_tccp = NULL; | | 326 | old_tccp = NULL; |
327 | new_found = false; | | 327 | new_found = false; |
328 | new_tccp = NULL; | | 328 | new_tccp = NULL; |
329 | | | 329 | |
330 | TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) { | | 330 | TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) { |
331 | if (!old_found && tccp->congctl_ctl == tp->t_congctl) { | | 331 | if (!old_found && tccp->congctl_ctl == tp->t_congctl) { |
332 | old_tccp = tccp; | | 332 | old_tccp = tccp; |
333 | old_found = true; | | 333 | old_found = true; |
334 | } | | 334 | } |
335 | | | 335 | |
336 | if (!new_found && !strcmp(name, tccp->congctl_name)) { | | 336 | if (!new_found && !strcmp(name, tccp->congctl_name)) { |
337 | new_tccp = tccp; | | 337 | new_tccp = tccp; |
338 | new_found = true; | | 338 | new_found = true; |
339 | } | | 339 | } |
340 | | | 340 | |
341 | if (new_found && old_found) { | | 341 | if (new_found && old_found) { |
342 | if (tp) { | | 342 | if (tp) { |
343 | mutex_enter(&tcp_congctl_mtx); | | 343 | mutex_enter(&tcp_congctl_mtx); |
344 | if (old_tccp) | | 344 | if (old_tccp) |
345 | old_tccp->congctl_refcnt--; | | 345 | old_tccp->congctl_refcnt--; |
346 | tp->t_congctl = new_tccp->congctl_ctl; | | 346 | tp->t_congctl = new_tccp->congctl_ctl; |
347 | new_tccp->congctl_refcnt++; | | 347 | new_tccp->congctl_refcnt++; |
348 | mutex_exit(&tcp_congctl_mtx); | | 348 | mutex_exit(&tcp_congctl_mtx); |
349 | } else { | | 349 | } else { |
350 | tcp_congctl_global = new_tccp; | | 350 | tcp_congctl_global = new_tccp; |
351 | strlcpy(tcp_congctl_global_name, | | 351 | strlcpy(tcp_congctl_global_name, |
352 | new_tccp->congctl_name, | | 352 | new_tccp->congctl_name, |
353 | sizeof(tcp_congctl_global_name) - 1); | | 353 | sizeof(tcp_congctl_global_name) - 1); |
354 | } | | 354 | } |
355 | return 0; | | 355 | return 0; |
356 | } | | 356 | } |
357 | } | | 357 | } |
358 | | | 358 | |
359 | return EINVAL; | | 359 | return EINVAL; |
360 | } | | 360 | } |
361 | | | 361 | |
362 | void | | 362 | void |
363 | tcp_congctl_release(struct tcpcb *tp) | | 363 | tcp_congctl_release(struct tcpcb *tp) |
364 | { | | 364 | { |
365 | struct tcp_congctlent *tccp; | | 365 | struct tcp_congctlent *tccp; |
366 | | | 366 | |
367 | KASSERT(tp->t_congctl); | | 367 | KASSERT(tp->t_congctl); |
368 | | | 368 | |
369 | TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) { | | 369 | TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) { |
370 | if (tccp->congctl_ctl == tp->t_congctl) { | | 370 | if (tccp->congctl_ctl == tp->t_congctl) { |
371 | tccp->congctl_refcnt--; | | 371 | tccp->congctl_refcnt--; |
372 | return; | | 372 | return; |
373 | } | | 373 | } |
374 | } | | 374 | } |
375 | } | | 375 | } |
376 | | | 376 | |
377 | /* | | 377 | /* |
378 | * Returns the name of a congestion algorithm. | | 378 | * Returns the name of a congestion algorithm. |
379 | */ | | 379 | */ |
380 | const char * | | 380 | const char * |
381 | tcp_congctl_bystruct(const struct tcp_congctl *tcc) | | 381 | tcp_congctl_bystruct(const struct tcp_congctl *tcc) |
382 | { | | 382 | { |
383 | struct tcp_congctlent *tccp; | | 383 | struct tcp_congctlent *tccp; |
384 | | | 384 | |
385 | KASSERT(tcc); | | 385 | KASSERT(tcc); |
386 | | | 386 | |
387 | TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) | | 387 | TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) |
388 | if (tccp->congctl_ctl == tcc) | | 388 | if (tccp->congctl_ctl == tcc) |
389 | return tccp->congctl_name; | | 389 | return tccp->congctl_name; |
390 | | | 390 | |
391 | return NULL; | | 391 | return NULL; |
392 | } | | 392 | } |
393 | | | 393 | |
394 | static void | | 394 | static void |
395 | tcp_congctl_fillnames(void) | | 395 | tcp_congctl_fillnames(void) |
396 | { | | 396 | { |
397 | struct tcp_congctlent *tccp; | | 397 | struct tcp_congctlent *tccp; |
398 | const char *delim = " "; | | 398 | const char *delim = " "; |
399 | | | 399 | |
400 | tcp_congctl_avail[0] = '\0'; | | 400 | tcp_congctl_avail[0] = '\0'; |
401 | TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) { | | 401 | TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) { |
402 | strlcat(tcp_congctl_avail, tccp->congctl_name, | | 402 | strlcat(tcp_congctl_avail, tccp->congctl_name, |
403 | sizeof(tcp_congctl_avail) - 1); | | 403 | sizeof(tcp_congctl_avail) - 1); |
404 | if (TAILQ_NEXT(tccp, congctl_ent)) | | 404 | if (TAILQ_NEXT(tccp, congctl_ent)) |
405 | strlcat(tcp_congctl_avail, delim, | | 405 | strlcat(tcp_congctl_avail, delim, |
406 | sizeof(tcp_congctl_avail) - 1); | | 406 | sizeof(tcp_congctl_avail) - 1); |
407 | } | | 407 | } |
408 | | | 408 | |
409 | } | | 409 | } |
410 | | | 410 | |
411 | /* ------------------------------------------------------------------------ */ | | 411 | /* ------------------------------------------------------------------------ */ |
412 | | | 412 | |
413 | /* | | 413 | /* |
414 | * Common stuff | | 414 | * Common stuff |
415 | */ | | 415 | */ |
416 | | | 416 | |
417 | /* Window reduction (1-beta) for [New]Reno: 0.5 */ | | 417 | /* Window reduction (1-beta) for [New]Reno: 0.5 */ |
418 | #define RENO_BETAA 1 | | 418 | #define RENO_BETAA 1 |
419 | #define RENO_BETAB 2 | | 419 | #define RENO_BETAB 2 |
420 | /* Window reduction (1-beta) for Cubic: 0.8 */ | | 420 | /* Window reduction (1-beta) for Cubic: 0.8 */ |
421 | #define CUBIC_BETAA 4 | | 421 | #define CUBIC_BETAA 4 |
422 | #define CUBIC_BETAB 5 | | 422 | #define CUBIC_BETAB 5 |
423 | /* Draft Rhee Section 4.1 */ | | 423 | /* Draft Rhee Section 4.1 */ |
424 | #define CUBIC_CA 4 | | 424 | #define CUBIC_CA 4 |
425 | #define CUBIC_CB 10 | | 425 | #define CUBIC_CB 10 |
426 | | | 426 | |
427 | static void | | 427 | static void |
428 | tcp_common_congestion_exp(struct tcpcb *tp, int betaa, int betab) | | 428 | tcp_common_congestion_exp(struct tcpcb *tp, int betaa, int betab) |
429 | { | | 429 | { |
430 | u_int win; | | 430 | u_long win; |
431 | | | 431 | |
432 | /* | | 432 | /* |
433 | * Reduce the congestion window and the slow start threshold. | | 433 | * Reduce the congestion window and the slow start threshold. |
434 | */ | | 434 | */ |
435 | win = uimin(tp->snd_wnd, tp->snd_cwnd) * betaa / betab / tp->t_segsz; | | 435 | win = ulmin(tp->snd_wnd, tp->snd_cwnd) * betaa / betab / tp->t_segsz; |
436 | if (win < 2) | | 436 | if (win < 2) |
437 | win = 2; | | 437 | win = 2; |
438 | | | 438 | |
439 | tp->snd_ssthresh = win * tp->t_segsz; | | 439 | tp->snd_ssthresh = win * tp->t_segsz; |
440 | tp->snd_recover = tp->snd_max; | | 440 | tp->snd_recover = tp->snd_max; |
441 | tp->snd_cwnd = tp->snd_ssthresh; | | 441 | tp->snd_cwnd = tp->snd_ssthresh; |
442 | | | 442 | |
443 | /* | | 443 | /* |
444 | * When using TCP ECN, notify the peer that | | 444 | * When using TCP ECN, notify the peer that |
445 | * we reduced the cwnd. | | 445 | * we reduced the cwnd. |
446 | */ | | 446 | */ |
447 | if (TCP_ECN_ALLOWED(tp)) | | 447 | if (TCP_ECN_ALLOWED(tp)) |
448 | tp->t_flags |= TF_ECN_SND_CWR; | | 448 | tp->t_flags |= TF_ECN_SND_CWR; |
449 | } | | 449 | } |
450 | | | 450 | |
451 | | | 451 | |
452 | /* ------------------------------------------------------------------------ */ | | 452 | /* ------------------------------------------------------------------------ */ |
453 | | | 453 | |
454 | /* | | 454 | /* |
455 | * TCP/Reno congestion control. | | 455 | * TCP/Reno congestion control. |
456 | */ | | 456 | */ |
457 | static void | | 457 | static void |
458 | tcp_reno_congestion_exp(struct tcpcb *tp) | | 458 | tcp_reno_congestion_exp(struct tcpcb *tp) |
459 | { | | 459 | { |
460 | | | 460 | |
461 | tcp_common_congestion_exp(tp, RENO_BETAA, RENO_BETAB); | | 461 | tcp_common_congestion_exp(tp, RENO_BETAA, RENO_BETAB); |
462 | } | | 462 | } |
463 | | | 463 | |
464 | static int | | 464 | static int |
465 | tcp_reno_do_fast_retransmit(struct tcpcb *tp, const struct tcphdr *th) | | 465 | tcp_reno_do_fast_retransmit(struct tcpcb *tp, const struct tcphdr *th) |
466 | { | | 466 | { |
467 | /* | | 467 | /* |
468 | * Dup acks mean that packets have left the | | 468 | * Dup acks mean that packets have left the |
469 | * network (they're now cached at the receiver) | | 469 | * network (they're now cached at the receiver) |
470 | * so bump cwnd by the amount in the receiver | | 470 | * so bump cwnd by the amount in the receiver |
471 | * to keep a constant cwnd packets in the | | 471 | * to keep a constant cwnd packets in the |
472 | * network. | | 472 | * network. |
473 | * | | 473 | * |
474 | * If we are using TCP/SACK, then enter | | 474 | * If we are using TCP/SACK, then enter |
475 | * Fast Recovery if the receiver SACKs | | 475 | * Fast Recovery if the receiver SACKs |
476 | * data that is tcprexmtthresh * MSS | | 476 | * data that is tcprexmtthresh * MSS |
477 | * bytes past the last ACKed segment, | | 477 | * bytes past the last ACKed segment, |
478 | * irrespective of the number of DupAcks. | | 478 | * irrespective of the number of DupAcks. |
479 | */ | | 479 | */ |
480 | | | 480 | |
481 | tcp_seq onxt = tp->snd_nxt; | | 481 | tcp_seq onxt = tp->snd_nxt; |
482 | | | 482 | |
483 | tp->t_partialacks = 0; | | 483 | tp->t_partialacks = 0; |
484 | TCP_TIMER_DISARM(tp, TCPT_REXMT); | | 484 | TCP_TIMER_DISARM(tp, TCPT_REXMT); |
485 | tp->t_rtttime = 0; | | 485 | tp->t_rtttime = 0; |
486 | if (TCP_SACK_ENABLED(tp)) { | | 486 | if (TCP_SACK_ENABLED(tp)) { |
487 | tp->t_dupacks = tcprexmtthresh; | | 487 | tp->t_dupacks = tcprexmtthresh; |
488 | tp->sack_newdata = tp->snd_nxt; | | 488 | tp->sack_newdata = tp->snd_nxt; |
489 | tp->snd_cwnd = tp->t_segsz; | | 489 | tp->snd_cwnd = tp->t_segsz; |
490 | (void) tcp_output(tp); | | 490 | (void) tcp_output(tp); |
491 | return 0; | | 491 | return 0; |
492 | } | | 492 | } |
493 | tp->snd_nxt = th->th_ack; | | 493 | tp->snd_nxt = th->th_ack; |
494 | tp->snd_cwnd = tp->t_segsz; | | 494 | tp->snd_cwnd = tp->t_segsz; |
495 | (void) tcp_output(tp); | | 495 | (void) tcp_output(tp); |
496 | tp->snd_cwnd = tp->snd_ssthresh + tp->t_segsz * tp->t_dupacks; | | 496 | tp->snd_cwnd = tp->snd_ssthresh + tp->t_segsz * tp->t_dupacks; |
497 | if (SEQ_GT(onxt, tp->snd_nxt)) | | 497 | if (SEQ_GT(onxt, tp->snd_nxt)) |
498 | tp->snd_nxt = onxt; | | 498 | tp->snd_nxt = onxt; |
499 | | | 499 | |
500 | return 0; | | 500 | return 0; |
501 | } | | 501 | } |
502 | | | 502 | |
503 | static int | | 503 | static int |
504 | tcp_reno_fast_retransmit(struct tcpcb *tp, const struct tcphdr *th) | | 504 | tcp_reno_fast_retransmit(struct tcpcb *tp, const struct tcphdr *th) |
505 | { | | 505 | { |
506 | | | 506 | |
507 | /* | | 507 | /* |
508 | * We know we're losing at the current | | 508 | * We know we're losing at the current |
509 | * window size so do congestion avoidance | | 509 | * window size so do congestion avoidance |
510 | * (set ssthresh to half the current window | | 510 | * (set ssthresh to half the current window |
511 | * and pull our congestion window back to | | 511 | * and pull our congestion window back to |
512 | * the new ssthresh). | | 512 | * the new ssthresh). |
513 | */ | | 513 | */ |
514 | | | 514 | |
515 | tcp_reno_congestion_exp(tp); | | 515 | tcp_reno_congestion_exp(tp); |
516 | return tcp_reno_do_fast_retransmit(tp, th); | | 516 | return tcp_reno_do_fast_retransmit(tp, th); |
517 | } | | 517 | } |
518 | | | 518 | |
519 | static void | | 519 | static void |
520 | tcp_reno_slow_retransmit(struct tcpcb *tp) | | 520 | tcp_reno_slow_retransmit(struct tcpcb *tp) |
521 | { | | 521 | { |
522 | u_int win; | | 522 | u_long win; |
523 | | | 523 | |
524 | /* | | 524 | /* |
525 | * Close the congestion window down to one segment | | 525 | * Close the congestion window down to one segment |
526 | * (we'll open it by one segment for each ack we get). | | 526 | * (we'll open it by one segment for each ack we get). |
527 | * Since we probably have a window's worth of unacked | | 527 | * Since we probably have a window's worth of unacked |
528 | * data accumulated, this "slow start" keeps us from | | 528 | * data accumulated, this "slow start" keeps us from |
529 | * dumping all that data as back-to-back packets (which | | 529 | * dumping all that data as back-to-back packets (which |
530 | * might overwhelm an intermediate gateway). | | 530 | * might overwhelm an intermediate gateway). |
531 | * | | 531 | * |
532 | * There are two phases to the opening: Initially we | | 532 | * There are two phases to the opening: Initially we |
533 | * open by one mss on each ack. This makes the window | | 533 | * open by one mss on each ack. This makes the window |
534 | * size increase exponentially with time. If the | | 534 | * size increase exponentially with time. If the |
535 | * window is larger than the path can handle, this | | 535 | * window is larger than the path can handle, this |
536 | * exponential growth results in dropped packet(s) | | 536 | * exponential growth results in dropped packet(s) |
537 | * almost immediately. To get more time between | | 537 | * almost immediately. To get more time between |
538 | * drops but still "push" the network to take advantage | | 538 | * drops but still "push" the network to take advantage |
539 | * of improving conditions, we switch from exponential | | 539 | * of improving conditions, we switch from exponential |
540 | * to linear window opening at some threshhold size. | | 540 | * to linear window opening at some threshhold size. |
541 | * For a threshhold, we use half the current window | | 541 | * For a threshhold, we use half the current window |
542 | * size, truncated to a multiple of the mss. | | 542 | * size, truncated to a multiple of the mss. |
543 | * | | 543 | * |
544 | * (the minimum cwnd that will give us exponential | | 544 | * (the minimum cwnd that will give us exponential |
545 | * growth is 2 mss. We don't allow the threshhold | | 545 | * growth is 2 mss. We don't allow the threshhold |
546 | * to go below this.) | | 546 | * to go below this.) |
547 | */ | | 547 | */ |
548 | | | 548 | |
549 | win = uimin(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_segsz; | | 549 | win = ulmin(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_segsz; |
550 | if (win < 2) | | 550 | if (win < 2) |
551 | win = 2; | | 551 | win = 2; |
552 | /* Loss Window MUST be one segment. */ | | 552 | /* Loss Window MUST be one segment. */ |
553 | tp->snd_cwnd = tp->t_segsz; | | 553 | tp->snd_cwnd = tp->t_segsz; |
554 | tp->snd_ssthresh = win * tp->t_segsz; | | 554 | tp->snd_ssthresh = win * tp->t_segsz; |
555 | tp->t_partialacks = -1; | | 555 | tp->t_partialacks = -1; |
556 | tp->t_dupacks = 0; | | 556 | tp->t_dupacks = 0; |
557 | tp->t_bytes_acked = 0; | | 557 | tp->t_bytes_acked = 0; |
558 | | | 558 | |
559 | if (TCP_ECN_ALLOWED(tp)) | | 559 | if (TCP_ECN_ALLOWED(tp)) |
560 | tp->t_flags |= TF_ECN_SND_CWR; | | 560 | tp->t_flags |= TF_ECN_SND_CWR; |
561 | } | | 561 | } |
562 | | | 562 | |
563 | static void | | 563 | static void |
564 | tcp_reno_fast_retransmit_newack(struct tcpcb *tp, | | 564 | tcp_reno_fast_retransmit_newack(struct tcpcb *tp, |
565 | const struct tcphdr *th) | | 565 | const struct tcphdr *th) |
566 | { | | 566 | { |
567 | if (tp->t_partialacks < 0) { | | 567 | if (tp->t_partialacks < 0) { |
568 | /* | | 568 | /* |
569 | * We were not in fast recovery. Reset the duplicate ack | | 569 | * We were not in fast recovery. Reset the duplicate ack |
570 | * counter. | | 570 | * counter. |
571 | */ | | 571 | */ |
572 | tp->t_dupacks = 0; | | 572 | tp->t_dupacks = 0; |
573 | } else { | | 573 | } else { |
574 | /* | | 574 | /* |
575 | * Clamp the congestion window to the crossover point and | | 575 | * Clamp the congestion window to the crossover point and |
576 | * exit fast recovery. | | 576 | * exit fast recovery. |
577 | */ | | 577 | */ |
578 | if (tp->snd_cwnd > tp->snd_ssthresh) | | 578 | if (tp->snd_cwnd > tp->snd_ssthresh) |
579 | tp->snd_cwnd = tp->snd_ssthresh; | | 579 | tp->snd_cwnd = tp->snd_ssthresh; |
580 | tp->t_partialacks = -1; | | 580 | tp->t_partialacks = -1; |
581 | tp->t_dupacks = 0; | | 581 | tp->t_dupacks = 0; |
582 | tp->t_bytes_acked = 0; | | 582 | tp->t_bytes_acked = 0; |
583 | if (TCP_SACK_ENABLED(tp) && SEQ_GT(th->th_ack, tp->snd_fack)) | | 583 | if (TCP_SACK_ENABLED(tp) && SEQ_GT(th->th_ack, tp->snd_fack)) |
584 | tp->snd_fack = th->th_ack; | | 584 | tp->snd_fack = th->th_ack; |
585 | } | | 585 | } |
586 | } | | 586 | } |
587 | | | 587 | |
588 | static void | | 588 | static void |
589 | tcp_reno_newack(struct tcpcb *tp, const struct tcphdr *th) | | 589 | tcp_reno_newack(struct tcpcb *tp, const struct tcphdr *th) |
590 | { | | 590 | { |
591 | /* | | 591 | /* |
592 | * When new data is acked, open the congestion window. | | 592 | * When new data is acked, open the congestion window. |
593 | */ | | 593 | */ |
594 | | | 594 | |
595 | u_int cw = tp->snd_cwnd; | | 595 | u_int cw = tp->snd_cwnd; |
596 | u_int incr = tp->t_segsz; | | 596 | u_int incr = tp->t_segsz; |
597 | | | 597 | |
598 | if (tcp_do_abc) { | | 598 | if (tcp_do_abc) { |
599 | | | 599 | |
600 | /* | | 600 | /* |
601 | * RFC 3465 Appropriate Byte Counting (ABC) | | 601 | * RFC 3465 Appropriate Byte Counting (ABC) |
602 | */ | | 602 | */ |
603 | | | 603 | |
604 | int acked = th->th_ack - tp->snd_una; | | 604 | int acked = th->th_ack - tp->snd_una; |
605 | | | 605 | |
606 | if (cw >= tp->snd_ssthresh) { | | 606 | if (cw >= tp->snd_ssthresh) { |
607 | tp->t_bytes_acked += acked; | | 607 | tp->t_bytes_acked += acked; |
608 | if (tp->t_bytes_acked >= cw) { | | 608 | if (tp->t_bytes_acked >= cw) { |
609 | /* Time to increase the window. */ | | 609 | /* Time to increase the window. */ |
610 | tp->t_bytes_acked -= cw; | | 610 | tp->t_bytes_acked -= cw; |
611 | } else { | | 611 | } else { |
612 | /* No need to increase yet. */ | | 612 | /* No need to increase yet. */ |
613 | incr = 0; | | 613 | incr = 0; |
614 | } | | 614 | } |
615 | } else { | | 615 | } else { |
616 | /* | | 616 | /* |
617 | * use 2*SMSS or 1*SMSS for the "L" param, | | 617 | * use 2*SMSS or 1*SMSS for the "L" param, |
618 | * depending on sysctl setting. | | 618 | * depending on sysctl setting. |
619 | * | | 619 | * |
620 | * (See RFC 3465 2.3 Choosing the Limit) | | 620 | * (See RFC 3465 2.3 Choosing the Limit) |
621 | */ | | 621 | */ |
622 | u_int abc_lim; | | 622 | u_int abc_lim; |
623 | | | 623 | |
624 | abc_lim = (tcp_abc_aggressive == 0 || | | 624 | abc_lim = (tcp_abc_aggressive == 0 || |
625 | tp->snd_nxt != tp->snd_max) ? incr : incr * 2; | | 625 | tp->snd_nxt != tp->snd_max) ? incr : incr * 2; |
626 | incr = uimin(acked, abc_lim); | | 626 | incr = uimin(acked, abc_lim); |
627 | } | | 627 | } |
628 | } else { | | 628 | } else { |
629 | | | 629 | |
630 | /* | | 630 | /* |
631 | * If the window gives us less than ssthresh packets | | 631 | * If the window gives us less than ssthresh packets |
632 | * in flight, open exponentially (segsz per packet). | | 632 | * in flight, open exponentially (segsz per packet). |
633 | * Otherwise open linearly: segsz per window | | 633 | * Otherwise open linearly: segsz per window |
634 | * (segsz^2 / cwnd per packet). | | 634 | * (segsz^2 / cwnd per packet). |
635 | */ | | 635 | */ |
636 | | | 636 | |
637 | if (cw >= tp->snd_ssthresh) { | | 637 | if (cw >= tp->snd_ssthresh) { |
638 | incr = incr * incr / cw; | | 638 | incr = incr * incr / cw; |
639 | } | | 639 | } |
640 | } | | 640 | } |
641 | | | 641 | |
642 | tp->snd_cwnd = uimin(cw + incr, TCP_MAXWIN << tp->snd_scale); | | 642 | tp->snd_cwnd = uimin(cw + incr, TCP_MAXWIN << tp->snd_scale); |
643 | } | | 643 | } |
644 | | | 644 | |
645 | const struct tcp_congctl tcp_reno_ctl = { | | 645 | const struct tcp_congctl tcp_reno_ctl = { |
646 | .fast_retransmit = tcp_reno_fast_retransmit, | | 646 | .fast_retransmit = tcp_reno_fast_retransmit, |
647 | .slow_retransmit = tcp_reno_slow_retransmit, | | 647 | .slow_retransmit = tcp_reno_slow_retransmit, |
648 | .fast_retransmit_newack = tcp_reno_fast_retransmit_newack, | | 648 | .fast_retransmit_newack = tcp_reno_fast_retransmit_newack, |
649 | .newack = tcp_reno_newack, | | 649 | .newack = tcp_reno_newack, |
650 | .cong_exp = tcp_reno_congestion_exp, | | 650 | .cong_exp = tcp_reno_congestion_exp, |
651 | }; | | 651 | }; |
652 | | | 652 | |
653 | /* | | 653 | /* |
654 | * TCP/NewReno Congestion control. | | 654 | * TCP/NewReno Congestion control. |
655 | */ | | 655 | */ |
656 | static int | | 656 | static int |
657 | tcp_newreno_fast_retransmit(struct tcpcb *tp, const struct tcphdr *th) | | 657 | tcp_newreno_fast_retransmit(struct tcpcb *tp, const struct tcphdr *th) |
658 | { | | 658 | { |
659 | | | 659 | |
660 | if (SEQ_LT(th->th_ack, tp->snd_high)) { | | 660 | if (SEQ_LT(th->th_ack, tp->snd_high)) { |
661 | /* | | 661 | /* |
662 | * False fast retransmit after timeout. | | 662 | * False fast retransmit after timeout. |
663 | * Do not enter fast recovery | | 663 | * Do not enter fast recovery |
664 | */ | | 664 | */ |
665 | tp->t_dupacks = 0; | | 665 | tp->t_dupacks = 0; |
666 | return 1; | | 666 | return 1; |
667 | } | | 667 | } |
668 | /* | | 668 | /* |
669 | * Fast retransmit is same as reno. | | 669 | * Fast retransmit is same as reno. |
670 | */ | | 670 | */ |
671 | return tcp_reno_fast_retransmit(tp, th); | | 671 | return tcp_reno_fast_retransmit(tp, th); |
672 | } | | 672 | } |
673 | | | 673 | |
674 | /* | | 674 | /* |
675 | * Implement the NewReno response to a new ack, checking for partial acks in | | 675 | * Implement the NewReno response to a new ack, checking for partial acks in |
676 | * fast recovery. | | 676 | * fast recovery. |
677 | */ | | 677 | */ |
678 | static void | | 678 | static void |
679 | tcp_newreno_fast_retransmit_newack(struct tcpcb *tp, const struct tcphdr *th) | | 679 | tcp_newreno_fast_retransmit_newack(struct tcpcb *tp, const struct tcphdr *th) |
680 | { | | 680 | { |
681 | if (tp->t_partialacks < 0) { | | 681 | if (tp->t_partialacks < 0) { |
682 | /* | | 682 | /* |
683 | * We were not in fast recovery. Reset the duplicate ack | | 683 | * We were not in fast recovery. Reset the duplicate ack |
684 | * counter. | | 684 | * counter. |
685 | */ | | 685 | */ |
686 | tp->t_dupacks = 0; | | 686 | tp->t_dupacks = 0; |
687 | } else if (SEQ_LT(th->th_ack, tp->snd_recover)) { | | 687 | } else if (SEQ_LT(th->th_ack, tp->snd_recover)) { |
688 | /* | | 688 | /* |
689 | * This is a partial ack. Retransmit the first unacknowledged | | 689 | * This is a partial ack. Retransmit the first unacknowledged |
690 | * segment and deflate the congestion window by the amount of | | 690 | * segment and deflate the congestion window by the amount of |
691 | * acknowledged data. Do not exit fast recovery. | | 691 | * acknowledged data. Do not exit fast recovery. |
692 | */ | | 692 | */ |
693 | tcp_seq onxt = tp->snd_nxt; | | 693 | tcp_seq onxt = tp->snd_nxt; |
694 | u_long ocwnd = tp->snd_cwnd; | | 694 | u_long ocwnd = tp->snd_cwnd; |
695 | int sack_num_segs = 1, sack_bytes_rxmt = 0; | | 695 | int sack_num_segs = 1, sack_bytes_rxmt = 0; |
696 | | | 696 | |
697 | /* | | 697 | /* |
698 | * snd_una has not yet been updated and the socket's send | | 698 | * snd_una has not yet been updated and the socket's send |
699 | * buffer has not yet drained off the ACK'd data, so we | | 699 | * buffer has not yet drained off the ACK'd data, so we |
700 | * have to leave snd_una as it was to get the correct data | | 700 | * have to leave snd_una as it was to get the correct data |
701 | * offset in tcp_output(). | | 701 | * offset in tcp_output(). |
702 | */ | | 702 | */ |
703 | tp->t_partialacks++; | | 703 | tp->t_partialacks++; |
704 | TCP_TIMER_DISARM(tp, TCPT_REXMT); | | 704 | TCP_TIMER_DISARM(tp, TCPT_REXMT); |
705 | tp->t_rtttime = 0; | | 705 | tp->t_rtttime = 0; |
706 | | | 706 | |
707 | if (TCP_SACK_ENABLED(tp)) { | | 707 | if (TCP_SACK_ENABLED(tp)) { |
708 | /* | | 708 | /* |
709 | * Partial ack handling within a sack recovery episode. | | 709 | * Partial ack handling within a sack recovery episode. |
710 | * Keeping this very simple for now. When a partial ack | | 710 | * Keeping this very simple for now. When a partial ack |
711 | * is received, force snd_cwnd to a value that will | | 711 | * is received, force snd_cwnd to a value that will |
712 | * allow the sender to transmit no more than 2 segments. | | 712 | * allow the sender to transmit no more than 2 segments. |
713 | * If necessary, a fancier scheme can be adopted at a | | 713 | * If necessary, a fancier scheme can be adopted at a |
714 | * later point, but for now, the goal is to prevent the | | 714 | * later point, but for now, the goal is to prevent the |
715 | * sender from bursting a large amount of data in the | | 715 | * sender from bursting a large amount of data in the |
716 | * midst of sack recovery. | | 716 | * midst of sack recovery. |
717 | */ | | 717 | */ |
718 | | | 718 | |
719 | /* | | 719 | /* |
720 | * send one or 2 segments based on how much | | 720 | * send one or 2 segments based on how much |
721 | * new data was acked | | 721 | * new data was acked |
722 | */ | | 722 | */ |
723 | if (((th->th_ack - tp->snd_una) / tp->t_segsz) > 2) | | 723 | if (((th->th_ack - tp->snd_una) / tp->t_segsz) > 2) |
724 | sack_num_segs = 2; | | 724 | sack_num_segs = 2; |
725 | (void)tcp_sack_output(tp, &sack_bytes_rxmt); | | 725 | (void)tcp_sack_output(tp, &sack_bytes_rxmt); |
726 | tp->snd_cwnd = sack_bytes_rxmt + | | 726 | tp->snd_cwnd = sack_bytes_rxmt + |
727 | (tp->snd_nxt - tp->sack_newdata) + | | 727 | (tp->snd_nxt - tp->sack_newdata) + |
728 | sack_num_segs * tp->t_segsz; | | 728 | sack_num_segs * tp->t_segsz; |
729 | tp->t_flags |= TF_ACKNOW; | | 729 | tp->t_flags |= TF_ACKNOW; |
730 | (void) tcp_output(tp); | | 730 | (void) tcp_output(tp); |
731 | } else { | | 731 | } else { |
732 | tp->snd_nxt = th->th_ack; | | 732 | tp->snd_nxt = th->th_ack; |
733 | /* | | 733 | /* |
734 | * Set snd_cwnd to one segment beyond ACK'd offset | | 734 | * Set snd_cwnd to one segment beyond ACK'd offset |
735 | * snd_una is not yet updated when we're called | | 735 | * snd_una is not yet updated when we're called |
736 | */ | | 736 | */ |
737 | tp->snd_cwnd = tp->t_segsz + (th->th_ack - tp->snd_una); | | 737 | tp->snd_cwnd = tp->t_segsz + (th->th_ack - tp->snd_una); |
738 | (void) tcp_output(tp); | | 738 | (void) tcp_output(tp); |
739 | tp->snd_cwnd = ocwnd; | | 739 | tp->snd_cwnd = ocwnd; |
740 | if (SEQ_GT(onxt, tp->snd_nxt)) | | 740 | if (SEQ_GT(onxt, tp->snd_nxt)) |
741 | tp->snd_nxt = onxt; | | 741 | tp->snd_nxt = onxt; |
742 | /* | | 742 | /* |
743 | * Partial window deflation. Relies on fact that | | 743 | * Partial window deflation. Relies on fact that |
744 | * tp->snd_una not updated yet. | | 744 | * tp->snd_una not updated yet. |
745 | */ | | 745 | */ |
746 | tp->snd_cwnd -= (th->th_ack - tp->snd_una - | | 746 | tp->snd_cwnd -= (th->th_ack - tp->snd_una - |
747 | tp->t_segsz); | | 747 | tp->t_segsz); |
748 | } | | 748 | } |
749 | } else { | | 749 | } else { |
750 | /* | | 750 | /* |
751 | * Complete ack. Inflate the congestion window to ssthresh | | 751 | * Complete ack. Inflate the congestion window to ssthresh |
752 | * and exit fast recovery. | | 752 | * and exit fast recovery. |
753 | * | | 753 | * |
754 | * Window inflation should have left us with approx. | | 754 | * Window inflation should have left us with approx. |
755 | * snd_ssthresh outstanding data. But in case we | | 755 | * snd_ssthresh outstanding data. But in case we |
756 | * would be inclined to send a burst, better to do | | 756 | * would be inclined to send a burst, better to do |
757 | * it via the slow start mechanism. | | 757 | * it via the slow start mechanism. |
758 | */ | | 758 | */ |
759 | if (SEQ_SUB(tp->snd_max, th->th_ack) < tp->snd_ssthresh) | | 759 | if (SEQ_SUB(tp->snd_max, th->th_ack) < tp->snd_ssthresh) |
760 | tp->snd_cwnd = SEQ_SUB(tp->snd_max, th->th_ack) | | 760 | tp->snd_cwnd = SEQ_SUB(tp->snd_max, th->th_ack) |
761 | + tp->t_segsz; | | 761 | + tp->t_segsz; |
762 | else | | 762 | else |
763 | tp->snd_cwnd = tp->snd_ssthresh; | | 763 | tp->snd_cwnd = tp->snd_ssthresh; |
764 | tp->t_partialacks = -1; | | 764 | tp->t_partialacks = -1; |
765 | tp->t_dupacks = 0; | | 765 | tp->t_dupacks = 0; |
766 | tp->t_bytes_acked = 0; | | 766 | tp->t_bytes_acked = 0; |
767 | if (TCP_SACK_ENABLED(tp) && SEQ_GT(th->th_ack, tp->snd_fack)) | | 767 | if (TCP_SACK_ENABLED(tp) && SEQ_GT(th->th_ack, tp->snd_fack)) |
768 | tp->snd_fack = th->th_ack; | | 768 | tp->snd_fack = th->th_ack; |
769 | } | | 769 | } |
770 | } | | 770 | } |
771 | | | 771 | |
772 | static void | | 772 | static void |
773 | tcp_newreno_newack(struct tcpcb *tp, const struct tcphdr *th) | | 773 | tcp_newreno_newack(struct tcpcb *tp, const struct tcphdr *th) |
774 | { | | 774 | { |
775 | /* | | 775 | /* |
776 | * If we are still in fast recovery (meaning we are using | | 776 | * If we are still in fast recovery (meaning we are using |
777 | * NewReno and we have only received partial acks), do not | | 777 | * NewReno and we have only received partial acks), do not |
778 | * inflate the window yet. | | 778 | * inflate the window yet. |
779 | */ | | 779 | */ |
780 | if (tp->t_partialacks < 0) | | 780 | if (tp->t_partialacks < 0) |
781 | tcp_reno_newack(tp, th); | | 781 | tcp_reno_newack(tp, th); |
782 | } | | 782 | } |
783 | | | 783 | |
784 | | | 784 | |
785 | const struct tcp_congctl tcp_newreno_ctl = { | | 785 | const struct tcp_congctl tcp_newreno_ctl = { |
786 | .fast_retransmit = tcp_newreno_fast_retransmit, | | 786 | .fast_retransmit = tcp_newreno_fast_retransmit, |
787 | .slow_retransmit = tcp_reno_slow_retransmit, | | 787 | .slow_retransmit = tcp_reno_slow_retransmit, |
788 | .fast_retransmit_newack = tcp_newreno_fast_retransmit_newack, | | 788 | .fast_retransmit_newack = tcp_newreno_fast_retransmit_newack, |
789 | .newack = tcp_newreno_newack, | | 789 | .newack = tcp_newreno_newack, |
790 | .cong_exp = tcp_reno_congestion_exp, | | 790 | .cong_exp = tcp_reno_congestion_exp, |
791 | }; | | 791 | }; |
792 | | | 792 | |
793 | /* | | 793 | /* |
794 | * CUBIC - http://tools.ietf.org/html/draft-rhee-tcpm-cubic-02 | | 794 | * CUBIC - http://tools.ietf.org/html/draft-rhee-tcpm-cubic-02 |
795 | */ | | 795 | */ |
796 | | | 796 | |
797 | /* Cubic prototypes */ | | 797 | /* Cubic prototypes */ |
798 | static void tcp_cubic_update_ctime(struct tcpcb *tp); | | 798 | static void tcp_cubic_update_ctime(struct tcpcb *tp); |
799 | static uint32_t tcp_cubic_diff_ctime(struct tcpcb *); | | 799 | static uint32_t tcp_cubic_diff_ctime(struct tcpcb *); |
800 | static uint32_t tcp_cubic_cbrt(uint32_t); | | 800 | static uint32_t tcp_cubic_cbrt(uint32_t); |
801 | static ulong tcp_cubic_getW(struct tcpcb *, uint32_t, uint32_t); | | 801 | static ulong tcp_cubic_getW(struct tcpcb *, uint32_t, uint32_t); |
802 | | | 802 | |
803 | /* Cubic TIME functions - XXX I don't like using timevals and microuptime */ | | 803 | /* Cubic TIME functions - XXX I don't like using timevals and microuptime */ |
804 | /* | | 804 | /* |
805 | * Set congestion timer to now | | 805 | * Set congestion timer to now |
806 | */ | | 806 | */ |
807 | static void | | 807 | static void |
808 | tcp_cubic_update_ctime(struct tcpcb *tp) | | 808 | tcp_cubic_update_ctime(struct tcpcb *tp) |
809 | { | | 809 | { |
810 | struct timeval now_timeval; | | 810 | struct timeval now_timeval; |
811 | | | 811 | |
812 | getmicrouptime(&now_timeval); | | 812 | getmicrouptime(&now_timeval); |
813 | tp->snd_cubic_ctime = now_timeval.tv_sec * 1000 + | | 813 | tp->snd_cubic_ctime = now_timeval.tv_sec * 1000 + |
814 | now_timeval.tv_usec / 1000; | | 814 | now_timeval.tv_usec / 1000; |
815 | } | | 815 | } |
816 | | | 816 | |
817 | /* | | 817 | /* |
818 | * miliseconds from last congestion | | 818 | * miliseconds from last congestion |
819 | */ | | 819 | */ |
820 | static uint32_t | | 820 | static uint32_t |
821 | tcp_cubic_diff_ctime(struct tcpcb *tp) | | 821 | tcp_cubic_diff_ctime(struct tcpcb *tp) |
822 | { | | 822 | { |
823 | struct timeval now_timeval; | | 823 | struct timeval now_timeval; |
824 | | | 824 | |
825 | getmicrouptime(&now_timeval); | | 825 | getmicrouptime(&now_timeval); |
826 | return now_timeval.tv_sec * 1000 + now_timeval.tv_usec / 1000 - | | 826 | return now_timeval.tv_sec * 1000 + now_timeval.tv_usec / 1000 - |
827 | tp->snd_cubic_ctime; | | 827 | tp->snd_cubic_ctime; |
828 | } | | 828 | } |
829 | | | 829 | |
830 | /* | | 830 | /* |
831 | * Approximate cubic root | | 831 | * Approximate cubic root |
832 | */ | | 832 | */ |
833 | #define CBRT_ROUNDS 30 | | 833 | #define CBRT_ROUNDS 30 |
834 | static uint32_t | | 834 | static uint32_t |
835 | tcp_cubic_cbrt(uint32_t v) | | 835 | tcp_cubic_cbrt(uint32_t v) |
836 | { | | 836 | { |
837 | int i, rounds = CBRT_ROUNDS; | | 837 | int i, rounds = CBRT_ROUNDS; |
838 | uint64_t x = v / 3; | | 838 | uint64_t x = v / 3; |
839 | | | 839 | |
840 | /* We fail to calculate correct for small numbers */ | | 840 | /* We fail to calculate correct for small numbers */ |
841 | if (v == 0) | | 841 | if (v == 0) |
842 | return 0; | | 842 | return 0; |
843 | else if (v < 4) | | 843 | else if (v < 4) |
844 | return 1; | | 844 | return 1; |
845 | | | 845 | |
846 | /* | | 846 | /* |
847 | * largest x that 2*x^3+3*x fits 64bit | | 847 | * largest x that 2*x^3+3*x fits 64bit |
848 | * Avoid overflow for a time cost | | 848 | * Avoid overflow for a time cost |
849 | */ | | 849 | */ |
850 | if (x > 2097151) | | 850 | if (x > 2097151) |
851 | rounds += 10; | | 851 | rounds += 10; |
852 | | | 852 | |
853 | for (i = 0; i < rounds; i++) | | 853 | for (i = 0; i < rounds; i++) |
854 | if (rounds == CBRT_ROUNDS) | | 854 | if (rounds == CBRT_ROUNDS) |
855 | x = (v + 2 * x * x * x) / (3 * x * x); | | 855 | x = (v + 2 * x * x * x) / (3 * x * x); |
856 | else | | 856 | else |
857 | /* Avoid overflow */ | | 857 | /* Avoid overflow */ |
858 | x = v / (3 * x * x) + 2 * x / 3; | | 858 | x = v / (3 * x * x) + 2 * x / 3; |
859 | | | 859 | |
860 | return (uint32_t)x; | | 860 | return (uint32_t)x; |
861 | } | | 861 | } |
862 | | | 862 | |
863 | /* Draft Rhee Section 3.1 - get W(t+rtt) - Eq. 1 */ | | 863 | /* Draft Rhee Section 3.1 - get W(t+rtt) - Eq. 1 */ |
864 | static ulong | | 864 | static ulong |
865 | tcp_cubic_getW(struct tcpcb *tp, uint32_t ms_elapsed, uint32_t rtt) | | 865 | tcp_cubic_getW(struct tcpcb *tp, uint32_t ms_elapsed, uint32_t rtt) |
866 | { | | 866 | { |
867 | uint32_t K; | | 867 | uint32_t K; |
868 | long tK3; | | 868 | long tK3; |
869 | | | 869 | |
870 | /* Section 3.1 Eq. 2 */ | | 870 | /* Section 3.1 Eq. 2 */ |
871 | K = tcp_cubic_cbrt(tp->snd_cubic_wmax / CUBIC_BETAB * | | 871 | K = tcp_cubic_cbrt(tp->snd_cubic_wmax / CUBIC_BETAB * |
872 | CUBIC_CB / CUBIC_CA); | | 872 | CUBIC_CB / CUBIC_CA); |
873 | /* (t-K)^3 - not clear why is the measure unit mattering */ | | 873 | /* (t-K)^3 - not clear why is the measure unit mattering */ |
874 | tK3 = (long)(ms_elapsed + rtt) - (long)K; | | 874 | tK3 = (long)(ms_elapsed + rtt) - (long)K; |
875 | tK3 = tK3 * tK3 * tK3; | | 875 | tK3 = tK3 * tK3 * tK3; |
876 | | | 876 | |
877 | return CUBIC_CA * tK3 / CUBIC_CB + tp->snd_cubic_wmax; | | 877 | return CUBIC_CA * tK3 / CUBIC_CB + tp->snd_cubic_wmax; |
878 | } | | 878 | } |
879 | | | 879 | |
880 | static void | | 880 | static void |
881 | tcp_cubic_congestion_exp(struct tcpcb *tp) | | 881 | tcp_cubic_congestion_exp(struct tcpcb *tp) |
882 | { | | 882 | { |
883 | | | 883 | |
884 | /* | | 884 | /* |
885 | * Congestion - Set WMax and shrink cwnd | | 885 | * Congestion - Set WMax and shrink cwnd |
886 | */ | | 886 | */ |
887 | tcp_cubic_update_ctime(tp); | | 887 | tcp_cubic_update_ctime(tp); |
888 | | | 888 | |
889 | /* Section 3.6 - Fast Convergence */ | | 889 | /* Section 3.6 - Fast Convergence */ |
890 | if (tp->snd_cubic_wmax < tp->snd_cubic_wmax_last) { | | 890 | if (tp->snd_cubic_wmax < tp->snd_cubic_wmax_last) { |
891 | tp->snd_cubic_wmax_last = tp->snd_cubic_wmax; | | 891 | tp->snd_cubic_wmax_last = tp->snd_cubic_wmax; |
892 | tp->snd_cubic_wmax = tp->snd_cubic_wmax / 2 + | | 892 | tp->snd_cubic_wmax = tp->snd_cubic_wmax / 2 + |
893 | tp->snd_cubic_wmax * CUBIC_BETAA / CUBIC_BETAB / 2; | | 893 | tp->snd_cubic_wmax * CUBIC_BETAA / CUBIC_BETAB / 2; |
894 | } else { | | 894 | } else { |
895 | tp->snd_cubic_wmax_last = tp->snd_cubic_wmax; | | 895 | tp->snd_cubic_wmax_last = tp->snd_cubic_wmax; |
896 | tp->snd_cubic_wmax = tp->snd_cwnd; | | 896 | tp->snd_cubic_wmax = tp->snd_cwnd; |
897 | } | | 897 | } |
898 | | | 898 | |
899 | tp->snd_cubic_wmax = uimax(tp->t_segsz, tp->snd_cubic_wmax); | | 899 | tp->snd_cubic_wmax = uimax(tp->t_segsz, tp->snd_cubic_wmax); |
900 | | | 900 | |
901 | /* Shrink CWND */ | | 901 | /* Shrink CWND */ |
902 | tcp_common_congestion_exp(tp, CUBIC_BETAA, CUBIC_BETAB); | | 902 | tcp_common_congestion_exp(tp, CUBIC_BETAA, CUBIC_BETAB); |
903 | } | | 903 | } |
904 | | | 904 | |
905 | static int | | 905 | static int |
906 | tcp_cubic_fast_retransmit(struct tcpcb *tp, const struct tcphdr *th) | | 906 | tcp_cubic_fast_retransmit(struct tcpcb *tp, const struct tcphdr *th) |
907 | { | | 907 | { |
908 | | | 908 | |
909 | if (SEQ_LT(th->th_ack, tp->snd_high)) { | | 909 | if (SEQ_LT(th->th_ack, tp->snd_high)) { |
910 | /* See newreno */ | | 910 | /* See newreno */ |
911 | tp->t_dupacks = 0; | | 911 | tp->t_dupacks = 0; |
912 | return 1; | | 912 | return 1; |
913 | } | | 913 | } |
914 | | | 914 | |
915 | /* | | 915 | /* |
916 | * mark WMax | | 916 | * mark WMax |
917 | */ | | 917 | */ |
918 | tcp_cubic_congestion_exp(tp); | | 918 | tcp_cubic_congestion_exp(tp); |
919 | | | 919 | |
920 | /* Do fast retransmit */ | | 920 | /* Do fast retransmit */ |
921 | return tcp_reno_do_fast_retransmit(tp, th); | | 921 | return tcp_reno_do_fast_retransmit(tp, th); |
922 | } | | 922 | } |
923 | | | 923 | |
924 | static void | | 924 | static void |
925 | tcp_cubic_newack(struct tcpcb *tp, const struct tcphdr *th) | | 925 | tcp_cubic_newack(struct tcpcb *tp, const struct tcphdr *th) |
926 | { | | 926 | { |
927 | uint32_t ms_elapsed, rtt; | | 927 | uint32_t ms_elapsed, rtt; |
928 | u_long w_tcp; | | 928 | u_long w_tcp; |
929 | | | 929 | |
930 | /* Congestion avoidance and not in fast recovery and usable rtt */ | | 930 | /* Congestion avoidance and not in fast recovery and usable rtt */ |
931 | if (tp->snd_cwnd > tp->snd_ssthresh && tp->t_partialacks < 0 && | | 931 | if (tp->snd_cwnd > tp->snd_ssthresh && tp->t_partialacks < 0 && |
932 | /* | | 932 | /* |
933 | * t_srtt is 1/32 units of slow ticks | | 933 | * t_srtt is 1/32 units of slow ticks |
934 | * converting it in ms would be equal to | | 934 | * converting it in ms would be equal to |
935 | * (t_srtt >> 5) * 1000 / PR_SLOWHZ ~= (t_srtt << 5) / PR_SLOWHZ | | 935 | * (t_srtt >> 5) * 1000 / PR_SLOWHZ ~= (t_srtt << 5) / PR_SLOWHZ |
936 | */ | | 936 | */ |
937 | (rtt = (tp->t_srtt << 5) / PR_SLOWHZ) > 0) { | | 937 | (rtt = (tp->t_srtt << 5) / PR_SLOWHZ) > 0) { |
938 | ms_elapsed = tcp_cubic_diff_ctime(tp); | | 938 | ms_elapsed = tcp_cubic_diff_ctime(tp); |
939 | | | 939 | |
940 | /* Compute W_tcp(t) */ | | 940 | /* Compute W_tcp(t) */ |
941 | w_tcp = tp->snd_cubic_wmax * CUBIC_BETAA / CUBIC_BETAB + | | 941 | w_tcp = tp->snd_cubic_wmax * CUBIC_BETAA / CUBIC_BETAB + |
942 | ms_elapsed / rtt / 3; | | 942 | ms_elapsed / rtt / 3; |
943 | | | 943 | |
944 | if (tp->snd_cwnd > w_tcp) { | | 944 | if (tp->snd_cwnd > w_tcp) { |
945 | /* Not in TCP friendly mode */ | | 945 | /* Not in TCP friendly mode */ |
946 | tp->snd_cwnd += (tcp_cubic_getW(tp, ms_elapsed, rtt) - | | 946 | tp->snd_cwnd += (tcp_cubic_getW(tp, ms_elapsed, rtt) - |
947 | tp->snd_cwnd) / tp->snd_cwnd; | | 947 | tp->snd_cwnd) / tp->snd_cwnd; |
948 | } else { | | 948 | } else { |
949 | /* friendly TCP mode */ | | 949 | /* friendly TCP mode */ |
950 | tp->snd_cwnd = w_tcp; | | 950 | tp->snd_cwnd = w_tcp; |
951 | } | | 951 | } |
952 | | | 952 | |
953 | /* Make sure we are within limits */ | | 953 | /* Make sure we are within limits */ |
954 | tp->snd_cwnd = uimax(tp->snd_cwnd, tp->t_segsz); | | 954 | tp->snd_cwnd = uimax(tp->snd_cwnd, tp->t_segsz); |
955 | tp->snd_cwnd = uimin(tp->snd_cwnd, TCP_MAXWIN << tp->snd_scale); | | 955 | tp->snd_cwnd = uimin(tp->snd_cwnd, TCP_MAXWIN << tp->snd_scale); |
956 | } else { | | 956 | } else { |
957 | /* Use New Reno */ | | 957 | /* Use New Reno */ |
958 | tcp_newreno_newack(tp, th); | | 958 | tcp_newreno_newack(tp, th); |
959 | } | | 959 | } |
960 | } | | 960 | } |
961 | | | 961 | |
962 | static void | | 962 | static void |
963 | tcp_cubic_slow_retransmit(struct tcpcb *tp) | | 963 | tcp_cubic_slow_retransmit(struct tcpcb *tp) |
964 | { | | 964 | { |
965 | | | 965 | |
966 | /* Timeout - Mark new congestion */ | | 966 | /* Timeout - Mark new congestion */ |
967 | tcp_cubic_congestion_exp(tp); | | 967 | tcp_cubic_congestion_exp(tp); |
968 | | | 968 | |
969 | /* Loss Window MUST be one segment. */ | | 969 | /* Loss Window MUST be one segment. */ |
970 | tp->snd_cwnd = tp->t_segsz; | | 970 | tp->snd_cwnd = tp->t_segsz; |
971 | tp->t_partialacks = -1; | | 971 | tp->t_partialacks = -1; |
972 | tp->t_dupacks = 0; | | 972 | tp->t_dupacks = 0; |
973 | tp->t_bytes_acked = 0; | | 973 | tp->t_bytes_acked = 0; |
974 | | | 974 | |
975 | if (TCP_ECN_ALLOWED(tp)) | | 975 | if (TCP_ECN_ALLOWED(tp)) |
976 | tp->t_flags |= TF_ECN_SND_CWR; | | 976 | tp->t_flags |= TF_ECN_SND_CWR; |
977 | } | | 977 | } |
978 | | | 978 | |
979 | const struct tcp_congctl tcp_cubic_ctl = { | | 979 | const struct tcp_congctl tcp_cubic_ctl = { |
980 | .fast_retransmit = tcp_cubic_fast_retransmit, | | 980 | .fast_retransmit = tcp_cubic_fast_retransmit, |
981 | .slow_retransmit = tcp_cubic_slow_retransmit, | | 981 | .slow_retransmit = tcp_cubic_slow_retransmit, |
982 | .fast_retransmit_newack = tcp_newreno_fast_retransmit_newack, | | 982 | .fast_retransmit_newack = tcp_newreno_fast_retransmit_newack, |
983 | .newack = tcp_cubic_newack, | | 983 | .newack = tcp_cubic_newack, |
984 | .cong_exp = tcp_cubic_congestion_exp, | | 984 | .cong_exp = tcp_cubic_congestion_exp, |
985 | }; | | 985 | }; |