| @@ -1,14 +1,14 @@ | | | @@ -1,14 +1,14 @@ |
1 | /* $NetBSD: tcp_input.c,v 1.404 2018/04/03 09:03:59 maxv Exp $ */ | | 1 | /* $NetBSD: tcp_input.c,v 1.405 2018/04/08 12:18:06 maxv Exp $ */ |
2 | | | 2 | |
3 | /* | | 3 | /* |
4 | * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. | | 4 | * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. |
5 | * All rights reserved. | | 5 | * All rights reserved. |
6 | * | | 6 | * |
7 | * Redistribution and use in source and binary forms, with or without | | 7 | * Redistribution and use in source and binary forms, with or without |
8 | * modification, are permitted provided that the following conditions | | 8 | * modification, are permitted provided that the following conditions |
9 | * are met: | | 9 | * are met: |
10 | * 1. Redistributions of source code must retain the above copyright | | 10 | * 1. Redistributions of source code must retain the above copyright |
11 | * notice, this list of conditions and the following disclaimer. | | 11 | * notice, this list of conditions and the following disclaimer. |
12 | * 2. Redistributions in binary form must reproduce the above copyright | | 12 | * 2. Redistributions in binary form must reproduce the above copyright |
13 | * notice, this list of conditions and the following disclaimer in the | | 13 | * notice, this list of conditions and the following disclaimer in the |
14 | * documentation and/or other materials provided with the distribution. | | 14 | * documentation and/or other materials provided with the distribution. |
| @@ -138,27 +138,27 @@ | | | @@ -138,27 +138,27 @@ |
138 | */ | | 138 | */ |
139 | | | 139 | |
140 | /* | | 140 | /* |
141 | * TODO list for SYN cache stuff: | | 141 | * TODO list for SYN cache stuff: |
142 | * | | 142 | * |
143 | * Find room for a "state" field, which is needed to keep a | | 143 | * Find room for a "state" field, which is needed to keep a |
144 | * compressed state for TIME_WAIT TCBs. It's been noted already | | 144 | * compressed state for TIME_WAIT TCBs. It's been noted already |
145 | * that this is fairly important for very high-volume web and | | 145 | * that this is fairly important for very high-volume web and |
146 | * mail servers, which use a large number of short-lived | | 146 | * mail servers, which use a large number of short-lived |
147 | * connections. | | 147 | * connections. |
148 | */ | | 148 | */ |
149 | | | 149 | |
150 | #include <sys/cdefs.h> | | 150 | #include <sys/cdefs.h> |
151 | __KERNEL_RCSID(0, "$NetBSD: tcp_input.c,v 1.404 2018/04/03 09:03:59 maxv Exp $"); | | 151 | __KERNEL_RCSID(0, "$NetBSD: tcp_input.c,v 1.405 2018/04/08 12:18:06 maxv Exp $"); |
152 | | | 152 | |
153 | #ifdef _KERNEL_OPT | | 153 | #ifdef _KERNEL_OPT |
154 | #include "opt_inet.h" | | 154 | #include "opt_inet.h" |
155 | #include "opt_ipsec.h" | | 155 | #include "opt_ipsec.h" |
156 | #include "opt_inet_csum.h" | | 156 | #include "opt_inet_csum.h" |
157 | #include "opt_tcp_debug.h" | | 157 | #include "opt_tcp_debug.h" |
158 | #endif | | 158 | #endif |
159 | | | 159 | |
160 | #include <sys/param.h> | | 160 | #include <sys/param.h> |
161 | #include <sys/systm.h> | | 161 | #include <sys/systm.h> |
162 | #include <sys/malloc.h> | | 162 | #include <sys/malloc.h> |
163 | #include <sys/mbuf.h> | | 163 | #include <sys/mbuf.h> |
164 | #include <sys/protosw.h> | | 164 | #include <sys/protosw.h> |
| @@ -395,28 +395,26 @@ extern struct evcnt tcp_reass_fragdup; | | | @@ -395,28 +395,26 @@ extern struct evcnt tcp_reass_fragdup; |
395 | | | 395 | |
396 | #endif /* TCP_REASS_COUNTERS */ | | 396 | #endif /* TCP_REASS_COUNTERS */ |
397 | | | 397 | |
398 | static int tcp_reass(struct tcpcb *, const struct tcphdr *, struct mbuf *, | | 398 | static int tcp_reass(struct tcpcb *, const struct tcphdr *, struct mbuf *, |
399 | int); | | 399 | int); |
400 | static int tcp_dooptions(struct tcpcb *, const u_char *, int, | | 400 | static int tcp_dooptions(struct tcpcb *, const u_char *, int, |
401 | struct tcphdr *, struct mbuf *, int, struct tcp_opt_info *); | | 401 | struct tcphdr *, struct mbuf *, int, struct tcp_opt_info *); |
402 | | | 402 | |
403 | static void tcp4_log_refused(const struct ip *, const struct tcphdr *); | | 403 | static void tcp4_log_refused(const struct ip *, const struct tcphdr *); |
404 | #ifdef INET6 | | 404 | #ifdef INET6 |
405 | static void tcp6_log_refused(const struct ip6_hdr *, const struct tcphdr *); | | 405 | static void tcp6_log_refused(const struct ip6_hdr *, const struct tcphdr *); |
406 | #endif | | 406 | #endif |
407 | | | 407 | |
408 | #define TRAVERSE(x) while ((x)->m_next) (x) = (x)->m_next | | | |
409 | | | | |
410 | #if defined(MBUFTRACE) | | 408 | #if defined(MBUFTRACE) |
411 | struct mowner tcp_reass_mowner = MOWNER_INIT("tcp", "reass"); | | 409 | struct mowner tcp_reass_mowner = MOWNER_INIT("tcp", "reass"); |
412 | #endif /* defined(MBUFTRACE) */ | | 410 | #endif /* defined(MBUFTRACE) */ |
413 | | | 411 | |
414 | static struct pool tcpipqent_pool; | | 412 | static struct pool tcpipqent_pool; |
415 | | | 413 | |
416 | void | | 414 | void |
417 | tcpipqent_init(void) | | 415 | tcpipqent_init(void) |
418 | { | | 416 | { |
419 | | | 417 | |
420 | pool_init(&tcpipqent_pool, sizeof(struct ipqent), 0, 0, 0, "tcpipqepl", | | 418 | pool_init(&tcpipqent_pool, sizeof(struct ipqent), 0, 0, 0, "tcpipqepl", |
421 | NULL, IPL_VM); | | 419 | NULL, IPL_VM); |
422 | } | | 420 | } |
| @@ -491,28 +489,27 @@ tcp_reass(struct tcpcb *tp, const struct | | | @@ -491,28 +489,27 @@ tcp_reass(struct tcpcb *tp, const struct |
491 | pkt_len = tlen; | | 489 | pkt_len = tlen; |
492 | pkt_flags = th->th_flags; | | 490 | pkt_flags = th->th_flags; |
493 | | | 491 | |
494 | TCP_REASS_COUNTER_INCR(&tcp_reass_); | | 492 | TCP_REASS_COUNTER_INCR(&tcp_reass_); |
495 | | | 493 | |
496 | if ((p = TAILQ_LAST(&tp->segq, ipqehead)) != NULL) { | | 494 | if ((p = TAILQ_LAST(&tp->segq, ipqehead)) != NULL) { |
497 | /* | | 495 | /* |
498 | * When we miss a packet, the vast majority of time we get | | 496 | * When we miss a packet, the vast majority of time we get |
499 | * packets that follow it in order. So optimize for that. | | 497 | * packets that follow it in order. So optimize for that. |
500 | */ | | 498 | */ |
501 | if (pkt_seq == p->ipqe_seq + p->ipqe_len) { | | 499 | if (pkt_seq == p->ipqe_seq + p->ipqe_len) { |
502 | p->ipqe_len += pkt_len; | | 500 | p->ipqe_len += pkt_len; |
503 | p->ipqe_flags |= pkt_flags; | | 501 | p->ipqe_flags |= pkt_flags; |
504 | m_cat(p->ipre_mlast, m); | | 502 | m_cat(p->ipqe_m, m); |
505 | TRAVERSE(p->ipre_mlast); | | | |
506 | m = NULL; | | 503 | m = NULL; |
507 | tiqe = p; | | 504 | tiqe = p; |
508 | TAILQ_REMOVE(&tp->timeq, p, ipqe_timeq); | | 505 | TAILQ_REMOVE(&tp->timeq, p, ipqe_timeq); |
509 | TCP_REASS_COUNTER_INCR(&tcp_reass_appendtail); | | 506 | TCP_REASS_COUNTER_INCR(&tcp_reass_appendtail); |
510 | goto skip_replacement; | | 507 | goto skip_replacement; |
511 | } | | 508 | } |
512 | /* | | 509 | /* |
513 | * While we're here, if the pkt is completely beyond | | 510 | * While we're here, if the pkt is completely beyond |
514 | * anything we have, just insert it at the tail. | | 511 | * anything we have, just insert it at the tail. |
515 | */ | | 512 | */ |
516 | if (SEQ_GT(pkt_seq, p->ipqe_seq + p->ipqe_len)) { | | 513 | if (SEQ_GT(pkt_seq, p->ipqe_seq + p->ipqe_len)) { |
517 | TCP_REASS_COUNTER_INCR(&tcp_reass_inserttail); | | 514 | TCP_REASS_COUNTER_INCR(&tcp_reass_inserttail); |
518 | goto insert_it; | | 515 | goto insert_it; |
| @@ -523,28 +520,26 @@ tcp_reass(struct tcpcb *tp, const struct | | | @@ -523,28 +520,26 @@ tcp_reass(struct tcpcb *tp, const struct |
523 | | | 520 | |
524 | if (q != NULL) { | | 521 | if (q != NULL) { |
525 | /* | | 522 | /* |
526 | * If this segment immediately precedes the first out-of-order | | 523 | * If this segment immediately precedes the first out-of-order |
527 | * block, simply slap the segment in front of it and (mostly) | | 524 | * block, simply slap the segment in front of it and (mostly) |
528 | * skip the complicated logic. | | 525 | * skip the complicated logic. |
529 | */ | | 526 | */ |
530 | if (pkt_seq + pkt_len == q->ipqe_seq) { | | 527 | if (pkt_seq + pkt_len == q->ipqe_seq) { |
531 | q->ipqe_seq = pkt_seq; | | 528 | q->ipqe_seq = pkt_seq; |
532 | q->ipqe_len += pkt_len; | | 529 | q->ipqe_len += pkt_len; |
533 | q->ipqe_flags |= pkt_flags; | | 530 | q->ipqe_flags |= pkt_flags; |
534 | m_cat(m, q->ipqe_m); | | 531 | m_cat(m, q->ipqe_m); |
535 | q->ipqe_m = m; | | 532 | q->ipqe_m = m; |
536 | q->ipre_mlast = m; /* last mbuf may have changed */ | | | |
537 | TRAVERSE(q->ipre_mlast); | | | |
538 | tiqe = q; | | 533 | tiqe = q; |
539 | TAILQ_REMOVE(&tp->timeq, q, ipqe_timeq); | | 534 | TAILQ_REMOVE(&tp->timeq, q, ipqe_timeq); |
540 | TCP_REASS_COUNTER_INCR(&tcp_reass_prependfirst); | | 535 | TCP_REASS_COUNTER_INCR(&tcp_reass_prependfirst); |
541 | goto skip_replacement; | | 536 | goto skip_replacement; |
542 | } | | 537 | } |
543 | } else { | | 538 | } else { |
544 | TCP_REASS_COUNTER_INCR(&tcp_reass_empty); | | 539 | TCP_REASS_COUNTER_INCR(&tcp_reass_empty); |
545 | } | | 540 | } |
546 | | | 541 | |
547 | /* | | 542 | /* |
548 | * Find a segment which begins after this one does. | | 543 | * Find a segment which begins after this one does. |
549 | */ | | 544 | */ |
550 | for (p = NULL; q != NULL; q = nq) { | | 545 | for (p = NULL; q != NULL; q = nq) { |
| @@ -552,28 +547,27 @@ tcp_reass(struct tcpcb *tp, const struct | | | @@ -552,28 +547,27 @@ tcp_reass(struct tcpcb *tp, const struct |
552 | #ifdef TCP_REASS_COUNTERS | | 547 | #ifdef TCP_REASS_COUNTERS |
553 | count++; | | 548 | count++; |
554 | #endif | | 549 | #endif |
555 | | | 550 | |
556 | /* | | 551 | /* |
557 | * If the received segment is just right after this | | 552 | * If the received segment is just right after this |
558 | * fragment, merge the two together and then check | | 553 | * fragment, merge the two together and then check |
559 | * for further overlaps. | | 554 | * for further overlaps. |
560 | */ | | 555 | */ |
561 | if (q->ipqe_seq + q->ipqe_len == pkt_seq) { | | 556 | if (q->ipqe_seq + q->ipqe_len == pkt_seq) { |
562 | pkt_len += q->ipqe_len; | | 557 | pkt_len += q->ipqe_len; |
563 | pkt_flags |= q->ipqe_flags; | | 558 | pkt_flags |= q->ipqe_flags; |
564 | pkt_seq = q->ipqe_seq; | | 559 | pkt_seq = q->ipqe_seq; |
565 | m_cat(q->ipre_mlast, m); | | 560 | m_cat(q->ipqe_m, m); |
566 | TRAVERSE(q->ipre_mlast); | | | |
567 | m = q->ipqe_m; | | 561 | m = q->ipqe_m; |
568 | TCP_REASS_COUNTER_INCR(&tcp_reass_append); | | 562 | TCP_REASS_COUNTER_INCR(&tcp_reass_append); |
569 | goto free_ipqe; | | 563 | goto free_ipqe; |
570 | } | | 564 | } |
571 | | | 565 | |
572 | /* | | 566 | /* |
573 | * If the received segment is completely past this | | 567 | * If the received segment is completely past this |
574 | * fragment, we need to go to the next fragment. | | 568 | * fragment, we need to go to the next fragment. |
575 | */ | | 569 | */ |
576 | if (SEQ_LT(q->ipqe_seq + q->ipqe_len, pkt_seq)) { | | 570 | if (SEQ_LT(q->ipqe_seq + q->ipqe_len, pkt_seq)) { |
577 | p = q; | | 571 | p = q; |
578 | continue; | | 572 | continue; |
579 | } | | 573 | } |
| @@ -619,28 +613,27 @@ tcp_reass(struct tcpcb *tp, const struct | | | @@ -619,28 +613,27 @@ tcp_reass(struct tcpcb *tp, const struct |
619 | goto free_ipqe; | | 613 | goto free_ipqe; |
620 | } | | 614 | } |
621 | | | 615 | |
622 | /* | | 616 | /* |
623 | * Received segment extends past the end of the fragment. | | 617 | * Received segment extends past the end of the fragment. |
624 | * Drop the overlapping bytes, merge the fragment and | | 618 | * Drop the overlapping bytes, merge the fragment and |
625 | * segment, and treat as a longer received packet. | | 619 | * segment, and treat as a longer received packet. |
626 | */ | | 620 | */ |
627 | if (SEQ_LT(q->ipqe_seq, pkt_seq) && | | 621 | if (SEQ_LT(q->ipqe_seq, pkt_seq) && |
628 | SEQ_GT(q->ipqe_seq + q->ipqe_len, pkt_seq)) { | | 622 | SEQ_GT(q->ipqe_seq + q->ipqe_len, pkt_seq)) { |
629 | int overlap = q->ipqe_seq + q->ipqe_len - pkt_seq; | | 623 | int overlap = q->ipqe_seq + q->ipqe_len - pkt_seq; |
630 | m_adj(m, overlap); | | 624 | m_adj(m, overlap); |
631 | rcvpartdupbyte += overlap; | | 625 | rcvpartdupbyte += overlap; |
632 | m_cat(q->ipre_mlast, m); | | 626 | m_cat(q->ipqe_m, m); |
633 | TRAVERSE(q->ipre_mlast); | | | |
634 | m = q->ipqe_m; | | 627 | m = q->ipqe_m; |
635 | pkt_seq = q->ipqe_seq; | | 628 | pkt_seq = q->ipqe_seq; |
636 | pkt_len += q->ipqe_len - overlap; | | 629 | pkt_len += q->ipqe_len - overlap; |
637 | rcvoobyte -= overlap; | | 630 | rcvoobyte -= overlap; |
638 | TCP_REASS_COUNTER_INCR(&tcp_reass_overlaptail); | | 631 | TCP_REASS_COUNTER_INCR(&tcp_reass_overlaptail); |
639 | goto free_ipqe; | | 632 | goto free_ipqe; |
640 | } | | 633 | } |
641 | | | 634 | |
642 | /* | | 635 | /* |
643 | * Received segment extends past the front of the fragment. | | 636 | * Received segment extends past the front of the fragment. |
644 | * Drop the overlapping bytes on the received packet. The | | 637 | * Drop the overlapping bytes on the received packet. The |
645 | * packet will then be concatenated with this fragment a | | 638 | * packet will then be concatenated with this fragment a |
646 | * bit later. | | 639 | * bit later. |
| @@ -740,27 +733,26 @@ insert_it: | | | @@ -740,27 +733,26 @@ insert_it: |
740 | tcps = TCP_STAT_GETREF(); | | 733 | tcps = TCP_STAT_GETREF(); |
741 | tcps[TCP_STAT_RCVOOPACK]++; | | 734 | tcps[TCP_STAT_RCVOOPACK]++; |
742 | tcps[TCP_STAT_RCVOOBYTE] += rcvoobyte; | | 735 | tcps[TCP_STAT_RCVOOBYTE] += rcvoobyte; |
743 | if (rcvpartdupbyte) { | | 736 | if (rcvpartdupbyte) { |
744 | tcps[TCP_STAT_RCVPARTDUPPACK]++; | | 737 | tcps[TCP_STAT_RCVPARTDUPPACK]++; |
745 | tcps[TCP_STAT_RCVPARTDUPBYTE] += rcvpartdupbyte; | | 738 | tcps[TCP_STAT_RCVPARTDUPBYTE] += rcvpartdupbyte; |
746 | } | | 739 | } |
747 | TCP_STAT_PUTREF(); | | 740 | TCP_STAT_PUTREF(); |
748 | | | 741 | |
749 | /* | | 742 | /* |
750 | * Insert the new fragment queue entry into both queues. | | 743 | * Insert the new fragment queue entry into both queues. |
751 | */ | | 744 | */ |
752 | tiqe->ipqe_m = m; | | 745 | tiqe->ipqe_m = m; |
753 | tiqe->ipre_mlast = m; | | | |
754 | tiqe->ipqe_seq = pkt_seq; | | 746 | tiqe->ipqe_seq = pkt_seq; |
755 | tiqe->ipqe_len = pkt_len; | | 747 | tiqe->ipqe_len = pkt_len; |
756 | tiqe->ipqe_flags = pkt_flags; | | 748 | tiqe->ipqe_flags = pkt_flags; |
757 | if (p == NULL) { | | 749 | if (p == NULL) { |
758 | TAILQ_INSERT_HEAD(&tp->segq, tiqe, ipqe_q); | | 750 | TAILQ_INSERT_HEAD(&tp->segq, tiqe, ipqe_q); |
759 | } else { | | 751 | } else { |
760 | TAILQ_INSERT_AFTER(&tp->segq, p, tiqe, ipqe_q); | | 752 | TAILQ_INSERT_AFTER(&tp->segq, p, tiqe, ipqe_q); |
761 | } | | 753 | } |
762 | tp->t_segqlen++; | | 754 | tp->t_segqlen++; |
763 | | | 755 | |
764 | skip_replacement: | | 756 | skip_replacement: |
765 | TAILQ_INSERT_HEAD(&tp->timeq, tiqe, ipqe_timeq); | | 757 | TAILQ_INSERT_HEAD(&tp->timeq, tiqe, ipqe_timeq); |
766 | | | 758 | |