Fri Feb 19 20:05:43 2016 UTC ()
Implement a queue for if_link_state_change() calls to fix a race condition
introduced in the prior patch.

The queue has capacity to store 8 link state changes, if it overflows then
the oldest state change is lost, but the oldest DOWN state change is
preserved to ensure any subsequent UP state changes reflect properly.

Because there are only 3 states to queue, the queue itself is implemented
by storing 2-bit numbers in a bigger one.
To increase the size of the queue, just increase the size of the backing
store to a bigger number.


(roy)
diff -r1.324 -r1.325 src/sys/net/if.c
diff -r1.197 -r1.198 src/sys/net/if.h

cvs diff -r1.324 -r1.325 src/sys/net/if.c (expand / switch to unified diff)

--- src/sys/net/if.c 2016/02/15 08:08:04 1.324
+++ src/sys/net/if.c 2016/02/19 20:05:43 1.325
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: if.c,v 1.324 2016/02/15 08:08:04 ozaki-r Exp $ */ 1/* $NetBSD: if.c,v 1.325 2016/02/19 20:05:43 roy Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1999, 2000, 2001, 2008 The NetBSD Foundation, Inc. 4 * Copyright (c) 1999, 2000, 2001, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by William Studenmund and Jason R. Thorpe. 8 * by William Studenmund and Jason R. Thorpe.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
@@ -80,27 +80,27 @@ @@ -80,27 +80,27 @@
80 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 80 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
81 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 81 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
82 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 82 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
83 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 83 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
84 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 84 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
85 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 85 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
86 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 86 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
87 * SUCH DAMAGE. 87 * SUCH DAMAGE.
88 * 88 *
89 * @(#)if.c 8.5 (Berkeley) 1/9/95 89 * @(#)if.c 8.5 (Berkeley) 1/9/95
90 */ 90 */
91 91
92#include <sys/cdefs.h> 92#include <sys/cdefs.h>
93__KERNEL_RCSID(0, "$NetBSD: if.c,v 1.324 2016/02/15 08:08:04 ozaki-r Exp $"); 93__KERNEL_RCSID(0, "$NetBSD: if.c,v 1.325 2016/02/19 20:05:43 roy Exp $");
94 94
95#if defined(_KERNEL_OPT) 95#if defined(_KERNEL_OPT)
96#include "opt_inet.h" 96#include "opt_inet.h"
97 97
98#include "opt_atalk.h" 98#include "opt_atalk.h"
99#include "opt_natm.h" 99#include "opt_natm.h"
100#include "opt_wlan.h" 100#include "opt_wlan.h"
101#include "opt_net_mpsafe.h" 101#include "opt_net_mpsafe.h"
102#endif 102#endif
103 103
104#include <sys/param.h> 104#include <sys/param.h>
105#include <sys/mbuf.h> 105#include <sys/mbuf.h>
106#include <sys/systm.h> 106#include <sys/systm.h>
@@ -593,27 +593,27 @@ if_initialize(ifnet_t *ifp) @@ -593,27 +593,27 @@ if_initialize(ifnet_t *ifp)
593 TAILQ_INIT(&ifp->if_addrlist); 593 TAILQ_INIT(&ifp->if_addrlist);
594 594
595 /* 595 /*
596 * Link level name is allocated later by a separate call to 596 * Link level name is allocated later by a separate call to
597 * if_alloc_sadl(). 597 * if_alloc_sadl().
598 */ 598 */
599 599
600 if (ifp->if_snd.ifq_maxlen == 0) 600 if (ifp->if_snd.ifq_maxlen == 0)
601 ifp->if_snd.ifq_maxlen = ifqmaxlen; 601 ifp->if_snd.ifq_maxlen = ifqmaxlen;
602 602
603 ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */ 603 ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */
604 604
605 ifp->if_link_state = LINK_STATE_UNKNOWN; 605 ifp->if_link_state = LINK_STATE_UNKNOWN;
606 ifp->if_old_link_state = LINK_STATE_UNKNOWN; 606 ifp->if_link_queue = -1; /* all bits set, see link_state_change() */
607 607
608 ifp->if_capenable = 0; 608 ifp->if_capenable = 0;
609 ifp->if_csum_flags_tx = 0; 609 ifp->if_csum_flags_tx = 0;
610 ifp->if_csum_flags_rx = 0; 610 ifp->if_csum_flags_rx = 0;
611 611
612#ifdef ALTQ 612#ifdef ALTQ
613 ifp->if_snd.altq_type = 0; 613 ifp->if_snd.altq_type = 0;
614 ifp->if_snd.altq_disc = NULL; 614 ifp->if_snd.altq_disc = NULL;
615 ifp->if_snd.altq_flags &= ALTQF_CANTCHANGE; 615 ifp->if_snd.altq_flags &= ALTQF_CANTCHANGE;
616 ifp->if_snd.altq_tbr = NULL; 616 ifp->if_snd.altq_tbr = NULL;
617 ifp->if_snd.altq_ifp = ifp; 617 ifp->if_snd.altq_ifp = ifp;
618#endif 618#endif
619 619
@@ -1553,102 +1553,205 @@ link_rtrequest(int cmd, struct rtentry * @@ -1553,102 +1553,205 @@ link_rtrequest(int cmd, struct rtentry *
1553 struct ifnet *ifp; 1553 struct ifnet *ifp;
1554 1554
1555 if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL || 1555 if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL ||
1556 (ifp = ifa->ifa_ifp) == NULL || (dst = rt_getkey(rt)) == NULL) 1556 (ifp = ifa->ifa_ifp) == NULL || (dst = rt_getkey(rt)) == NULL)
1557 return; 1557 return;
1558 if ((ifa = ifaof_ifpforaddr(dst, ifp)) != NULL) { 1558 if ((ifa = ifaof_ifpforaddr(dst, ifp)) != NULL) {
1559 rt_replace_ifa(rt, ifa); 1559 rt_replace_ifa(rt, ifa);
1560 if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest) 1560 if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
1561 ifa->ifa_rtrequest(cmd, rt, info); 1561 ifa->ifa_rtrequest(cmd, rt, info);
1562 } 1562 }
1563} 1563}
1564 1564
1565/* 1565/*
1566 * Handle a change in the interface link state. 1566 * bitmask macros to manage a densely packed link_state change queue.
1567 * XXX: We should listen to the routing socket in-kernel rather 1567 * Because we need to store LINK_STATE_UNKNOWN(0), LINK_STATE_DOWN(1) and
1568 * than calling in6_if_link_* functions directly from here. 1568 * LINK_STATE_UP(2) we need 2 bits for each state change.
 1569 * As a state change to store is 0, treat all bits set as an unset item.
 1570 */
 1571#define LQ_ITEM_BITS 2
 1572#define LQ_ITEM_MASK ((1 << LQ_ITEM_BITS) - 1)
 1573#define LQ_MASK(i) (LQ_ITEM_MASK << (i) * LQ_ITEM_BITS)
 1574#define LINK_STATE_UNSET LQ_ITEM_MASK
 1575#define LQ_ITEM(q, i) (((q) & LQ_MASK((i))) >> (i) * LQ_ITEM_BITS)
 1576#define LQ_STORE(q, i, v) \
 1577 do { \
 1578 (q) &= ~LQ_MASK((i)); \
 1579 (q) |= (v) << (i) * LQ_ITEM_BITS; \
 1580 } while (0 /* CONSTCOND */)
 1581#define LQ_MAX(q) ((sizeof((q)) * NBBY) / LQ_ITEM_BITS)
 1582#define LQ_POP(q, v) \
 1583 do { \
 1584 (v) = LQ_ITEM((q), 0); \
 1585 (q) >>= LQ_ITEM_BITS; \
 1586 (q) |= LINK_STATE_UNSET << (LQ_MAX((q)) - 1) * LQ_ITEM_BITS; \
 1587 } while (0 /* CONSTCOND */)
 1588#define LQ_PUSH(q, v) \
 1589 do { \
 1590 (q) >>= LQ_ITEM_BITS; \
 1591 (q) |= (v) << (LQ_MAX((q)) - 1) * LQ_ITEM_BITS; \
 1592 } while (0 /* CONSTCOND */)
 1593#define LQ_FIND_UNSET(q, i) \
 1594 for ((i) = 0; i < LQ_MAX((q)); (i)++) { \
 1595 if (LQ_ITEM((q), (i)) == LINK_STATE_UNSET) \
 1596 break; \
 1597 }
 1598/*
 1599 * Handle a change in the interface link state and
 1600 * queue notifications.
1569 */ 1601 */
1570void 1602void
1571if_link_state_change(struct ifnet *ifp, int link_state) 1603if_link_state_change(struct ifnet *ifp, int link_state)
1572{ 1604{
1573 int s; 1605 int s, idx;
1574 1606
1575 s = splnet(); 1607 /* Ensure change is to a valid state */
1576 if (ifp->if_link_state == link_state) { 1608 switch (link_state) {
1577 splx(s); 1609 case LINK_STATE_UNKNOWN: /* FALLTHROUGH */
 1610 case LINK_STATE_DOWN: /* FALLTHROUGH */
 1611 case LINK_STATE_UP:
 1612 break;
 1613 default:
 1614#ifdef DEBUG
 1615 printf("%s: invalid link state %d\n",
 1616 ifp->if_xname, link_state);
 1617#endif
1578 return; 1618 return;
1579 } 1619 }
1580 1620
1581 ifp->if_link_state = link_state; 1621 s = splnet();
 1622
 1623 /* Find the last unset event in the queue. */
 1624 LQ_FIND_UNSET(ifp->if_link_queue, idx);
 1625
 1626 /*
 1627 * Ensure link_state doesn't match the last event in the queue.
 1628 * ifp->if_link_state is not checked and set here because
 1629 * that would present an inconsistent picture to the system.
 1630 */
 1631 if (idx != 0 &&
 1632 LQ_ITEM(ifp->if_link_queue, idx - 1) == (uint8_t)link_state)
 1633 goto out;
 1634
 1635 /* Handle queue overflow. */
 1636 if (idx == LQ_MAX(ifp->if_link_queue)) {
 1637 uint8_t lost;
 1638
 1639 /*
 1640 * The DOWN state must be protected from being pushed off
 1641 * the queue to ensure that userland will always be
 1642 * in a sane state.
 1643 * Because DOWN is protected, there is no need to protect
 1644 * UNKNOWN.
 1645 * It should be invalid to change from any other state to
 1646 * UNKNOWN anyway ...
 1647 */
 1648 lost = LQ_ITEM(ifp->if_link_queue, 0);
 1649 LQ_PUSH(ifp->if_link_queue, (uint8_t)link_state);
 1650 if (lost == LINK_STATE_DOWN) {
 1651 lost = LQ_ITEM(ifp->if_link_queue, 0);
 1652 LQ_STORE(ifp->if_link_queue, 0, LINK_STATE_DOWN);
 1653 }
 1654 printf("%s: lost link state change %s\n",
 1655 ifp->if_xname,
 1656 lost == LINK_STATE_UP ? "UP" :
 1657 lost == LINK_STATE_DOWN ? "DOWN" :
 1658 "UNKNOWN");
 1659 } else
 1660 LQ_STORE(ifp->if_link_queue, idx, (uint8_t)link_state);
 1661
1582 softint_schedule(ifp->if_link_si); 1662 softint_schedule(ifp->if_link_si);
1583 1663
 1664out:
1584 splx(s); 1665 splx(s);
1585} 1666}
1586 1667
1587 1668/*
 1669 * Handle interface link state change notifications.
 1670 * Must be called at splnet().
 1671 */
1588static void 1672static void
1589if_link_state_change_si(void *arg) 1673if_link_state_change0(struct ifnet *ifp, int link_state)
1590{ 1674{
1591 struct ifnet *ifp = arg; 
1592 int s; 
1593 int link_state, old_link_state; 
1594 struct domain *dp; 1675 struct domain *dp;
1595 1676
1596 s = splnet(); 1677 /* Ensure the change is still valid. */
1597 link_state = ifp->if_link_state; 1678 if (ifp->if_link_state == link_state)
1598 old_link_state = ifp->if_old_link_state; 1679 return;
1599 ifp->if_old_link_state = ifp->if_link_state; 
1600 1680
1601#ifdef DEBUG 1681#ifdef DEBUG
1602 log(LOG_DEBUG, "%s: link state %s (was %s)\n", ifp->if_xname, 1682 log(LOG_DEBUG, "%s: link state %s (was %s)\n", ifp->if_xname,
1603 link_state == LINK_STATE_UP ? "UP" : 1683 link_state == LINK_STATE_UP ? "UP" :
1604 link_state == LINK_STATE_DOWN ? "DOWN" : 1684 link_state == LINK_STATE_DOWN ? "DOWN" :
1605 "UNKNOWN", 1685 "UNKNOWN",
1606 old_link_state == LINK_STATE_UP ? "UP" : 1686 ifp->if_link_state == LINK_STATE_UP ? "UP" :
1607 old_link_state == LINK_STATE_DOWN ? "DOWN" : 1687 ifp->if_link_state == LINK_STATE_DOWN ? "DOWN" :
1608 "UNKNOWN"); 1688 "UNKNOWN");
1609#endif 1689#endif
1610 1690
1611 /* 1691 /*
1612 * When going from UNKNOWN to UP, we need to mark existing 1692 * When going from UNKNOWN to UP, we need to mark existing
1613 * addresses as tentative and restart DAD as we may have 1693 * addresses as tentative and restart DAD as we may have
1614 * erroneously not found a duplicate. 1694 * erroneously not found a duplicate.
1615 * 1695 *
1616 * This needs to happen before rt_ifmsg to avoid a race where 1696 * This needs to happen before rt_ifmsg to avoid a race where
1617 * listeners would have an address and expect it to work right 1697 * listeners would have an address and expect it to work right
1618 * away. 1698 * away.
1619 */ 1699 */
1620 if (link_state == LINK_STATE_UP && 1700 if (link_state == LINK_STATE_UP &&
1621 old_link_state == LINK_STATE_UNKNOWN) 1701 ifp->if_link_state == LINK_STATE_UNKNOWN)
1622 { 1702 {
1623 DOMAIN_FOREACH(dp) { 1703 DOMAIN_FOREACH(dp) {
1624 if (dp->dom_if_link_state_change != NULL) 1704 if (dp->dom_if_link_state_change != NULL)
1625 dp->dom_if_link_state_change(ifp, 1705 dp->dom_if_link_state_change(ifp,
1626 LINK_STATE_DOWN); 1706 LINK_STATE_DOWN);
1627 } 1707 }
1628 } 1708 }
1629 1709
 1710 ifp->if_link_state = link_state;
 1711
1630 /* Notify that the link state has changed. */ 1712 /* Notify that the link state has changed. */
1631 rt_ifmsg(ifp); 1713 rt_ifmsg(ifp);
1632 1714
1633#if NCARP > 0 1715#if NCARP > 0
1634 if (ifp->if_carp) 1716 if (ifp->if_carp)
1635 carp_carpdev_state(ifp); 1717 carp_carpdev_state(ifp);
1636#endif 1718#endif
1637 1719
1638 DOMAIN_FOREACH(dp) { 1720 DOMAIN_FOREACH(dp) {
1639 if (dp->dom_if_link_state_change != NULL) 1721 if (dp->dom_if_link_state_change != NULL)
1640 dp->dom_if_link_state_change(ifp, link_state); 1722 dp->dom_if_link_state_change(ifp, link_state);
1641 } 1723 }
 1724}
 1725
 1726/*
 1727 * Process the interface link state change queue.
 1728 */
 1729static void
 1730if_link_state_change_si(void *arg)
 1731{
 1732 struct ifnet *ifp = arg;
 1733 int s;
 1734 uint8_t state;
 1735
 1736 s = splnet();
 1737
 1738 /* Pop a link state change from the queue and process it. */
 1739 LQ_POP(ifp->if_link_queue, state);
 1740 if_link_state_change0(ifp, state);
 1741
 1742 /* If there is a link state change to come, schedule it. */
 1743 if (LQ_ITEM(ifp->if_link_queue, 0) != LINK_STATE_UNSET)
 1744 softint_schedule(ifp->if_link_si);
1642 1745
1643 splx(s); 1746 splx(s);
1644} 1747}
1645 1748
1646/* 1749/*
1647 * Default action when installing a local route on a point-to-point 1750 * Default action when installing a local route on a point-to-point
1648 * interface. 1751 * interface.
1649 */ 1752 */
1650void 1753void
1651p2p_rtrequest(int req, struct rtentry *rt, 1754p2p_rtrequest(int req, struct rtentry *rt,
1652 __unused const struct rt_addrinfo *info) 1755 __unused const struct rt_addrinfo *info)
1653{ 1756{
1654 struct ifnet *ifp = rt->rt_ifp; 1757 struct ifnet *ifp = rt->rt_ifp;

cvs diff -r1.197 -r1.198 src/sys/net/if.h (expand / switch to unified diff)

--- src/sys/net/if.h 2016/02/16 01:31:26 1.197
+++ src/sys/net/if.h 2016/02/19 20:05:43 1.198
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: if.h,v 1.197 2016/02/16 01:31:26 ozaki-r Exp $ */ 1/* $NetBSD: if.h,v 1.198 2016/02/19 20:05:43 roy Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1999, 2000, 2001 The NetBSD Foundation, Inc. 4 * Copyright (c) 1999, 2000, 2001 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by William Studenmund and Jason R. Thorpe. 8 * by William Studenmund and Jason R. Thorpe.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
@@ -341,27 +341,27 @@ typedef struct ifnet { @@ -341,27 +341,27 @@ typedef struct ifnet {
341 * same, they are the same ifnet. 341 * same, they are the same ifnet.
342 */ 342 */
343 struct sysctllog *if_sysctl_log; 343 struct sysctllog *if_sysctl_log;
344 int (*if_initaddr)(struct ifnet *, struct ifaddr *, bool); 344 int (*if_initaddr)(struct ifnet *, struct ifaddr *, bool);
345 int (*if_mcastop)(struct ifnet *, const unsigned long, 345 int (*if_mcastop)(struct ifnet *, const unsigned long,
346 const struct sockaddr *); 346 const struct sockaddr *);
347 int (*if_setflags)(struct ifnet *, const short); 347 int (*if_setflags)(struct ifnet *, const short);
348 struct ifnet_lock *if_ioctl_lock; 348 struct ifnet_lock *if_ioctl_lock;
349#ifdef _KERNEL /* XXX kvm(3) */ 349#ifdef _KERNEL /* XXX kvm(3) */
350 struct callout *if_slowtimo_ch; 350 struct callout *if_slowtimo_ch;
351 struct krwlock *if_afdata_lock; 351 struct krwlock *if_afdata_lock;
352 struct if_percpuq *if_percpuq; /* We should remove it in the future */ 352 struct if_percpuq *if_percpuq; /* We should remove it in the future */
353 void *if_link_si; /* softint to handle link state changes */ 353 void *if_link_si; /* softint to handle link state changes */
354 int if_old_link_state; /* previous link state */ 354 uint16_t if_link_queue; /* masked link state change queue */
355#endif 355#endif
356} ifnet_t; 356} ifnet_t;
357  357
358#define if_mtu if_data.ifi_mtu 358#define if_mtu if_data.ifi_mtu
359#define if_type if_data.ifi_type 359#define if_type if_data.ifi_type
360#define if_addrlen if_data.ifi_addrlen 360#define if_addrlen if_data.ifi_addrlen
361#define if_hdrlen if_data.ifi_hdrlen 361#define if_hdrlen if_data.ifi_hdrlen
362#define if_metric if_data.ifi_metric 362#define if_metric if_data.ifi_metric
363#define if_link_state if_data.ifi_link_state 363#define if_link_state if_data.ifi_link_state
364#define if_baudrate if_data.ifi_baudrate 364#define if_baudrate if_data.ifi_baudrate
365#define if_ipackets if_data.ifi_ipackets 365#define if_ipackets if_data.ifi_ipackets
366#define if_ierrors if_data.ifi_ierrors 366#define if_ierrors if_data.ifi_ierrors
367#define if_opackets if_data.ifi_opackets 367#define if_opackets if_data.ifi_opackets