Fri Dec 1 09:21:15 2017 UTC ()
Pull up following revision(s) (requested by christos in ticket #415):
	sys/netipsec/key.c: revision 1.244
	sys/netipsec/key.c: revision 1.245
Use KDASSERT for mutex_ownable
Because mutex_ownable is not cheap.
Fix a deadlock happening if !NET_MPSAFE
If NET_MPSAFE isn't set, key_timehandler_work is executed with holding
softnet_lock. This means that localcount_drain can be called with holding
softnet_lock resulting in a deadlock that localcount_drain waits for packet
processing to release a reference to SP/SA while network processing is prevented
by softnet_lock.
Fix the deadlock by not taking softnet_lock in key_timehandler_work. It's okay
because IPsec is MP-safe even if !NET_MPSAFE. Note that the change also needs
to enable pserialize_perform because the IPsec code can be run in parallel now.
Reported by christos@


(martin)
diff -r1.163.2.4 -r1.163.2.5 src/sys/netipsec/key.c

cvs diff -r1.163.2.4 -r1.163.2.5 src/sys/netipsec/key.c (expand / switch to unified diff)

--- src/sys/netipsec/key.c 2017/11/30 15:57:37 1.163.2.4
+++ src/sys/netipsec/key.c 2017/12/01 09:21:15 1.163.2.5
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: key.c,v 1.163.2.4 2017/11/30 15:57:37 martin Exp $ */ 1/* $NetBSD: key.c,v 1.163.2.5 2017/12/01 09:21:15 martin Exp $ */
2/* $FreeBSD: src/sys/netipsec/key.c,v 1.3.2.3 2004/02/14 22:23:23 bms Exp $ */ 2/* $FreeBSD: src/sys/netipsec/key.c,v 1.3.2.3 2004/02/14 22:23:23 bms Exp $ */
3/* $KAME: key.c,v 1.191 2001/06/27 10:46:49 sakane Exp $ */ 3/* $KAME: key.c,v 1.191 2001/06/27 10:46:49 sakane Exp $ */
4 4
5/* 5/*
6 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
7 * All rights reserved. 7 * All rights reserved.
8 * 8 *
9 * Redistribution and use in source and binary forms, with or without 9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions 10 * modification, are permitted provided that the following conditions
11 * are met: 11 * are met:
12 * 1. Redistributions of source code must retain the above copyright 12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer. 13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright 14 * 2. Redistributions in binary form must reproduce the above copyright
@@ -22,27 +22,27 @@ @@ -22,27 +22,27 @@
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE. 31 * SUCH DAMAGE.
32 */ 32 */
33 33
34#include <sys/cdefs.h> 34#include <sys/cdefs.h>
35__KERNEL_RCSID(0, "$NetBSD: key.c,v 1.163.2.4 2017/11/30 15:57:37 martin Exp $"); 35__KERNEL_RCSID(0, "$NetBSD: key.c,v 1.163.2.5 2017/12/01 09:21:15 martin Exp $");
36 36
37/* 37/*
38 * This code is referred to RFC 2367 38 * This code is referred to RFC 2367
39 */ 39 */
40 40
41#if defined(_KERNEL_OPT) 41#if defined(_KERNEL_OPT)
42#include "opt_inet.h" 42#include "opt_inet.h"
43#include "opt_ipsec.h" 43#include "opt_ipsec.h"
44#include "opt_gateway.h" 44#include "opt_gateway.h"
45#include "opt_net_mpsafe.h" 45#include "opt_net_mpsafe.h"
46#endif 46#endif
47 47
48#include <sys/types.h> 48#include <sys/types.h>
@@ -790,66 +790,62 @@ static void key_timehandler(void *); @@ -790,66 +790,62 @@ static void key_timehandler(void *);
790static void key_timehandler_work(struct work *, void *); 790static void key_timehandler_work(struct work *, void *);
791static struct callout key_timehandler_ch; 791static struct callout key_timehandler_ch;
792static struct workqueue *key_timehandler_wq; 792static struct workqueue *key_timehandler_wq;
793static struct work key_timehandler_wk; 793static struct work key_timehandler_wk;
794 794
795u_int 795u_int
796key_sp_refcnt(const struct secpolicy *sp) 796key_sp_refcnt(const struct secpolicy *sp)
797{ 797{
798 798
799 /* FIXME */ 799 /* FIXME */
800 return 0; 800 return 0;
801} 801}
802 802
803#ifdef NET_MPSAFE 
804static void 803static void
805key_spd_pserialize_perform(void) 804key_spd_pserialize_perform(void)
806{ 805{
807 806
808 KASSERT(mutex_owned(&key_spd.lock)); 807 KASSERT(mutex_owned(&key_spd.lock));
809 808
810 while (key_spd.psz_performing) 809 while (key_spd.psz_performing)
811 cv_wait(&key_spd.cv_psz, &key_spd.lock); 810 cv_wait(&key_spd.cv_psz, &key_spd.lock);
812 key_spd.psz_performing = true; 811 key_spd.psz_performing = true;
813 mutex_exit(&key_spd.lock); 812 mutex_exit(&key_spd.lock);
814 813
815 pserialize_perform(key_spd.psz); 814 pserialize_perform(key_spd.psz);
816 815
817 mutex_enter(&key_spd.lock); 816 mutex_enter(&key_spd.lock);
818 key_spd.psz_performing = false; 817 key_spd.psz_performing = false;
819 cv_broadcast(&key_spd.cv_psz); 818 cv_broadcast(&key_spd.cv_psz);
820} 819}
821#endif 
822 820
823/* 821/*
824 * Remove the sp from the key_spd.splist and wait for references to the sp 822 * Remove the sp from the key_spd.splist and wait for references to the sp
825 * to be released. key_spd.lock must be held. 823 * to be released. key_spd.lock must be held.
826 */ 824 */
827static void 825static void
828key_unlink_sp(struct secpolicy *sp) 826key_unlink_sp(struct secpolicy *sp)
829{ 827{
830 828
831 KASSERT(mutex_owned(&key_spd.lock)); 829 KASSERT(mutex_owned(&key_spd.lock));
832 830
833 sp->state = IPSEC_SPSTATE_DEAD; 831 sp->state = IPSEC_SPSTATE_DEAD;
834 SPLIST_WRITER_REMOVE(sp); 832 SPLIST_WRITER_REMOVE(sp);
835 833
836 /* Invalidate all cached SPD pointers in the PCBs. */ 834 /* Invalidate all cached SPD pointers in the PCBs. */
837 ipsec_invalpcbcacheall(); 835 ipsec_invalpcbcacheall();
838 836
839#ifdef NET_MPSAFE 837 KDASSERT(mutex_ownable(softnet_lock));
840 KASSERT(mutex_ownable(softnet_lock)); 
841 key_spd_pserialize_perform(); 838 key_spd_pserialize_perform();
842#endif 
843 839
844 localcount_drain(&sp->localcount, &key_spd.cv_lc, &key_spd.lock); 840 localcount_drain(&sp->localcount, &key_spd.cv_lc, &key_spd.lock);
845} 841}
846 842
847/* 843/*
848 * Return 0 when there are known to be no SP's for the specified 844 * Return 0 when there are known to be no SP's for the specified
849 * direction. Otherwise return 1. This is used by IPsec code 845 * direction. Otherwise return 1. This is used by IPsec code
850 * to optimize performance. 846 * to optimize performance.
851 */ 847 */
852int 848int
853key_havesp(u_int dir) 849key_havesp(u_int dir)
854{ 850{
855 return (dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND ? 851 return (dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND ?
@@ -1483,62 +1479,58 @@ key_freesp_so(struct secpolicy **sp) @@ -1483,62 +1479,58 @@ key_freesp_so(struct secpolicy **sp)
1483 KASSERT(sp != NULL); 1479 KASSERT(sp != NULL);
1484 KASSERT(*sp != NULL); 1480 KASSERT(*sp != NULL);
1485 1481
1486 if ((*sp)->policy == IPSEC_POLICY_ENTRUST || 1482 if ((*sp)->policy == IPSEC_POLICY_ENTRUST ||
1487 (*sp)->policy == IPSEC_POLICY_BYPASS) 1483 (*sp)->policy == IPSEC_POLICY_BYPASS)
1488 return; 1484 return;
1489 1485
1490 KASSERTMSG((*sp)->policy == IPSEC_POLICY_IPSEC, 1486 KASSERTMSG((*sp)->policy == IPSEC_POLICY_IPSEC,
1491 "invalid policy %u", (*sp)->policy); 1487 "invalid policy %u", (*sp)->policy);
1492 KEY_SP_UNREF(&sp); 1488 KEY_SP_UNREF(&sp);
1493} 1489}
1494#endif 1490#endif
1495 1491
1496#ifdef NET_MPSAFE 
1497static void 1492static void
1498key_sad_pserialize_perform(void) 1493key_sad_pserialize_perform(void)
1499{ 1494{
1500 1495
1501 KASSERT(mutex_owned(&key_sad.lock)); 1496 KASSERT(mutex_owned(&key_sad.lock));
1502 1497
1503 while (key_sad.psz_performing) 1498 while (key_sad.psz_performing)
1504 cv_wait(&key_sad.cv_psz, &key_sad.lock); 1499 cv_wait(&key_sad.cv_psz, &key_sad.lock);
1505 key_sad.psz_performing = true; 1500 key_sad.psz_performing = true;
1506 mutex_exit(&key_sad.lock); 1501 mutex_exit(&key_sad.lock);
1507 1502
1508 pserialize_perform(key_sad.psz); 1503 pserialize_perform(key_sad.psz);
1509 1504
1510 mutex_enter(&key_sad.lock); 1505 mutex_enter(&key_sad.lock);
1511 key_sad.psz_performing = false; 1506 key_sad.psz_performing = false;
1512 cv_broadcast(&key_sad.cv_psz); 1507 cv_broadcast(&key_sad.cv_psz);
1513} 1508}
1514#endif 
1515 1509
1516/* 1510/*
1517 * Remove the sav from the savlist of its sah and wait for references to the sav 1511 * Remove the sav from the savlist of its sah and wait for references to the sav
1518 * to be released. key_sad.lock must be held. 1512 * to be released. key_sad.lock must be held.
1519 */ 1513 */
1520static void 1514static void
1521key_unlink_sav(struct secasvar *sav) 1515key_unlink_sav(struct secasvar *sav)
1522{ 1516{
1523 1517
1524 KASSERT(mutex_owned(&key_sad.lock)); 1518 KASSERT(mutex_owned(&key_sad.lock));
1525 1519
1526 SAVLIST_WRITER_REMOVE(sav); 1520 SAVLIST_WRITER_REMOVE(sav);
1527 1521
1528#ifdef NET_MPSAFE 1522 KDASSERT(mutex_ownable(softnet_lock));
1529 KASSERT(mutex_ownable(softnet_lock)); 
1530 key_sad_pserialize_perform(); 1523 key_sad_pserialize_perform();
1531#endif 
1532 1524
1533 localcount_drain(&sav->localcount, &key_sad.cv_lc, &key_sad.lock); 1525 localcount_drain(&sav->localcount, &key_sad.cv_lc, &key_sad.lock);
1534} 1526}
1535 1527
1536/* 1528/*
1537 * Destroy an sav where the sav must be unlinked from an sah 1529 * Destroy an sav where the sav must be unlinked from an sah
1538 * by say key_unlink_sav. 1530 * by say key_unlink_sav.
1539 */ 1531 */
1540static void 1532static void
1541key_destroy_sav(struct secasvar *sav) 1533key_destroy_sav(struct secasvar *sav)
1542{ 1534{
1543 1535
1544 ASSERT_SLEEPABLE(); 1536 ASSERT_SLEEPABLE();
@@ -1557,30 +1549,28 @@ key_destroy_sav_with_ref(struct secasvar @@ -1557,30 +1549,28 @@ key_destroy_sav_with_ref(struct secasvar
1557{ 1549{
1558 1550
1559 ASSERT_SLEEPABLE(); 1551 ASSERT_SLEEPABLE();
1560 1552
1561 mutex_enter(&key_sad.lock); 1553 mutex_enter(&key_sad.lock);
1562 sav->state = SADB_SASTATE_DEAD; 1554 sav->state = SADB_SASTATE_DEAD;
1563 SAVLIST_WRITER_REMOVE(sav); 1555 SAVLIST_WRITER_REMOVE(sav);
1564 mutex_exit(&key_sad.lock); 1556 mutex_exit(&key_sad.lock);
1565 1557
1566 /* We cannot unref with holding key_sad.lock */ 1558 /* We cannot unref with holding key_sad.lock */
1567 KEY_SA_UNREF(&sav); 1559 KEY_SA_UNREF(&sav);
1568 1560
1569 mutex_enter(&key_sad.lock); 1561 mutex_enter(&key_sad.lock);
1570#ifdef NET_MPSAFE 1562 KDASSERT(mutex_ownable(softnet_lock));
1571 KASSERT(mutex_ownable(softnet_lock)); 
1572 key_sad_pserialize_perform(); 1563 key_sad_pserialize_perform();
1573#endif 
1574 localcount_drain(&sav->localcount, &key_sad.cv_lc, &key_sad.lock); 1564 localcount_drain(&sav->localcount, &key_sad.cv_lc, &key_sad.lock);
1575 mutex_exit(&key_sad.lock); 1565 mutex_exit(&key_sad.lock);
1576 1566
1577 key_destroy_sav(sav); 1567 key_destroy_sav(sav);
1578} 1568}
1579 1569
1580/* %%% SPD management */ 1570/* %%% SPD management */
1581/* 1571/*
1582 * free security policy entry. 1572 * free security policy entry.
1583 */ 1573 */
1584static void 1574static void
1585key_destroy_sp(struct secpolicy *sp) 1575key_destroy_sp(struct secpolicy *sp)
1586{ 1576{
@@ -3038,30 +3028,28 @@ key_sah_has_sav(struct secashead *sah) @@ -3038,30 +3028,28 @@ key_sah_has_sav(struct secashead *sah)
3038} 3028}
3039 3029
3040static void 3030static void
3041key_unlink_sah(struct secashead *sah) 3031key_unlink_sah(struct secashead *sah)
3042{ 3032{
3043 3033
3044 KASSERT(!cpu_softintr_p()); 3034 KASSERT(!cpu_softintr_p());
3045 KASSERT(mutex_owned(&key_sad.lock)); 3035 KASSERT(mutex_owned(&key_sad.lock));
3046 KASSERT(sah->state == SADB_SASTATE_DEAD); 3036 KASSERT(sah->state == SADB_SASTATE_DEAD);
3047 3037
3048 /* Remove from the sah list */ 3038 /* Remove from the sah list */
3049 SAHLIST_WRITER_REMOVE(sah); 3039 SAHLIST_WRITER_REMOVE(sah);
3050 3040
3051#ifdef NET_MPSAFE 3041 KDASSERT(mutex_ownable(softnet_lock));
3052 KASSERT(mutex_ownable(softnet_lock)); 
3053 key_sad_pserialize_perform(); 3042 key_sad_pserialize_perform();
3054#endif 
3055 3043
3056 localcount_drain(&sah->localcount, &key_sad.cv_lc, &key_sad.lock); 3044 localcount_drain(&sah->localcount, &key_sad.cv_lc, &key_sad.lock);
3057} 3045}
3058 3046
3059static void 3047static void
3060key_destroy_sah(struct secashead *sah) 3048key_destroy_sah(struct secashead *sah)
3061{ 3049{
3062 3050
3063 rtcache_free(&sah->sa_route); 3051 rtcache_free(&sah->sa_route);
3064 3052
3065 SAHLIST_ENTRY_DESTROY(sah); 3053 SAHLIST_ENTRY_DESTROY(sah);
3066 localcount_fini(&sah->localcount); 3054 localcount_fini(&sah->localcount);
3067 3055
@@ -4852,44 +4840,40 @@ key_timehandler_spacq(time_t now) @@ -4852,44 +4840,40 @@ key_timehandler_spacq(time_t now)
4852} 4840}
4853 4841
4854static unsigned int key_timehandler_work_enqueued = 0; 4842static unsigned int key_timehandler_work_enqueued = 0;
4855 4843
4856/* 4844/*
4857 * time handler. 4845 * time handler.
4858 * scanning SPD and SAD to check status for each entries, 4846 * scanning SPD and SAD to check status for each entries,
4859 * and do to remove or to expire. 4847 * and do to remove or to expire.
4860 */ 4848 */
4861static void 4849static void
4862key_timehandler_work(struct work *wk, void *arg) 4850key_timehandler_work(struct work *wk, void *arg)
4863{ 4851{
4864 time_t now = time_uptime; 4852 time_t now = time_uptime;
4865 IPSEC_DECLARE_LOCK_VARIABLE; 
4866 4853
4867 /* We can allow enqueuing another work at this point */ 4854 /* We can allow enqueuing another work at this point */
4868 atomic_swap_uint(&key_timehandler_work_enqueued, 0); 4855 atomic_swap_uint(&key_timehandler_work_enqueued, 0);
4869 4856
4870 IPSEC_ACQUIRE_GLOBAL_LOCKS(); 
4871 
4872 key_timehandler_spd(now); 4857 key_timehandler_spd(now);
4873 key_timehandler_sad(now); 4858 key_timehandler_sad(now);
4874 key_timehandler_acq(now); 4859 key_timehandler_acq(now);
4875 key_timehandler_spacq(now); 4860 key_timehandler_spacq(now);
4876 4861
4877 key_acquire_sendup_pending_mbuf(); 4862 key_acquire_sendup_pending_mbuf();
4878 4863
4879 /* do exchange to tick time !! */ 4864 /* do exchange to tick time !! */
4880 callout_reset(&key_timehandler_ch, hz, key_timehandler, NULL); 4865 callout_reset(&key_timehandler_ch, hz, key_timehandler, NULL);
4881 4866
4882 IPSEC_RELEASE_GLOBAL_LOCKS(); 
4883 return; 4867 return;
4884} 4868}
4885 4869
4886static void 4870static void
4887key_timehandler(void *arg) 4871key_timehandler(void *arg)
4888{ 4872{
4889 4873
4890 /* Avoid enqueuing another work when one is already enqueued */ 4874 /* Avoid enqueuing another work when one is already enqueued */
4891 if (atomic_swap_uint(&key_timehandler_work_enqueued, 1) == 1) 4875 if (atomic_swap_uint(&key_timehandler_work_enqueued, 1) == 1)
4892 return; 4876 return;
4893 4877
4894 workqueue_enqueue(key_timehandler_wq, &key_timehandler_wk, NULL); 4878 workqueue_enqueue(key_timehandler_wq, &key_timehandler_wk, NULL);
4895} 4879}