Thu Jun 8 11:15:26 2023 UTC ()
Pull up following revision(s) (requested by ozaki-r in ticket #195):

	sys/net/route.c: revision 1.237

route: run workqueue kthreads with KERNEL_LOCK unless NET_MPSAFE

Without KERNEL_LOCK, rt_timer_work and rt_free_work can run in parallel
with other LWPs running in the network stack, which eventually results
in say use-after-free of a deleted route.


(martin)
diff -r1.235.2.1 -r1.235.2.2 src/sys/net/route.c

cvs diff -r1.235.2.1 -r1.235.2.2 src/sys/net/route.c (expand / switch to unified diff)

--- src/sys/net/route.c 2023/02/22 18:52:45 1.235.2.1
+++ src/sys/net/route.c 2023/06/08 11:15:26 1.235.2.2
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: route.c,v 1.235.2.1 2023/02/22 18:52:45 martin Exp $ */ 1/* $NetBSD: route.c,v 1.235.2.2 2023/06/08 11:15:26 martin Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc. 4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility, 8 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center. 9 * NASA Ames Research Center.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
@@ -87,27 +87,27 @@ @@ -87,27 +87,27 @@
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88 * SUCH DAMAGE. 88 * SUCH DAMAGE.
89 * 89 *
90 * @(#)route.c 8.3 (Berkeley) 1/9/95 90 * @(#)route.c 8.3 (Berkeley) 1/9/95
91 */ 91 */
92 92
93#ifdef _KERNEL_OPT 93#ifdef _KERNEL_OPT
94#include "opt_inet.h" 94#include "opt_inet.h"
95#include "opt_route.h" 95#include "opt_route.h"
96#include "opt_net_mpsafe.h" 96#include "opt_net_mpsafe.h"
97#endif 97#endif
98 98
99#include <sys/cdefs.h> 99#include <sys/cdefs.h>
100__KERNEL_RCSID(0, "$NetBSD: route.c,v 1.235.2.1 2023/02/22 18:52:45 martin Exp $"); 100__KERNEL_RCSID(0, "$NetBSD: route.c,v 1.235.2.2 2023/06/08 11:15:26 martin Exp $");
101 101
102#include <sys/param.h> 102#include <sys/param.h>
103#ifdef RTFLUSH_DEBUG 103#ifdef RTFLUSH_DEBUG
104#include <sys/sysctl.h> 104#include <sys/sysctl.h>
105#endif 105#endif
106#include <sys/systm.h> 106#include <sys/systm.h>
107#include <sys/callout.h> 107#include <sys/callout.h>
108#include <sys/proc.h> 108#include <sys/proc.h>
109#include <sys/mbuf.h> 109#include <sys/mbuf.h>
110#include <sys/socket.h> 110#include <sys/socket.h>
111#include <sys/socketvar.h> 111#include <sys/socketvar.h>
112#include <sys/domain.h> 112#include <sys/domain.h>
113#include <sys/kernel.h> 113#include <sys/kernel.h>
@@ -219,32 +219,34 @@ static void rt_timer_timer(void *); @@ -219,32 +219,34 @@ static void rt_timer_timer(void *);
219 * otherwise the cache is invalidated 219 * otherwise the cache is invalidated
220 */ 220 */
221 221
222/* 222/*
223 * Global lock for the routing table. 223 * Global lock for the routing table.
224 */ 224 */
225static krwlock_t rt_lock __cacheline_aligned; 225static krwlock_t rt_lock __cacheline_aligned;
226#ifdef NET_MPSAFE 226#ifdef NET_MPSAFE
227#define RT_RLOCK() rw_enter(&rt_lock, RW_READER) 227#define RT_RLOCK() rw_enter(&rt_lock, RW_READER)
228#define RT_WLOCK() rw_enter(&rt_lock, RW_WRITER) 228#define RT_WLOCK() rw_enter(&rt_lock, RW_WRITER)
229#define RT_UNLOCK() rw_exit(&rt_lock) 229#define RT_UNLOCK() rw_exit(&rt_lock)
230#define RT_WLOCKED() rw_write_held(&rt_lock) 230#define RT_WLOCKED() rw_write_held(&rt_lock)
231#define RT_ASSERT_WLOCK() KASSERT(rw_write_held(&rt_lock)) 231#define RT_ASSERT_WLOCK() KASSERT(rw_write_held(&rt_lock))
 232#define RT_WQ_FLAGS WQ_MPSAFE
232#else 233#else
233#define RT_RLOCK() do {} while (0) 234#define RT_RLOCK() do {} while (0)
234#define RT_WLOCK() do {} while (0) 235#define RT_WLOCK() do {} while (0)
235#define RT_UNLOCK() do {} while (0) 236#define RT_UNLOCK() do {} while (0)
236#define RT_WLOCKED() true 237#define RT_WLOCKED() true
237#define RT_ASSERT_WLOCK() do {} while (0) 238#define RT_ASSERT_WLOCK() do {} while (0)
 239#define RT_WQ_FLAGS 0
238#endif 240#endif
239 241
240static uint64_t rtcache_generation; 242static uint64_t rtcache_generation;
241 243
242/* 244/*
243 * mutex and cv that are used to wait for references to a rtentry left 245 * mutex and cv that are used to wait for references to a rtentry left
244 * before updating the rtentry. 246 * before updating the rtentry.
245 */ 247 */
246static struct { 248static struct {
247 kmutex_t lock; 249 kmutex_t lock;
248 kcondvar_t cv; 250 kcondvar_t cv;
249 bool ongoing; 251 bool ongoing;
250 const struct lwp *lwp; 252 const struct lwp *lwp;
@@ -467,27 +469,27 @@ rt_init(void) @@ -467,27 +469,27 @@ rt_init(void)
467 int error; 469 int error;
468 470
469#ifdef RTFLUSH_DEBUG 471#ifdef RTFLUSH_DEBUG
470 sysctl_net_rtcache_setup(NULL); 472 sysctl_net_rtcache_setup(NULL);
471#endif 473#endif
472 474
473 mutex_init(&rt_free_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 475 mutex_init(&rt_free_global.lock, MUTEX_DEFAULT, IPL_SOFTNET);
474 SLIST_INIT(&rt_free_global.queue); 476 SLIST_INIT(&rt_free_global.queue);
475 rt_free_global.enqueued = false; 477 rt_free_global.enqueued = false;
476 478
477 rt_psref_class = psref_class_create("rtentry", IPL_SOFTNET); 479 rt_psref_class = psref_class_create("rtentry", IPL_SOFTNET);
478 480
479 error = workqueue_create(&rt_free_global.wq, "rt_free", 481 error = workqueue_create(&rt_free_global.wq, "rt_free",
480 rt_free_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 482 rt_free_work, NULL, PRI_SOFTNET, IPL_SOFTNET, RT_WQ_FLAGS);
481 if (error) 483 if (error)
482 panic("%s: workqueue_create failed (%d)\n", __func__, error); 484 panic("%s: workqueue_create failed (%d)\n", __func__, error);
483 485
484 mutex_init(&rt_update_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 486 mutex_init(&rt_update_global.lock, MUTEX_DEFAULT, IPL_SOFTNET);
485 cv_init(&rt_update_global.cv, "rt_update"); 487 cv_init(&rt_update_global.cv, "rt_update");
486 488
487 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl", 489 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl",
488 NULL, IPL_SOFTNET); 490 NULL, IPL_SOFTNET);
489 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl", 491 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl",
490 NULL, IPL_SOFTNET); 492 NULL, IPL_SOFTNET);
491 493
492 rn_init(); /* initialize all zeroes, all ones, mask table */ 494 rn_init(); /* initialize all zeroes, all ones, mask table */
493 rtbl_init(); 495 rtbl_init();
@@ -1812,27 +1814,27 @@ static void rt_timer_work(struct work *, @@ -1812,27 +1814,27 @@ static void rt_timer_work(struct work *,
1812static void 1814static void
1813rt_timer_init(void) 1815rt_timer_init(void)
1814{ 1816{
1815 int error; 1817 int error;
1816 1818
1817 assert(rt_init_done == 0); 1819 assert(rt_init_done == 0);
1818 1820
1819 /* XXX should be in rt_init */ 1821 /* XXX should be in rt_init */
1820 rw_init(&rt_lock); 1822 rw_init(&rt_lock);
1821 1823
1822 LIST_INIT(&rttimer_queue_head); 1824 LIST_INIT(&rttimer_queue_head);
1823 callout_init(&rt_timer_ch, CALLOUT_MPSAFE); 1825 callout_init(&rt_timer_ch, CALLOUT_MPSAFE);
1824 error = workqueue_create(&rt_timer_wq, "rt_timer", 1826 error = workqueue_create(&rt_timer_wq, "rt_timer",
1825 rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 1827 rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, RT_WQ_FLAGS);
1826 if (error) 1828 if (error)
1827 panic("%s: workqueue_create failed (%d)\n", __func__, error); 1829 panic("%s: workqueue_create failed (%d)\n", __func__, error);
1828 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 1830 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
1829 rt_init_done = 1; 1831 rt_init_done = 1;
1830} 1832}
1831 1833
1832struct rttimer_queue * 1834struct rttimer_queue *
1833rt_timer_queue_create(u_int timeout) 1835rt_timer_queue_create(u_int timeout)
1834{ 1836{
1835 struct rttimer_queue *rtq; 1837 struct rttimer_queue *rtq;
1836 1838
1837 if (rt_init_done == 0) 1839 if (rt_init_done == 0)
1838 rt_timer_init(); 1840 rt_timer_init();