Thu Jun 8 11:15:26 2023 UTC ()
Pull up following revision(s) (requested by ozaki-r in ticket #195):

	sys/net/route.c: revision 1.237

route: run workqueue kthreads with KERNEL_LOCK unless NET_MPSAFE

Without KERNEL_LOCK, rt_timer_work and rt_free_work can run in parallel
with other LWPs running in the network stack, which eventually results
in say use-after-free of a deleted route.


(martin)
diff -r1.235.2.1 -r1.235.2.2 src/sys/net/route.c

cvs diff -r1.235.2.1 -r1.235.2.2 src/sys/net/route.c (switch to unified diff)

--- src/sys/net/route.c 2023/02/22 18:52:45 1.235.2.1
+++ src/sys/net/route.c 2023/06/08 11:15:26 1.235.2.2
@@ -1,2510 +1,2512 @@ @@ -1,2510 +1,2512 @@
1/* $NetBSD: route.c,v 1.235.2.1 2023/02/22 18:52:45 martin Exp $ */ 1/* $NetBSD: route.c,v 1.235.2.2 2023/06/08 11:15:26 martin Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc. 4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility, 8 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center. 9 * NASA Ames Research Center.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution. 18 * documentation and/or other materials provided with the distribution.
19 * 19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE. 30 * POSSIBILITY OF SUCH DAMAGE.
31 */ 31 */
32 32
33/* 33/*
34 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 34 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
35 * All rights reserved. 35 * All rights reserved.
36 * 36 *
37 * Redistribution and use in source and binary forms, with or without 37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions 38 * modification, are permitted provided that the following conditions
39 * are met: 39 * are met:
40 * 1. Redistributions of source code must retain the above copyright 40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer. 41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright 42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the 43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution. 44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the project nor the names of its contributors 45 * 3. Neither the name of the project nor the names of its contributors
46 * may be used to endorse or promote products derived from this software 46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission. 47 * without specific prior written permission.
48 * 48 *
49 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 49 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE. 59 * SUCH DAMAGE.
60 */ 60 */
61 61
62/* 62/*
63 * Copyright (c) 1980, 1986, 1991, 1993 63 * Copyright (c) 1980, 1986, 1991, 1993
64 * The Regents of the University of California. All rights reserved. 64 * The Regents of the University of California. All rights reserved.
65 * 65 *
66 * Redistribution and use in source and binary forms, with or without 66 * Redistribution and use in source and binary forms, with or without
67 * modification, are permitted provided that the following conditions 67 * modification, are permitted provided that the following conditions
68 * are met: 68 * are met:
69 * 1. Redistributions of source code must retain the above copyright 69 * 1. Redistributions of source code must retain the above copyright
70 * notice, this list of conditions and the following disclaimer. 70 * notice, this list of conditions and the following disclaimer.
71 * 2. Redistributions in binary form must reproduce the above copyright 71 * 2. Redistributions in binary form must reproduce the above copyright
72 * notice, this list of conditions and the following disclaimer in the 72 * notice, this list of conditions and the following disclaimer in the
73 * documentation and/or other materials provided with the distribution. 73 * documentation and/or other materials provided with the distribution.
74 * 3. Neither the name of the University nor the names of its contributors 74 * 3. Neither the name of the University nor the names of its contributors
75 * may be used to endorse or promote products derived from this software 75 * may be used to endorse or promote products derived from this software
76 * without specific prior written permission. 76 * without specific prior written permission.
77 * 77 *
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88 * SUCH DAMAGE. 88 * SUCH DAMAGE.
89 * 89 *
90 * @(#)route.c 8.3 (Berkeley) 1/9/95 90 * @(#)route.c 8.3 (Berkeley) 1/9/95
91 */ 91 */
92 92
93#ifdef _KERNEL_OPT 93#ifdef _KERNEL_OPT
94#include "opt_inet.h" 94#include "opt_inet.h"
95#include "opt_route.h" 95#include "opt_route.h"
96#include "opt_net_mpsafe.h" 96#include "opt_net_mpsafe.h"
97#endif 97#endif
98 98
99#include <sys/cdefs.h> 99#include <sys/cdefs.h>
100__KERNEL_RCSID(0, "$NetBSD: route.c,v 1.235.2.1 2023/02/22 18:52:45 martin Exp $"); 100__KERNEL_RCSID(0, "$NetBSD: route.c,v 1.235.2.2 2023/06/08 11:15:26 martin Exp $");
101 101
102#include <sys/param.h> 102#include <sys/param.h>
103#ifdef RTFLUSH_DEBUG 103#ifdef RTFLUSH_DEBUG
104#include <sys/sysctl.h> 104#include <sys/sysctl.h>
105#endif 105#endif
106#include <sys/systm.h> 106#include <sys/systm.h>
107#include <sys/callout.h> 107#include <sys/callout.h>
108#include <sys/proc.h> 108#include <sys/proc.h>
109#include <sys/mbuf.h> 109#include <sys/mbuf.h>
110#include <sys/socket.h> 110#include <sys/socket.h>
111#include <sys/socketvar.h> 111#include <sys/socketvar.h>
112#include <sys/domain.h> 112#include <sys/domain.h>
113#include <sys/kernel.h> 113#include <sys/kernel.h>
114#include <sys/ioctl.h> 114#include <sys/ioctl.h>
115#include <sys/pool.h> 115#include <sys/pool.h>
116#include <sys/kauth.h> 116#include <sys/kauth.h>
117#include <sys/workqueue.h> 117#include <sys/workqueue.h>
118#include <sys/syslog.h> 118#include <sys/syslog.h>
119#include <sys/rwlock.h> 119#include <sys/rwlock.h>
120#include <sys/mutex.h> 120#include <sys/mutex.h>
121#include <sys/cpu.h> 121#include <sys/cpu.h>
122#include <sys/kmem.h> 122#include <sys/kmem.h>
123 123
124#include <net/if.h> 124#include <net/if.h>
125#include <net/if_dl.h> 125#include <net/if_dl.h>
126#include <net/route.h> 126#include <net/route.h>
127#if defined(INET) || defined(INET6) 127#if defined(INET) || defined(INET6)
128#include <net/if_llatbl.h> 128#include <net/if_llatbl.h>
129#endif 129#endif
130 130
131#include <netinet/in.h> 131#include <netinet/in.h>
132#include <netinet/in_var.h> 132#include <netinet/in_var.h>
133 133
134#define PRESERVED_RTF (RTF_UP | RTF_GATEWAY | RTF_HOST | RTF_DONE | RTF_MASK) 134#define PRESERVED_RTF (RTF_UP | RTF_GATEWAY | RTF_HOST | RTF_DONE | RTF_MASK)
135 135
136#ifdef RTFLUSH_DEBUG 136#ifdef RTFLUSH_DEBUG
137#define rtcache_debug() __predict_false(_rtcache_debug) 137#define rtcache_debug() __predict_false(_rtcache_debug)
138#else /* RTFLUSH_DEBUG */ 138#else /* RTFLUSH_DEBUG */
139#define rtcache_debug() 0 139#define rtcache_debug() 0
140#endif /* RTFLUSH_DEBUG */ 140#endif /* RTFLUSH_DEBUG */
141 141
142#ifdef RT_DEBUG 142#ifdef RT_DEBUG
143#define RT_REFCNT_TRACE(rt) printf("%s:%d: rt=%p refcnt=%d\n", \ 143#define RT_REFCNT_TRACE(rt) printf("%s:%d: rt=%p refcnt=%d\n", \
144 __func__, __LINE__, (rt), (rt)->rt_refcnt) 144 __func__, __LINE__, (rt), (rt)->rt_refcnt)
145#else 145#else
146#define RT_REFCNT_TRACE(rt) do {} while (0) 146#define RT_REFCNT_TRACE(rt) do {} while (0)
147#endif 147#endif
148 148
149#ifdef RT_DEBUG 149#ifdef RT_DEBUG
150#define dlog(level, fmt, args...) log(level, fmt, ##args) 150#define dlog(level, fmt, args...) log(level, fmt, ##args)
151#else 151#else
152#define dlog(level, fmt, args...) do {} while (0) 152#define dlog(level, fmt, args...) do {} while (0)
153#endif 153#endif
154 154
155struct rtstat rtstat; 155struct rtstat rtstat;
156 156
157static int rttrash; /* routes not in table but not freed */ 157static int rttrash; /* routes not in table but not freed */
158 158
159static struct pool rtentry_pool; 159static struct pool rtentry_pool;
160static struct pool rttimer_pool; 160static struct pool rttimer_pool;
161 161
162static struct callout rt_timer_ch; /* callout for rt_timer_timer() */ 162static struct callout rt_timer_ch; /* callout for rt_timer_timer() */
163static struct workqueue *rt_timer_wq; 163static struct workqueue *rt_timer_wq;
164static struct work rt_timer_wk; 164static struct work rt_timer_wk;
165 165
166static void rt_timer_init(void); 166static void rt_timer_init(void);
167static void rt_timer_queue_remove_all(struct rttimer_queue *); 167static void rt_timer_queue_remove_all(struct rttimer_queue *);
168static void rt_timer_remove_all(struct rtentry *); 168static void rt_timer_remove_all(struct rtentry *);
169static void rt_timer_timer(void *); 169static void rt_timer_timer(void *);
170 170
171/* 171/*
172 * Locking notes: 172 * Locking notes:
173 * - The routing table is protected by a global rwlock 173 * - The routing table is protected by a global rwlock
174 * - API: RT_RLOCK and friends 174 * - API: RT_RLOCK and friends
175 * - rtcaches are NOT protected by the framework 175 * - rtcaches are NOT protected by the framework
176 * - Callers must guarantee a rtcache isn't accessed simultaneously 176 * - Callers must guarantee a rtcache isn't accessed simultaneously
177 * - How the constraint is guaranteed in the wild 177 * - How the constraint is guaranteed in the wild
178 * - Protect a rtcache by a mutex (e.g., inp_route) 178 * - Protect a rtcache by a mutex (e.g., inp_route)
179 * - Make rtcache per-CPU and allow only accesses from softint 179 * - Make rtcache per-CPU and allow only accesses from softint
180 * (e.g., ipforward_rt_percpu) 180 * (e.g., ipforward_rt_percpu)
181 * - References to a rtentry is managed by reference counting and psref 181 * - References to a rtentry is managed by reference counting and psref
182 * - Reference counting is used for temporal reference when a rtentry 182 * - Reference counting is used for temporal reference when a rtentry
183 * is fetched from the routing table 183 * is fetched from the routing table
184 * - psref is used for temporal reference when a rtentry is fetched 184 * - psref is used for temporal reference when a rtentry is fetched
185 * from a rtcache 185 * from a rtcache
186 * - struct route (rtcache) has struct psref, so we cannot obtain 186 * - struct route (rtcache) has struct psref, so we cannot obtain
187 * a reference twice on the same struct route 187 * a reference twice on the same struct route
188 * - Before destroying or updating a rtentry, we have to wait for 188 * - Before destroying or updating a rtentry, we have to wait for
189 * all references left (see below for details) 189 * all references left (see below for details)
190 * - APIs 190 * - APIs
191 * - An obtained rtentry via rtalloc1 or rtrequest* must be 191 * - An obtained rtentry via rtalloc1 or rtrequest* must be
192 * unreferenced by rt_unref 192 * unreferenced by rt_unref
193 * - An obtained rtentry via rtcache_* must be unreferenced by 193 * - An obtained rtentry via rtcache_* must be unreferenced by
194 * rtcache_unref 194 * rtcache_unref
195 * - TODO: once we get a lockless routing table, we should use only 195 * - TODO: once we get a lockless routing table, we should use only
196 * psref for rtentries 196 * psref for rtentries
197 * - rtentry destruction 197 * - rtentry destruction
198 * - A rtentry is destroyed (freed) only when we call rtrequest(RTM_DELETE) 198 * - A rtentry is destroyed (freed) only when we call rtrequest(RTM_DELETE)
199 * - If a caller of rtrequest grabs a reference of a rtentry, the caller 199 * - If a caller of rtrequest grabs a reference of a rtentry, the caller
200 * has a responsibility to destroy the rtentry by itself by calling 200 * has a responsibility to destroy the rtentry by itself by calling
201 * rt_free 201 * rt_free
202 * - If not, rtrequest itself does that 202 * - If not, rtrequest itself does that
203 * - If rt_free is called in softint, the actual destruction routine is 203 * - If rt_free is called in softint, the actual destruction routine is
204 * deferred to a workqueue 204 * deferred to a workqueue
205 * - rtentry update 205 * - rtentry update
206 * - When updating a rtentry, RTF_UPDATING flag is set 206 * - When updating a rtentry, RTF_UPDATING flag is set
207 * - If a rtentry is set RTF_UPDATING, fetching the rtentry from 207 * - If a rtentry is set RTF_UPDATING, fetching the rtentry from
208 * the routing table or a rtcache results in either of the following 208 * the routing table or a rtcache results in either of the following
209 * cases: 209 * cases:
210 * - if the caller runs in softint, the caller fails to fetch 210 * - if the caller runs in softint, the caller fails to fetch
211 * - otherwise, the caller waits for the update completed and retries 211 * - otherwise, the caller waits for the update completed and retries
212 * to fetch (probably succeed to fetch for the second time) 212 * to fetch (probably succeed to fetch for the second time)
213 * - rtcache invalidation 213 * - rtcache invalidation
214 * - There is a global generation counter that is incremented when 214 * - There is a global generation counter that is incremented when
215 * any routes have been added or deleted 215 * any routes have been added or deleted
216 * - When a rtcache caches a rtentry into itself, it also stores 216 * - When a rtcache caches a rtentry into itself, it also stores
217 * a snapshot of the generation counter 217 * a snapshot of the generation counter
218 * - If the snapshot equals to the global counter, the cache is valid, 218 * - If the snapshot equals to the global counter, the cache is valid,
219 * otherwise the cache is invalidated 219 * otherwise the cache is invalidated
220 */ 220 */
221 221
222/* 222/*
223 * Global lock for the routing table. 223 * Global lock for the routing table.
224 */ 224 */
225static krwlock_t rt_lock __cacheline_aligned; 225static krwlock_t rt_lock __cacheline_aligned;
226#ifdef NET_MPSAFE 226#ifdef NET_MPSAFE
227#define RT_RLOCK() rw_enter(&rt_lock, RW_READER) 227#define RT_RLOCK() rw_enter(&rt_lock, RW_READER)
228#define RT_WLOCK() rw_enter(&rt_lock, RW_WRITER) 228#define RT_WLOCK() rw_enter(&rt_lock, RW_WRITER)
229#define RT_UNLOCK() rw_exit(&rt_lock) 229#define RT_UNLOCK() rw_exit(&rt_lock)
230#define RT_WLOCKED() rw_write_held(&rt_lock) 230#define RT_WLOCKED() rw_write_held(&rt_lock)
231#define RT_ASSERT_WLOCK() KASSERT(rw_write_held(&rt_lock)) 231#define RT_ASSERT_WLOCK() KASSERT(rw_write_held(&rt_lock))
 232#define RT_WQ_FLAGS WQ_MPSAFE
232#else 233#else
233#define RT_RLOCK() do {} while (0) 234#define RT_RLOCK() do {} while (0)
234#define RT_WLOCK() do {} while (0) 235#define RT_WLOCK() do {} while (0)
235#define RT_UNLOCK() do {} while (0) 236#define RT_UNLOCK() do {} while (0)
236#define RT_WLOCKED() true 237#define RT_WLOCKED() true
237#define RT_ASSERT_WLOCK() do {} while (0) 238#define RT_ASSERT_WLOCK() do {} while (0)
 239#define RT_WQ_FLAGS 0
238#endif 240#endif
239 241
240static uint64_t rtcache_generation; 242static uint64_t rtcache_generation;
241 243
242/* 244/*
243 * mutex and cv that are used to wait for references to a rtentry left 245 * mutex and cv that are used to wait for references to a rtentry left
244 * before updating the rtentry. 246 * before updating the rtentry.
245 */ 247 */
246static struct { 248static struct {
247 kmutex_t lock; 249 kmutex_t lock;
248 kcondvar_t cv; 250 kcondvar_t cv;
249 bool ongoing; 251 bool ongoing;
250 const struct lwp *lwp; 252 const struct lwp *lwp;
251} rt_update_global __cacheline_aligned; 253} rt_update_global __cacheline_aligned;
252 254
253/* 255/*
254 * A workqueue and stuff that are used to defer the destruction routine 256 * A workqueue and stuff that are used to defer the destruction routine
255 * of rtentries. 257 * of rtentries.
256 */ 258 */
257static struct { 259static struct {
258 struct workqueue *wq; 260 struct workqueue *wq;
259 struct work wk; 261 struct work wk;
260 kmutex_t lock; 262 kmutex_t lock;
261 SLIST_HEAD(, rtentry) queue; 263 SLIST_HEAD(, rtentry) queue;
262 bool enqueued; 264 bool enqueued;
263} rt_free_global __cacheline_aligned; 265} rt_free_global __cacheline_aligned;
264 266
265/* psref for rtentry */ 267/* psref for rtentry */
266static struct psref_class *rt_psref_class __read_mostly; 268static struct psref_class *rt_psref_class __read_mostly;
267 269
268#ifdef RTFLUSH_DEBUG 270#ifdef RTFLUSH_DEBUG
269static int _rtcache_debug = 0; 271static int _rtcache_debug = 0;
270#endif /* RTFLUSH_DEBUG */ 272#endif /* RTFLUSH_DEBUG */
271 273
272static kauth_listener_t route_listener; 274static kauth_listener_t route_listener;
273 275
274static int rtdeletemsg(struct rtentry *); 276static int rtdeletemsg(struct rtentry *);
275 277
276static void rt_maskedcopy(const struct sockaddr *, 278static void rt_maskedcopy(const struct sockaddr *,
277 struct sockaddr *, const struct sockaddr *); 279 struct sockaddr *, const struct sockaddr *);
278 280
279static void rtcache_invalidate(void); 281static void rtcache_invalidate(void);
280 282
281static void rt_ref(struct rtentry *); 283static void rt_ref(struct rtentry *);
282 284
283static struct rtentry * 285static struct rtentry *
284 rtalloc1_locked(const struct sockaddr *, int, bool, bool); 286 rtalloc1_locked(const struct sockaddr *, int, bool, bool);
285 287
286static struct ifaddr *rt_getifa(struct rt_addrinfo *, struct psref *); 288static struct ifaddr *rt_getifa(struct rt_addrinfo *, struct psref *);
287static struct ifnet *rt_getifp(struct rt_addrinfo *, struct psref *); 289static struct ifnet *rt_getifp(struct rt_addrinfo *, struct psref *);
288static struct ifaddr *ifa_ifwithroute_psref(int, const struct sockaddr *, 290static struct ifaddr *ifa_ifwithroute_psref(int, const struct sockaddr *,
289 const struct sockaddr *, struct psref *); 291 const struct sockaddr *, struct psref *);
290 292
291static void rtcache_ref(struct rtentry *, struct route *); 293static void rtcache_ref(struct rtentry *, struct route *);
292 294
293#ifdef NET_MPSAFE 295#ifdef NET_MPSAFE
294static void rt_update_wait(void); 296static void rt_update_wait(void);
295#endif 297#endif
296 298
297static bool rt_wait_ok(void); 299static bool rt_wait_ok(void);
298static void rt_wait_refcnt(const char *, struct rtentry *, int); 300static void rt_wait_refcnt(const char *, struct rtentry *, int);
299static void rt_wait_psref(struct rtentry *); 301static void rt_wait_psref(struct rtentry *);
300 302
301#ifdef DDB 303#ifdef DDB
302static void db_print_sa(const struct sockaddr *); 304static void db_print_sa(const struct sockaddr *);
303static void db_print_ifa(struct ifaddr *); 305static void db_print_ifa(struct ifaddr *);
304static int db_show_rtentry(struct rtentry *, void *); 306static int db_show_rtentry(struct rtentry *, void *);
305#endif 307#endif
306 308
307#ifdef RTFLUSH_DEBUG 309#ifdef RTFLUSH_DEBUG
308static void sysctl_net_rtcache_setup(struct sysctllog **); 310static void sysctl_net_rtcache_setup(struct sysctllog **);
309static void 311static void
310sysctl_net_rtcache_setup(struct sysctllog **clog) 312sysctl_net_rtcache_setup(struct sysctllog **clog)
311{ 313{
312 const struct sysctlnode *rnode; 314 const struct sysctlnode *rnode;
313 315
314 if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT, 316 if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT,
315 CTLTYPE_NODE, 317 CTLTYPE_NODE,
316 "rtcache", SYSCTL_DESCR("Route cache related settings"), 318 "rtcache", SYSCTL_DESCR("Route cache related settings"),
317 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0) 319 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0)
318 return; 320 return;
319 if (sysctl_createv(clog, 0, &rnode, &rnode, 321 if (sysctl_createv(clog, 0, &rnode, &rnode,
320 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, 322 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
321 "debug", SYSCTL_DESCR("Debug route caches"), 323 "debug", SYSCTL_DESCR("Debug route caches"),
322 NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0) 324 NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0)
323 return; 325 return;
324} 326}
325#endif /* RTFLUSH_DEBUG */ 327#endif /* RTFLUSH_DEBUG */
326 328
327static inline void 329static inline void
328rt_destroy(struct rtentry *rt) 330rt_destroy(struct rtentry *rt)
329{ 331{
330 if (rt->_rt_key != NULL) 332 if (rt->_rt_key != NULL)
331 sockaddr_free(rt->_rt_key); 333 sockaddr_free(rt->_rt_key);
332 if (rt->rt_gateway != NULL) 334 if (rt->rt_gateway != NULL)
333 sockaddr_free(rt->rt_gateway); 335 sockaddr_free(rt->rt_gateway);
334 if (rt_gettag(rt) != NULL) 336 if (rt_gettag(rt) != NULL)
335 sockaddr_free(rt_gettag(rt)); 337 sockaddr_free(rt_gettag(rt));
336 rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL; 338 rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL;
337} 339}
338 340
339static inline const struct sockaddr * 341static inline const struct sockaddr *
340rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags) 342rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags)
341{ 343{
342 if (rt->_rt_key == key) 344 if (rt->_rt_key == key)
343 goto out; 345 goto out;
344 346
345 if (rt->_rt_key != NULL) 347 if (rt->_rt_key != NULL)
346 sockaddr_free(rt->_rt_key); 348 sockaddr_free(rt->_rt_key);
347 rt->_rt_key = sockaddr_dup(key, flags); 349 rt->_rt_key = sockaddr_dup(key, flags);
348out: 350out:
349 rt->rt_nodes->rn_key = (const char *)rt->_rt_key; 351 rt->rt_nodes->rn_key = (const char *)rt->_rt_key;
350 return rt->_rt_key; 352 return rt->_rt_key;
351} 353}
352 354
353struct ifaddr * 355struct ifaddr *
354rt_get_ifa(struct rtentry *rt) 356rt_get_ifa(struct rtentry *rt)
355{ 357{
356 struct ifaddr *ifa; 358 struct ifaddr *ifa;
357 359
358 ifa = rt->rt_ifa; 360 ifa = rt->rt_ifa;
359 if (ifa->ifa_getifa == NULL) 361 if (ifa->ifa_getifa == NULL)
360 return ifa; 362 return ifa;
361#if 0 363#if 0
362 else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno) 364 else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno)
363 return ifa; 365 return ifa;
364#endif 366#endif
365 else { 367 else {
366 ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt)); 368 ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt));
367 if (ifa == NULL) 369 if (ifa == NULL)
368 return NULL; 370 return NULL;
369 rt_replace_ifa(rt, ifa); 371 rt_replace_ifa(rt, ifa);
370 return ifa; 372 return ifa;
371 } 373 }
372} 374}
373 375
374static void 376static void
375rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa) 377rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa)
376{ 378{
377 rt->rt_ifa = ifa; 379 rt->rt_ifa = ifa;
378 if (ifa->ifa_seqno != NULL) 380 if (ifa->ifa_seqno != NULL)
379 rt->rt_ifa_seqno = *ifa->ifa_seqno; 381 rt->rt_ifa_seqno = *ifa->ifa_seqno;
380} 382}
381 383
382/* 384/*
383 * Is this route the connected route for the ifa? 385 * Is this route the connected route for the ifa?
384 */ 386 */
385static int 387static int
386rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa) 388rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa)
387{ 389{
388 const struct sockaddr *key, *dst, *odst; 390 const struct sockaddr *key, *dst, *odst;
389 struct sockaddr_storage maskeddst; 391 struct sockaddr_storage maskeddst;
390 392
391 key = rt_getkey(rt); 393 key = rt_getkey(rt);
392 dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; 394 dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
393 if (dst == NULL || 395 if (dst == NULL ||
394 dst->sa_family != key->sa_family || 396 dst->sa_family != key->sa_family ||
395 dst->sa_len != key->sa_len) 397 dst->sa_len != key->sa_len)
396 return 0; 398 return 0;
397 if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 399 if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
398 odst = dst; 400 odst = dst;
399 dst = (struct sockaddr *)&maskeddst; 401 dst = (struct sockaddr *)&maskeddst;
400 rt_maskedcopy(odst, (struct sockaddr *)&maskeddst, 402 rt_maskedcopy(odst, (struct sockaddr *)&maskeddst,
401 ifa->ifa_netmask); 403 ifa->ifa_netmask);
402 } 404 }
403 return (memcmp(dst, key, dst->sa_len) == 0); 405 return (memcmp(dst, key, dst->sa_len) == 0);
404} 406}
405 407
406void 408void
407rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa) 409rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa)
408{ 410{
409 struct ifaddr *old; 411 struct ifaddr *old;
410 412
411 if (rt->rt_ifa == ifa) 413 if (rt->rt_ifa == ifa)
412 return; 414 return;
413 415
414 if (rt->rt_ifa != ifa && 416 if (rt->rt_ifa != ifa &&
415 rt->rt_ifa->ifa_flags & IFA_ROUTE && 417 rt->rt_ifa->ifa_flags & IFA_ROUTE &&
416 rt_ifa_connected(rt, rt->rt_ifa)) 418 rt_ifa_connected(rt, rt->rt_ifa))
417 { 419 {
418 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 420 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
419 "replace deleted IFA_ROUTE\n", 421 "replace deleted IFA_ROUTE\n",
420 (void *)rt->_rt_key, (void *)rt->rt_ifa); 422 (void *)rt->_rt_key, (void *)rt->rt_ifa);
421 rt->rt_ifa->ifa_flags &= ~IFA_ROUTE; 423 rt->rt_ifa->ifa_flags &= ~IFA_ROUTE;
422 if (rt_ifa_connected(rt, ifa)) { 424 if (rt_ifa_connected(rt, ifa)) {
423 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 425 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
424 "replace added IFA_ROUTE\n", 426 "replace added IFA_ROUTE\n",
425 (void *)rt->_rt_key, (void *)ifa); 427 (void *)rt->_rt_key, (void *)ifa);
426 ifa->ifa_flags |= IFA_ROUTE; 428 ifa->ifa_flags |= IFA_ROUTE;
427 } 429 }
428 } 430 }
429 431
430 ifaref(ifa); 432 ifaref(ifa);
431 old = rt->rt_ifa; 433 old = rt->rt_ifa;
432 rt_set_ifa1(rt, ifa); 434 rt_set_ifa1(rt, ifa);
433 ifafree(old); 435 ifafree(old);
434} 436}
435 437
436static void 438static void
437rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa) 439rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa)
438{ 440{
439 ifaref(ifa); 441 ifaref(ifa);
440 rt_set_ifa1(rt, ifa); 442 rt_set_ifa1(rt, ifa);
441} 443}
442 444
443static int 445static int
444route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 446route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
445 void *arg0, void *arg1, void *arg2, void *arg3) 447 void *arg0, void *arg1, void *arg2, void *arg3)
446{ 448{
447 struct rt_msghdr *rtm; 449 struct rt_msghdr *rtm;
448 int result; 450 int result;
449 451
450 result = KAUTH_RESULT_DEFER; 452 result = KAUTH_RESULT_DEFER;
451 rtm = arg1; 453 rtm = arg1;
452 454
453 if (action != KAUTH_NETWORK_ROUTE) 455 if (action != KAUTH_NETWORK_ROUTE)
454 return result; 456 return result;
455 457
456 if (rtm->rtm_type == RTM_GET) 458 if (rtm->rtm_type == RTM_GET)
457 result = KAUTH_RESULT_ALLOW; 459 result = KAUTH_RESULT_ALLOW;
458 460
459 return result; 461 return result;
460} 462}
461 463
462static void rt_free_work(struct work *, void *); 464static void rt_free_work(struct work *, void *);
463 465
464void 466void
465rt_init(void) 467rt_init(void)
466{ 468{
467 int error; 469 int error;
468 470
469#ifdef RTFLUSH_DEBUG 471#ifdef RTFLUSH_DEBUG
470 sysctl_net_rtcache_setup(NULL); 472 sysctl_net_rtcache_setup(NULL);
471#endif 473#endif
472 474
473 mutex_init(&rt_free_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 475 mutex_init(&rt_free_global.lock, MUTEX_DEFAULT, IPL_SOFTNET);
474 SLIST_INIT(&rt_free_global.queue); 476 SLIST_INIT(&rt_free_global.queue);
475 rt_free_global.enqueued = false; 477 rt_free_global.enqueued = false;
476 478
477 rt_psref_class = psref_class_create("rtentry", IPL_SOFTNET); 479 rt_psref_class = psref_class_create("rtentry", IPL_SOFTNET);
478 480
479 error = workqueue_create(&rt_free_global.wq, "rt_free", 481 error = workqueue_create(&rt_free_global.wq, "rt_free",
480 rt_free_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 482 rt_free_work, NULL, PRI_SOFTNET, IPL_SOFTNET, RT_WQ_FLAGS);
481 if (error) 483 if (error)
482 panic("%s: workqueue_create failed (%d)\n", __func__, error); 484 panic("%s: workqueue_create failed (%d)\n", __func__, error);
483 485
484 mutex_init(&rt_update_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 486 mutex_init(&rt_update_global.lock, MUTEX_DEFAULT, IPL_SOFTNET);
485 cv_init(&rt_update_global.cv, "rt_update"); 487 cv_init(&rt_update_global.cv, "rt_update");
486 488
487 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl", 489 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl",
488 NULL, IPL_SOFTNET); 490 NULL, IPL_SOFTNET);
489 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl", 491 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl",
490 NULL, IPL_SOFTNET); 492 NULL, IPL_SOFTNET);
491 493
492 rn_init(); /* initialize all zeroes, all ones, mask table */ 494 rn_init(); /* initialize all zeroes, all ones, mask table */
493 rtbl_init(); 495 rtbl_init();
494 496
495 route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK, 497 route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK,
496 route_listener_cb, NULL); 498 route_listener_cb, NULL);
497} 499}
498 500
499static void 501static void
500rtcache_invalidate(void) 502rtcache_invalidate(void)
501{ 503{
502 504
503 RT_ASSERT_WLOCK(); 505 RT_ASSERT_WLOCK();
504 506
505 if (rtcache_debug()) 507 if (rtcache_debug())
506 printf("%s: enter\n", __func__); 508 printf("%s: enter\n", __func__);
507 509
508 rtcache_generation++; 510 rtcache_generation++;
509} 511}
510 512
511#ifdef RT_DEBUG 513#ifdef RT_DEBUG
512static void 514static void
513dump_rt(const struct rtentry *rt) 515dump_rt(const struct rtentry *rt)
514{ 516{
515 char buf[512]; 517 char buf[512];
516 518
517 log(LOG_DEBUG, "rt: "); 519 log(LOG_DEBUG, "rt: ");
518 log(LOG_DEBUG, "p=%p ", rt); 520 log(LOG_DEBUG, "p=%p ", rt);
519 if (rt->_rt_key == NULL) { 521 if (rt->_rt_key == NULL) {
520 log(LOG_DEBUG, "dst=(NULL) "); 522 log(LOG_DEBUG, "dst=(NULL) ");
521 } else { 523 } else {
522 sockaddr_format(rt->_rt_key, buf, sizeof(buf)); 524 sockaddr_format(rt->_rt_key, buf, sizeof(buf));
523 log(LOG_DEBUG, "dst=%s ", buf); 525 log(LOG_DEBUG, "dst=%s ", buf);
524 } 526 }
525 if (rt->rt_gateway == NULL) { 527 if (rt->rt_gateway == NULL) {
526 log(LOG_DEBUG, "gw=(NULL) "); 528 log(LOG_DEBUG, "gw=(NULL) ");
527 } else { 529 } else {
528 sockaddr_format(rt->_rt_key, buf, sizeof(buf)); 530 sockaddr_format(rt->_rt_key, buf, sizeof(buf));
529 log(LOG_DEBUG, "gw=%s ", buf); 531 log(LOG_DEBUG, "gw=%s ", buf);
530 } 532 }
531 log(LOG_DEBUG, "flags=%x ", rt->rt_flags); 533 log(LOG_DEBUG, "flags=%x ", rt->rt_flags);
532 if (rt->rt_ifp == NULL) { 534 if (rt->rt_ifp == NULL) {
533 log(LOG_DEBUG, "if=(NULL) "); 535 log(LOG_DEBUG, "if=(NULL) ");
534 } else { 536 } else {
535 log(LOG_DEBUG, "if=%s ", rt->rt_ifp->if_xname); 537 log(LOG_DEBUG, "if=%s ", rt->rt_ifp->if_xname);
536 } 538 }
537 log(LOG_DEBUG, "\n"); 539 log(LOG_DEBUG, "\n");
538} 540}
539#endif /* RT_DEBUG */ 541#endif /* RT_DEBUG */
540 542
541/* 543/*
542 * Packet routing routines. If success, refcnt of a returned rtentry 544 * Packet routing routines. If success, refcnt of a returned rtentry
543 * will be incremented. The caller has to rtfree it by itself. 545 * will be incremented. The caller has to rtfree it by itself.
544 */ 546 */
545struct rtentry * 547struct rtentry *
546rtalloc1_locked(const struct sockaddr *dst, int report, bool wait_ok, 548rtalloc1_locked(const struct sockaddr *dst, int report, bool wait_ok,
547 bool wlock) 549 bool wlock)
548{ 550{
549 rtbl_t *rtbl; 551 rtbl_t *rtbl;
550 struct rtentry *rt; 552 struct rtentry *rt;
551 int s; 553 int s;
552 554
553#ifdef NET_MPSAFE 555#ifdef NET_MPSAFE
554retry: 556retry:
555#endif 557#endif
556 s = splsoftnet(); 558 s = splsoftnet();
557 rtbl = rt_gettable(dst->sa_family); 559 rtbl = rt_gettable(dst->sa_family);
558 if (rtbl == NULL) 560 if (rtbl == NULL)
559 goto miss; 561 goto miss;
560 562
561 rt = rt_matchaddr(rtbl, dst); 563 rt = rt_matchaddr(rtbl, dst);
562 if (rt == NULL) 564 if (rt == NULL)
563 goto miss; 565 goto miss;
564 566
565 if (!ISSET(rt->rt_flags, RTF_UP)) 567 if (!ISSET(rt->rt_flags, RTF_UP))
566 goto miss; 568 goto miss;
567 569
568#ifdef NET_MPSAFE 570#ifdef NET_MPSAFE
569 if (ISSET(rt->rt_flags, RTF_UPDATING) && 571 if (ISSET(rt->rt_flags, RTF_UPDATING) &&
570 /* XXX updater should be always able to acquire */ 572 /* XXX updater should be always able to acquire */
571 curlwp != rt_update_global.lwp) { 573 curlwp != rt_update_global.lwp) {
572 if (!wait_ok || !rt_wait_ok()) 574 if (!wait_ok || !rt_wait_ok())
573 goto miss; 575 goto miss;
574 RT_UNLOCK(); 576 RT_UNLOCK();
575 splx(s); 577 splx(s);
576 578
577 /* We can wait until the update is complete */ 579 /* We can wait until the update is complete */
578 rt_update_wait(); 580 rt_update_wait();
579 581
580 if (wlock) 582 if (wlock)
581 RT_WLOCK(); 583 RT_WLOCK();
582 else 584 else
583 RT_RLOCK(); 585 RT_RLOCK();
584 goto retry; 586 goto retry;
585 } 587 }
586#endif /* NET_MPSAFE */ 588#endif /* NET_MPSAFE */
587 589
588 rt_ref(rt); 590 rt_ref(rt);
589 RT_REFCNT_TRACE(rt); 591 RT_REFCNT_TRACE(rt);
590 592
591 splx(s); 593 splx(s);
592 return rt; 594 return rt;
593miss: 595miss:
594 rtstat.rts_unreach++; 596 rtstat.rts_unreach++;
595 if (report) { 597 if (report) {
596 struct rt_addrinfo info; 598 struct rt_addrinfo info;
597 599
598 memset(&info, 0, sizeof(info)); 600 memset(&info, 0, sizeof(info));
599 info.rti_info[RTAX_DST] = dst; 601 info.rti_info[RTAX_DST] = dst;
600 rt_missmsg(RTM_MISS, &info, 0, 0); 602 rt_missmsg(RTM_MISS, &info, 0, 0);
601 } 603 }
602 splx(s); 604 splx(s);
603 return NULL; 605 return NULL;
604} 606}
605 607
606struct rtentry * 608struct rtentry *
607rtalloc1(const struct sockaddr *dst, int report) 609rtalloc1(const struct sockaddr *dst, int report)
608{ 610{
609 struct rtentry *rt; 611 struct rtentry *rt;
610 612
611 RT_RLOCK(); 613 RT_RLOCK();
612 rt = rtalloc1_locked(dst, report, true, false); 614 rt = rtalloc1_locked(dst, report, true, false);
613 RT_UNLOCK(); 615 RT_UNLOCK();
614 616
615 return rt; 617 return rt;
616} 618}
617 619
618static void 620static void
619rt_ref(struct rtentry *rt) 621rt_ref(struct rtentry *rt)
620{ 622{
621 623
622 KASSERTMSG(rt->rt_refcnt >= 0, "rt_refcnt=%d", rt->rt_refcnt); 624 KASSERTMSG(rt->rt_refcnt >= 0, "rt_refcnt=%d", rt->rt_refcnt);
623 atomic_inc_uint(&rt->rt_refcnt); 625 atomic_inc_uint(&rt->rt_refcnt);
624} 626}
625 627
626void 628void
627rt_unref(struct rtentry *rt) 629rt_unref(struct rtentry *rt)
628{ 630{
629 631
630 KASSERT(rt != NULL); 632 KASSERT(rt != NULL);
631 KASSERTMSG(rt->rt_refcnt > 0, "refcnt=%d", rt->rt_refcnt); 633 KASSERTMSG(rt->rt_refcnt > 0, "refcnt=%d", rt->rt_refcnt);
632 634
633 atomic_dec_uint(&rt->rt_refcnt); 635 atomic_dec_uint(&rt->rt_refcnt);
634 if (!ISSET(rt->rt_flags, RTF_UP) || ISSET(rt->rt_flags, RTF_UPDATING)) { 636 if (!ISSET(rt->rt_flags, RTF_UP) || ISSET(rt->rt_flags, RTF_UPDATING)) {
635 mutex_enter(&rt_free_global.lock); 637 mutex_enter(&rt_free_global.lock);
636 cv_broadcast(&rt->rt_cv); 638 cv_broadcast(&rt->rt_cv);
637 mutex_exit(&rt_free_global.lock); 639 mutex_exit(&rt_free_global.lock);
638 } 640 }
639} 641}
640 642
641static bool 643static bool
642rt_wait_ok(void) 644rt_wait_ok(void)
643{ 645{
644 646
645 /* 647 /*
646 * This originally returned !cpu_softintr_p(), but that doesn't 648 * This originally returned !cpu_softintr_p(), but that doesn't
647 * work: the caller may hold a lock (probably softnet lock) 649 * work: the caller may hold a lock (probably softnet lock)
648 * that a softint is waiting for, in which case waiting here 650 * that a softint is waiting for, in which case waiting here
649 * would cause a deadlock. See https://gnats.netbsd.org/56844 651 * would cause a deadlock. See https://gnats.netbsd.org/56844
650 * for details. For now, until the locking paths are sorted 652 * for details. For now, until the locking paths are sorted
651 * out, we just disable the waiting option altogether and 653 * out, we just disable the waiting option altogether and
652 * always defer to workqueue. 654 * always defer to workqueue.
653 */ 655 */
654 KASSERT(!cpu_intr_p()); 656 KASSERT(!cpu_intr_p());
655 return false; 657 return false;
656} 658}
657 659
658void 660void
659rt_wait_refcnt(const char *title, struct rtentry *rt, int cnt) 661rt_wait_refcnt(const char *title, struct rtentry *rt, int cnt)
660{ 662{
661 mutex_enter(&rt_free_global.lock); 663 mutex_enter(&rt_free_global.lock);
662 while (rt->rt_refcnt > cnt) { 664 while (rt->rt_refcnt > cnt) {
663 dlog(LOG_DEBUG, "%s: %s waiting (refcnt=%d)\n", 665 dlog(LOG_DEBUG, "%s: %s waiting (refcnt=%d)\n",
664 __func__, title, rt->rt_refcnt); 666 __func__, title, rt->rt_refcnt);
665 cv_wait(&rt->rt_cv, &rt_free_global.lock); 667 cv_wait(&rt->rt_cv, &rt_free_global.lock);
666 dlog(LOG_DEBUG, "%s: %s waited (refcnt=%d)\n", 668 dlog(LOG_DEBUG, "%s: %s waited (refcnt=%d)\n",
667 __func__, title, rt->rt_refcnt); 669 __func__, title, rt->rt_refcnt);
668 } 670 }
669 mutex_exit(&rt_free_global.lock); 671 mutex_exit(&rt_free_global.lock);
670} 672}
671 673
672void 674void
673rt_wait_psref(struct rtentry *rt) 675rt_wait_psref(struct rtentry *rt)
674{ 676{
675 677
676 psref_target_destroy(&rt->rt_psref, rt_psref_class); 678 psref_target_destroy(&rt->rt_psref, rt_psref_class);
677 psref_target_init(&rt->rt_psref, rt_psref_class); 679 psref_target_init(&rt->rt_psref, rt_psref_class);
678} 680}
679 681
680static void 682static void
681_rt_free(struct rtentry *rt) 683_rt_free(struct rtentry *rt)
682{ 684{
683 struct ifaddr *ifa; 685 struct ifaddr *ifa;
684 686
685 /* 687 /*
686 * Need to avoid a deadlock on rt_wait_refcnt of update 688 * Need to avoid a deadlock on rt_wait_refcnt of update
687 * and a conflict on psref_target_destroy of update. 689 * and a conflict on psref_target_destroy of update.
688 */ 690 */
689#ifdef NET_MPSAFE 691#ifdef NET_MPSAFE
690 rt_update_wait(); 692 rt_update_wait();
691#endif 693#endif
692 694
693 RT_REFCNT_TRACE(rt); 695 RT_REFCNT_TRACE(rt);
694 KASSERTMSG(rt->rt_refcnt >= 0, "refcnt=%d", rt->rt_refcnt); 696 KASSERTMSG(rt->rt_refcnt >= 0, "refcnt=%d", rt->rt_refcnt);
695 rt_wait_refcnt("free", rt, 0); 697 rt_wait_refcnt("free", rt, 0);
696#ifdef NET_MPSAFE 698#ifdef NET_MPSAFE
697 psref_target_destroy(&rt->rt_psref, rt_psref_class); 699 psref_target_destroy(&rt->rt_psref, rt_psref_class);
698#endif 700#endif
699 701
700 rt_assert_inactive(rt); 702 rt_assert_inactive(rt);
701 rttrash--; 703 rttrash--;
702 ifa = rt->rt_ifa; 704 ifa = rt->rt_ifa;
703 rt->rt_ifa = NULL; 705 rt->rt_ifa = NULL;
704 ifafree(ifa); 706 ifafree(ifa);
705 rt->rt_ifp = NULL; 707 rt->rt_ifp = NULL;
706 cv_destroy(&rt->rt_cv); 708 cv_destroy(&rt->rt_cv);
707 rt_destroy(rt); 709 rt_destroy(rt);
708 pool_put(&rtentry_pool, rt); 710 pool_put(&rtentry_pool, rt);
709} 711}
710 712
711static void 713static void
712rt_free_work(struct work *wk, void *arg) 714rt_free_work(struct work *wk, void *arg)
713{ 715{
714 716
715 for (;;) { 717 for (;;) {
716 struct rtentry *rt; 718 struct rtentry *rt;
717 719
718 mutex_enter(&rt_free_global.lock); 720 mutex_enter(&rt_free_global.lock);
719 if ((rt = SLIST_FIRST(&rt_free_global.queue)) == NULL) { 721 if ((rt = SLIST_FIRST(&rt_free_global.queue)) == NULL) {
720 rt_free_global.enqueued = false; 722 rt_free_global.enqueued = false;
721 mutex_exit(&rt_free_global.lock); 723 mutex_exit(&rt_free_global.lock);
722 return; 724 return;
723 } 725 }
724 SLIST_REMOVE_HEAD(&rt_free_global.queue, rt_free); 726 SLIST_REMOVE_HEAD(&rt_free_global.queue, rt_free);
725 mutex_exit(&rt_free_global.lock); 727 mutex_exit(&rt_free_global.lock);
726 atomic_dec_uint(&rt->rt_refcnt); 728 atomic_dec_uint(&rt->rt_refcnt);
727 _rt_free(rt); 729 _rt_free(rt);
728 } 730 }
729} 731}
730 732
731void 733void
732rt_free(struct rtentry *rt) 734rt_free(struct rtentry *rt)
733{ 735{
734 736
735 KASSERTMSG(rt->rt_refcnt > 0, "rt_refcnt=%d", rt->rt_refcnt); 737 KASSERTMSG(rt->rt_refcnt > 0, "rt_refcnt=%d", rt->rt_refcnt);
736 if (rt_wait_ok()) { 738 if (rt_wait_ok()) {
737 atomic_dec_uint(&rt->rt_refcnt); 739 atomic_dec_uint(&rt->rt_refcnt);
738 _rt_free(rt); 740 _rt_free(rt);
739 return; 741 return;
740 } 742 }
741 743
742 mutex_enter(&rt_free_global.lock); 744 mutex_enter(&rt_free_global.lock);
743 /* No need to add a reference here. */ 745 /* No need to add a reference here. */
744 SLIST_INSERT_HEAD(&rt_free_global.queue, rt, rt_free); 746 SLIST_INSERT_HEAD(&rt_free_global.queue, rt, rt_free);
745 if (!rt_free_global.enqueued) { 747 if (!rt_free_global.enqueued) {
746 workqueue_enqueue(rt_free_global.wq, &rt_free_global.wk, NULL); 748 workqueue_enqueue(rt_free_global.wq, &rt_free_global.wk, NULL);
747 rt_free_global.enqueued = true; 749 rt_free_global.enqueued = true;
748 } 750 }
749 mutex_exit(&rt_free_global.lock); 751 mutex_exit(&rt_free_global.lock);
750} 752}
751 753
752#ifdef NET_MPSAFE 754#ifdef NET_MPSAFE
753static void 755static void
754rt_update_wait(void) 756rt_update_wait(void)
755{ 757{
756 758
757 mutex_enter(&rt_update_global.lock); 759 mutex_enter(&rt_update_global.lock);
758 while (rt_update_global.ongoing) { 760 while (rt_update_global.ongoing) {
759 dlog(LOG_DEBUG, "%s: waiting lwp=%p\n", __func__, curlwp); 761 dlog(LOG_DEBUG, "%s: waiting lwp=%p\n", __func__, curlwp);
760 cv_wait(&rt_update_global.cv, &rt_update_global.lock); 762 cv_wait(&rt_update_global.cv, &rt_update_global.lock);
761 dlog(LOG_DEBUG, "%s: waited lwp=%p\n", __func__, curlwp); 763 dlog(LOG_DEBUG, "%s: waited lwp=%p\n", __func__, curlwp);
762 } 764 }
763 mutex_exit(&rt_update_global.lock); 765 mutex_exit(&rt_update_global.lock);
764} 766}
765#endif 767#endif
766 768
767int 769int
768rt_update_prepare(struct rtentry *rt) 770rt_update_prepare(struct rtentry *rt)
769{ 771{
770 772
771 dlog(LOG_DEBUG, "%s: updating rt=%p lwp=%p\n", __func__, rt, curlwp); 773 dlog(LOG_DEBUG, "%s: updating rt=%p lwp=%p\n", __func__, rt, curlwp);
772 774
773 RT_WLOCK(); 775 RT_WLOCK();
774 /* If the entry is being destroyed, don't proceed the update. */ 776 /* If the entry is being destroyed, don't proceed the update. */
775 if (!ISSET(rt->rt_flags, RTF_UP)) { 777 if (!ISSET(rt->rt_flags, RTF_UP)) {
776 RT_UNLOCK(); 778 RT_UNLOCK();
777 return ESRCH; 779 return ESRCH;
778 } 780 }
779 rt->rt_flags |= RTF_UPDATING; 781 rt->rt_flags |= RTF_UPDATING;
780 RT_UNLOCK(); 782 RT_UNLOCK();
781 783
782 mutex_enter(&rt_update_global.lock); 784 mutex_enter(&rt_update_global.lock);
783 while (rt_update_global.ongoing) { 785 while (rt_update_global.ongoing) {
784 dlog(LOG_DEBUG, "%s: waiting ongoing updating rt=%p lwp=%p\n", 786 dlog(LOG_DEBUG, "%s: waiting ongoing updating rt=%p lwp=%p\n",
785 __func__, rt, curlwp); 787 __func__, rt, curlwp);
786 cv_wait(&rt_update_global.cv, &rt_update_global.lock); 788 cv_wait(&rt_update_global.cv, &rt_update_global.lock);
787 dlog(LOG_DEBUG, "%s: waited ongoing updating rt=%p lwp=%p\n", 789 dlog(LOG_DEBUG, "%s: waited ongoing updating rt=%p lwp=%p\n",
788 __func__, rt, curlwp); 790 __func__, rt, curlwp);
789 } 791 }
790 rt_update_global.ongoing = true; 792 rt_update_global.ongoing = true;
791 /* XXX need it to avoid rt_update_wait by updater itself. */ 793 /* XXX need it to avoid rt_update_wait by updater itself. */
792 rt_update_global.lwp = curlwp; 794 rt_update_global.lwp = curlwp;
793 mutex_exit(&rt_update_global.lock); 795 mutex_exit(&rt_update_global.lock);
794 796
795 rt_wait_refcnt("update", rt, 1); 797 rt_wait_refcnt("update", rt, 1);
796 rt_wait_psref(rt); 798 rt_wait_psref(rt);
797 799
798 return 0; 800 return 0;
799} 801}
800 802
801void 803void
802rt_update_finish(struct rtentry *rt) 804rt_update_finish(struct rtentry *rt)
803{ 805{
804 806
805 RT_WLOCK(); 807 RT_WLOCK();
806 rt->rt_flags &= ~RTF_UPDATING; 808 rt->rt_flags &= ~RTF_UPDATING;
807 RT_UNLOCK(); 809 RT_UNLOCK();
808 810
809 mutex_enter(&rt_update_global.lock); 811 mutex_enter(&rt_update_global.lock);
810 rt_update_global.ongoing = false; 812 rt_update_global.ongoing = false;
811 rt_update_global.lwp = NULL; 813 rt_update_global.lwp = NULL;
812 cv_broadcast(&rt_update_global.cv); 814 cv_broadcast(&rt_update_global.cv);
813 mutex_exit(&rt_update_global.lock); 815 mutex_exit(&rt_update_global.lock);
814 816
815 dlog(LOG_DEBUG, "%s: updated rt=%p lwp=%p\n", __func__, rt, curlwp); 817 dlog(LOG_DEBUG, "%s: updated rt=%p lwp=%p\n", __func__, rt, curlwp);
816} 818}
817 819
818/* 820/*
819 * Force a routing table entry to the specified 821 * Force a routing table entry to the specified
820 * destination to go through the given gateway. 822 * destination to go through the given gateway.
821 * Normally called as a result of a routing redirect 823 * Normally called as a result of a routing redirect
822 * message from the network layer. 824 * message from the network layer.
823 * 825 *
824 * N.B.: must be called at splsoftnet 826 * N.B.: must be called at splsoftnet
825 */ 827 */
826void 828void
827rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway, 829rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway,
828 const struct sockaddr *netmask, int flags, const struct sockaddr *src, 830 const struct sockaddr *netmask, int flags, const struct sockaddr *src,
829 struct rtentry **rtp) 831 struct rtentry **rtp)
830{ 832{
831 struct rtentry *rt; 833 struct rtentry *rt;
832 int error = 0; 834 int error = 0;
833 uint64_t *stat = NULL; 835 uint64_t *stat = NULL;
834 struct rt_addrinfo info; 836 struct rt_addrinfo info;
835 struct ifaddr *ifa; 837 struct ifaddr *ifa;
836 struct psref psref; 838 struct psref psref;
837 839
838 /* verify the gateway is directly reachable */ 840 /* verify the gateway is directly reachable */
839 if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) { 841 if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) {
840 error = ENETUNREACH; 842 error = ENETUNREACH;
841 goto out; 843 goto out;
842 } 844 }
843 rt = rtalloc1(dst, 0); 845 rt = rtalloc1(dst, 0);
844 /* 846 /*
845 * If the redirect isn't from our current router for this dst, 847 * If the redirect isn't from our current router for this dst,
846 * it's either old or wrong. If it redirects us to ourselves, 848 * it's either old or wrong. If it redirects us to ourselves,
847 * we have a routing loop, perhaps as a result of an interface 849 * we have a routing loop, perhaps as a result of an interface
848 * going down recently. 850 * going down recently.
849 */ 851 */
850 if (!(flags & RTF_DONE) && rt && 852 if (!(flags & RTF_DONE) && rt &&
851 (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa)) 853 (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa))
852 error = EINVAL; 854 error = EINVAL;
853 else { 855 else {
854 int s = pserialize_read_enter(); 856 int s = pserialize_read_enter();
855 struct ifaddr *_ifa; 857 struct ifaddr *_ifa;
856 858
857 _ifa = ifa_ifwithaddr(gateway); 859 _ifa = ifa_ifwithaddr(gateway);
858 if (_ifa != NULL) 860 if (_ifa != NULL)
859 error = EHOSTUNREACH; 861 error = EHOSTUNREACH;
860 pserialize_read_exit(s); 862 pserialize_read_exit(s);
861 } 863 }
862 if (error) 864 if (error)
863 goto done; 865 goto done;
864 /* 866 /*
865 * Create a new entry if we just got back a wildcard entry 867 * Create a new entry if we just got back a wildcard entry
866 * or the lookup failed. This is necessary for hosts 868 * or the lookup failed. This is necessary for hosts
867 * which use routing redirects generated by smart gateways 869 * which use routing redirects generated by smart gateways
868 * to dynamically build the routing tables. 870 * to dynamically build the routing tables.
869 */ 871 */
870 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) 872 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
871 goto create; 873 goto create;
872 /* 874 /*
873 * Don't listen to the redirect if it's 875 * Don't listen to the redirect if it's
874 * for a route to an interface. 876 * for a route to an interface.
875 */ 877 */
876 if (rt->rt_flags & RTF_GATEWAY) { 878 if (rt->rt_flags & RTF_GATEWAY) {
877 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { 879 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
878 /* 880 /*
879 * Changing from route to net => route to host. 881 * Changing from route to net => route to host.
880 * Create new route, rather than smashing route to net. 882 * Create new route, rather than smashing route to net.
881 */ 883 */
882 create: 884 create:
883 if (rt != NULL) 885 if (rt != NULL)
884 rt_unref(rt); 886 rt_unref(rt);
885 flags |= RTF_GATEWAY | RTF_DYNAMIC; 887 flags |= RTF_GATEWAY | RTF_DYNAMIC;
886 memset(&info, 0, sizeof(info)); 888 memset(&info, 0, sizeof(info));
887 info.rti_info[RTAX_DST] = dst; 889 info.rti_info[RTAX_DST] = dst;
888 info.rti_info[RTAX_GATEWAY] = gateway; 890 info.rti_info[RTAX_GATEWAY] = gateway;
889 info.rti_info[RTAX_NETMASK] = netmask; 891 info.rti_info[RTAX_NETMASK] = netmask;
890 info.rti_ifa = ifa; 892 info.rti_ifa = ifa;
891 info.rti_flags = flags; 893 info.rti_flags = flags;
892 rt = NULL; 894 rt = NULL;
893 error = rtrequest1(RTM_ADD, &info, &rt); 895 error = rtrequest1(RTM_ADD, &info, &rt);
894 if (rt != NULL) 896 if (rt != NULL)
895 flags = rt->rt_flags; 897 flags = rt->rt_flags;
896 if (error == 0) 898 if (error == 0)
897 rt_newmsg_dynamic(RTM_ADD, rt); 899 rt_newmsg_dynamic(RTM_ADD, rt);
898 stat = &rtstat.rts_dynamic; 900 stat = &rtstat.rts_dynamic;
899 } else { 901 } else {
900 /* 902 /*
901 * Smash the current notion of the gateway to 903 * Smash the current notion of the gateway to
902 * this destination. Should check about netmask!!! 904 * this destination. Should check about netmask!!!
903 */ 905 */
904#ifdef NET_MPSAFE 906#ifdef NET_MPSAFE
905 KASSERT(!cpu_softintr_p()); 907 KASSERT(!cpu_softintr_p());
906 908
907 error = rt_update_prepare(rt); 909 error = rt_update_prepare(rt);
908 if (error == 0) { 910 if (error == 0) {
909#endif 911#endif
910 RT_WLOCK(); 912 RT_WLOCK();
911 error = rt_setgate(rt, gateway); 913 error = rt_setgate(rt, gateway);
912 if (error == 0) { 914 if (error == 0) {
913 rt->rt_flags |= RTF_MODIFIED; 915 rt->rt_flags |= RTF_MODIFIED;
914 flags |= RTF_MODIFIED; 916 flags |= RTF_MODIFIED;
915 } 917 }
916 RT_UNLOCK(); 918 RT_UNLOCK();
917#ifdef NET_MPSAFE 919#ifdef NET_MPSAFE
918 rt_update_finish(rt); 920 rt_update_finish(rt);
919 } else { 921 } else {
920 /* 922 /*
921 * If error != 0, the rtentry is being 923 * If error != 0, the rtentry is being
922 * destroyed, so doing nothing doesn't 924 * destroyed, so doing nothing doesn't
923 * matter. 925 * matter.
924 */ 926 */
925 } 927 }
926#endif 928#endif
927 stat = &rtstat.rts_newgateway; 929 stat = &rtstat.rts_newgateway;
928 } 930 }
929 } else 931 } else
930 error = EHOSTUNREACH; 932 error = EHOSTUNREACH;
931done: 933done:
932 if (rt) { 934 if (rt) {
933 if (rtp != NULL && !error) 935 if (rtp != NULL && !error)
934 *rtp = rt; 936 *rtp = rt;
935 else 937 else
936 rt_unref(rt); 938 rt_unref(rt);
937 } 939 }
938out: 940out:
939 if (error) 941 if (error)
940 rtstat.rts_badredirect++; 942 rtstat.rts_badredirect++;
941 else if (stat != NULL) 943 else if (stat != NULL)
942 (*stat)++; 944 (*stat)++;
943 memset(&info, 0, sizeof(info)); 945 memset(&info, 0, sizeof(info));
944 info.rti_info[RTAX_DST] = dst; 946 info.rti_info[RTAX_DST] = dst;
945 info.rti_info[RTAX_GATEWAY] = gateway; 947 info.rti_info[RTAX_GATEWAY] = gateway;
946 info.rti_info[RTAX_NETMASK] = netmask; 948 info.rti_info[RTAX_NETMASK] = netmask;
947 info.rti_info[RTAX_AUTHOR] = src; 949 info.rti_info[RTAX_AUTHOR] = src;
948 rt_missmsg(RTM_REDIRECT, &info, flags, error); 950 rt_missmsg(RTM_REDIRECT, &info, flags, error);
949 ifa_release(ifa, &psref); 951 ifa_release(ifa, &psref);
950} 952}
951 953
952/* 954/*
953 * Delete a route and generate a message. 955 * Delete a route and generate a message.
954 * It doesn't free a passed rt. 956 * It doesn't free a passed rt.
955 */ 957 */
956static int 958static int
957rtdeletemsg(struct rtentry *rt) 959rtdeletemsg(struct rtentry *rt)
958{ 960{
959 int error; 961 int error;
960 struct rt_addrinfo info; 962 struct rt_addrinfo info;
961 struct rtentry *retrt; 963 struct rtentry *retrt;
962 964
963 /* 965 /*
964 * Request the new route so that the entry is not actually 966 * Request the new route so that the entry is not actually
965 * deleted. That will allow the information being reported to 967 * deleted. That will allow the information being reported to
966 * be accurate (and consistent with route_output()). 968 * be accurate (and consistent with route_output()).
967 */ 969 */
968 memset(&info, 0, sizeof(info)); 970 memset(&info, 0, sizeof(info));
969 info.rti_info[RTAX_DST] = rt_getkey(rt); 971 info.rti_info[RTAX_DST] = rt_getkey(rt);
970 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 972 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
971 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 973 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
972 info.rti_flags = rt->rt_flags; 974 info.rti_flags = rt->rt_flags;
973 error = rtrequest1(RTM_DELETE, &info, &retrt); 975 error = rtrequest1(RTM_DELETE, &info, &retrt);
974 976
975 rt_missmsg(RTM_DELETE, &info, info.rti_flags, error); 977 rt_missmsg(RTM_DELETE, &info, info.rti_flags, error);
976 978
977 return error; 979 return error;
978} 980}
979 981
980static struct ifaddr * 982static struct ifaddr *
981ifa_ifwithroute_psref(int flags, const struct sockaddr *dst, 983ifa_ifwithroute_psref(int flags, const struct sockaddr *dst,
982 const struct sockaddr *gateway, struct psref *psref) 984 const struct sockaddr *gateway, struct psref *psref)
983{ 985{
984 struct ifaddr *ifa = NULL; 986 struct ifaddr *ifa = NULL;
985 987
986 if ((flags & RTF_GATEWAY) == 0) { 988 if ((flags & RTF_GATEWAY) == 0) {
987 /* 989 /*
988 * If we are adding a route to an interface, 990 * If we are adding a route to an interface,
989 * and the interface is a pt to pt link 991 * and the interface is a pt to pt link
990 * we should search for the destination 992 * we should search for the destination
991 * as our clue to the interface. Otherwise 993 * as our clue to the interface. Otherwise
992 * we can use the local address. 994 * we can use the local address.
993 */ 995 */
994 if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK) 996 if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK)
995 ifa = ifa_ifwithdstaddr_psref(dst, psref); 997 ifa = ifa_ifwithdstaddr_psref(dst, psref);
996 if (ifa == NULL) 998 if (ifa == NULL)
997 ifa = ifa_ifwithaddr_psref(gateway, psref); 999 ifa = ifa_ifwithaddr_psref(gateway, psref);
998 } else { 1000 } else {
999 /* 1001 /*
1000 * If we are adding a route to a remote net 1002 * If we are adding a route to a remote net
1001 * or host, the gateway may still be on the 1003 * or host, the gateway may still be on the
1002 * other end of a pt to pt link. 1004 * other end of a pt to pt link.
1003 */ 1005 */
1004 ifa = ifa_ifwithdstaddr_psref(gateway, psref); 1006 ifa = ifa_ifwithdstaddr_psref(gateway, psref);
1005 } 1007 }
1006 if (ifa == NULL) 1008 if (ifa == NULL)
1007 ifa = ifa_ifwithnet_psref(gateway, psref); 1009 ifa = ifa_ifwithnet_psref(gateway, psref);
1008 if (ifa == NULL) { 1010 if (ifa == NULL) {
1009 int s; 1011 int s;
1010 struct rtentry *rt; 1012 struct rtentry *rt;
1011 1013
1012 rt = rtalloc1_locked(gateway, 0, true, true); 1014 rt = rtalloc1_locked(gateway, 0, true, true);
1013 if (rt == NULL) 1015 if (rt == NULL)
1014 return NULL; 1016 return NULL;
1015 if (rt->rt_flags & RTF_GATEWAY) { 1017 if (rt->rt_flags & RTF_GATEWAY) {
1016 rt_unref(rt); 1018 rt_unref(rt);
1017 return NULL; 1019 return NULL;
1018 } 1020 }
1019 /* 1021 /*
1020 * Just in case. May not need to do this workaround. 1022 * Just in case. May not need to do this workaround.
1021 * Revisit when working on rtentry MP-ification. 1023 * Revisit when working on rtentry MP-ification.
1022 */ 1024 */
1023 s = pserialize_read_enter(); 1025 s = pserialize_read_enter();
1024 IFADDR_READER_FOREACH(ifa, rt->rt_ifp) { 1026 IFADDR_READER_FOREACH(ifa, rt->rt_ifp) {
1025 if (ifa == rt->rt_ifa) 1027 if (ifa == rt->rt_ifa)
1026 break; 1028 break;
1027 } 1029 }
1028 if (ifa != NULL) 1030 if (ifa != NULL)
1029 ifa_acquire(ifa, psref); 1031 ifa_acquire(ifa, psref);
1030 pserialize_read_exit(s); 1032 pserialize_read_exit(s);
1031 rt_unref(rt); 1033 rt_unref(rt);
1032 if (ifa == NULL) 1034 if (ifa == NULL)
1033 return NULL; 1035 return NULL;
1034 } 1036 }
1035 if (ifa->ifa_addr->sa_family != dst->sa_family) { 1037 if (ifa->ifa_addr->sa_family != dst->sa_family) {
1036 struct ifaddr *nifa; 1038 struct ifaddr *nifa;
1037 int s; 1039 int s;
1038 1040
1039 s = pserialize_read_enter(); 1041 s = pserialize_read_enter();
1040 nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 1042 nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
1041 if (nifa != NULL) { 1043 if (nifa != NULL) {
1042 ifa_release(ifa, psref); 1044 ifa_release(ifa, psref);
1043 ifa_acquire(nifa, psref); 1045 ifa_acquire(nifa, psref);
1044 ifa = nifa; 1046 ifa = nifa;
1045 } 1047 }
1046 pserialize_read_exit(s); 1048 pserialize_read_exit(s);
1047 } 1049 }
1048 return ifa; 1050 return ifa;
1049} 1051}
1050 1052
1051/* 1053/*
1052 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented. 1054 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
1053 * The caller has to rtfree it by itself. 1055 * The caller has to rtfree it by itself.
1054 */ 1056 */
1055int 1057int
1056rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway, 1058rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway,
1057 const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt) 1059 const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt)
1058{ 1060{
1059 struct rt_addrinfo info; 1061 struct rt_addrinfo info;
1060 1062
1061 memset(&info, 0, sizeof(info)); 1063 memset(&info, 0, sizeof(info));
1062 info.rti_flags = flags; 1064 info.rti_flags = flags;
1063 info.rti_info[RTAX_DST] = dst; 1065 info.rti_info[RTAX_DST] = dst;
1064 info.rti_info[RTAX_GATEWAY] = gateway; 1066 info.rti_info[RTAX_GATEWAY] = gateway;
1065 info.rti_info[RTAX_NETMASK] = netmask; 1067 info.rti_info[RTAX_NETMASK] = netmask;
1066 return rtrequest1(req, &info, ret_nrt); 1068 return rtrequest1(req, &info, ret_nrt);
1067} 1069}
1068 1070
1069static struct ifnet * 1071static struct ifnet *
1070rt_getifp(struct rt_addrinfo *info, struct psref *psref) 1072rt_getifp(struct rt_addrinfo *info, struct psref *psref)
1071{ 1073{
1072 const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP]; 1074 const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP];
1073 1075
1074 if (info->rti_ifp != NULL) 1076 if (info->rti_ifp != NULL)
1075 return NULL; 1077 return NULL;
1076 /* 1078 /*
1077 * ifp may be specified by sockaddr_dl when protocol address 1079 * ifp may be specified by sockaddr_dl when protocol address
1078 * is ambiguous 1080 * is ambiguous
1079 */ 1081 */
1080 if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) { 1082 if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) {
1081 struct ifaddr *ifa; 1083 struct ifaddr *ifa;
1082 int s = pserialize_read_enter(); 1084 int s = pserialize_read_enter();
1083 1085
1084 ifa = ifa_ifwithnet(ifpaddr); 1086 ifa = ifa_ifwithnet(ifpaddr);
1085 if (ifa != NULL) 1087 if (ifa != NULL)
1086 info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index, 1088 info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index,
1087 psref); 1089 psref);
1088 pserialize_read_exit(s); 1090 pserialize_read_exit(s);
1089 } 1091 }
1090 1092
1091 return info->rti_ifp; 1093 return info->rti_ifp;
1092} 1094}
1093 1095
1094static struct ifaddr * 1096static struct ifaddr *
1095rt_getifa(struct rt_addrinfo *info, struct psref *psref) 1097rt_getifa(struct rt_addrinfo *info, struct psref *psref)
1096{ 1098{
1097 struct ifaddr *ifa = NULL; 1099 struct ifaddr *ifa = NULL;
1098 const struct sockaddr *dst = info->rti_info[RTAX_DST]; 1100 const struct sockaddr *dst = info->rti_info[RTAX_DST];
1099 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY]; 1101 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
1100 const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA]; 1102 const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA];
1101 int flags = info->rti_flags; 1103 int flags = info->rti_flags;
1102 const struct sockaddr *sa; 1104 const struct sockaddr *sa;
1103 1105
1104 if (info->rti_ifa == NULL && ifaaddr != NULL) { 1106 if (info->rti_ifa == NULL && ifaaddr != NULL) {
1105 ifa = ifa_ifwithaddr_psref(ifaaddr, psref); 1107 ifa = ifa_ifwithaddr_psref(ifaaddr, psref);
1106 if (ifa != NULL) 1108 if (ifa != NULL)
1107 goto got; 1109 goto got;
1108 } 1110 }
1109 1111
1110 sa = ifaaddr != NULL ? ifaaddr : 1112 sa = ifaaddr != NULL ? ifaaddr :
1111 (gateway != NULL ? gateway : dst); 1113 (gateway != NULL ? gateway : dst);
1112 if (sa != NULL && info->rti_ifp != NULL) 1114 if (sa != NULL && info->rti_ifp != NULL)
1113 ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref); 1115 ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref);
1114 else if (dst != NULL && gateway != NULL) 1116 else if (dst != NULL && gateway != NULL)
1115 ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref); 1117 ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref);
1116 else if (sa != NULL) 1118 else if (sa != NULL)
1117 ifa = ifa_ifwithroute_psref(flags, sa, sa, psref); 1119 ifa = ifa_ifwithroute_psref(flags, sa, sa, psref);
1118 if (ifa == NULL) 1120 if (ifa == NULL)
1119 return NULL; 1121 return NULL;
1120got: 1122got:
1121 if (ifa->ifa_getifa != NULL) { 1123 if (ifa->ifa_getifa != NULL) {
1122 /* FIXME ifa_getifa is NOMPSAFE */ 1124 /* FIXME ifa_getifa is NOMPSAFE */
1123 ifa = (*ifa->ifa_getifa)(ifa, dst); 1125 ifa = (*ifa->ifa_getifa)(ifa, dst);
1124 if (ifa == NULL) 1126 if (ifa == NULL)
1125 return NULL; 1127 return NULL;
1126 ifa_acquire(ifa, psref); 1128 ifa_acquire(ifa, psref);
1127 } 1129 }
1128 info->rti_ifa = ifa; 1130 info->rti_ifa = ifa;
1129 if (info->rti_ifp == NULL) 1131 if (info->rti_ifp == NULL)
1130 info->rti_ifp = ifa->ifa_ifp; 1132 info->rti_ifp = ifa->ifa_ifp;
1131 return ifa; 1133 return ifa;
1132} 1134}
1133 1135
1134/* 1136/*
1135 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented. 1137 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
1136 * The caller has to rtfree it by itself. 1138 * The caller has to rtfree it by itself.
1137 */ 1139 */
1138int 1140int
1139rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) 1141rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
1140{ 1142{
1141 int s = splsoftnet(), ss; 1143 int s = splsoftnet(), ss;
1142 int error = 0, rc; 1144 int error = 0, rc;
1143 struct rtentry *rt; 1145 struct rtentry *rt;
1144 rtbl_t *rtbl; 1146 rtbl_t *rtbl;
1145 struct ifaddr *ifa = NULL; 1147 struct ifaddr *ifa = NULL;
1146 struct sockaddr_storage maskeddst; 1148 struct sockaddr_storage maskeddst;
1147 const struct sockaddr *dst = info->rti_info[RTAX_DST]; 1149 const struct sockaddr *dst = info->rti_info[RTAX_DST];
1148 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY]; 1150 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
1149 const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK]; 1151 const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK];
1150 int flags = info->rti_flags; 1152 int flags = info->rti_flags;
1151 struct psref psref_ifp, psref_ifa; 1153 struct psref psref_ifp, psref_ifa;
1152 int bound = 0; 1154 int bound = 0;
1153 struct ifnet *ifp = NULL; 1155 struct ifnet *ifp = NULL;
1154 bool need_to_release_ifa = true; 1156 bool need_to_release_ifa = true;
1155 bool need_unlock = true; 1157 bool need_unlock = true;
1156#define senderr(x) { error = x ; goto bad; } 1158#define senderr(x) { error = x ; goto bad; }
1157 1159
1158 RT_WLOCK(); 1160 RT_WLOCK();
1159 1161
1160 bound = curlwp_bind(); 1162 bound = curlwp_bind();
1161 if ((rtbl = rt_gettable(dst->sa_family)) == NULL) 1163 if ((rtbl = rt_gettable(dst->sa_family)) == NULL)
1162 senderr(ESRCH); 1164 senderr(ESRCH);
1163 if (flags & RTF_HOST) 1165 if (flags & RTF_HOST)
1164 netmask = NULL; 1166 netmask = NULL;
1165 switch (req) { 1167 switch (req) {
1166 case RTM_DELETE: 1168 case RTM_DELETE:
1167 if (netmask) { 1169 if (netmask) {
1168 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1170 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
1169 netmask); 1171 netmask);
1170 dst = (struct sockaddr *)&maskeddst; 1172 dst = (struct sockaddr *)&maskeddst;
1171 } 1173 }
1172 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL) 1174 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
1173 senderr(ESRCH); 1175 senderr(ESRCH);
1174 if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL) 1176 if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL)
1175 senderr(ESRCH); 1177 senderr(ESRCH);
1176 rt->rt_flags &= ~RTF_UP; 1178 rt->rt_flags &= ~RTF_UP;
1177 ifa = rt->rt_ifa; 1179 ifa = rt->rt_ifa;
1178 if (ifa->ifa_flags & IFA_ROUTE && 1180 if (ifa->ifa_flags & IFA_ROUTE &&
1179 rt_ifa_connected(rt, ifa)) { 1181 rt_ifa_connected(rt, ifa)) {
1180 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 1182 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
1181 "deleted IFA_ROUTE\n", 1183 "deleted IFA_ROUTE\n",
1182 (void *)rt->_rt_key, (void *)ifa); 1184 (void *)rt->_rt_key, (void *)ifa);
1183 ifa->ifa_flags &= ~IFA_ROUTE; 1185 ifa->ifa_flags &= ~IFA_ROUTE;
1184 } 1186 }
1185 if (ifa->ifa_rtrequest) 1187 if (ifa->ifa_rtrequest)
1186 ifa->ifa_rtrequest(RTM_DELETE, rt, info); 1188 ifa->ifa_rtrequest(RTM_DELETE, rt, info);
1187 ifa = NULL; 1189 ifa = NULL;
1188 rttrash++; 1190 rttrash++;
1189 if (ret_nrt) { 1191 if (ret_nrt) {
1190 *ret_nrt = rt; 1192 *ret_nrt = rt;
1191 rt_ref(rt); 1193 rt_ref(rt);
1192 RT_REFCNT_TRACE(rt); 1194 RT_REFCNT_TRACE(rt);
1193 } 1195 }
1194 rtcache_invalidate(); 1196 rtcache_invalidate();
1195 RT_UNLOCK(); 1197 RT_UNLOCK();
1196 need_unlock = false; 1198 need_unlock = false;
1197 rt_timer_remove_all(rt); 1199 rt_timer_remove_all(rt);
1198#if defined(INET) || defined(INET6) 1200#if defined(INET) || defined(INET6)
1199 if (netmask != NULL) 1201 if (netmask != NULL)
1200 lltable_prefix_free(dst->sa_family, dst, netmask, 0); 1202 lltable_prefix_free(dst->sa_family, dst, netmask, 0);
1201#endif 1203#endif
1202 if (ret_nrt == NULL) { 1204 if (ret_nrt == NULL) {
1203 /* Adjust the refcount */ 1205 /* Adjust the refcount */
1204 rt_ref(rt); 1206 rt_ref(rt);
1205 RT_REFCNT_TRACE(rt); 1207 RT_REFCNT_TRACE(rt);
1206 rt_free(rt); 1208 rt_free(rt);
1207 } 1209 }
1208 break; 1210 break;
1209 1211
1210 case RTM_ADD: 1212 case RTM_ADD:
1211 if (info->rti_ifa == NULL) { 1213 if (info->rti_ifa == NULL) {
1212 ifp = rt_getifp(info, &psref_ifp); 1214 ifp = rt_getifp(info, &psref_ifp);
1213 ifa = rt_getifa(info, &psref_ifa); 1215 ifa = rt_getifa(info, &psref_ifa);
1214 if (ifa == NULL) 1216 if (ifa == NULL)
1215 senderr(ENETUNREACH); 1217 senderr(ENETUNREACH);
1216 } else { 1218 } else {
1217 /* Caller should have a reference of ifa */ 1219 /* Caller should have a reference of ifa */
1218 ifa = info->rti_ifa; 1220 ifa = info->rti_ifa;
1219 need_to_release_ifa = false; 1221 need_to_release_ifa = false;
1220 } 1222 }
1221 rt = pool_get(&rtentry_pool, PR_NOWAIT); 1223 rt = pool_get(&rtentry_pool, PR_NOWAIT);
1222 if (rt == NULL) 1224 if (rt == NULL)
1223 senderr(ENOBUFS); 1225 senderr(ENOBUFS);
1224 memset(rt, 0, sizeof(*rt)); 1226 memset(rt, 0, sizeof(*rt));
1225 rt->rt_flags = RTF_UP | (flags & ~RTF_DONTCHANGEIFA); 1227 rt->rt_flags = RTF_UP | (flags & ~RTF_DONTCHANGEIFA);
1226 LIST_INIT(&rt->rt_timer); 1228 LIST_INIT(&rt->rt_timer);
1227 1229
1228 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1230 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1229 if (netmask) { 1231 if (netmask) {
1230 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1232 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
1231 netmask); 1233 netmask);
1232 rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT); 1234 rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT);
1233 } else { 1235 } else {
1234 rt_setkey(rt, dst, M_NOWAIT); 1236 rt_setkey(rt, dst, M_NOWAIT);
1235 } 1237 }
1236 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1238 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1237 if (rt_getkey(rt) == NULL || 1239 if (rt_getkey(rt) == NULL ||
1238 rt_setgate(rt, gateway) != 0) { 1240 rt_setgate(rt, gateway) != 0) {
1239 pool_put(&rtentry_pool, rt); 1241 pool_put(&rtentry_pool, rt);
1240 senderr(ENOBUFS); 1242 senderr(ENOBUFS);
1241 } 1243 }
1242 1244
1243 rt_set_ifa(rt, ifa); 1245 rt_set_ifa(rt, ifa);
1244 if (info->rti_info[RTAX_TAG] != NULL) { 1246 if (info->rti_info[RTAX_TAG] != NULL) {
1245 const struct sockaddr *tag; 1247 const struct sockaddr *tag;
1246 tag = rt_settag(rt, info->rti_info[RTAX_TAG]); 1248 tag = rt_settag(rt, info->rti_info[RTAX_TAG]);
1247 if (tag == NULL) 1249 if (tag == NULL)
1248 senderr(ENOBUFS); 1250 senderr(ENOBUFS);
1249 } 1251 }
1250 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1252 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1251 1253
1252 ss = pserialize_read_enter(); 1254 ss = pserialize_read_enter();
1253 if (info->rti_info[RTAX_IFP] != NULL) { 1255 if (info->rti_info[RTAX_IFP] != NULL) {
1254 struct ifaddr *ifa2; 1256 struct ifaddr *ifa2;
1255 ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]); 1257 ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]);
1256 if (ifa2 != NULL) 1258 if (ifa2 != NULL)
1257 rt->rt_ifp = ifa2->ifa_ifp; 1259 rt->rt_ifp = ifa2->ifa_ifp;
1258 else 1260 else
1259 rt->rt_ifp = ifa->ifa_ifp; 1261 rt->rt_ifp = ifa->ifa_ifp;
1260 } else 1262 } else
1261 rt->rt_ifp = ifa->ifa_ifp; 1263 rt->rt_ifp = ifa->ifa_ifp;
1262 pserialize_read_exit(ss); 1264 pserialize_read_exit(ss);
1263 cv_init(&rt->rt_cv, "rtentry"); 1265 cv_init(&rt->rt_cv, "rtentry");
1264 psref_target_init(&rt->rt_psref, rt_psref_class); 1266 psref_target_init(&rt->rt_psref, rt_psref_class);
1265 1267
1266 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1268 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1267 rc = rt_addaddr(rtbl, rt, netmask); 1269 rc = rt_addaddr(rtbl, rt, netmask);
1268 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1270 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1269 if (rc != 0) { 1271 if (rc != 0) {
1270 ifafree(ifa); /* for rt_set_ifa above */ 1272 ifafree(ifa); /* for rt_set_ifa above */
1271 cv_destroy(&rt->rt_cv); 1273 cv_destroy(&rt->rt_cv);
1272 rt_destroy(rt); 1274 rt_destroy(rt);
1273 pool_put(&rtentry_pool, rt); 1275 pool_put(&rtentry_pool, rt);
1274 senderr(rc); 1276 senderr(rc);
1275 } 1277 }
1276 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1278 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1277 if (ifa->ifa_rtrequest) 1279 if (ifa->ifa_rtrequest)
1278 ifa->ifa_rtrequest(req, rt, info); 1280 ifa->ifa_rtrequest(req, rt, info);
1279 if (need_to_release_ifa) 1281 if (need_to_release_ifa)
1280 ifa_release(ifa, &psref_ifa); 1282 ifa_release(ifa, &psref_ifa);
1281 ifa = NULL; 1283 ifa = NULL;
1282 if_put(ifp, &psref_ifp); 1284 if_put(ifp, &psref_ifp);
1283 ifp = NULL; 1285 ifp = NULL;
1284 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1286 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1285 if (ret_nrt) { 1287 if (ret_nrt) {
1286 *ret_nrt = rt; 1288 *ret_nrt = rt;
1287 rt_ref(rt); 1289 rt_ref(rt);
1288 RT_REFCNT_TRACE(rt); 1290 RT_REFCNT_TRACE(rt);
1289 } 1291 }
1290 rtcache_invalidate(); 1292 rtcache_invalidate();
1291 RT_UNLOCK(); 1293 RT_UNLOCK();
1292 need_unlock = false; 1294 need_unlock = false;
1293 break; 1295 break;
1294 case RTM_GET: 1296 case RTM_GET:
1295 if (netmask != NULL) { 1297 if (netmask != NULL) {
1296 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1298 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
1297 netmask); 1299 netmask);
1298 dst = (struct sockaddr *)&maskeddst; 1300 dst = (struct sockaddr *)&maskeddst;
1299 } 1301 }
1300 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL) 1302 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
1301 senderr(ESRCH); 1303 senderr(ESRCH);
1302 if (ret_nrt != NULL) { 1304 if (ret_nrt != NULL) {
1303 *ret_nrt = rt; 1305 *ret_nrt = rt;
1304 rt_ref(rt); 1306 rt_ref(rt);
1305 RT_REFCNT_TRACE(rt); 1307 RT_REFCNT_TRACE(rt);
1306 } 1308 }
1307 break; 1309 break;
1308 } 1310 }
1309bad: 1311bad:
1310 if (need_to_release_ifa) 1312 if (need_to_release_ifa)
1311 ifa_release(ifa, &psref_ifa); 1313 ifa_release(ifa, &psref_ifa);
1312 if_put(ifp, &psref_ifp); 1314 if_put(ifp, &psref_ifp);
1313 curlwp_bindx(bound); 1315 curlwp_bindx(bound);
1314 if (need_unlock) 1316 if (need_unlock)
1315 RT_UNLOCK(); 1317 RT_UNLOCK();
1316 splx(s); 1318 splx(s);
1317 return error; 1319 return error;
1318} 1320}
1319 1321
1320int 1322int
1321rt_setgate(struct rtentry *rt, const struct sockaddr *gate) 1323rt_setgate(struct rtentry *rt, const struct sockaddr *gate)
1322{ 1324{
1323 struct sockaddr *new, *old; 1325 struct sockaddr *new, *old;
1324 1326
1325 KASSERT(RT_WLOCKED()); 1327 KASSERT(RT_WLOCKED());
1326 KASSERT(rt->_rt_key != NULL); 1328 KASSERT(rt->_rt_key != NULL);
1327 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1329 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1328 1330
1329 new = sockaddr_dup(gate, M_ZERO | M_NOWAIT); 1331 new = sockaddr_dup(gate, M_ZERO | M_NOWAIT);
1330 if (new == NULL) 1332 if (new == NULL)
1331 return ENOMEM; 1333 return ENOMEM;
1332 1334
1333 old = rt->rt_gateway; 1335 old = rt->rt_gateway;
1334 rt->rt_gateway = new; 1336 rt->rt_gateway = new;
1335 if (old != NULL) 1337 if (old != NULL)
1336 sockaddr_free(old); 1338 sockaddr_free(old);
1337 1339
1338 KASSERT(rt->_rt_key != NULL); 1340 KASSERT(rt->_rt_key != NULL);
1339 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1341 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1340 1342
1341 if (rt->rt_flags & RTF_GATEWAY) { 1343 if (rt->rt_flags & RTF_GATEWAY) {
1342 struct rtentry *gwrt; 1344 struct rtentry *gwrt;
1343 1345
1344 gwrt = rtalloc1_locked(gate, 1, false, true); 1346 gwrt = rtalloc1_locked(gate, 1, false, true);
1345 /* 1347 /*
1346 * If we switched gateways, grab the MTU from the new 1348 * If we switched gateways, grab the MTU from the new
1347 * gateway route if the current MTU, if the current MTU is 1349 * gateway route if the current MTU, if the current MTU is
1348 * greater than the MTU of gateway. 1350 * greater than the MTU of gateway.
1349 * Note that, if the MTU of gateway is 0, we will reset the 1351 * Note that, if the MTU of gateway is 0, we will reset the
1350 * MTU of the route to run PMTUD again from scratch. XXX 1352 * MTU of the route to run PMTUD again from scratch. XXX
1351 */ 1353 */
1352 if (gwrt != NULL) { 1354 if (gwrt != NULL) {
1353 KASSERT(gwrt->_rt_key != NULL); 1355 KASSERT(gwrt->_rt_key != NULL);
1354 RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key); 1356 RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key);
1355 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 && 1357 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 &&
1356 rt->rt_rmx.rmx_mtu && 1358 rt->rt_rmx.rmx_mtu &&
1357 rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) { 1359 rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) {
1358 rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu; 1360 rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu;
1359 } 1361 }
1360 rt_unref(gwrt); 1362 rt_unref(gwrt);
1361 } 1363 }
1362 } 1364 }
1363 KASSERT(rt->_rt_key != NULL); 1365 KASSERT(rt->_rt_key != NULL);
1364 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1366 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1365 return 0; 1367 return 0;
1366} 1368}
1367 1369
1368static struct ifaddr * 1370static struct ifaddr *
1369rt_update_get_ifa(const struct rt_addrinfo *info, const struct rtentry *rt, 1371rt_update_get_ifa(const struct rt_addrinfo *info, const struct rtentry *rt,
1370 struct ifnet **ifp, struct psref *psref_ifp, struct psref *psref) 1372 struct ifnet **ifp, struct psref *psref_ifp, struct psref *psref)
1371{ 1373{
1372 struct ifaddr *ifa = NULL; 1374 struct ifaddr *ifa = NULL;
1373 1375
1374 *ifp = NULL; 1376 *ifp = NULL;
1375 if (info->rti_info[RTAX_IFP] != NULL) { 1377 if (info->rti_info[RTAX_IFP] != NULL) {
1376 ifa = ifa_ifwithnet_psref(info->rti_info[RTAX_IFP], psref); 1378 ifa = ifa_ifwithnet_psref(info->rti_info[RTAX_IFP], psref);
1377 if (ifa == NULL) 1379 if (ifa == NULL)
1378 goto next; 1380 goto next;
1379 if (ifa->ifa_ifp->if_flags & IFF_UNNUMBERED) { 1381 if (ifa->ifa_ifp->if_flags & IFF_UNNUMBERED) {
1380 ifa_release(ifa, psref); 1382 ifa_release(ifa, psref);
1381 ifa = NULL; 1383 ifa = NULL;
1382 goto next; 1384 goto next;
1383 } 1385 }
1384 *ifp = ifa->ifa_ifp; 1386 *ifp = ifa->ifa_ifp;
1385 if_acquire(*ifp, psref_ifp); 1387 if_acquire(*ifp, psref_ifp);
1386 if (info->rti_info[RTAX_IFA] == NULL && 1388 if (info->rti_info[RTAX_IFA] == NULL &&
1387 info->rti_info[RTAX_GATEWAY] == NULL) 1389 info->rti_info[RTAX_GATEWAY] == NULL)
1388 goto out; 1390 goto out;
1389 ifa_release(ifa, psref); 1391 ifa_release(ifa, psref);
1390 if (info->rti_info[RTAX_IFA] == NULL) { 1392 if (info->rti_info[RTAX_IFA] == NULL) {
1391 /* route change <dst> <gw> -ifp <if> */ 1393 /* route change <dst> <gw> -ifp <if> */
1392 ifa = ifaof_ifpforaddr_psref( 1394 ifa = ifaof_ifpforaddr_psref(
1393 info->rti_info[RTAX_GATEWAY], *ifp, psref); 1395 info->rti_info[RTAX_GATEWAY], *ifp, psref);
1394 } else { 1396 } else {
1395 /* route change <dst> -ifp <if> -ifa <addr> */ 1397 /* route change <dst> -ifp <if> -ifa <addr> */
1396 ifa = ifa_ifwithaddr_psref(info->rti_info[RTAX_IFA], 1398 ifa = ifa_ifwithaddr_psref(info->rti_info[RTAX_IFA],
1397 psref); 1399 psref);
1398 if (ifa != NULL) 1400 if (ifa != NULL)
1399 goto out; 1401 goto out;
1400 ifa = ifaof_ifpforaddr_psref(info->rti_info[RTAX_IFA], 1402 ifa = ifaof_ifpforaddr_psref(info->rti_info[RTAX_IFA],
1401 *ifp, psref); 1403 *ifp, psref);
1402 } 1404 }
1403 goto out; 1405 goto out;
1404 } 1406 }
1405next: 1407next:
1406 if (info->rti_info[RTAX_IFA] != NULL) { 1408 if (info->rti_info[RTAX_IFA] != NULL) {
1407 /* route change <dst> <gw> -ifa <addr> */ 1409 /* route change <dst> <gw> -ifa <addr> */
1408 ifa = ifa_ifwithaddr_psref(info->rti_info[RTAX_IFA], psref); 1410 ifa = ifa_ifwithaddr_psref(info->rti_info[RTAX_IFA], psref);
1409 if (ifa != NULL) 1411 if (ifa != NULL)
1410 goto out; 1412 goto out;
1411 } 1413 }
1412 if (info->rti_info[RTAX_GATEWAY] != NULL) { 1414 if (info->rti_info[RTAX_GATEWAY] != NULL) {
1413 /* route change <dst> <gw> */ 1415 /* route change <dst> <gw> */
1414 ifa = ifa_ifwithroute_psref(rt->rt_flags, rt_getkey(rt), 1416 ifa = ifa_ifwithroute_psref(rt->rt_flags, rt_getkey(rt),
1415 info->rti_info[RTAX_GATEWAY], psref); 1417 info->rti_info[RTAX_GATEWAY], psref);
1416 } 1418 }
1417out: 1419out:
1418 if (ifa != NULL && *ifp == NULL) { 1420 if (ifa != NULL && *ifp == NULL) {
1419 *ifp = ifa->ifa_ifp; 1421 *ifp = ifa->ifa_ifp;
1420 if_acquire(*ifp, psref_ifp); 1422 if_acquire(*ifp, psref_ifp);
1421 } 1423 }
1422 if (ifa == NULL && *ifp != NULL) { 1424 if (ifa == NULL && *ifp != NULL) {
1423 if_put(*ifp, psref_ifp); 1425 if_put(*ifp, psref_ifp);
1424 *ifp = NULL; 1426 *ifp = NULL;
1425 } 1427 }
1426 return ifa; 1428 return ifa;
1427} 1429}
1428 1430
1429int 1431int
1430rt_update(struct rtentry *rt, struct rt_addrinfo *info, void *rtm) 1432rt_update(struct rtentry *rt, struct rt_addrinfo *info, void *rtm)
1431{ 1433{
1432 int error = 0; 1434 int error = 0;
1433 struct ifnet *ifp = NULL, *new_ifp = NULL; 1435 struct ifnet *ifp = NULL, *new_ifp = NULL;
1434 struct ifaddr *ifa = NULL, *new_ifa; 1436 struct ifaddr *ifa = NULL, *new_ifa;
1435 struct psref psref_ifa, psref_new_ifa, psref_ifp, psref_new_ifp; 1437 struct psref psref_ifa, psref_new_ifa, psref_ifp, psref_new_ifp;
1436 bool newgw, ifp_changed = false; 1438 bool newgw, ifp_changed = false;
1437 1439
1438 RT_WLOCK(); 1440 RT_WLOCK();
1439 /* 1441 /*
1440 * New gateway could require new ifaddr, ifp; 1442 * New gateway could require new ifaddr, ifp;
1441 * flags may also be different; ifp may be specified 1443 * flags may also be different; ifp may be specified
1442 * by ll sockaddr when protocol address is ambiguous 1444 * by ll sockaddr when protocol address is ambiguous
1443 */ 1445 */
1444 newgw = info->rti_info[RTAX_GATEWAY] != NULL && 1446 newgw = info->rti_info[RTAX_GATEWAY] != NULL &&
1445 sockaddr_cmp(info->rti_info[RTAX_GATEWAY], rt->rt_gateway) != 0; 1447 sockaddr_cmp(info->rti_info[RTAX_GATEWAY], rt->rt_gateway) != 0;
1446 1448
1447 if (newgw || info->rti_info[RTAX_IFP] != NULL || 1449 if (newgw || info->rti_info[RTAX_IFP] != NULL ||
1448 info->rti_info[RTAX_IFA] != NULL) { 1450 info->rti_info[RTAX_IFA] != NULL) {
1449 ifp = rt_getifp(info, &psref_ifp); 1451 ifp = rt_getifp(info, &psref_ifp);
1450 /* info refers ifp so we need to keep a reference */ 1452 /* info refers ifp so we need to keep a reference */
1451 ifa = rt_getifa(info, &psref_ifa); 1453 ifa = rt_getifa(info, &psref_ifa);
1452 if (ifa == NULL) { 1454 if (ifa == NULL) {
1453 error = ENETUNREACH; 1455 error = ENETUNREACH;
1454 goto out; 1456 goto out;
1455 } 1457 }
1456 } 1458 }
1457 if (newgw) { 1459 if (newgw) {
1458 error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY]); 1460 error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY]);
1459 if (error != 0) 1461 if (error != 0)
1460 goto out; 1462 goto out;
1461 } 1463 }
1462 if (info->rti_info[RTAX_TAG]) { 1464 if (info->rti_info[RTAX_TAG]) {
1463 const struct sockaddr *tag; 1465 const struct sockaddr *tag;
1464 tag = rt_settag(rt, info->rti_info[RTAX_TAG]); 1466 tag = rt_settag(rt, info->rti_info[RTAX_TAG]);
1465 if (tag == NULL) { 1467 if (tag == NULL) {
1466 error = ENOBUFS; 1468 error = ENOBUFS;
1467 goto out; 1469 goto out;
1468 } 1470 }
1469 } 1471 }
1470 /* 1472 /*
1471 * New gateway could require new ifaddr, ifp; 1473 * New gateway could require new ifaddr, ifp;
1472 * flags may also be different; ifp may be specified 1474 * flags may also be different; ifp may be specified
1473 * by ll sockaddr when protocol address is ambiguous 1475 * by ll sockaddr when protocol address is ambiguous
1474 */ 1476 */
1475 new_ifa = rt_update_get_ifa(info, rt, &new_ifp, &psref_new_ifp, 1477 new_ifa = rt_update_get_ifa(info, rt, &new_ifp, &psref_new_ifp,
1476 &psref_new_ifa); 1478 &psref_new_ifa);
1477 if (new_ifa != NULL) { 1479 if (new_ifa != NULL) {
1478 ifa_release(ifa, &psref_ifa); 1480 ifa_release(ifa, &psref_ifa);
1479 ifa = new_ifa; 1481 ifa = new_ifa;
1480 } 1482 }
1481 if (ifa) { 1483 if (ifa) {
1482 struct ifaddr *oifa = rt->rt_ifa; 1484 struct ifaddr *oifa = rt->rt_ifa;
1483 if (oifa != ifa && !ifa_is_destroying(ifa) && 1485 if (oifa != ifa && !ifa_is_destroying(ifa) &&
1484 new_ifp != NULL && !if_is_deactivated(new_ifp)) { 1486 new_ifp != NULL && !if_is_deactivated(new_ifp)) {
1485 if (oifa && oifa->ifa_rtrequest) 1487 if (oifa && oifa->ifa_rtrequest)
1486 oifa->ifa_rtrequest(RTM_DELETE, rt, info); 1488 oifa->ifa_rtrequest(RTM_DELETE, rt, info);
1487 rt_replace_ifa(rt, ifa); 1489 rt_replace_ifa(rt, ifa);
1488 rt->rt_ifp = new_ifp; 1490 rt->rt_ifp = new_ifp;
1489 ifp_changed = true; 1491 ifp_changed = true;
1490 } 1492 }
1491 if (new_ifa == NULL) 1493 if (new_ifa == NULL)
1492 ifa_release(ifa, &psref_ifa); 1494 ifa_release(ifa, &psref_ifa);
1493 /* To avoid ifa_release below */ 1495 /* To avoid ifa_release below */
1494 ifa = NULL; 1496 ifa = NULL;
1495 } 1497 }
1496 ifa_release(new_ifa, &psref_new_ifa); 1498 ifa_release(new_ifa, &psref_new_ifa);
1497 if (new_ifp && rt->rt_ifp != new_ifp && !if_is_deactivated(new_ifp)) { 1499 if (new_ifp && rt->rt_ifp != new_ifp && !if_is_deactivated(new_ifp)) {
1498 rt->rt_ifp = new_ifp; 1500 rt->rt_ifp = new_ifp;
1499 ifp_changed = true; 1501 ifp_changed = true;
1500 } 1502 }
1501 rt_setmetrics(rtm, rt); 1503 rt_setmetrics(rtm, rt);
1502 if (rt->rt_flags != info->rti_flags) { 1504 if (rt->rt_flags != info->rti_flags) {
1503 rt->rt_flags = (info->rti_flags & ~PRESERVED_RTF) | 1505 rt->rt_flags = (info->rti_flags & ~PRESERVED_RTF) |
1504 (rt->rt_flags & PRESERVED_RTF); 1506 (rt->rt_flags & PRESERVED_RTF);
1505 } 1507 }
1506 if (rt->rt_ifa->ifa_rtrequest) 1508 if (rt->rt_ifa->ifa_rtrequest)
1507 rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, info); 1509 rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, info);
1508#if defined(INET) || defined(INET6) 1510#if defined(INET) || defined(INET6)
1509 if (ifp_changed && rt_mask(rt) != NULL) 1511 if (ifp_changed && rt_mask(rt) != NULL)
1510 lltable_prefix_free(rt_getkey(rt)->sa_family, rt_getkey(rt), 1512 lltable_prefix_free(rt_getkey(rt)->sa_family, rt_getkey(rt),
1511 rt_mask(rt), 0); 1513 rt_mask(rt), 0);
1512#else 1514#else
1513 (void)ifp_changed; /* XXX gcc */ 1515 (void)ifp_changed; /* XXX gcc */
1514#endif 1516#endif
1515out: 1517out:
1516 ifa_release(ifa, &psref_ifa); 1518 ifa_release(ifa, &psref_ifa);
1517 if_put(new_ifp, &psref_new_ifp); 1519 if_put(new_ifp, &psref_new_ifp);
1518 if_put(ifp, &psref_ifp); 1520 if_put(ifp, &psref_ifp);
1519 1521
1520 RT_UNLOCK(); 1522 RT_UNLOCK();
1521 1523
1522 return error; 1524 return error;
1523} 1525}
1524 1526
1525static void 1527static void
1526rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst, 1528rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst,
1527 const struct sockaddr *netmask) 1529 const struct sockaddr *netmask)
1528{ 1530{
1529 const char *netmaskp = &netmask->sa_data[0], 1531 const char *netmaskp = &netmask->sa_data[0],
1530 *srcp = &src->sa_data[0]; 1532 *srcp = &src->sa_data[0];
1531 char *dstp = &dst->sa_data[0]; 1533 char *dstp = &dst->sa_data[0];
1532 const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len); 1534 const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len);
1533 const char *srcend = (char *)dst + src->sa_len; 1535 const char *srcend = (char *)dst + src->sa_len;
1534 1536
1535 dst->sa_len = src->sa_len; 1537 dst->sa_len = src->sa_len;
1536 dst->sa_family = src->sa_family; 1538 dst->sa_family = src->sa_family;
1537 1539
1538 while (dstp < maskend) 1540 while (dstp < maskend)
1539 *dstp++ = *srcp++ & *netmaskp++; 1541 *dstp++ = *srcp++ & *netmaskp++;
1540 if (dstp < srcend) 1542 if (dstp < srcend)
1541 memset(dstp, 0, (size_t)(srcend - dstp)); 1543 memset(dstp, 0, (size_t)(srcend - dstp));
1542} 1544}
1543 1545
1544/* 1546/*
1545 * Inform the routing socket of a route change. 1547 * Inform the routing socket of a route change.
1546 */ 1548 */
1547void 1549void
1548rt_newmsg(const int cmd, const struct rtentry *rt) 1550rt_newmsg(const int cmd, const struct rtentry *rt)
1549{ 1551{
1550 struct rt_addrinfo info; 1552 struct rt_addrinfo info;
1551 1553
1552 memset((void *)&info, 0, sizeof(info)); 1554 memset((void *)&info, 0, sizeof(info));
1553 info.rti_info[RTAX_DST] = rt_getkey(rt); 1555 info.rti_info[RTAX_DST] = rt_getkey(rt);
1554 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1556 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1555 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1557 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1556 if (rt->rt_ifp) { 1558 if (rt->rt_ifp) {
1557 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr; 1559 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr;
1558 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1560 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1559 } 1561 }
1560 1562
1561 rt_missmsg(cmd, &info, rt->rt_flags, 0); 1563 rt_missmsg(cmd, &info, rt->rt_flags, 0);
1562} 1564}
1563 1565
1564/* 1566/*
1565 * Inform the routing socket of a route change for RTF_DYNAMIC. 1567 * Inform the routing socket of a route change for RTF_DYNAMIC.
1566 */ 1568 */
1567void 1569void
1568rt_newmsg_dynamic(const int cmd, const struct rtentry *rt) 1570rt_newmsg_dynamic(const int cmd, const struct rtentry *rt)
1569{ 1571{
1570 struct rt_addrinfo info; 1572 struct rt_addrinfo info;
1571 struct sockaddr *gateway = rt->rt_gateway; 1573 struct sockaddr *gateway = rt->rt_gateway;
1572 1574
1573 if (gateway == NULL) 1575 if (gateway == NULL)
1574 return; 1576 return;
1575 1577
1576 switch(gateway->sa_family) { 1578 switch(gateway->sa_family) {
1577#ifdef INET 1579#ifdef INET
1578 case AF_INET: { 1580 case AF_INET: {
1579 extern bool icmp_dynamic_rt_msg; 1581 extern bool icmp_dynamic_rt_msg;
1580 if (!icmp_dynamic_rt_msg) 1582 if (!icmp_dynamic_rt_msg)
1581 return; 1583 return;
1582 break; 1584 break;
1583 } 1585 }
1584#endif 1586#endif
1585#ifdef INET6 1587#ifdef INET6
1586 case AF_INET6: { 1588 case AF_INET6: {
1587 extern bool icmp6_dynamic_rt_msg; 1589 extern bool icmp6_dynamic_rt_msg;
1588 if (!icmp6_dynamic_rt_msg) 1590 if (!icmp6_dynamic_rt_msg)
1589 return; 1591 return;
1590 break; 1592 break;
1591 } 1593 }
1592#endif 1594#endif
1593 default: 1595 default:
1594 return; 1596 return;
1595 } 1597 }
1596 1598
1597 memset((void *)&info, 0, sizeof(info)); 1599 memset((void *)&info, 0, sizeof(info));
1598 info.rti_info[RTAX_DST] = rt_getkey(rt); 1600 info.rti_info[RTAX_DST] = rt_getkey(rt);
1599 info.rti_info[RTAX_GATEWAY] = gateway; 1601 info.rti_info[RTAX_GATEWAY] = gateway;
1600 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1602 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1601 if (rt->rt_ifp) { 1603 if (rt->rt_ifp) {
1602 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr; 1604 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr;
1603 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1605 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1604 } 1606 }
1605 1607
1606 rt_missmsg(cmd, &info, rt->rt_flags, 0); 1608 rt_missmsg(cmd, &info, rt->rt_flags, 0);
1607} 1609}
1608 1610
1609/* 1611/*
1610 * Set up or tear down a routing table entry, normally 1612 * Set up or tear down a routing table entry, normally
1611 * for an interface. 1613 * for an interface.
1612 */ 1614 */
1613int 1615int
1614rtinit(struct ifaddr *ifa, int cmd, int flags) 1616rtinit(struct ifaddr *ifa, int cmd, int flags)
1615{ 1617{
1616 struct rtentry *rt; 1618 struct rtentry *rt;
1617 struct sockaddr *dst, *odst; 1619 struct sockaddr *dst, *odst;
1618 struct sockaddr_storage maskeddst; 1620 struct sockaddr_storage maskeddst;
1619 struct rtentry *nrt = NULL; 1621 struct rtentry *nrt = NULL;
1620 int error; 1622 int error;
1621 struct rt_addrinfo info; 1623 struct rt_addrinfo info;
1622 1624
1623 dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; 1625 dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
1624 if (cmd == RTM_DELETE) { 1626 if (cmd == RTM_DELETE) {
1625 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 1627 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
1626 /* Delete subnet route for this interface */ 1628 /* Delete subnet route for this interface */
1627 odst = dst; 1629 odst = dst;
1628 dst = (struct sockaddr *)&maskeddst; 1630 dst = (struct sockaddr *)&maskeddst;
1629 rt_maskedcopy(odst, dst, ifa->ifa_netmask); 1631 rt_maskedcopy(odst, dst, ifa->ifa_netmask);
1630 } 1632 }
1631 if ((rt = rtalloc1(dst, 0)) != NULL) { 1633 if ((rt = rtalloc1(dst, 0)) != NULL) {
1632 if (rt->rt_ifa != ifa) { 1634 if (rt->rt_ifa != ifa) {
1633 rt_unref(rt); 1635 rt_unref(rt);
1634 return (flags & RTF_HOST) ? EHOSTUNREACH 1636 return (flags & RTF_HOST) ? EHOSTUNREACH
1635 : ENETUNREACH; 1637 : ENETUNREACH;
1636 } 1638 }
1637 rt_unref(rt); 1639 rt_unref(rt);
1638 } 1640 }
1639 } 1641 }
1640 memset(&info, 0, sizeof(info)); 1642 memset(&info, 0, sizeof(info));
1641 info.rti_ifa = ifa; 1643 info.rti_ifa = ifa;
1642 info.rti_flags = flags | ifa->ifa_flags | RTF_DONTCHANGEIFA; 1644 info.rti_flags = flags | ifa->ifa_flags | RTF_DONTCHANGEIFA;
1643 info.rti_info[RTAX_DST] = dst; 1645 info.rti_info[RTAX_DST] = dst;
1644 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 1646 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1645 1647
1646 /* 1648 /*
1647 * XXX here, it seems that we are assuming that ifa_netmask is NULL 1649 * XXX here, it seems that we are assuming that ifa_netmask is NULL
1648 * for RTF_HOST. bsdi4 passes NULL explicitly (via intermediate 1650 * for RTF_HOST. bsdi4 passes NULL explicitly (via intermediate
1649 * variable) when RTF_HOST is 1. still not sure if i can safely 1651 * variable) when RTF_HOST is 1. still not sure if i can safely
1650 * change it to meet bsdi4 behavior. 1652 * change it to meet bsdi4 behavior.
1651 */ 1653 */
1652 if (cmd != RTM_LLINFO_UPD) 1654 if (cmd != RTM_LLINFO_UPD)
1653 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1655 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1654 error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info, 1656 error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info,
1655 &nrt); 1657 &nrt);
1656 if (error != 0) 1658 if (error != 0)
1657 return error; 1659 return error;
1658 1660
1659 rt = nrt; 1661 rt = nrt;
1660 RT_REFCNT_TRACE(rt); 1662 RT_REFCNT_TRACE(rt);
1661 switch (cmd) { 1663 switch (cmd) {
1662 case RTM_DELETE: 1664 case RTM_DELETE:
1663 rt_newmsg(cmd, rt); 1665 rt_newmsg(cmd, rt);
1664 rt_free(rt); 1666 rt_free(rt);
1665 break; 1667 break;
1666 case RTM_LLINFO_UPD: 1668 case RTM_LLINFO_UPD:
1667 if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL) 1669 if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL)
1668 ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info); 1670 ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info);
1669 rt_newmsg(RTM_CHANGE, rt); 1671 rt_newmsg(RTM_CHANGE, rt);
1670 rt_unref(rt); 1672 rt_unref(rt);
1671 break; 1673 break;
1672 case RTM_ADD: 1674 case RTM_ADD:
1673 KASSERT(rt->rt_ifa == ifa); 1675 KASSERT(rt->rt_ifa == ifa);
1674 rt_newmsg(cmd, rt); 1676 rt_newmsg(cmd, rt);
1675 rt_unref(rt); 1677 rt_unref(rt);
1676 RT_REFCNT_TRACE(rt); 1678 RT_REFCNT_TRACE(rt);
1677 break; 1679 break;
1678 } 1680 }
1679 return error; 1681 return error;
1680} 1682}
1681 1683
1682/* 1684/*
1683 * Create a local route entry for the address. 1685 * Create a local route entry for the address.
1684 * Announce the addition of the address and the route to the routing socket. 1686 * Announce the addition of the address and the route to the routing socket.
1685 */ 1687 */
1686int 1688int
1687rt_ifa_addlocal(struct ifaddr *ifa) 1689rt_ifa_addlocal(struct ifaddr *ifa)
1688{ 1690{
1689 struct rtentry *rt; 1691 struct rtentry *rt;
1690 int e; 1692 int e;
1691 1693
1692 /* If there is no loopback entry, allocate one. */ 1694 /* If there is no loopback entry, allocate one. */
1693 rt = rtalloc1(ifa->ifa_addr, 0); 1695 rt = rtalloc1(ifa->ifa_addr, 0);
1694#ifdef RT_DEBUG 1696#ifdef RT_DEBUG
1695 if (rt != NULL) 1697 if (rt != NULL)
1696 dump_rt(rt); 1698 dump_rt(rt);
1697#endif 1699#endif
1698 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 || 1700 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 ||
1699 (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) 1701 (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0)
1700 { 1702 {
1701 struct rt_addrinfo info; 1703 struct rt_addrinfo info;
1702 struct rtentry *nrt; 1704 struct rtentry *nrt;
1703 1705
1704 memset(&info, 0, sizeof(info)); 1706 memset(&info, 0, sizeof(info));
1705 info.rti_flags = RTF_HOST | RTF_LOCAL | RTF_DONTCHANGEIFA; 1707 info.rti_flags = RTF_HOST | RTF_LOCAL | RTF_DONTCHANGEIFA;
1706 info.rti_info[RTAX_DST] = ifa->ifa_addr; 1708 info.rti_info[RTAX_DST] = ifa->ifa_addr;
1707 info.rti_info[RTAX_GATEWAY] = 1709 info.rti_info[RTAX_GATEWAY] =
1708 (const struct sockaddr *)ifa->ifa_ifp->if_sadl; 1710 (const struct sockaddr *)ifa->ifa_ifp->if_sadl;
1709 info.rti_ifa = ifa; 1711 info.rti_ifa = ifa;
1710 nrt = NULL; 1712 nrt = NULL;
1711 e = rtrequest1(RTM_ADD, &info, &nrt); 1713 e = rtrequest1(RTM_ADD, &info, &nrt);
1712 rt_addrmsg_rt(RTM_ADD, ifa, e, nrt); 1714 rt_addrmsg_rt(RTM_ADD, ifa, e, nrt);
1713 if (nrt != NULL) { 1715 if (nrt != NULL) {
1714 KASSERT(nrt->rt_ifa == ifa); 1716 KASSERT(nrt->rt_ifa == ifa);
1715#ifdef RT_DEBUG 1717#ifdef RT_DEBUG
1716 dump_rt(nrt); 1718 dump_rt(nrt);
1717#endif 1719#endif
1718 rt_unref(nrt); 1720 rt_unref(nrt);
1719 RT_REFCNT_TRACE(nrt); 1721 RT_REFCNT_TRACE(nrt);
1720 } 1722 }
1721 } else { 1723 } else {
1722 e = 0; 1724 e = 0;
1723 rt_addrmsg(RTM_NEWADDR, ifa); 1725 rt_addrmsg(RTM_NEWADDR, ifa);
1724 } 1726 }
1725 if (rt != NULL) 1727 if (rt != NULL)
1726 rt_unref(rt); 1728 rt_unref(rt);
1727 return e; 1729 return e;
1728} 1730}
1729 1731
1730/* 1732/*
1731 * Remove the local route entry for the address. 1733 * Remove the local route entry for the address.
1732 * Announce the removal of the address and the route to the routing socket. 1734 * Announce the removal of the address and the route to the routing socket.
1733 */ 1735 */
1734int 1736int
1735rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa) 1737rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa)
1736{ 1738{
1737 struct rtentry *rt; 1739 struct rtentry *rt;
1738 int e = 0; 1740 int e = 0;
1739 1741
1740 rt = rtalloc1(ifa->ifa_addr, 0); 1742 rt = rtalloc1(ifa->ifa_addr, 0);
1741 1743
1742 /* 1744 /*
1743 * Before deleting, check if a corresponding loopbacked 1745 * Before deleting, check if a corresponding loopbacked
1744 * host route surely exists. With this check, we can avoid 1746 * host route surely exists. With this check, we can avoid
1745 * deleting an interface direct route whose destination is 1747 * deleting an interface direct route whose destination is
1746 * the same as the address being removed. This can happen 1748 * the same as the address being removed. This can happen
1747 * when removing a subnet-router anycast address on an 1749 * when removing a subnet-router anycast address on an
1748 * interface attached to a shared medium. 1750 * interface attached to a shared medium.
1749 */ 1751 */
1750 if (rt != NULL && 1752 if (rt != NULL &&
1751 (rt->rt_flags & RTF_HOST) && 1753 (rt->rt_flags & RTF_HOST) &&
1752 (rt->rt_ifp->if_flags & IFF_LOOPBACK)) 1754 (rt->rt_ifp->if_flags & IFF_LOOPBACK))
1753 { 1755 {
1754 /* If we cannot replace the route's ifaddr with the equivalent 1756 /* If we cannot replace the route's ifaddr with the equivalent
1755 * ifaddr of another interface, I believe it is safest to 1757 * ifaddr of another interface, I believe it is safest to
1756 * delete the route. 1758 * delete the route.
1757 */ 1759 */
1758 if (alt_ifa == NULL) { 1760 if (alt_ifa == NULL) {
1759 e = rtdeletemsg(rt); 1761 e = rtdeletemsg(rt);
1760 if (e == 0) { 1762 if (e == 0) {
1761 rt_unref(rt); 1763 rt_unref(rt);
1762 rt_free(rt); 1764 rt_free(rt);
1763 rt = NULL; 1765 rt = NULL;
1764 } 1766 }
1765 rt_addrmsg(RTM_DELADDR, ifa); 1767 rt_addrmsg(RTM_DELADDR, ifa);
1766 } else { 1768 } else {
1767#ifdef NET_MPSAFE 1769#ifdef NET_MPSAFE
1768 int error = rt_update_prepare(rt); 1770 int error = rt_update_prepare(rt);
1769 if (error == 0) { 1771 if (error == 0) {
1770 rt_replace_ifa(rt, alt_ifa); 1772 rt_replace_ifa(rt, alt_ifa);
1771 rt_update_finish(rt); 1773 rt_update_finish(rt);
1772 } else { 1774 } else {
1773 /* 1775 /*
1774 * If error != 0, the rtentry is being 1776 * If error != 0, the rtentry is being
1775 * destroyed, so doing nothing doesn't 1777 * destroyed, so doing nothing doesn't
1776 * matter. 1778 * matter.
1777 */ 1779 */
1778 } 1780 }
1779#else 1781#else
1780 rt_replace_ifa(rt, alt_ifa); 1782 rt_replace_ifa(rt, alt_ifa);
1781#endif 1783#endif
1782 rt_newmsg(RTM_CHANGE, rt); 1784 rt_newmsg(RTM_CHANGE, rt);
1783 } 1785 }
1784 } else 1786 } else
1785 rt_addrmsg(RTM_DELADDR, ifa); 1787 rt_addrmsg(RTM_DELADDR, ifa);
1786 if (rt != NULL) 1788 if (rt != NULL)
1787 rt_unref(rt); 1789 rt_unref(rt);
1788 return e; 1790 return e;
1789} 1791}
1790 1792
1791/* 1793/*
1792 * Route timer routines. These routes allow functions to be called 1794 * Route timer routines. These routes allow functions to be called
1793 * for various routes at any time. This is useful in supporting 1795 * for various routes at any time. This is useful in supporting
1794 * path MTU discovery and redirect route deletion. 1796 * path MTU discovery and redirect route deletion.
1795 * 1797 *
1796 * This is similar to some BSDI internal functions, but it provides 1798 * This is similar to some BSDI internal functions, but it provides
1797 * for multiple queues for efficiency's sake... 1799 * for multiple queues for efficiency's sake...
1798 */ 1800 */
1799 1801
1800LIST_HEAD(, rttimer_queue) rttimer_queue_head; 1802LIST_HEAD(, rttimer_queue) rttimer_queue_head;
1801static int rt_init_done = 0; 1803static int rt_init_done = 0;
1802 1804
1803/* 1805/*
1804 * Some subtle order problems with domain initialization mean that 1806 * Some subtle order problems with domain initialization mean that
1805 * we cannot count on this being run from rt_init before various 1807 * we cannot count on this being run from rt_init before various
1806 * protocol initializations are done. Therefore, we make sure 1808 * protocol initializations are done. Therefore, we make sure
1807 * that this is run when the first queue is added... 1809 * that this is run when the first queue is added...
1808 */ 1810 */
1809 1811
1810static void rt_timer_work(struct work *, void *); 1812static void rt_timer_work(struct work *, void *);
1811 1813
1812static void 1814static void
1813rt_timer_init(void) 1815rt_timer_init(void)
1814{ 1816{
1815 int error; 1817 int error;
1816 1818
1817 assert(rt_init_done == 0); 1819 assert(rt_init_done == 0);
1818 1820
1819 /* XXX should be in rt_init */ 1821 /* XXX should be in rt_init */
1820 rw_init(&rt_lock); 1822 rw_init(&rt_lock);
1821 1823
1822 LIST_INIT(&rttimer_queue_head); 1824 LIST_INIT(&rttimer_queue_head);
1823 callout_init(&rt_timer_ch, CALLOUT_MPSAFE); 1825 callout_init(&rt_timer_ch, CALLOUT_MPSAFE);
1824 error = workqueue_create(&rt_timer_wq, "rt_timer", 1826 error = workqueue_create(&rt_timer_wq, "rt_timer",
1825 rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 1827 rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, RT_WQ_FLAGS);
1826 if (error) 1828 if (error)
1827 panic("%s: workqueue_create failed (%d)\n", __func__, error); 1829 panic("%s: workqueue_create failed (%d)\n", __func__, error);
1828 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 1830 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
1829 rt_init_done = 1; 1831 rt_init_done = 1;
1830} 1832}
1831 1833
1832struct rttimer_queue * 1834struct rttimer_queue *
1833rt_timer_queue_create(u_int timeout) 1835rt_timer_queue_create(u_int timeout)
1834{ 1836{
1835 struct rttimer_queue *rtq; 1837 struct rttimer_queue *rtq;
1836 1838
1837 if (rt_init_done == 0) 1839 if (rt_init_done == 0)
1838 rt_timer_init(); 1840 rt_timer_init();
1839 1841
1840 R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq); 1842 R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq);
1841 if (rtq == NULL) 1843 if (rtq == NULL)
1842 return NULL; 1844 return NULL;
1843 memset(rtq, 0, sizeof(*rtq)); 1845 memset(rtq, 0, sizeof(*rtq));
1844 1846
1845 rtq->rtq_timeout = timeout; 1847 rtq->rtq_timeout = timeout;
1846 TAILQ_INIT(&rtq->rtq_head); 1848 TAILQ_INIT(&rtq->rtq_head);
1847 RT_WLOCK(); 1849 RT_WLOCK();
1848 LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link); 1850 LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link);
1849 RT_UNLOCK(); 1851 RT_UNLOCK();
1850 1852
1851 return rtq; 1853 return rtq;
1852} 1854}
1853 1855
1854void 1856void
1855rt_timer_queue_change(struct rttimer_queue *rtq, long timeout) 1857rt_timer_queue_change(struct rttimer_queue *rtq, long timeout)
1856{ 1858{
1857 1859
1858 rtq->rtq_timeout = timeout; 1860 rtq->rtq_timeout = timeout;
1859} 1861}
1860 1862
1861static void 1863static void
1862rt_timer_queue_remove_all(struct rttimer_queue *rtq) 1864rt_timer_queue_remove_all(struct rttimer_queue *rtq)
1863{ 1865{
1864 struct rttimer *r; 1866 struct rttimer *r;
1865 1867
1866 RT_ASSERT_WLOCK(); 1868 RT_ASSERT_WLOCK();
1867 1869
1868 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) { 1870 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
1869 LIST_REMOVE(r, rtt_link); 1871 LIST_REMOVE(r, rtt_link);
1870 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1872 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1871 rt_ref(r->rtt_rt); /* XXX */ 1873 rt_ref(r->rtt_rt); /* XXX */
1872 RT_REFCNT_TRACE(r->rtt_rt); 1874 RT_REFCNT_TRACE(r->rtt_rt);
1873 RT_UNLOCK(); 1875 RT_UNLOCK();
1874 (*r->rtt_func)(r->rtt_rt, r); 1876 (*r->rtt_func)(r->rtt_rt, r);
1875 pool_put(&rttimer_pool, r); 1877 pool_put(&rttimer_pool, r);
1876 RT_WLOCK(); 1878 RT_WLOCK();
1877 if (rtq->rtq_count > 0) 1879 if (rtq->rtq_count > 0)
1878 rtq->rtq_count--; 1880 rtq->rtq_count--;
1879 else 1881 else
1880 printf("rt_timer_queue_remove_all: " 1882 printf("rt_timer_queue_remove_all: "
1881 "rtq_count reached 0\n"); 1883 "rtq_count reached 0\n");
1882 } 1884 }
1883} 1885}
1884 1886
1885void 1887void
1886rt_timer_queue_destroy(struct rttimer_queue *rtq) 1888rt_timer_queue_destroy(struct rttimer_queue *rtq)
1887{ 1889{
1888 1890
1889 RT_WLOCK(); 1891 RT_WLOCK();
1890 rt_timer_queue_remove_all(rtq); 1892 rt_timer_queue_remove_all(rtq);
1891 LIST_REMOVE(rtq, rtq_link); 1893 LIST_REMOVE(rtq, rtq_link);
1892 RT_UNLOCK(); 1894 RT_UNLOCK();
1893 1895
1894 /* 1896 /*
1895 * Caller is responsible for freeing the rttimer_queue structure. 1897 * Caller is responsible for freeing the rttimer_queue structure.
1896 */ 1898 */
1897} 1899}
1898 1900
1899unsigned long 1901unsigned long
1900rt_timer_count(struct rttimer_queue *rtq) 1902rt_timer_count(struct rttimer_queue *rtq)
1901{ 1903{
1902 return rtq->rtq_count; 1904 return rtq->rtq_count;
1903} 1905}
1904 1906
1905static void 1907static void
1906rt_timer_remove_all(struct rtentry *rt) 1908rt_timer_remove_all(struct rtentry *rt)
1907{ 1909{
1908 struct rttimer *r; 1910 struct rttimer *r;
1909 1911
1910 RT_WLOCK(); 1912 RT_WLOCK();
1911 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) { 1913 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) {
1912 LIST_REMOVE(r, rtt_link); 1914 LIST_REMOVE(r, rtt_link);
1913 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1915 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1914 if (r->rtt_queue->rtq_count > 0) 1916 if (r->rtt_queue->rtq_count > 0)
1915 r->rtt_queue->rtq_count--; 1917 r->rtt_queue->rtq_count--;
1916 else 1918 else
1917 printf("rt_timer_remove_all: rtq_count reached 0\n"); 1919 printf("rt_timer_remove_all: rtq_count reached 0\n");
1918 pool_put(&rttimer_pool, r); 1920 pool_put(&rttimer_pool, r);
1919 } 1921 }
1920 RT_UNLOCK(); 1922 RT_UNLOCK();
1921} 1923}
1922 1924
1923int 1925int
1924rt_timer_add(struct rtentry *rt, 1926rt_timer_add(struct rtentry *rt,
1925 void (*func)(struct rtentry *, struct rttimer *), 1927 void (*func)(struct rtentry *, struct rttimer *),
1926 struct rttimer_queue *queue) 1928 struct rttimer_queue *queue)
1927{ 1929{
1928 struct rttimer *r; 1930 struct rttimer *r;
1929 1931
1930 KASSERT(func != NULL); 1932 KASSERT(func != NULL);
1931 RT_WLOCK(); 1933 RT_WLOCK();
1932 /* 1934 /*
1933 * If there's already a timer with this action, destroy it before 1935 * If there's already a timer with this action, destroy it before
1934 * we add a new one. 1936 * we add a new one.
1935 */ 1937 */
1936 LIST_FOREACH(r, &rt->rt_timer, rtt_link) { 1938 LIST_FOREACH(r, &rt->rt_timer, rtt_link) {
1937 if (r->rtt_func == func) 1939 if (r->rtt_func == func)
1938 break; 1940 break;
1939 } 1941 }
1940 if (r != NULL) { 1942 if (r != NULL) {
1941 LIST_REMOVE(r, rtt_link); 1943 LIST_REMOVE(r, rtt_link);
1942 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1944 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1943 if (r->rtt_queue->rtq_count > 0) 1945 if (r->rtt_queue->rtq_count > 0)
1944 r->rtt_queue->rtq_count--; 1946 r->rtt_queue->rtq_count--;
1945 else 1947 else
1946 printf("rt_timer_add: rtq_count reached 0\n"); 1948 printf("rt_timer_add: rtq_count reached 0\n");
1947 } else { 1949 } else {
1948 r = pool_get(&rttimer_pool, PR_NOWAIT); 1950 r = pool_get(&rttimer_pool, PR_NOWAIT);
1949 if (r == NULL) { 1951 if (r == NULL) {
1950 RT_UNLOCK(); 1952 RT_UNLOCK();
1951 return ENOBUFS; 1953 return ENOBUFS;
1952 } 1954 }
1953 } 1955 }
1954 1956
1955 memset(r, 0, sizeof(*r)); 1957 memset(r, 0, sizeof(*r));
1956 1958
1957 r->rtt_rt = rt; 1959 r->rtt_rt = rt;
1958 r->rtt_time = time_uptime; 1960 r->rtt_time = time_uptime;
1959 r->rtt_func = func; 1961 r->rtt_func = func;
1960 r->rtt_queue = queue; 1962 r->rtt_queue = queue;
1961 LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link); 1963 LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link);
1962 TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next); 1964 TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next);
1963 r->rtt_queue->rtq_count++; 1965 r->rtt_queue->rtq_count++;
1964 1966
1965 RT_UNLOCK(); 1967 RT_UNLOCK();
1966 1968
1967 return 0; 1969 return 0;
1968} 1970}
1969 1971
1970static void 1972static void
1971rt_timer_work(struct work *wk, void *arg) 1973rt_timer_work(struct work *wk, void *arg)
1972{ 1974{
1973 struct rttimer_queue *rtq; 1975 struct rttimer_queue *rtq;
1974 struct rttimer *r; 1976 struct rttimer *r;
1975 1977
1976 RT_WLOCK(); 1978 RT_WLOCK();
1977 LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) { 1979 LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) {
1978 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL && 1980 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL &&
1979 (r->rtt_time + rtq->rtq_timeout) < time_uptime) { 1981 (r->rtt_time + rtq->rtq_timeout) < time_uptime) {
1980 LIST_REMOVE(r, rtt_link); 1982 LIST_REMOVE(r, rtt_link);
1981 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1983 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1982 /* 1984 /*
1983 * Take a reference to avoid the rtentry is freed 1985 * Take a reference to avoid the rtentry is freed
1984 * accidentally after RT_UNLOCK. The callback 1986 * accidentally after RT_UNLOCK. The callback
1985 * (rtt_func) must rt_unref it by itself. 1987 * (rtt_func) must rt_unref it by itself.
1986 */ 1988 */
1987 rt_ref(r->rtt_rt); 1989 rt_ref(r->rtt_rt);
1988 RT_REFCNT_TRACE(r->rtt_rt); 1990 RT_REFCNT_TRACE(r->rtt_rt);
1989 RT_UNLOCK(); 1991 RT_UNLOCK();
1990 (*r->rtt_func)(r->rtt_rt, r); 1992 (*r->rtt_func)(r->rtt_rt, r);
1991 pool_put(&rttimer_pool, r); 1993 pool_put(&rttimer_pool, r);
1992 RT_WLOCK(); 1994 RT_WLOCK();
1993 if (rtq->rtq_count > 0) 1995 if (rtq->rtq_count > 0)
1994 rtq->rtq_count--; 1996 rtq->rtq_count--;
1995 else 1997 else
1996 printf("rt_timer_timer: rtq_count reached 0\n"); 1998 printf("rt_timer_timer: rtq_count reached 0\n");
1997 } 1999 }
1998 } 2000 }
1999 RT_UNLOCK(); 2001 RT_UNLOCK();
2000 2002
2001 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 2003 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
2002} 2004}
2003 2005
2004static void 2006static void
2005rt_timer_timer(void *arg) 2007rt_timer_timer(void *arg)
2006{ 2008{
2007 2009
2008 workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL); 2010 workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL);
2009} 2011}
2010 2012
2011static struct rtentry * 2013static struct rtentry *
2012_rtcache_init(struct route *ro, int flag) 2014_rtcache_init(struct route *ro, int flag)
2013{ 2015{
2014 struct rtentry *rt; 2016 struct rtentry *rt;
2015 2017
2016 rtcache_invariants(ro); 2018 rtcache_invariants(ro);
2017 KASSERT(ro->_ro_rt == NULL); 2019 KASSERT(ro->_ro_rt == NULL);
2018 2020
2019 if (rtcache_getdst(ro) == NULL) 2021 if (rtcache_getdst(ro) == NULL)
2020 return NULL; 2022 return NULL;
2021 rt = rtalloc1(rtcache_getdst(ro), flag); 2023 rt = rtalloc1(rtcache_getdst(ro), flag);
2022 if (rt != NULL) { 2024 if (rt != NULL) {
2023 RT_RLOCK(); 2025 RT_RLOCK();
2024 if (ISSET(rt->rt_flags, RTF_UP)) { 2026 if (ISSET(rt->rt_flags, RTF_UP)) {
2025 ro->_ro_rt = rt; 2027 ro->_ro_rt = rt;
2026 ro->ro_rtcache_generation = rtcache_generation; 2028 ro->ro_rtcache_generation = rtcache_generation;
2027 rtcache_ref(rt, ro); 2029 rtcache_ref(rt, ro);
2028 } 2030 }
2029 RT_UNLOCK(); 2031 RT_UNLOCK();
2030 rt_unref(rt); 2032 rt_unref(rt);
2031 } 2033 }
2032 2034
2033 rtcache_invariants(ro); 2035 rtcache_invariants(ro);
2034 return ro->_ro_rt; 2036 return ro->_ro_rt;
2035} 2037}
2036 2038
2037struct rtentry * 2039struct rtentry *
2038rtcache_init(struct route *ro) 2040rtcache_init(struct route *ro)
2039{ 2041{
2040 2042
2041 return _rtcache_init(ro, 1); 2043 return _rtcache_init(ro, 1);
2042} 2044}
2043 2045
2044struct rtentry * 2046struct rtentry *
2045rtcache_init_noclone(struct route *ro) 2047rtcache_init_noclone(struct route *ro)
2046{ 2048{
2047 2049
2048 return _rtcache_init(ro, 0); 2050 return _rtcache_init(ro, 0);
2049} 2051}
2050 2052
2051struct rtentry * 2053struct rtentry *
2052rtcache_update(struct route *ro, int clone) 2054rtcache_update(struct route *ro, int clone)
2053{ 2055{
2054 2056
2055 ro->_ro_rt = NULL; 2057 ro->_ro_rt = NULL;
2056 return _rtcache_init(ro, clone); 2058 return _rtcache_init(ro, clone);
2057} 2059}
2058 2060
2059void 2061void
2060rtcache_copy(struct route *new_ro, struct route *old_ro) 2062rtcache_copy(struct route *new_ro, struct route *old_ro)
2061{ 2063{
2062 struct rtentry *rt; 2064 struct rtentry *rt;
2063 int ret; 2065 int ret;
2064 2066
2065 KASSERT(new_ro != old_ro); 2067 KASSERT(new_ro != old_ro);
2066 rtcache_invariants(new_ro); 2068 rtcache_invariants(new_ro);
2067 rtcache_invariants(old_ro); 2069 rtcache_invariants(old_ro);
2068 2070
2069 rt = rtcache_validate(old_ro); 2071 rt = rtcache_validate(old_ro);
2070 2072
2071 if (rtcache_getdst(old_ro) == NULL) 2073 if (rtcache_getdst(old_ro) == NULL)
2072 goto out; 2074 goto out;
2073 ret = rtcache_setdst(new_ro, rtcache_getdst(old_ro)); 2075 ret = rtcache_setdst(new_ro, rtcache_getdst(old_ro));
2074 if (ret != 0) 2076 if (ret != 0)
2075 goto out; 2077 goto out;
2076 2078
2077 RT_RLOCK(); 2079 RT_RLOCK();
2078 new_ro->_ro_rt = rt; 2080 new_ro->_ro_rt = rt;
2079 new_ro->ro_rtcache_generation = rtcache_generation; 2081 new_ro->ro_rtcache_generation = rtcache_generation;
2080 RT_UNLOCK(); 2082 RT_UNLOCK();
2081 rtcache_invariants(new_ro); 2083 rtcache_invariants(new_ro);
2082out: 2084out:
2083 rtcache_unref(rt, old_ro); 2085 rtcache_unref(rt, old_ro);
2084 return; 2086 return;
2085} 2087}
2086 2088
2087#if defined(RT_DEBUG) && defined(NET_MPSAFE) 2089#if defined(RT_DEBUG) && defined(NET_MPSAFE)
2088static void 2090static void
2089rtcache_trace(const char *func, struct rtentry *rt, struct route *ro) 2091rtcache_trace(const char *func, struct rtentry *rt, struct route *ro)
2090{ 2092{
2091 char dst[64]; 2093 char dst[64];
2092 2094
2093 sockaddr_format(ro->ro_sa, dst, 64); 2095 sockaddr_format(ro->ro_sa, dst, 64);
2094 printf("trace: %s:\tdst=%s cpu=%d lwp=%p psref=%p target=%p\n", func, dst, 2096 printf("trace: %s:\tdst=%s cpu=%d lwp=%p psref=%p target=%p\n", func, dst,
2095 cpu_index(curcpu()), curlwp, &ro->ro_psref, &rt->rt_psref); 2097 cpu_index(curcpu()), curlwp, &ro->ro_psref, &rt->rt_psref);
2096} 2098}
2097#define RTCACHE_PSREF_TRACE(rt, ro) rtcache_trace(__func__, (rt), (ro)) 2099#define RTCACHE_PSREF_TRACE(rt, ro) rtcache_trace(__func__, (rt), (ro))
2098#else 2100#else
2099#define RTCACHE_PSREF_TRACE(rt, ro) do {} while (0) 2101#define RTCACHE_PSREF_TRACE(rt, ro) do {} while (0)
2100#endif 2102#endif
2101 2103
2102static void 2104static void
2103rtcache_ref(struct rtentry *rt, struct route *ro) 2105rtcache_ref(struct rtentry *rt, struct route *ro)
2104{ 2106{
2105 2107
2106 KASSERT(rt != NULL); 2108 KASSERT(rt != NULL);
2107 2109
2108#ifdef NET_MPSAFE 2110#ifdef NET_MPSAFE
2109 RTCACHE_PSREF_TRACE(rt, ro); 2111 RTCACHE_PSREF_TRACE(rt, ro);
2110 ro->ro_bound = curlwp_bind(); 2112 ro->ro_bound = curlwp_bind();
2111 /* XXX Use a real caller's address */ 2113 /* XXX Use a real caller's address */
2112 PSREF_DEBUG_FILL_RETURN_ADDRESS(&ro->ro_psref); 2114 PSREF_DEBUG_FILL_RETURN_ADDRESS(&ro->ro_psref);
2113 psref_acquire(&ro->ro_psref, &rt->rt_psref, rt_psref_class); 2115 psref_acquire(&ro->ro_psref, &rt->rt_psref, rt_psref_class);
2114#endif 2116#endif
2115} 2117}
2116 2118
2117void 2119void
2118rtcache_unref(struct rtentry *rt, struct route *ro) 2120rtcache_unref(struct rtentry *rt, struct route *ro)
2119{ 2121{
2120 2122
2121 if (rt == NULL) 2123 if (rt == NULL)
2122 return; 2124 return;
2123 2125
2124#ifdef NET_MPSAFE 2126#ifdef NET_MPSAFE
2125 psref_release(&ro->ro_psref, &rt->rt_psref, rt_psref_class); 2127 psref_release(&ro->ro_psref, &rt->rt_psref, rt_psref_class);
2126 curlwp_bindx(ro->ro_bound); 2128 curlwp_bindx(ro->ro_bound);
2127 RTCACHE_PSREF_TRACE(rt, ro); 2129 RTCACHE_PSREF_TRACE(rt, ro);
2128#endif 2130#endif
2129} 2131}
2130 2132
2131struct rtentry * 2133struct rtentry *
2132rtcache_validate(struct route *ro) 2134rtcache_validate(struct route *ro)
2133{ 2135{
2134 struct rtentry *rt = NULL; 2136 struct rtentry *rt = NULL;
2135 2137
2136#ifdef NET_MPSAFE 2138#ifdef NET_MPSAFE
2137retry: 2139retry:
2138#endif 2140#endif
2139 rtcache_invariants(ro); 2141 rtcache_invariants(ro);
2140 RT_RLOCK(); 2142 RT_RLOCK();
2141 if (ro->ro_rtcache_generation != rtcache_generation) { 2143 if (ro->ro_rtcache_generation != rtcache_generation) {
2142 /* The cache is invalidated */ 2144 /* The cache is invalidated */
2143 rt = NULL; 2145 rt = NULL;
2144 goto out; 2146 goto out;
2145 } 2147 }
2146 2148
2147 rt = ro->_ro_rt; 2149 rt = ro->_ro_rt;
2148 if (rt == NULL) 2150 if (rt == NULL)
2149 goto out; 2151 goto out;
2150 2152
2151 if ((rt->rt_flags & RTF_UP) == 0) { 2153 if ((rt->rt_flags & RTF_UP) == 0) {
2152 rt = NULL; 2154 rt = NULL;
2153 goto out; 2155 goto out;
2154 } 2156 }
2155#ifdef NET_MPSAFE 2157#ifdef NET_MPSAFE
2156 if (ISSET(rt->rt_flags, RTF_UPDATING)) { 2158 if (ISSET(rt->rt_flags, RTF_UPDATING)) {
2157 if (rt_wait_ok()) { 2159 if (rt_wait_ok()) {
2158 RT_UNLOCK(); 2160 RT_UNLOCK();
2159 2161
2160 /* We can wait until the update is complete */ 2162 /* We can wait until the update is complete */
2161 rt_update_wait(); 2163 rt_update_wait();
2162 goto retry; 2164 goto retry;
2163 } else { 2165 } else {
2164 rt = NULL; 2166 rt = NULL;
2165 } 2167 }
2166 } else 2168 } else
2167#endif 2169#endif
2168 rtcache_ref(rt, ro); 2170 rtcache_ref(rt, ro);
2169out: 2171out:
2170 RT_UNLOCK(); 2172 RT_UNLOCK();
2171 return rt; 2173 return rt;
2172} 2174}
2173 2175
2174struct rtentry * 2176struct rtentry *
2175rtcache_lookup2(struct route *ro, const struct sockaddr *dst, 2177rtcache_lookup2(struct route *ro, const struct sockaddr *dst,
2176 int clone, int *hitp) 2178 int clone, int *hitp)
2177{ 2179{
2178 const struct sockaddr *odst; 2180 const struct sockaddr *odst;
2179 struct rtentry *rt = NULL; 2181 struct rtentry *rt = NULL;
2180 2182
2181 odst = rtcache_getdst(ro); 2183 odst = rtcache_getdst(ro);
2182 if (odst == NULL) 2184 if (odst == NULL)
2183 goto miss; 2185 goto miss;
2184 2186
2185 if (sockaddr_cmp(odst, dst) != 0) { 2187 if (sockaddr_cmp(odst, dst) != 0) {
2186 rtcache_free(ro); 2188 rtcache_free(ro);
2187 goto miss; 2189 goto miss;
2188 } 2190 }
2189 2191
2190 rt = rtcache_validate(ro); 2192 rt = rtcache_validate(ro);
2191 if (rt == NULL) { 2193 if (rt == NULL) {
2192 ro->_ro_rt = NULL; 2194 ro->_ro_rt = NULL;
2193 goto miss; 2195 goto miss;
2194 } 2196 }
2195 2197
2196 rtcache_invariants(ro); 2198 rtcache_invariants(ro);
2197 2199
2198 if (hitp != NULL) 2200 if (hitp != NULL)
2199 *hitp = 1; 2201 *hitp = 1;
2200 return rt; 2202 return rt;
2201miss: 2203miss:
2202 if (hitp != NULL) 2204 if (hitp != NULL)
2203 *hitp = 0; 2205 *hitp = 0;
2204 if (rtcache_setdst(ro, dst) == 0) 2206 if (rtcache_setdst(ro, dst) == 0)
2205 rt = _rtcache_init(ro, clone); 2207 rt = _rtcache_init(ro, clone);
2206 2208
2207 rtcache_invariants(ro); 2209 rtcache_invariants(ro);
2208 2210
2209 return rt; 2211 return rt;
2210} 2212}
2211 2213
2212void 2214void
2213rtcache_free(struct route *ro) 2215rtcache_free(struct route *ro)
2214{ 2216{
2215 2217
2216 ro->_ro_rt = NULL; 2218 ro->_ro_rt = NULL;
2217 if (ro->ro_sa != NULL) { 2219 if (ro->ro_sa != NULL) {
2218 sockaddr_free(ro->ro_sa); 2220 sockaddr_free(ro->ro_sa);
2219 ro->ro_sa = NULL; 2221 ro->ro_sa = NULL;
2220 } 2222 }
2221 rtcache_invariants(ro); 2223 rtcache_invariants(ro);
2222} 2224}
2223 2225
2224int 2226int
2225rtcache_setdst(struct route *ro, const struct sockaddr *sa) 2227rtcache_setdst(struct route *ro, const struct sockaddr *sa)
2226{ 2228{
2227 KASSERT(sa != NULL); 2229 KASSERT(sa != NULL);
2228 2230
2229 rtcache_invariants(ro); 2231 rtcache_invariants(ro);
2230 if (ro->ro_sa != NULL) { 2232 if (ro->ro_sa != NULL) {
2231 if (ro->ro_sa->sa_family == sa->sa_family) { 2233 if (ro->ro_sa->sa_family == sa->sa_family) {
2232 ro->_ro_rt = NULL; 2234 ro->_ro_rt = NULL;
2233 sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa); 2235 sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa);
2234 rtcache_invariants(ro); 2236 rtcache_invariants(ro);
2235 return 0; 2237 return 0;
2236 } 2238 }
2237 /* free ro_sa, wrong family */ 2239 /* free ro_sa, wrong family */
2238 rtcache_free(ro); 2240 rtcache_free(ro);
2239 } 2241 }
2240 2242
2241 KASSERT(ro->_ro_rt == NULL); 2243 KASSERT(ro->_ro_rt == NULL);
2242 2244
2243 if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) { 2245 if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) {
2244 rtcache_invariants(ro); 2246 rtcache_invariants(ro);
2245 return ENOMEM; 2247 return ENOMEM;
2246 } 2248 }
2247 rtcache_invariants(ro); 2249 rtcache_invariants(ro);
2248 return 0; 2250 return 0;
2249} 2251}
2250 2252
2251static void 2253static void
2252rtcache_percpu_init_cpu(void *p, void *arg __unused, struct cpu_info *ci __unused) 2254rtcache_percpu_init_cpu(void *p, void *arg __unused, struct cpu_info *ci __unused)
2253{ 2255{
2254 struct route **rop = p; 2256 struct route **rop = p;
2255 2257
2256 /* 2258 /*
2257 * We can't have struct route as percpu data because it can be destroyed 2259 * We can't have struct route as percpu data because it can be destroyed
2258 * over a memory enlargement processing of percpu. 2260 * over a memory enlargement processing of percpu.
2259 */ 2261 */
2260 *rop = kmem_zalloc(sizeof(**rop), KM_SLEEP); 2262 *rop = kmem_zalloc(sizeof(**rop), KM_SLEEP);
2261} 2263}
2262 2264
2263percpu_t * 2265percpu_t *
2264rtcache_percpu_alloc(void) 2266rtcache_percpu_alloc(void)
2265{ 2267{
2266 2268
2267 return percpu_create(sizeof(struct route *), 2269 return percpu_create(sizeof(struct route *),
2268 rtcache_percpu_init_cpu, NULL, NULL); 2270 rtcache_percpu_init_cpu, NULL, NULL);
2269} 2271}
2270 2272
2271const struct sockaddr * 2273const struct sockaddr *
2272rt_settag(struct rtentry *rt, const struct sockaddr *tag) 2274rt_settag(struct rtentry *rt, const struct sockaddr *tag)
2273{ 2275{
2274 if (rt->rt_tag != tag) { 2276 if (rt->rt_tag != tag) {
2275 if (rt->rt_tag != NULL) 2277 if (rt->rt_tag != NULL)
2276 sockaddr_free(rt->rt_tag); 2278 sockaddr_free(rt->rt_tag);
2277 rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT); 2279 rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT);
2278 } 2280 }
2279 return rt->rt_tag; 2281 return rt->rt_tag;
2280} 2282}
2281 2283
2282struct sockaddr * 2284struct sockaddr *
2283rt_gettag(const struct rtentry *rt) 2285rt_gettag(const struct rtentry *rt)
2284{ 2286{
2285 return rt->rt_tag; 2287 return rt->rt_tag;
2286} 2288}
2287 2289
2288int 2290int
2289rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp) 2291rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp)
2290{ 2292{
2291 2293
2292 if ((rt->rt_flags & RTF_REJECT) != 0) { 2294 if ((rt->rt_flags & RTF_REJECT) != 0) {
2293 /* Mimic looutput */ 2295 /* Mimic looutput */
2294 if (ifp->if_flags & IFF_LOOPBACK) 2296 if (ifp->if_flags & IFF_LOOPBACK)
2295 return (rt->rt_flags & RTF_HOST) ? 2297 return (rt->rt_flags & RTF_HOST) ?
2296 EHOSTUNREACH : ENETUNREACH; 2298 EHOSTUNREACH : ENETUNREACH;
2297 else if (rt->rt_rmx.rmx_expire == 0 || 2299 else if (rt->rt_rmx.rmx_expire == 0 ||
2298 time_uptime < rt->rt_rmx.rmx_expire) 2300 time_uptime < rt->rt_rmx.rmx_expire)
2299 return (rt->rt_flags & RTF_GATEWAY) ? 2301 return (rt->rt_flags & RTF_GATEWAY) ?
2300 EHOSTUNREACH : EHOSTDOWN; 2302 EHOSTUNREACH : EHOSTDOWN;
2301 } 2303 }
2302 2304
2303 return 0; 2305 return 0;
2304} 2306}
2305 2307
2306void 2308void
2307rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *), 2309rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *),
2308 void *v, bool notify) 2310 void *v, bool notify)
2309{ 2311{
2310 2312
2311 for (;;) { 2313 for (;;) {
2312 int s; 2314 int s;
2313 int error; 2315 int error;
2314 struct rtentry *rt, *retrt = NULL; 2316 struct rtentry *rt, *retrt = NULL;
2315 2317
2316 RT_RLOCK(); 2318 RT_RLOCK();
2317 s = splsoftnet(); 2319 s = splsoftnet();
2318 rt = rtbl_search_matched_entry(family, f, v); 2320 rt = rtbl_search_matched_entry(family, f, v);
2319 if (rt == NULL) { 2321 if (rt == NULL) {
2320 splx(s); 2322 splx(s);
2321 RT_UNLOCK(); 2323 RT_UNLOCK();
2322 return; 2324 return;
2323 } 2325 }
2324 rt_ref(rt); 2326 rt_ref(rt);
2325 RT_REFCNT_TRACE(rt); 2327 RT_REFCNT_TRACE(rt);
2326 splx(s); 2328 splx(s);
2327 RT_UNLOCK(); 2329 RT_UNLOCK();
2328 2330
2329 error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway, 2331 error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway,
2330 rt_mask(rt), rt->rt_flags, &retrt); 2332 rt_mask(rt), rt->rt_flags, &retrt);
2331 if (error == 0) { 2333 if (error == 0) {
2332 KASSERT(retrt == rt); 2334 KASSERT(retrt == rt);
2333 KASSERT((retrt->rt_flags & RTF_UP) == 0); 2335 KASSERT((retrt->rt_flags & RTF_UP) == 0);
2334 if (notify) 2336 if (notify)
2335 rt_newmsg(RTM_DELETE, retrt); 2337 rt_newmsg(RTM_DELETE, retrt);
2336 retrt->rt_ifp = NULL; 2338 retrt->rt_ifp = NULL;
2337 rt_unref(rt); 2339 rt_unref(rt);
2338 RT_REFCNT_TRACE(rt); 2340 RT_REFCNT_TRACE(rt);
2339 rt_free(retrt); 2341 rt_free(retrt);
2340 } else if (error == ESRCH) { 2342 } else if (error == ESRCH) {
2341 /* Someone deleted the entry already. */ 2343 /* Someone deleted the entry already. */
2342 rt_unref(rt); 2344 rt_unref(rt);
2343 RT_REFCNT_TRACE(rt); 2345 RT_REFCNT_TRACE(rt);
2344 } else { 2346 } else {
2345 log(LOG_ERR, "%s: unable to delete rtentry @ %p, " 2347 log(LOG_ERR, "%s: unable to delete rtentry @ %p, "
2346 "error = %d\n", rt->rt_ifp->if_xname, rt, error); 2348 "error = %d\n", rt->rt_ifp->if_xname, rt, error);
2347 /* XXX how to treat this case? */ 2349 /* XXX how to treat this case? */
2348 } 2350 }
2349 } 2351 }
2350} 2352}
2351 2353
2352static int 2354static int
2353rt_walktree_locked(sa_family_t family, int (*f)(struct rtentry *, void *), 2355rt_walktree_locked(sa_family_t family, int (*f)(struct rtentry *, void *),
2354 void *v) 2356 void *v)
2355{ 2357{
2356 2358
2357 return rtbl_walktree(family, f, v); 2359 return rtbl_walktree(family, f, v);
2358} 2360}
2359 2361
2360void 2362void
2361rt_replace_ifa_matched_entries(sa_family_t family, 2363rt_replace_ifa_matched_entries(sa_family_t family,
2362 int (*f)(struct rtentry *, void *), void *v, struct ifaddr *ifa) 2364 int (*f)(struct rtentry *, void *), void *v, struct ifaddr *ifa)
2363{ 2365{
2364 2366
2365 for (;;) { 2367 for (;;) {
2366 int s; 2368 int s;
2367#ifdef NET_MPSAFE 2369#ifdef NET_MPSAFE
2368 int error; 2370 int error;
2369#endif 2371#endif
2370 struct rtentry *rt; 2372 struct rtentry *rt;
2371 2373
2372 RT_RLOCK(); 2374 RT_RLOCK();
2373 s = splsoftnet(); 2375 s = splsoftnet();
2374 rt = rtbl_search_matched_entry(family, f, v); 2376 rt = rtbl_search_matched_entry(family, f, v);
2375 if (rt == NULL) { 2377 if (rt == NULL) {
2376 splx(s); 2378 splx(s);
2377 RT_UNLOCK(); 2379 RT_UNLOCK();
2378 return; 2380 return;
2379 } 2381 }
2380 rt_ref(rt); 2382 rt_ref(rt);
2381 RT_REFCNT_TRACE(rt); 2383 RT_REFCNT_TRACE(rt);
2382 splx(s); 2384 splx(s);
2383 RT_UNLOCK(); 2385 RT_UNLOCK();
2384 2386
2385#ifdef NET_MPSAFE 2387#ifdef NET_MPSAFE
2386 error = rt_update_prepare(rt); 2388 error = rt_update_prepare(rt);
2387 if (error == 0) { 2389 if (error == 0) {
2388 rt_replace_ifa(rt, ifa); 2390 rt_replace_ifa(rt, ifa);
2389 rt_update_finish(rt); 2391 rt_update_finish(rt);
2390 rt_newmsg(RTM_CHANGE, rt); 2392 rt_newmsg(RTM_CHANGE, rt);
2391 } else { 2393 } else {
2392 /* 2394 /*
2393 * If error != 0, the rtentry is being 2395 * If error != 0, the rtentry is being
2394 * destroyed, so doing nothing doesn't 2396 * destroyed, so doing nothing doesn't
2395 * matter. 2397 * matter.
2396 */ 2398 */
2397 } 2399 }
2398#else 2400#else
2399 rt_replace_ifa(rt, ifa); 2401 rt_replace_ifa(rt, ifa);
2400 rt_newmsg(RTM_CHANGE, rt); 2402 rt_newmsg(RTM_CHANGE, rt);
2401#endif 2403#endif
2402 rt_unref(rt); 2404 rt_unref(rt);
2403 RT_REFCNT_TRACE(rt); 2405 RT_REFCNT_TRACE(rt);
2404 } 2406 }
2405} 2407}
2406 2408
2407int 2409int
2408rt_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v) 2410rt_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v)
2409{ 2411{
2410 int error; 2412 int error;
2411 2413
2412 RT_RLOCK(); 2414 RT_RLOCK();
2413 error = rt_walktree_locked(family, f, v); 2415 error = rt_walktree_locked(family, f, v);
2414 RT_UNLOCK(); 2416 RT_UNLOCK();
2415 2417
2416 return error; 2418 return error;
2417} 2419}
2418 2420
2419#ifdef DDB 2421#ifdef DDB
2420 2422
2421#include <machine/db_machdep.h> 2423#include <machine/db_machdep.h>
2422#include <ddb/db_interface.h> 2424#include <ddb/db_interface.h>
2423#include <ddb/db_output.h> 2425#include <ddb/db_output.h>
2424 2426
2425#define rt_expire rt_rmx.rmx_expire 2427#define rt_expire rt_rmx.rmx_expire
2426 2428
2427static void 2429static void
2428db_print_sa(const struct sockaddr *sa) 2430db_print_sa(const struct sockaddr *sa)
2429{ 2431{
2430 int len; 2432 int len;
2431 const u_char *p; 2433 const u_char *p;
2432 2434
2433 if (sa == NULL) { 2435 if (sa == NULL) {
2434 db_printf("[NULL]"); 2436 db_printf("[NULL]");
2435 return; 2437 return;
2436 } 2438 }
2437 2439
2438 p = (const u_char *)sa; 2440 p = (const u_char *)sa;
2439 len = sa->sa_len; 2441 len = sa->sa_len;
2440 db_printf("["); 2442 db_printf("[");
2441 while (len > 0) { 2443 while (len > 0) {
2442 db_printf("%d", *p); 2444 db_printf("%d", *p);
2443 p++; len--; 2445 p++; len--;
2444 if (len) db_printf(","); 2446 if (len) db_printf(",");
2445 } 2447 }
2446 db_printf("]\n"); 2448 db_printf("]\n");
2447} 2449}
2448 2450
2449static void 2451static void
2450db_print_ifa(struct ifaddr *ifa) 2452db_print_ifa(struct ifaddr *ifa)
2451{ 2453{
2452 if (ifa == NULL) 2454 if (ifa == NULL)
2453 return; 2455 return;
2454 db_printf(" ifa_addr="); 2456 db_printf(" ifa_addr=");
2455 db_print_sa(ifa->ifa_addr); 2457 db_print_sa(ifa->ifa_addr);
2456 db_printf(" ifa_dsta="); 2458 db_printf(" ifa_dsta=");
2457 db_print_sa(ifa->ifa_dstaddr); 2459 db_print_sa(ifa->ifa_dstaddr);
2458 db_printf(" ifa_mask="); 2460 db_printf(" ifa_mask=");
2459 db_print_sa(ifa->ifa_netmask); 2461 db_print_sa(ifa->ifa_netmask);
2460 db_printf(" flags=0x%x,refcnt=%d,metric=%d\n", 2462 db_printf(" flags=0x%x,refcnt=%d,metric=%d\n",
2461 ifa->ifa_flags, 2463 ifa->ifa_flags,
2462 ifa->ifa_refcnt, 2464 ifa->ifa_refcnt,
2463 ifa->ifa_metric); 2465 ifa->ifa_metric);
2464} 2466}
2465 2467
2466/* 2468/*
2467 * Function to pass to rt_walktree(). 2469 * Function to pass to rt_walktree().
2468 * Return non-zero error to abort walk. 2470 * Return non-zero error to abort walk.
2469 */ 2471 */
2470static int 2472static int
2471db_show_rtentry(struct rtentry *rt, void *w) 2473db_show_rtentry(struct rtentry *rt, void *w)
2472{ 2474{
2473 db_printf("rtentry=%p", rt); 2475 db_printf("rtentry=%p", rt);
2474 2476
2475 db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n", 2477 db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n",
2476 rt->rt_flags, rt->rt_refcnt, 2478 rt->rt_flags, rt->rt_refcnt,
2477 rt->rt_use, (uint64_t)rt->rt_expire); 2479 rt->rt_use, (uint64_t)rt->rt_expire);
2478 2480
2479 db_printf(" key="); db_print_sa(rt_getkey(rt)); 2481 db_printf(" key="); db_print_sa(rt_getkey(rt));
2480 db_printf(" mask="); db_print_sa(rt_mask(rt)); 2482 db_printf(" mask="); db_print_sa(rt_mask(rt));
2481 db_printf(" gw="); db_print_sa(rt->rt_gateway); 2483 db_printf(" gw="); db_print_sa(rt->rt_gateway);
2482 2484
2483 db_printf(" ifp=%p ", rt->rt_ifp); 2485 db_printf(" ifp=%p ", rt->rt_ifp);
2484 if (rt->rt_ifp) 2486 if (rt->rt_ifp)
2485 db_printf("(%s)", rt->rt_ifp->if_xname); 2487 db_printf("(%s)", rt->rt_ifp->if_xname);
2486 else 2488 else
2487 db_printf("(NULL)"); 2489 db_printf("(NULL)");
2488 2490
2489 db_printf(" ifa=%p\n", rt->rt_ifa); 2491 db_printf(" ifa=%p\n", rt->rt_ifa);
2490 db_print_ifa(rt->rt_ifa); 2492 db_print_ifa(rt->rt_ifa);
2491 2493
2492 db_printf(" gwroute=%p llinfo=%p\n", 2494 db_printf(" gwroute=%p llinfo=%p\n",
2493 rt->rt_gwroute, rt->rt_llinfo); 2495 rt->rt_gwroute, rt->rt_llinfo);
2494 2496
2495 return 0; 2497 return 0;
2496} 2498}
2497 2499
2498/* 2500/*
2499 * Function to print all the route trees. 2501 * Function to print all the route trees.
2500 * Use this from ddb: "show routes" 2502 * Use this from ddb: "show routes"
2501 */ 2503 */
2502void 2504void
2503db_show_routes(db_expr_t addr, bool have_addr, 2505db_show_routes(db_expr_t addr, bool have_addr,
2504 db_expr_t count, const char *modif) 2506 db_expr_t count, const char *modif)
2505{ 2507{
2506 2508
2507 /* Taking RT_LOCK will fail if LOCKDEBUG is enabled. */ 2509 /* Taking RT_LOCK will fail if LOCKDEBUG is enabled. */
2508 rt_walktree_locked(AF_INET, db_show_rtentry, NULL); 2510 rt_walktree_locked(AF_INET, db_show_rtentry, NULL);
2509} 2511}
2510#endif 2512#endif