Thu Aug 27 18:49:36 2020 UTC ()
npf: Don't stop early after sleeping and before processing instances.

We already check winfo->exit below, after processing instances and
before sleeping again.

Candidate fix for:

panic: kernel diagnostic assertion "LIST_EMPTY(&winfo->instances)" failed: file "/home/riastradh/netbsd/current/src/sys/rump/net/lib/libnpf/../../../..//net/npf/npf_worker.c", line 300 NPF instances must be discharged before the npfk_sysfini() call


(riastradh)
diff -r1.9 -r1.10 src/sys/net/npf/npf_worker.c

cvs diff -r1.9 -r1.10 src/sys/net/npf/npf_worker.c (switch to unified diff)

--- src/sys/net/npf/npf_worker.c 2020/05/30 20:54:54 1.9
+++ src/sys/net/npf/npf_worker.c 2020/08/27 18:49:36 1.10
@@ -1,303 +1,303 @@ @@ -1,303 +1,303 @@
1/*- 1/*-
2 * Copyright (c) 2010-2020 The NetBSD Foundation, Inc. 2 * Copyright (c) 2010-2020 The NetBSD Foundation, Inc.
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
5 * This material is based upon work partially supported by The 5 * This material is based upon work partially supported by The
6 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 6 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions 9 * modification, are permitted provided that the following conditions
10 * are met: 10 * are met:
11 * 1. Redistributions of source code must retain the above copyright 11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer. 12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright 13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the 14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution. 15 * documentation and/or other materials provided with the distribution.
16 * 16 *
17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE. 27 * POSSIBILITY OF SUCH DAMAGE.
28 */ 28 */
29 29
30#ifdef _KERNEL 30#ifdef _KERNEL
31#include <sys/cdefs.h> 31#include <sys/cdefs.h>
32__KERNEL_RCSID(0, "$NetBSD: npf_worker.c,v 1.9 2020/05/30 20:54:54 rmind Exp $"); 32__KERNEL_RCSID(0, "$NetBSD: npf_worker.c,v 1.10 2020/08/27 18:49:36 riastradh Exp $");
33 33
34#include <sys/param.h> 34#include <sys/param.h>
35#include <sys/types.h> 35#include <sys/types.h>
36 36
37#include <sys/mutex.h> 37#include <sys/mutex.h>
38#include <sys/kmem.h> 38#include <sys/kmem.h>
39#include <sys/kernel.h> 39#include <sys/kernel.h>
40#include <sys/kthread.h> 40#include <sys/kthread.h>
41#include <sys/cprng.h> 41#include <sys/cprng.h>
42#endif 42#endif
43 43
44#include "npf_impl.h" 44#include "npf_impl.h"
45 45
46typedef struct npf_worker { 46typedef struct npf_worker {
47 kmutex_t lock; 47 kmutex_t lock;
48 kcondvar_t cv; 48 kcondvar_t cv;
49 kcondvar_t exit_cv; 49 kcondvar_t exit_cv;
50 bool exit; 50 bool exit;
51 LIST_HEAD(, npf) instances; 51 LIST_HEAD(, npf) instances;
52 unsigned worker_count; 52 unsigned worker_count;
53 lwp_t * worker[]; 53 lwp_t * worker[];
54} npf_workerinfo_t; 54} npf_workerinfo_t;
55 55
56#define NPF_GC_MINWAIT (10) // 10 ms 56#define NPF_GC_MINWAIT (10) // 10 ms
57#define NPF_GC_MAXWAIT (10 * 1000) // 10 sec 57#define NPF_GC_MAXWAIT (10 * 1000) // 10 sec
58 58
59/* 59/*
60 * Flags for the npf_t::worker_flags field. 60 * Flags for the npf_t::worker_flags field.
61 */ 61 */
62#define WFLAG_ACTIVE 0x01 // instance enqueued for workers 62#define WFLAG_ACTIVE 0x01 // instance enqueued for workers
63#define WFLAG_INITED 0x02 // worker setup the instance 63#define WFLAG_INITED 0x02 // worker setup the instance
64#define WFLAG_REMOVE 0x04 // remove the instance 64#define WFLAG_REMOVE 0x04 // remove the instance
65 65
66static void npf_worker(void *) __dead; 66static void npf_worker(void *) __dead;
67static npf_workerinfo_t * worker_info __read_mostly; 67static npf_workerinfo_t * worker_info __read_mostly;
68 68
69int 69int
70npf_worker_sysinit(unsigned nworkers) 70npf_worker_sysinit(unsigned nworkers)
71{ 71{
72 const size_t len = offsetof(npf_workerinfo_t, worker[nworkers]); 72 const size_t len = offsetof(npf_workerinfo_t, worker[nworkers]);
73 npf_workerinfo_t *winfo; 73 npf_workerinfo_t *winfo;
74 74
75 KASSERT(worker_info == NULL); 75 KASSERT(worker_info == NULL);
76 76
77 if (!nworkers) { 77 if (!nworkers) {
78 return 0; 78 return 0;
79 } 79 }
80 80
81 winfo = kmem_zalloc(len, KM_SLEEP); 81 winfo = kmem_zalloc(len, KM_SLEEP);
82 winfo->worker_count = nworkers; 82 winfo->worker_count = nworkers;
83 mutex_init(&winfo->lock, MUTEX_DEFAULT, IPL_SOFTNET); 83 mutex_init(&winfo->lock, MUTEX_DEFAULT, IPL_SOFTNET);
84 cv_init(&winfo->exit_cv, "npfgcx"); 84 cv_init(&winfo->exit_cv, "npfgcx");
85 cv_init(&winfo->cv, "npfgcw"); 85 cv_init(&winfo->cv, "npfgcw");
86 LIST_INIT(&winfo->instances); 86 LIST_INIT(&winfo->instances);
87 worker_info = winfo; 87 worker_info = winfo;
88 88
89 for (unsigned i = 0; i < nworkers; i++) { 89 for (unsigned i = 0; i < nworkers; i++) {
90 if (kthread_create(PRI_NONE, KTHREAD_MPSAFE | KTHREAD_MUSTJOIN, 90 if (kthread_create(PRI_NONE, KTHREAD_MPSAFE | KTHREAD_MUSTJOIN,
91 NULL, npf_worker, winfo, &winfo->worker[i], "npfgc%u", i)) { 91 NULL, npf_worker, winfo, &winfo->worker[i], "npfgc%u", i)) {
92 npf_worker_sysfini(); 92 npf_worker_sysfini();
93 return ENOMEM; 93 return ENOMEM;
94 } 94 }
95 } 95 }
96 return 0; 96 return 0;
97} 97}
98 98
99void 99void
100npf_worker_sysfini(void) 100npf_worker_sysfini(void)
101{ 101{
102 npf_workerinfo_t *winfo = worker_info; 102 npf_workerinfo_t *winfo = worker_info;
103 unsigned nworkers; 103 unsigned nworkers;
104 104
105 if (!winfo) { 105 if (!winfo) {
106 return; 106 return;
107 } 107 }
108 108
109 /* Notify the workers to exit. */ 109 /* Notify the workers to exit. */
110 mutex_enter(&winfo->lock); 110 mutex_enter(&winfo->lock);
111 winfo->exit = true; 111 winfo->exit = true;
112 cv_broadcast(&winfo->cv); 112 cv_broadcast(&winfo->cv);
113 mutex_exit(&winfo->lock); 113 mutex_exit(&winfo->lock);
114 114
115 /* Wait for them to finish and then destroy. */ 115 /* Wait for them to finish and then destroy. */
116 nworkers = winfo->worker_count; 116 nworkers = winfo->worker_count;
117 for (unsigned i = 0; i < nworkers; i++) { 117 for (unsigned i = 0; i < nworkers; i++) {
118 lwp_t *worker; 118 lwp_t *worker;
119 119
120 if ((worker = winfo->worker[i]) != NULL) { 120 if ((worker = winfo->worker[i]) != NULL) {
121 kthread_join(worker); 121 kthread_join(worker);
122 } 122 }
123 } 123 }
124 cv_destroy(&winfo->cv); 124 cv_destroy(&winfo->cv);
125 cv_destroy(&winfo->exit_cv); 125 cv_destroy(&winfo->exit_cv);
126 mutex_destroy(&winfo->lock); 126 mutex_destroy(&winfo->lock);
127 kmem_free(winfo, offsetof(npf_workerinfo_t, worker[nworkers])); 127 kmem_free(winfo, offsetof(npf_workerinfo_t, worker[nworkers]));
128 worker_info = NULL; 128 worker_info = NULL;
129} 129}
130 130
131int 131int
132npf_worker_addfunc(npf_t *npf, npf_workfunc_t work) 132npf_worker_addfunc(npf_t *npf, npf_workfunc_t work)
133{ 133{
134 KASSERTMSG(npf->worker_flags == 0, 134 KASSERTMSG(npf->worker_flags == 0,
135 "the task must be added before the npf_worker_enlist() call"); 135 "the task must be added before the npf_worker_enlist() call");
136 136
137 for (unsigned i = 0; i < NPF_MAX_WORKS; i++) { 137 for (unsigned i = 0; i < NPF_MAX_WORKS; i++) {
138 if (npf->worker_funcs[i] == NULL) { 138 if (npf->worker_funcs[i] == NULL) {
139 npf->worker_funcs[i] = work; 139 npf->worker_funcs[i] = work;
140 return 0; 140 return 0;
141 } 141 }
142 } 142 }
143 return -1; 143 return -1;
144} 144}
145 145
146void 146void
147npf_worker_signal(npf_t *npf) 147npf_worker_signal(npf_t *npf)
148{ 148{
149 npf_workerinfo_t *winfo = worker_info; 149 npf_workerinfo_t *winfo = worker_info;
150 150
151 if ((npf->worker_flags & WFLAG_ACTIVE) == 0) { 151 if ((npf->worker_flags & WFLAG_ACTIVE) == 0) {
152 return; 152 return;
153 } 153 }
154 KASSERT(winfo != NULL); 154 KASSERT(winfo != NULL);
155 155
156 mutex_enter(&winfo->lock); 156 mutex_enter(&winfo->lock);
157 cv_signal(&winfo->cv); 157 cv_signal(&winfo->cv);
158 mutex_exit(&winfo->lock); 158 mutex_exit(&winfo->lock);
159} 159}
160 160
161/* 161/*
162 * npf_worker_enlist: add the NPF instance for worker(s) to process. 162 * npf_worker_enlist: add the NPF instance for worker(s) to process.
163 */ 163 */
164void 164void
165npf_worker_enlist(npf_t *npf) 165npf_worker_enlist(npf_t *npf)
166{ 166{
167 npf_workerinfo_t *winfo = worker_info; 167 npf_workerinfo_t *winfo = worker_info;
168 168
169 KASSERT(npf->worker_flags == 0); 169 KASSERT(npf->worker_flags == 0);
170 if (!winfo) { 170 if (!winfo) {
171 return; 171 return;
172 } 172 }
173 173
174 mutex_enter(&winfo->lock); 174 mutex_enter(&winfo->lock);
175 LIST_INSERT_HEAD(&winfo->instances, npf, worker_entry); 175 LIST_INSERT_HEAD(&winfo->instances, npf, worker_entry);
176 npf->worker_flags |= WFLAG_ACTIVE; 176 npf->worker_flags |= WFLAG_ACTIVE;
177 mutex_exit(&winfo->lock); 177 mutex_exit(&winfo->lock);
178} 178}
179 179
180/* 180/*
181 * npf_worker_discharge: remove the NPF instance the list for workers. 181 * npf_worker_discharge: remove the NPF instance the list for workers.
182 * 182 *
183 * => May block waiting for a worker to finish processing the instance. 183 * => May block waiting for a worker to finish processing the instance.
184 */ 184 */
185void 185void
186npf_worker_discharge(npf_t *npf) 186npf_worker_discharge(npf_t *npf)
187{ 187{
188 npf_workerinfo_t *winfo = worker_info; 188 npf_workerinfo_t *winfo = worker_info;
189 189
190 if ((npf->worker_flags & WFLAG_ACTIVE) == 0) { 190 if ((npf->worker_flags & WFLAG_ACTIVE) == 0) {
191 return; 191 return;
192 } 192 }
193 KASSERT(winfo != NULL); 193 KASSERT(winfo != NULL);
194 194
195 /* 195 /*
196 * Notify the worker(s) that we are removing this instance. 196 * Notify the worker(s) that we are removing this instance.
197 */ 197 */
198 mutex_enter(&winfo->lock); 198 mutex_enter(&winfo->lock);
199 KASSERT(npf->worker_flags & WFLAG_ACTIVE); 199 KASSERT(npf->worker_flags & WFLAG_ACTIVE);
200 npf->worker_flags |= WFLAG_REMOVE; 200 npf->worker_flags |= WFLAG_REMOVE;
201 cv_broadcast(&winfo->cv); 201 cv_broadcast(&winfo->cv);
202 202
203 /* Wait for a worker to process this request. */ 203 /* Wait for a worker to process this request. */
204 while (npf->worker_flags & WFLAG_ACTIVE) { 204 while (npf->worker_flags & WFLAG_ACTIVE) {
205 cv_wait(&winfo->exit_cv, &winfo->lock); 205 cv_wait(&winfo->exit_cv, &winfo->lock);
206 } 206 }
207 mutex_exit(&winfo->lock); 207 mutex_exit(&winfo->lock);
208 KASSERT(npf->worker_flags == 0); 208 KASSERT(npf->worker_flags == 0);
209} 209}
210 210
211static void 211static void
212remove_npf_instance(npf_workerinfo_t *winfo, npf_t *npf) 212remove_npf_instance(npf_workerinfo_t *winfo, npf_t *npf)
213{ 213{
214 KASSERT(mutex_owned(&winfo->lock)); 214 KASSERT(mutex_owned(&winfo->lock));
215 KASSERT(npf->worker_flags & WFLAG_ACTIVE); 215 KASSERT(npf->worker_flags & WFLAG_ACTIVE);
216 KASSERT(npf->worker_flags & WFLAG_REMOVE); 216 KASSERT(npf->worker_flags & WFLAG_REMOVE);
217 217
218 /* 218 /*
219 * Remove the NPF instance: 219 * Remove the NPF instance:
220 * - Release any structures owned by the worker. 220 * - Release any structures owned by the worker.
221 * - Remove the instance from the list. 221 * - Remove the instance from the list.
222 * - Notify any thread waiting for removal to complete. 222 * - Notify any thread waiting for removal to complete.
223 */ 223 */
224 if (npf->worker_flags & WFLAG_INITED) { 224 if (npf->worker_flags & WFLAG_INITED) {
225 npfk_thread_unregister(npf); 225 npfk_thread_unregister(npf);
226 } 226 }
227 LIST_REMOVE(npf, worker_entry); 227 LIST_REMOVE(npf, worker_entry);
228 npf->worker_flags = 0; 228 npf->worker_flags = 0;
229 cv_broadcast(&winfo->exit_cv); 229 cv_broadcast(&winfo->exit_cv);
230} 230}
231 231
232static unsigned 232static unsigned
233process_npf_instance(npf_workerinfo_t *winfo, npf_t *npf) 233process_npf_instance(npf_workerinfo_t *winfo, npf_t *npf)
234{ 234{
235 npf_workfunc_t work; 235 npf_workfunc_t work;
236 236
237 KASSERT(mutex_owned(&winfo->lock)); 237 KASSERT(mutex_owned(&winfo->lock));
238 238
239 if (npf->worker_flags & WFLAG_REMOVE) { 239 if (npf->worker_flags & WFLAG_REMOVE) {
240 remove_npf_instance(winfo, npf); 240 remove_npf_instance(winfo, npf);
241 return NPF_GC_MAXWAIT; 241 return NPF_GC_MAXWAIT;
242 } 242 }
243 243
244 if ((npf->worker_flags & WFLAG_INITED) == 0) { 244 if ((npf->worker_flags & WFLAG_INITED) == 0) {
245 npfk_thread_register(npf); 245 npfk_thread_register(npf);
246 npf->worker_flags |= WFLAG_INITED; 246 npf->worker_flags |= WFLAG_INITED;
247 } 247 }
248 248
249 /* Run the jobs. */ 249 /* Run the jobs. */
250 for (unsigned i = 0; i < NPF_MAX_WORKS; i++) { 250 for (unsigned i = 0; i < NPF_MAX_WORKS; i++) {
251 if ((work = npf->worker_funcs[i]) == NULL) { 251 if ((work = npf->worker_funcs[i]) == NULL) {
252 break; 252 break;
253 } 253 }
254 work(npf); 254 work(npf);
255 } 255 }
256 256
257 return MAX(MIN(npf->worker_wait_time, NPF_GC_MAXWAIT), NPF_GC_MINWAIT); 257 return MAX(MIN(npf->worker_wait_time, NPF_GC_MAXWAIT), NPF_GC_MINWAIT);
258} 258}
259 259
260/* 260/*
261 * npf_worker: the main worker loop, processing enlisted NPF instances. 261 * npf_worker: the main worker loop, processing enlisted NPF instances.
262 * 262 *
263 * XXX: Currently, npf_workerinfo_t::lock would serialize all workers, 263 * XXX: Currently, npf_workerinfo_t::lock would serialize all workers,
264 * so there is no point to have more than one worker; but there might 264 * so there is no point to have more than one worker; but there might
265 * not be much point anyway. 265 * not be much point anyway.
266 */ 266 */
267static void 267static void
268npf_worker(void *arg) 268npf_worker(void *arg)
269{ 269{
270 npf_workerinfo_t *winfo = arg; 270 npf_workerinfo_t *winfo = arg;
271 npf_t *npf; 271 npf_t *npf;
272 272
273 mutex_enter(&winfo->lock); 273 mutex_enter(&winfo->lock);
274 while (!winfo->exit) { 274 for (;;) {
275 unsigned wait_time = NPF_GC_MAXWAIT; 275 unsigned wait_time = NPF_GC_MAXWAIT;
276 276
277 /* 277 /*
278 * Iterate all instances. We do not use LIST_FOREACH here, 278 * Iterate all instances. We do not use LIST_FOREACH here,
279 * since the instance can be removed. 279 * since the instance can be removed.
280 */ 280 */
281 npf = LIST_FIRST(&winfo->instances); 281 npf = LIST_FIRST(&winfo->instances);
282 while (npf) { 282 while (npf) {
283 npf_t *next = LIST_NEXT(npf, worker_entry); 283 npf_t *next = LIST_NEXT(npf, worker_entry);
284 unsigned i_wait_time = process_npf_instance(winfo, npf); 284 unsigned i_wait_time = process_npf_instance(winfo, npf);
285 wait_time = MIN(wait_time, i_wait_time); 285 wait_time = MIN(wait_time, i_wait_time);
286 npf = next; 286 npf = next;
287 } 287 }
288 288
289 /* 289 /*
290 * Sleep and periodically wake up, unless we get notified. 290 * Sleep and periodically wake up, unless we get notified.
291 */ 291 */
292 if (winfo->exit) { 292 if (winfo->exit) {
293 break; 293 break;
294 } 294 }
295 cv_timedwait(&winfo->cv, &winfo->lock, mstohz(wait_time)); 295 cv_timedwait(&winfo->cv, &winfo->lock, mstohz(wait_time));
296 } 296 }
297 mutex_exit(&winfo->lock); 297 mutex_exit(&winfo->lock);
298 298
299 KASSERTMSG(LIST_EMPTY(&winfo->instances), 299 KASSERTMSG(LIST_EMPTY(&winfo->instances),
300 "NPF instances must be discharged before the npfk_sysfini() call"); 300 "NPF instances must be discharged before the npfk_sysfini() call");
301 301
302 kthread_exit(0); 302 kthread_exit(0);
303} 303}