Mon Aug 31 19:51:30 2020 UTC ()
PR/55629: Andreas Gustafsson: Don't crash when an emulation does not provide
e_dtrace_syscall (like compat_netbsd32)


(christos)
diff -r1.20 -r1.21 src/sys/kern/kern_syscall.c

cvs diff -r1.20 -r1.21 src/sys/kern/kern_syscall.c (switch to unified diff)

--- src/sys/kern/kern_syscall.c 2020/05/23 23:42:43 1.20
+++ src/sys/kern/kern_syscall.c 2020/08/31 19:51:30 1.21
@@ -1,301 +1,308 @@ @@ -1,301 +1,308 @@
1/* $NetBSD: kern_syscall.c,v 1.20 2020/05/23 23:42:43 ad Exp $ */ 1/* $NetBSD: kern_syscall.c,v 1.21 2020/08/31 19:51:30 christos Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software developed for The NetBSD Foundation 7 * This code is derived from software developed for The NetBSD Foundation
8 * by Andrew Doran. 8 * by Andrew Doran.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32#include <sys/cdefs.h> 32#include <sys/cdefs.h>
33__KERNEL_RCSID(0, "$NetBSD: kern_syscall.c,v 1.20 2020/05/23 23:42:43 ad Exp $"); 33__KERNEL_RCSID(0, "$NetBSD: kern_syscall.c,v 1.21 2020/08/31 19:51:30 christos Exp $");
34 34
35#ifdef _KERNEL_OPT 35#ifdef _KERNEL_OPT
36#include "opt_modular.h" 36#include "opt_modular.h"
37#include "opt_syscall_debug.h" 37#include "opt_syscall_debug.h"
38#include "opt_ktrace.h" 38#include "opt_ktrace.h"
39#include "opt_ptrace.h" 39#include "opt_ptrace.h"
40#include "opt_dtrace.h" 40#include "opt_dtrace.h"
41#endif 41#endif
42 42
43/* XXX To get syscall prototypes. */ 43/* XXX To get syscall prototypes. */
44#define SYSVSHM 44#define SYSVSHM
45#define SYSVSEM 45#define SYSVSEM
46#define SYSVMSG 46#define SYSVMSG
47 47
48#include <sys/param.h> 48#include <sys/param.h>
49#include <sys/module.h> 49#include <sys/module.h>
50#include <sys/sched.h> 50#include <sys/sched.h>
51#include <sys/syscall.h> 51#include <sys/syscall.h>
52#include <sys/syscallargs.h> 52#include <sys/syscallargs.h>
53#include <sys/syscallvar.h> 53#include <sys/syscallvar.h>
54#include <sys/systm.h> 54#include <sys/systm.h>
55#include <sys/xcall.h> 55#include <sys/xcall.h>
56#include <sys/ktrace.h> 56#include <sys/ktrace.h>
57#include <sys/ptrace.h> 57#include <sys/ptrace.h>
58 58
59int 59int
60sys_nomodule(struct lwp *l, const void *v, register_t *retval) 60sys_nomodule(struct lwp *l, const void *v, register_t *retval)
61{ 61{
62#ifdef MODULAR 62#ifdef MODULAR
63 63
64 const struct sysent *sy; 64 const struct sysent *sy;
65 const struct emul *em; 65 const struct emul *em;
66 const struct sc_autoload *auto_list; 66 const struct sc_autoload *auto_list;
67 u_int code; 67 u_int code;
68 68
69 /* 69 /*
70 * Restart the syscall if we interrupted a module unload that 70 * Restart the syscall if we interrupted a module unload that
71 * failed. Acquiring kernconfig_lock delays us until any unload 71 * failed. Acquiring kernconfig_lock delays us until any unload
72 * has been completed or rolled back. 72 * has been completed or rolled back.
73 */ 73 */
74 kernconfig_lock(); 74 kernconfig_lock();
75 sy = l->l_sysent; 75 sy = l->l_sysent;
76 if (sy->sy_call != sys_nomodule) { 76 if (sy->sy_call != sys_nomodule) {
77 kernconfig_unlock(); 77 kernconfig_unlock();
78 return ERESTART; 78 return ERESTART;
79 } 79 }
80 /* 80 /*
81 * Try to autoload a module to satisfy the request. If it  81 * Try to autoload a module to satisfy the request. If it
82 * works, retry the request. 82 * works, retry the request.
83 */ 83 */
84 em = l->l_proc->p_emul; 84 em = l->l_proc->p_emul;
85 code = sy - em->e_sysent; 85 code = sy - em->e_sysent;
86 86
87 if ((auto_list = em->e_sc_autoload) != NULL) 87 if ((auto_list = em->e_sc_autoload) != NULL)
88 for (; auto_list->al_code > 0; auto_list++) { 88 for (; auto_list->al_code > 0; auto_list++) {
89 if (auto_list->al_code != code) { 89 if (auto_list->al_code != code) {
90 continue; 90 continue;
91 } 91 }
92 if (module_autoload(auto_list->al_module, 92 if (module_autoload(auto_list->al_module,
93 MODULE_CLASS_ANY) != 0 || 93 MODULE_CLASS_ANY) != 0 ||
94 sy->sy_call == sys_nomodule) { 94 sy->sy_call == sys_nomodule) {
95 break; 95 break;
96 } 96 }
97 kernconfig_unlock(); 97 kernconfig_unlock();
98 return ERESTART; 98 return ERESTART;
99 } 99 }
100 kernconfig_unlock(); 100 kernconfig_unlock();
101#endif /* MODULAR */ 101#endif /* MODULAR */
102 102
103 return sys_nosys(l, v, retval); 103 return sys_nosys(l, v, retval);
104} 104}
105 105
106int 106int
107syscall_establish(const struct emul *em, const struct syscall_package *sp) 107syscall_establish(const struct emul *em, const struct syscall_package *sp)
108{ 108{
109 struct sysent *sy; 109 struct sysent *sy;
110 int i; 110 int i;
111 111
112 KASSERT(kernconfig_is_held()); 112 KASSERT(kernconfig_is_held());
113 113
114 if (em == NULL) { 114 if (em == NULL) {
115 em = &emul_netbsd; 115 em = &emul_netbsd;
116 } 116 }
117 sy = em->e_sysent; 117 sy = em->e_sysent;
118 118
119 /* 119 /*
120 * Ensure that all preconditions are valid, since this is 120 * Ensure that all preconditions are valid, since this is
121 * an all or nothing deal. Once a system call is entered, 121 * an all or nothing deal. Once a system call is entered,
122 * it can become busy and we could be unable to remove it 122 * it can become busy and we could be unable to remove it
123 * on error. 123 * on error.
124 */ 124 */
125 for (i = 0; sp[i].sp_call != NULL; i++) { 125 for (i = 0; sp[i].sp_call != NULL; i++) {
126 if (sp[i].sp_code >= SYS_NSYSENT) 126 if (sp[i].sp_code >= SYS_NSYSENT)
127 return EINVAL; 127 return EINVAL;
128 if (sy[sp[i].sp_code].sy_call != sys_nomodule && 128 if (sy[sp[i].sp_code].sy_call != sys_nomodule &&
129 sy[sp[i].sp_code].sy_call != sys_nosys) { 129 sy[sp[i].sp_code].sy_call != sys_nosys) {
130#ifdef DIAGNOSTIC 130#ifdef DIAGNOSTIC
131 printf("syscall %d is busy\n", sp[i].sp_code); 131 printf("syscall %d is busy\n", sp[i].sp_code);
132#endif 132#endif
133 return EBUSY; 133 return EBUSY;
134 } 134 }
135 } 135 }
136 /* Everything looks good, patch them in. */ 136 /* Everything looks good, patch them in. */
137 for (i = 0; sp[i].sp_call != NULL; i++) { 137 for (i = 0; sp[i].sp_call != NULL; i++) {
138 sy[sp[i].sp_code].sy_call = sp[i].sp_call; 138 sy[sp[i].sp_code].sy_call = sp[i].sp_call;
139 } 139 }
140 140
141 return 0; 141 return 0;
142} 142}
143 143
144int 144int
145syscall_disestablish(const struct emul *em, const struct syscall_package *sp) 145syscall_disestablish(const struct emul *em, const struct syscall_package *sp)
146{ 146{
147 struct sysent *sy; 147 struct sysent *sy;
148 const uint32_t *sb; 148 const uint32_t *sb;
149 lwp_t *l; 149 lwp_t *l;
150 int i; 150 int i;
151 151
152 KASSERT(kernconfig_is_held()); 152 KASSERT(kernconfig_is_held());
153 153
154 if (em == NULL) { 154 if (em == NULL) {
155 em = &emul_netbsd; 155 em = &emul_netbsd;
156 } 156 }
157 sy = em->e_sysent; 157 sy = em->e_sysent;
158 sb = em->e_nomodbits; 158 sb = em->e_nomodbits;
159 159
160 /* 160 /*
161 * First, patch the system calls to sys_nomodule or sys_nosys 161 * First, patch the system calls to sys_nomodule or sys_nosys
162 * to gate further activity. 162 * to gate further activity.
163 */ 163 */
164 for (i = 0; sp[i].sp_call != NULL; i++) { 164 for (i = 0; sp[i].sp_call != NULL; i++) {
165 KASSERT(sy[sp[i].sp_code].sy_call == sp[i].sp_call); 165 KASSERT(sy[sp[i].sp_code].sy_call == sp[i].sp_call);
166 sy[sp[i].sp_code].sy_call = 166 sy[sp[i].sp_code].sy_call =
167 sb[sp[i].sp_code / 32] & (1 << (sp[i].sp_code % 32)) ? 167 sb[sp[i].sp_code / 32] & (1 << (sp[i].sp_code % 32)) ?
168 sys_nomodule : sys_nosys; 168 sys_nomodule : sys_nosys;
169 } 169 }
170 170
171 /* 171 /*
172 * Run a cross call to cycle through all CPUs. This does two 172 * Run a cross call to cycle through all CPUs. This does two
173 * things: lock activity provides a barrier and makes our update 173 * things: lock activity provides a barrier and makes our update
174 * of sy_call visible to all CPUs, and upon return we can be sure 174 * of sy_call visible to all CPUs, and upon return we can be sure
175 * that we see pertinent values of l_sysent posted by remote CPUs. 175 * that we see pertinent values of l_sysent posted by remote CPUs.
176 */ 176 */
177 xc_barrier(0); 177 xc_barrier(0);
178 178
179 /* 179 /*
180 * Now it's safe to check l_sysent. Run through all LWPs and see 180 * Now it's safe to check l_sysent. Run through all LWPs and see
181 * if anyone is still using the system call. 181 * if anyone is still using the system call.
182 */ 182 */
183 for (i = 0; sp[i].sp_call != NULL; i++) { 183 for (i = 0; sp[i].sp_call != NULL; i++) {
184 mutex_enter(&proc_lock); 184 mutex_enter(&proc_lock);
185 LIST_FOREACH(l, &alllwp, l_list) { 185 LIST_FOREACH(l, &alllwp, l_list) {
186 if (l->l_sysent == &sy[sp[i].sp_code]) { 186 if (l->l_sysent == &sy[sp[i].sp_code]) {
187 break; 187 break;
188 } 188 }
189 } 189 }
190 mutex_exit(&proc_lock); 190 mutex_exit(&proc_lock);
191 if (l == NULL) { 191 if (l == NULL) {
192 continue; 192 continue;
193 } 193 }
194 /* 194 /*
195 * We lose: one or more calls are still in use. Put back 195 * We lose: one or more calls are still in use. Put back
196 * the old entrypoints and act like nothing happened. 196 * the old entrypoints and act like nothing happened.
197 * When we drop kernconfig_lock, any system calls held in 197 * When we drop kernconfig_lock, any system calls held in
198 * sys_nomodule() will be restarted. 198 * sys_nomodule() will be restarted.
199 */ 199 */
200 for (i = 0; sp[i].sp_call != NULL; i++) { 200 for (i = 0; sp[i].sp_call != NULL; i++) {
201 sy[sp[i].sp_code].sy_call = sp[i].sp_call; 201 sy[sp[i].sp_code].sy_call = sp[i].sp_call;
202 } 202 }
203 return EBUSY; 203 return EBUSY;
204 } 204 }
205 205
206 return 0; 206 return 0;
207} 207}
208 208
209/* 209/*
210 * Return true if system call tracing is enabled for the specified process. 210 * Return true if system call tracing is enabled for the specified process.
211 */ 211 */
212bool 212bool
213trace_is_enabled(struct proc *p) 213trace_is_enabled(struct proc *p)
214{ 214{
215#ifdef SYSCALL_DEBUG 215#ifdef SYSCALL_DEBUG
216 return (true); 216 return (true);
217#endif 217#endif
218#ifdef KTRACE 218#ifdef KTRACE
219 if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET))) 219 if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET)))
220 return (true); 220 return (true);
221#endif 221#endif
222#ifdef PTRACE 222#ifdef PTRACE
223 if (ISSET(p->p_slflag, PSL_SYSCALL)) 223 if (ISSET(p->p_slflag, PSL_SYSCALL))
224 return (true); 224 return (true);
225#endif 225#endif
226 226
227 return (false); 227 return (false);
228} 228}
229 229
230/* 230/*
231 * Start trace of particular system call. If process is being traced, 231 * Start trace of particular system call. If process is being traced,
232 * this routine is called by MD syscall dispatch code just before 232 * this routine is called by MD syscall dispatch code just before
233 * a system call is actually executed. 233 * a system call is actually executed.
234 */ 234 */
235int 235int
236trace_enter(register_t code, const struct sysent *sy, const void *args) 236trace_enter(register_t code, const struct sysent *sy, const void *args)
237{ 237{
238 int error = 0; 238 int error = 0;
 239#if defined(PTRACE) || defined(KDTRACE_HOOKS)
 240 struct proc *p = curlwp->l_proc;
 241#endif
239 242
240#ifdef KDTRACE_HOOKS 243#ifdef KDTRACE_HOOKS
241 if (sy->sy_entry) { 244 if (sy->sy_entry) {
242 struct emul *e = curlwp->l_proc->p_emul; 245 struct emul *e = p->p_emul;
243 (*e->e_dtrace_syscall)(sy->sy_entry, code, sy, args, NULL, 0); 246 if (e->e_dtrace_syscall)
 247 (*e->e_dtrace_syscall)(sy->sy_entry, code, sy, args,
 248 NULL, 0);
244 } 249 }
245#endif 250#endif
246 251
247#ifdef SYSCALL_DEBUG 252#ifdef SYSCALL_DEBUG
248 scdebug_call(code, args); 253 scdebug_call(code, args);
249#endif /* SYSCALL_DEBUG */ 254#endif /* SYSCALL_DEBUG */
250 255
251 ktrsyscall(code, args, sy->sy_narg); 256 ktrsyscall(code, args, sy->sy_narg);
252 257
253#ifdef PTRACE 258#ifdef PTRACE
254 if ((curlwp->l_proc->p_slflag & (PSL_SYSCALL|PSL_TRACED)) == 259 if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED)) ==
255 (PSL_SYSCALL|PSL_TRACED)) { 260 (PSL_SYSCALL|PSL_TRACED)) {
256 proc_stoptrace(TRAP_SCE, code, args, NULL, 0); 261 proc_stoptrace(TRAP_SCE, code, args, NULL, 0);
257 if (curlwp->l_proc->p_slflag & PSL_SYSCALLEMU) { 262 if (curlwp->l_proc->p_slflag & PSL_SYSCALLEMU) {
258 /* tracer will emulate syscall for us */ 263 /* tracer will emulate syscall for us */
259 error = EJUSTRETURN; 264 error = EJUSTRETURN;
260 } 265 }
261 } 266 }
262#endif 267#endif
263 return error; 268 return error;
264} 269}
265 270
266/* 271/*
267 * End trace of particular system call. If process is being traced, 272 * End trace of particular system call. If process is being traced,
268 * this routine is called by MD syscall dispatch code just after 273 * this routine is called by MD syscall dispatch code just after
269 * a system call finishes. 274 * a system call finishes.
270 * MD caller guarantees the passed 'code' is within the supported 275 * MD caller guarantees the passed 'code' is within the supported
271 * system call number range for emulation the process runs under. 276 * system call number range for emulation the process runs under.
272 */ 277 */
273void 278void
274trace_exit(register_t code, const struct sysent *sy, const void *args, 279trace_exit(register_t code, const struct sysent *sy, const void *args,
275 register_t rval[], int error) 280 register_t rval[], int error)
276{ 281{
277#if defined(PTRACE) || defined(KDTRACE_HOOKS) 282#if defined(PTRACE) || defined(KDTRACE_HOOKS)
278 struct proc *p = curlwp->l_proc; 283 struct proc *p = curlwp->l_proc;
279#endif 284#endif
280 285
281#ifdef KDTRACE_HOOKS 286#ifdef KDTRACE_HOOKS
282 if (sy->sy_return) { 287 if (sy->sy_return) {
283 (*p->p_emul->e_dtrace_syscall)(sy->sy_return, code, sy, args, 288 struct emul *e = p->p_emul;
284 rval, error); 289 if (e->e_dtrace_syscall)
 290 (*p->p_emul->e_dtrace_syscall)(sy->sy_return, code, sy,
 291 args, rval, error);
285 } 292 }
286#endif 293#endif
287 294
288#ifdef SYSCALL_DEBUG 295#ifdef SYSCALL_DEBUG
289 scdebug_ret(code, error, rval); 296 scdebug_ret(code, error, rval);
290#endif /* SYSCALL_DEBUG */ 297#endif /* SYSCALL_DEBUG */
291 298
292 ktrsysret(code, error, rval); 299 ktrsysret(code, error, rval);
293  300
294#ifdef PTRACE 301#ifdef PTRACE
295 if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED|PSL_SYSCALLEMU)) == 302 if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED|PSL_SYSCALLEMU)) ==
296 (PSL_SYSCALL|PSL_TRACED)) { 303 (PSL_SYSCALL|PSL_TRACED)) {
297 proc_stoptrace(TRAP_SCX, code, args, rval, error); 304 proc_stoptrace(TRAP_SCX, code, args, rval, error);
298 } 305 }
299 CLR(p->p_slflag, PSL_SYSCALLEMU); 306 CLR(p->p_slflag, PSL_SYSCALLEMU);
300#endif 307#endif
301} 308}