Tue Oct 13 00:24:35 2015 UTC ()
In execve_runproc(), update the p_waited entry for the process being
moved to SSTOP state, not for its parent.  (It is correct to update
the parent's p_nstopchild count.)  If the value is not already zero,
it could prevent its parent from waiting for the process.

Fixes PR kern/50298

Pullups will be requested for:

       NetBSD-7, -6, -6-0, -6-1, -5, -5-0, -5-1, and -5-2


(pgoyette)
diff -r1.418 -r1.419 src/sys/kern/kern_exec.c

cvs diff -r1.418 -r1.419 src/sys/kern/kern_exec.c (switch to unified diff)

--- src/sys/kern/kern_exec.c 2015/10/02 16:54:15 1.418
+++ src/sys/kern/kern_exec.c 2015/10/13 00:24:35 1.419
@@ -1,2284 +1,2284 @@ @@ -1,2284 +1,2284 @@
1/* $NetBSD: kern_exec.c,v 1.418 2015/10/02 16:54:15 christos Exp $ */ 1/* $NetBSD: kern_exec.c,v 1.419 2015/10/13 00:24:35 pgoyette Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * Redistribution and use in source and binary forms, with or without 7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions 8 * modification, are permitted provided that the following conditions
9 * are met: 9 * are met:
10 * 1. Redistributions of source code must retain the above copyright 10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer. 11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright 12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the 13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution. 14 * documentation and/or other materials provided with the distribution.
15 * 15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE. 26 * POSSIBILITY OF SUCH DAMAGE.
27 */ 27 */
28 28
29/*- 29/*-
30 * Copyright (C) 1993, 1994, 1996 Christopher G. Demetriou 30 * Copyright (C) 1993, 1994, 1996 Christopher G. Demetriou
31 * Copyright (C) 1992 Wolfgang Solfrank. 31 * Copyright (C) 1992 Wolfgang Solfrank.
32 * Copyright (C) 1992 TooLs GmbH. 32 * Copyright (C) 1992 TooLs GmbH.
33 * All rights reserved. 33 * All rights reserved.
34 * 34 *
35 * Redistribution and use in source and binary forms, with or without 35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions 36 * modification, are permitted provided that the following conditions
37 * are met: 37 * are met:
38 * 1. Redistributions of source code must retain the above copyright 38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer. 39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright 40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the 41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution. 42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software 43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement: 44 * must display the following acknowledgement:
45 * This product includes software developed by TooLs GmbH. 45 * This product includes software developed by TooLs GmbH.
46 * 4. The name of TooLs GmbH may not be used to endorse or promote products 46 * 4. The name of TooLs GmbH may not be used to endorse or promote products
47 * derived from this software without specific prior written permission. 47 * derived from this software without specific prior written permission.
48 * 48 *
49 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR 49 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
50 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 50 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
51 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 51 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
52 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 52 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
53 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 53 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
54 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 54 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
55 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 55 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
56 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 56 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
57 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 57 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
58 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 58 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
59 */ 59 */
60 60
61#include <sys/cdefs.h> 61#include <sys/cdefs.h>
62__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.418 2015/10/02 16:54:15 christos Exp $"); 62__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.419 2015/10/13 00:24:35 pgoyette Exp $");
63 63
64#include "opt_exec.h" 64#include "opt_exec.h"
65#include "opt_execfmt.h" 65#include "opt_execfmt.h"
66#include "opt_ktrace.h" 66#include "opt_ktrace.h"
67#include "opt_modular.h" 67#include "opt_modular.h"
68#include "opt_syscall_debug.h" 68#include "opt_syscall_debug.h"
69#include "veriexec.h" 69#include "veriexec.h"
70#include "opt_pax.h" 70#include "opt_pax.h"
71 71
72#include <sys/param.h> 72#include <sys/param.h>
73#include <sys/systm.h> 73#include <sys/systm.h>
74#include <sys/filedesc.h> 74#include <sys/filedesc.h>
75#include <sys/kernel.h> 75#include <sys/kernel.h>
76#include <sys/proc.h> 76#include <sys/proc.h>
77#include <sys/mount.h> 77#include <sys/mount.h>
78#include <sys/kmem.h> 78#include <sys/kmem.h>
79#include <sys/namei.h> 79#include <sys/namei.h>
80#include <sys/vnode.h> 80#include <sys/vnode.h>
81#include <sys/file.h> 81#include <sys/file.h>
82#include <sys/filedesc.h> 82#include <sys/filedesc.h>
83#include <sys/acct.h> 83#include <sys/acct.h>
84#include <sys/atomic.h> 84#include <sys/atomic.h>
85#include <sys/exec.h> 85#include <sys/exec.h>
86#include <sys/ktrace.h> 86#include <sys/ktrace.h>
87#include <sys/uidinfo.h> 87#include <sys/uidinfo.h>
88#include <sys/wait.h> 88#include <sys/wait.h>
89#include <sys/mman.h> 89#include <sys/mman.h>
90#include <sys/ras.h> 90#include <sys/ras.h>
91#include <sys/signalvar.h> 91#include <sys/signalvar.h>
92#include <sys/stat.h> 92#include <sys/stat.h>
93#include <sys/syscall.h> 93#include <sys/syscall.h>
94#include <sys/kauth.h> 94#include <sys/kauth.h>
95#include <sys/lwpctl.h> 95#include <sys/lwpctl.h>
96#include <sys/pax.h> 96#include <sys/pax.h>
97#include <sys/cpu.h> 97#include <sys/cpu.h>
98#include <sys/module.h> 98#include <sys/module.h>
99#include <sys/syscallvar.h> 99#include <sys/syscallvar.h>
100#include <sys/syscallargs.h> 100#include <sys/syscallargs.h>
101#if NVERIEXEC > 0 101#if NVERIEXEC > 0
102#include <sys/verified_exec.h> 102#include <sys/verified_exec.h>
103#endif /* NVERIEXEC > 0 */ 103#endif /* NVERIEXEC > 0 */
104#include <sys/sdt.h> 104#include <sys/sdt.h>
105#include <sys/spawn.h> 105#include <sys/spawn.h>
106#include <sys/prot.h> 106#include <sys/prot.h>
107#include <sys/cprng.h> 107#include <sys/cprng.h>
108 108
109#include <uvm/uvm_extern.h> 109#include <uvm/uvm_extern.h>
110 110
111#include <machine/reg.h> 111#include <machine/reg.h>
112 112
113#include <compat/common/compat_util.h> 113#include <compat/common/compat_util.h>
114 114
115#ifndef MD_TOPDOWN_INIT 115#ifndef MD_TOPDOWN_INIT
116#ifdef __USE_TOPDOWN_VM 116#ifdef __USE_TOPDOWN_VM
117#define MD_TOPDOWN_INIT(epp) (epp)->ep_flags |= EXEC_TOPDOWN_VM 117#define MD_TOPDOWN_INIT(epp) (epp)->ep_flags |= EXEC_TOPDOWN_VM
118#else 118#else
119#define MD_TOPDOWN_INIT(epp) 119#define MD_TOPDOWN_INIT(epp)
120#endif 120#endif
121#endif 121#endif
122 122
123struct execve_data; 123struct execve_data;
124 124
125static size_t calcargs(struct execve_data * restrict, const size_t); 125static size_t calcargs(struct execve_data * restrict, const size_t);
126static size_t calcstack(struct execve_data * restrict, const size_t); 126static size_t calcstack(struct execve_data * restrict, const size_t);
127static int copyoutargs(struct execve_data * restrict, struct lwp *, 127static int copyoutargs(struct execve_data * restrict, struct lwp *,
128 char * const); 128 char * const);
129static int copyoutpsstrs(struct execve_data * restrict, struct proc *); 129static int copyoutpsstrs(struct execve_data * restrict, struct proc *);
130static int copyinargs(struct execve_data * restrict, char * const *, 130static int copyinargs(struct execve_data * restrict, char * const *,
131 char * const *, execve_fetch_element_t, char **); 131 char * const *, execve_fetch_element_t, char **);
132static int copyinargstrs(struct execve_data * restrict, char * const *, 132static int copyinargstrs(struct execve_data * restrict, char * const *,
133 execve_fetch_element_t, char **, size_t *, void (*)(const void *, size_t)); 133 execve_fetch_element_t, char **, size_t *, void (*)(const void *, size_t));
134static int exec_sigcode_map(struct proc *, const struct emul *); 134static int exec_sigcode_map(struct proc *, const struct emul *);
135 135
136#ifdef DEBUG_EXEC 136#ifdef DEBUG_EXEC
137#define DPRINTF(a) printf a 137#define DPRINTF(a) printf a
138#define COPYPRINTF(s, a, b) printf("%s, %d: copyout%s @%p %zu\n", __func__, \ 138#define COPYPRINTF(s, a, b) printf("%s, %d: copyout%s @%p %zu\n", __func__, \
139 __LINE__, (s), (a), (b)) 139 __LINE__, (s), (a), (b))
140static void dump_vmcmds(const struct exec_package * const, size_t, int); 140static void dump_vmcmds(const struct exec_package * const, size_t, int);
141#define DUMPVMCMDS(p, x, e) do { dump_vmcmds((p), (x), (e)); } while (0) 141#define DUMPVMCMDS(p, x, e) do { dump_vmcmds((p), (x), (e)); } while (0)
142#else 142#else
143#define DPRINTF(a) 143#define DPRINTF(a)
144#define COPYPRINTF(s, a, b) 144#define COPYPRINTF(s, a, b)
145#define DUMPVMCMDS(p, x, e) do {} while (0) 145#define DUMPVMCMDS(p, x, e) do {} while (0)
146#endif /* DEBUG_EXEC */ 146#endif /* DEBUG_EXEC */
147 147
148/* 148/*
149 * DTrace SDT provider definitions 149 * DTrace SDT provider definitions
150 */ 150 */
151SDT_PROVIDER_DECLARE(proc); 151SDT_PROVIDER_DECLARE(proc);
152SDT_PROBE_DEFINE1(proc, kernel, , exec, "char *"); 152SDT_PROBE_DEFINE1(proc, kernel, , exec, "char *");
153SDT_PROBE_DEFINE1(proc, kernel, , exec__success, "char *"); 153SDT_PROBE_DEFINE1(proc, kernel, , exec__success, "char *");
154SDT_PROBE_DEFINE1(proc, kernel, , exec__failure, "int"); 154SDT_PROBE_DEFINE1(proc, kernel, , exec__failure, "int");
155 155
156/* 156/*
157 * Exec function switch: 157 * Exec function switch:
158 * 158 *
159 * Note that each makecmds function is responsible for loading the 159 * Note that each makecmds function is responsible for loading the
160 * exec package with the necessary functions for any exec-type-specific 160 * exec package with the necessary functions for any exec-type-specific
161 * handling. 161 * handling.
162 * 162 *
163 * Functions for specific exec types should be defined in their own 163 * Functions for specific exec types should be defined in their own
164 * header file. 164 * header file.
165 */ 165 */
166static const struct execsw **execsw = NULL; 166static const struct execsw **execsw = NULL;
167static int nexecs; 167static int nexecs;
168 168
169u_int exec_maxhdrsz; /* must not be static - used by netbsd32 */ 169u_int exec_maxhdrsz; /* must not be static - used by netbsd32 */
170 170
171/* list of dynamically loaded execsw entries */ 171/* list of dynamically loaded execsw entries */
172static LIST_HEAD(execlist_head, exec_entry) ex_head = 172static LIST_HEAD(execlist_head, exec_entry) ex_head =
173 LIST_HEAD_INITIALIZER(ex_head); 173 LIST_HEAD_INITIALIZER(ex_head);
174struct exec_entry { 174struct exec_entry {
175 LIST_ENTRY(exec_entry) ex_list; 175 LIST_ENTRY(exec_entry) ex_list;
176 SLIST_ENTRY(exec_entry) ex_slist; 176 SLIST_ENTRY(exec_entry) ex_slist;
177 const struct execsw *ex_sw; 177 const struct execsw *ex_sw;
178}; 178};
179 179
180#ifndef __HAVE_SYSCALL_INTERN 180#ifndef __HAVE_SYSCALL_INTERN
181void syscall(void); 181void syscall(void);
182#endif 182#endif
183 183
184/* NetBSD emul struct */ 184/* NetBSD emul struct */
185struct emul emul_netbsd = { 185struct emul emul_netbsd = {
186 .e_name = "netbsd", 186 .e_name = "netbsd",
187#ifdef EMUL_NATIVEROOT 187#ifdef EMUL_NATIVEROOT
188 .e_path = EMUL_NATIVEROOT, 188 .e_path = EMUL_NATIVEROOT,
189#else 189#else
190 .e_path = NULL, 190 .e_path = NULL,
191#endif 191#endif
192#ifndef __HAVE_MINIMAL_EMUL 192#ifndef __HAVE_MINIMAL_EMUL
193 .e_flags = EMUL_HAS_SYS___syscall, 193 .e_flags = EMUL_HAS_SYS___syscall,
194 .e_errno = NULL, 194 .e_errno = NULL,
195 .e_nosys = SYS_syscall, 195 .e_nosys = SYS_syscall,
196 .e_nsysent = SYS_NSYSENT, 196 .e_nsysent = SYS_NSYSENT,
197#endif 197#endif
198 .e_sysent = sysent, 198 .e_sysent = sysent,
199#ifdef SYSCALL_DEBUG 199#ifdef SYSCALL_DEBUG
200 .e_syscallnames = syscallnames, 200 .e_syscallnames = syscallnames,
201#else 201#else
202 .e_syscallnames = NULL, 202 .e_syscallnames = NULL,
203#endif 203#endif
204 .e_sendsig = sendsig, 204 .e_sendsig = sendsig,
205 .e_trapsignal = trapsignal, 205 .e_trapsignal = trapsignal,
206 .e_tracesig = NULL, 206 .e_tracesig = NULL,
207 .e_sigcode = NULL, 207 .e_sigcode = NULL,
208 .e_esigcode = NULL, 208 .e_esigcode = NULL,
209 .e_sigobject = NULL, 209 .e_sigobject = NULL,
210 .e_setregs = setregs, 210 .e_setregs = setregs,
211 .e_proc_exec = NULL, 211 .e_proc_exec = NULL,
212 .e_proc_fork = NULL, 212 .e_proc_fork = NULL,
213 .e_proc_exit = NULL, 213 .e_proc_exit = NULL,
214 .e_lwp_fork = NULL, 214 .e_lwp_fork = NULL,
215 .e_lwp_exit = NULL, 215 .e_lwp_exit = NULL,
216#ifdef __HAVE_SYSCALL_INTERN 216#ifdef __HAVE_SYSCALL_INTERN
217 .e_syscall_intern = syscall_intern, 217 .e_syscall_intern = syscall_intern,
218#else 218#else
219 .e_syscall = syscall, 219 .e_syscall = syscall,
220#endif 220#endif
221 .e_sysctlovly = NULL, 221 .e_sysctlovly = NULL,
222 .e_fault = NULL, 222 .e_fault = NULL,
223 .e_vm_default_addr = uvm_default_mapaddr, 223 .e_vm_default_addr = uvm_default_mapaddr,
224 .e_usertrap = NULL, 224 .e_usertrap = NULL,
225 .e_ucsize = sizeof(ucontext_t), 225 .e_ucsize = sizeof(ucontext_t),
226 .e_startlwp = startlwp 226 .e_startlwp = startlwp
227}; 227};
228 228
229/* 229/*
230 * Exec lock. Used to control access to execsw[] structures. 230 * Exec lock. Used to control access to execsw[] structures.
231 * This must not be static so that netbsd32 can access it, too. 231 * This must not be static so that netbsd32 can access it, too.
232 */ 232 */
233krwlock_t exec_lock; 233krwlock_t exec_lock;
234 234
235static kmutex_t sigobject_lock; 235static kmutex_t sigobject_lock;
236 236
237/* 237/*
238 * Data used between a loadvm and execve part of an "exec" operation 238 * Data used between a loadvm and execve part of an "exec" operation
239 */ 239 */
240struct execve_data { 240struct execve_data {
241 struct exec_package ed_pack; 241 struct exec_package ed_pack;
242 struct pathbuf *ed_pathbuf; 242 struct pathbuf *ed_pathbuf;
243 struct vattr ed_attr; 243 struct vattr ed_attr;
244 struct ps_strings ed_arginfo; 244 struct ps_strings ed_arginfo;
245 char *ed_argp; 245 char *ed_argp;
246 const char *ed_pathstring; 246 const char *ed_pathstring;
247 char *ed_resolvedpathbuf; 247 char *ed_resolvedpathbuf;
248 size_t ed_ps_strings_sz; 248 size_t ed_ps_strings_sz;
249 int ed_szsigcode; 249 int ed_szsigcode;
250 size_t ed_argslen; 250 size_t ed_argslen;
251 long ed_argc; 251 long ed_argc;
252 long ed_envc; 252 long ed_envc;
253}; 253};
254 254
255/* 255/*
256 * data passed from parent lwp to child during a posix_spawn() 256 * data passed from parent lwp to child during a posix_spawn()
257 */ 257 */
258struct spawn_exec_data { 258struct spawn_exec_data {
259 struct execve_data sed_exec; 259 struct execve_data sed_exec;
260 struct posix_spawn_file_actions 260 struct posix_spawn_file_actions
261 *sed_actions; 261 *sed_actions;
262 struct posix_spawnattr *sed_attrs; 262 struct posix_spawnattr *sed_attrs;
263 struct proc *sed_parent; 263 struct proc *sed_parent;
264 kcondvar_t sed_cv_child_ready; 264 kcondvar_t sed_cv_child_ready;
265 kmutex_t sed_mtx_child; 265 kmutex_t sed_mtx_child;
266 int sed_error; 266 int sed_error;
267 volatile uint32_t sed_refcnt; 267 volatile uint32_t sed_refcnt;
268}; 268};
269 269
270static void * 270static void *
271exec_pool_alloc(struct pool *pp, int flags) 271exec_pool_alloc(struct pool *pp, int flags)
272{ 272{
273 273
274 return (void *)uvm_km_alloc(kernel_map, NCARGS, 0, 274 return (void *)uvm_km_alloc(kernel_map, NCARGS, 0,
275 UVM_KMF_PAGEABLE | UVM_KMF_WAITVA); 275 UVM_KMF_PAGEABLE | UVM_KMF_WAITVA);
276} 276}
277 277
278static void 278static void
279exec_pool_free(struct pool *pp, void *addr) 279exec_pool_free(struct pool *pp, void *addr)
280{ 280{
281 281
282 uvm_km_free(kernel_map, (vaddr_t)addr, NCARGS, UVM_KMF_PAGEABLE); 282 uvm_km_free(kernel_map, (vaddr_t)addr, NCARGS, UVM_KMF_PAGEABLE);
283} 283}
284 284
285static struct pool exec_pool; 285static struct pool exec_pool;
286 286
287static struct pool_allocator exec_palloc = { 287static struct pool_allocator exec_palloc = {
288 .pa_alloc = exec_pool_alloc, 288 .pa_alloc = exec_pool_alloc,
289 .pa_free = exec_pool_free, 289 .pa_free = exec_pool_free,
290 .pa_pagesz = NCARGS 290 .pa_pagesz = NCARGS
291}; 291};
292 292
293/* 293/*
294 * check exec: 294 * check exec:
295 * given an "executable" described in the exec package's namei info, 295 * given an "executable" described in the exec package's namei info,
296 * see what we can do with it. 296 * see what we can do with it.
297 * 297 *
298 * ON ENTRY: 298 * ON ENTRY:
299 * exec package with appropriate namei info 299 * exec package with appropriate namei info
300 * lwp pointer of exec'ing lwp 300 * lwp pointer of exec'ing lwp
301 * NO SELF-LOCKED VNODES 301 * NO SELF-LOCKED VNODES
302 * 302 *
303 * ON EXIT: 303 * ON EXIT:
304 * error: nothing held, etc. exec header still allocated. 304 * error: nothing held, etc. exec header still allocated.
305 * ok: filled exec package, executable's vnode (unlocked). 305 * ok: filled exec package, executable's vnode (unlocked).
306 * 306 *
307 * EXEC SWITCH ENTRY: 307 * EXEC SWITCH ENTRY:
308 * Locked vnode to check, exec package, proc. 308 * Locked vnode to check, exec package, proc.
309 * 309 *
310 * EXEC SWITCH EXIT: 310 * EXEC SWITCH EXIT:
311 * ok: return 0, filled exec package, executable's vnode (unlocked). 311 * ok: return 0, filled exec package, executable's vnode (unlocked).
312 * error: destructive: 312 * error: destructive:
313 * everything deallocated execept exec header. 313 * everything deallocated execept exec header.
314 * non-destructive: 314 * non-destructive:
315 * error code, executable's vnode (unlocked), 315 * error code, executable's vnode (unlocked),
316 * exec header unmodified. 316 * exec header unmodified.
317 */ 317 */
318int 318int
319/*ARGSUSED*/ 319/*ARGSUSED*/
320check_exec(struct lwp *l, struct exec_package *epp, struct pathbuf *pb) 320check_exec(struct lwp *l, struct exec_package *epp, struct pathbuf *pb)
321{ 321{
322 int error, i; 322 int error, i;
323 struct vnode *vp; 323 struct vnode *vp;
324 struct nameidata nd; 324 struct nameidata nd;
325 size_t resid; 325 size_t resid;
326 326
327 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 327 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
328 328
329 /* first get the vnode */ 329 /* first get the vnode */
330 if ((error = namei(&nd)) != 0) 330 if ((error = namei(&nd)) != 0)
331 return error; 331 return error;
332 epp->ep_vp = vp = nd.ni_vp; 332 epp->ep_vp = vp = nd.ni_vp;
333 /* normally this can't fail */ 333 /* normally this can't fail */
334 error = copystr(nd.ni_pnbuf, epp->ep_resolvedname, PATH_MAX, NULL); 334 error = copystr(nd.ni_pnbuf, epp->ep_resolvedname, PATH_MAX, NULL);
335 KASSERT(error == 0); 335 KASSERT(error == 0);
336 336
337#ifdef DIAGNOSTIC 337#ifdef DIAGNOSTIC
338 /* paranoia (take this out once namei stuff stabilizes) */ 338 /* paranoia (take this out once namei stuff stabilizes) */
339 memset(nd.ni_pnbuf, '~', PATH_MAX); 339 memset(nd.ni_pnbuf, '~', PATH_MAX);
340#endif 340#endif
341 341
342 /* check access and type */ 342 /* check access and type */
343 if (vp->v_type != VREG) { 343 if (vp->v_type != VREG) {
344 error = EACCES; 344 error = EACCES;
345 goto bad1; 345 goto bad1;
346 } 346 }
347 if ((error = VOP_ACCESS(vp, VEXEC, l->l_cred)) != 0) 347 if ((error = VOP_ACCESS(vp, VEXEC, l->l_cred)) != 0)
348 goto bad1; 348 goto bad1;
349 349
350 /* get attributes */ 350 /* get attributes */
351 if ((error = VOP_GETATTR(vp, epp->ep_vap, l->l_cred)) != 0) 351 if ((error = VOP_GETATTR(vp, epp->ep_vap, l->l_cred)) != 0)
352 goto bad1; 352 goto bad1;
353 353
354 /* Check mount point */ 354 /* Check mount point */
355 if (vp->v_mount->mnt_flag & MNT_NOEXEC) { 355 if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
356 error = EACCES; 356 error = EACCES;
357 goto bad1; 357 goto bad1;
358 } 358 }
359 if (vp->v_mount->mnt_flag & MNT_NOSUID) 359 if (vp->v_mount->mnt_flag & MNT_NOSUID)
360 epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID); 360 epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID);
361 361
362 /* try to open it */ 362 /* try to open it */
363 if ((error = VOP_OPEN(vp, FREAD, l->l_cred)) != 0) 363 if ((error = VOP_OPEN(vp, FREAD, l->l_cred)) != 0)
364 goto bad1; 364 goto bad1;
365 365
366 /* unlock vp, since we need it unlocked from here on out. */ 366 /* unlock vp, since we need it unlocked from here on out. */
367 VOP_UNLOCK(vp); 367 VOP_UNLOCK(vp);
368 368
369#if NVERIEXEC > 0 369#if NVERIEXEC > 0
370 error = veriexec_verify(l, vp, epp->ep_resolvedname, 370 error = veriexec_verify(l, vp, epp->ep_resolvedname,
371 epp->ep_flags & EXEC_INDIR ? VERIEXEC_INDIRECT : VERIEXEC_DIRECT, 371 epp->ep_flags & EXEC_INDIR ? VERIEXEC_INDIRECT : VERIEXEC_DIRECT,
372 NULL); 372 NULL);
373 if (error) 373 if (error)
374 goto bad2; 374 goto bad2;
375#endif /* NVERIEXEC > 0 */ 375#endif /* NVERIEXEC > 0 */
376 376
377#ifdef PAX_SEGVGUARD 377#ifdef PAX_SEGVGUARD
378 error = pax_segvguard(l, vp, epp->ep_resolvedname, false); 378 error = pax_segvguard(l, vp, epp->ep_resolvedname, false);
379 if (error) 379 if (error)
380 goto bad2; 380 goto bad2;
381#endif /* PAX_SEGVGUARD */ 381#endif /* PAX_SEGVGUARD */
382 382
383 /* now we have the file, get the exec header */ 383 /* now we have the file, get the exec header */
384 error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0, 384 error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
385 UIO_SYSSPACE, 0, l->l_cred, &resid, NULL); 385 UIO_SYSSPACE, 0, l->l_cred, &resid, NULL);
386 if (error) 386 if (error)
387 goto bad2; 387 goto bad2;
388 epp->ep_hdrvalid = epp->ep_hdrlen - resid; 388 epp->ep_hdrvalid = epp->ep_hdrlen - resid;
389 389
390 /* 390 /*
391 * Set up default address space limits. Can be overridden 391 * Set up default address space limits. Can be overridden
392 * by individual exec packages. 392 * by individual exec packages.
393 * 393 *
394 * XXX probably should be all done in the exec packages. 394 * XXX probably should be all done in the exec packages.
395 */ 395 */
396 epp->ep_vm_minaddr = VM_MIN_ADDRESS; 396 epp->ep_vm_minaddr = VM_MIN_ADDRESS;
397 epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS; 397 epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS;
398 /* 398 /*
399 * set up the vmcmds for creation of the process 399 * set up the vmcmds for creation of the process
400 * address space 400 * address space
401 */ 401 */
402 error = ENOEXEC; 402 error = ENOEXEC;
403 for (i = 0; i < nexecs; i++) { 403 for (i = 0; i < nexecs; i++) {
404 int newerror; 404 int newerror;
405 405
406 epp->ep_esch = execsw[i]; 406 epp->ep_esch = execsw[i];
407 newerror = (*execsw[i]->es_makecmds)(l, epp); 407 newerror = (*execsw[i]->es_makecmds)(l, epp);
408 408
409 if (!newerror) { 409 if (!newerror) {
410 /* Seems ok: check that entry point is not too high */ 410 /* Seems ok: check that entry point is not too high */
411 if (epp->ep_entry > epp->ep_vm_maxaddr) { 411 if (epp->ep_entry > epp->ep_vm_maxaddr) {
412#ifdef DIAGNOSTIC 412#ifdef DIAGNOSTIC
413 printf("%s: rejecting %p due to " 413 printf("%s: rejecting %p due to "
414 "too high entry address (> %p)\n", 414 "too high entry address (> %p)\n",
415 __func__, (void *)epp->ep_entry, 415 __func__, (void *)epp->ep_entry,
416 (void *)epp->ep_vm_maxaddr); 416 (void *)epp->ep_vm_maxaddr);
417#endif 417#endif
418 error = ENOEXEC; 418 error = ENOEXEC;
419 break; 419 break;
420 } 420 }
421 /* Seems ok: check that entry point is not too low */ 421 /* Seems ok: check that entry point is not too low */
422 if (epp->ep_entry < epp->ep_vm_minaddr) { 422 if (epp->ep_entry < epp->ep_vm_minaddr) {
423#ifdef DIAGNOSTIC 423#ifdef DIAGNOSTIC
424 printf("%s: rejecting %p due to " 424 printf("%s: rejecting %p due to "
425 "too low entry address (< %p)\n", 425 "too low entry address (< %p)\n",
426 __func__, (void *)epp->ep_entry, 426 __func__, (void *)epp->ep_entry,
427 (void *)epp->ep_vm_minaddr); 427 (void *)epp->ep_vm_minaddr);
428#endif 428#endif
429 error = ENOEXEC; 429 error = ENOEXEC;
430 break; 430 break;
431 } 431 }
432 432
433 /* check limits */ 433 /* check limits */
434 if ((epp->ep_tsize > MAXTSIZ) || 434 if ((epp->ep_tsize > MAXTSIZ) ||
435 (epp->ep_dsize > (u_quad_t)l->l_proc->p_rlimit 435 (epp->ep_dsize > (u_quad_t)l->l_proc->p_rlimit
436 [RLIMIT_DATA].rlim_cur)) { 436 [RLIMIT_DATA].rlim_cur)) {
437#ifdef DIAGNOSTIC 437#ifdef DIAGNOSTIC
438 printf("%s: rejecting due to " 438 printf("%s: rejecting due to "
439 "limits (t=%llu > %llu || d=%llu > %llu)\n", 439 "limits (t=%llu > %llu || d=%llu > %llu)\n",
440 __func__, 440 __func__,
441 (unsigned long long)epp->ep_tsize, 441 (unsigned long long)epp->ep_tsize,
442 (unsigned long long)MAXTSIZ, 442 (unsigned long long)MAXTSIZ,
443 (unsigned long long)epp->ep_dsize, 443 (unsigned long long)epp->ep_dsize,
444 (unsigned long long) 444 (unsigned long long)
445 l->l_proc->p_rlimit[RLIMIT_DATA].rlim_cur); 445 l->l_proc->p_rlimit[RLIMIT_DATA].rlim_cur);
446#endif 446#endif
447 error = ENOMEM; 447 error = ENOMEM;
448 break; 448 break;
449 } 449 }
450 return 0; 450 return 0;
451 } 451 }
452 452
453 if (epp->ep_emul_root != NULL) { 453 if (epp->ep_emul_root != NULL) {
454 vrele(epp->ep_emul_root); 454 vrele(epp->ep_emul_root);
455 epp->ep_emul_root = NULL; 455 epp->ep_emul_root = NULL;
456 } 456 }
457 if (epp->ep_interp != NULL) { 457 if (epp->ep_interp != NULL) {
458 vrele(epp->ep_interp); 458 vrele(epp->ep_interp);
459 epp->ep_interp = NULL; 459 epp->ep_interp = NULL;
460 } 460 }
461 461
462 /* make sure the first "interesting" error code is saved. */ 462 /* make sure the first "interesting" error code is saved. */
463 if (error == ENOEXEC) 463 if (error == ENOEXEC)
464 error = newerror; 464 error = newerror;
465 465
466 if (epp->ep_flags & EXEC_DESTR) 466 if (epp->ep_flags & EXEC_DESTR)
467 /* Error from "#!" code, tidied up by recursive call */ 467 /* Error from "#!" code, tidied up by recursive call */
468 return error; 468 return error;
469 } 469 }
470 470
471 /* not found, error */ 471 /* not found, error */
472 472
473 /* 473 /*
474 * free any vmspace-creation commands, 474 * free any vmspace-creation commands,
475 * and release their references 475 * and release their references
476 */ 476 */
477 kill_vmcmds(&epp->ep_vmcmds); 477 kill_vmcmds(&epp->ep_vmcmds);
478 478
479bad2: 479bad2:
480 /* 480 /*
481 * close and release the vnode, restore the old one, free the 481 * close and release the vnode, restore the old one, free the
482 * pathname buf, and punt. 482 * pathname buf, and punt.
483 */ 483 */
484 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 484 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
485 VOP_CLOSE(vp, FREAD, l->l_cred); 485 VOP_CLOSE(vp, FREAD, l->l_cred);
486 vput(vp); 486 vput(vp);
487 return error; 487 return error;
488 488
489bad1: 489bad1:
490 /* 490 /*
491 * free the namei pathname buffer, and put the vnode 491 * free the namei pathname buffer, and put the vnode
492 * (which we don't yet have open). 492 * (which we don't yet have open).
493 */ 493 */
494 vput(vp); /* was still locked */ 494 vput(vp); /* was still locked */
495 return error; 495 return error;
496} 496}
497 497
498#ifdef __MACHINE_STACK_GROWS_UP 498#ifdef __MACHINE_STACK_GROWS_UP
499#define STACK_PTHREADSPACE NBPG 499#define STACK_PTHREADSPACE NBPG
500#else 500#else
501#define STACK_PTHREADSPACE 0 501#define STACK_PTHREADSPACE 0
502#endif 502#endif
503 503
504static int 504static int
505execve_fetch_element(char * const *array, size_t index, char **value) 505execve_fetch_element(char * const *array, size_t index, char **value)
506{ 506{
507 return copyin(array + index, value, sizeof(*value)); 507 return copyin(array + index, value, sizeof(*value));
508} 508}
509 509
510/* 510/*
511 * exec system call 511 * exec system call
512 */ 512 */
513int 513int
514sys_execve(struct lwp *l, const struct sys_execve_args *uap, register_t *retval) 514sys_execve(struct lwp *l, const struct sys_execve_args *uap, register_t *retval)
515{ 515{
516 /* { 516 /* {
517 syscallarg(const char *) path; 517 syscallarg(const char *) path;
518 syscallarg(char * const *) argp; 518 syscallarg(char * const *) argp;
519 syscallarg(char * const *) envp; 519 syscallarg(char * const *) envp;
520 } */ 520 } */
521 521
522 return execve1(l, SCARG(uap, path), SCARG(uap, argp), 522 return execve1(l, SCARG(uap, path), SCARG(uap, argp),
523 SCARG(uap, envp), execve_fetch_element); 523 SCARG(uap, envp), execve_fetch_element);
524} 524}
525 525
526int 526int
527sys_fexecve(struct lwp *l, const struct sys_fexecve_args *uap, 527sys_fexecve(struct lwp *l, const struct sys_fexecve_args *uap,
528 register_t *retval) 528 register_t *retval)
529{ 529{
530 /* { 530 /* {
531 syscallarg(int) fd; 531 syscallarg(int) fd;
532 syscallarg(char * const *) argp; 532 syscallarg(char * const *) argp;
533 syscallarg(char * const *) envp; 533 syscallarg(char * const *) envp;
534 } */ 534 } */
535 535
536 return ENOSYS; 536 return ENOSYS;
537} 537}
538 538
539/* 539/*
540 * Load modules to try and execute an image that we do not understand. 540 * Load modules to try and execute an image that we do not understand.
541 * If no execsw entries are present, we load those likely to be needed 541 * If no execsw entries are present, we load those likely to be needed
542 * in order to run native images only. Otherwise, we autoload all 542 * in order to run native images only. Otherwise, we autoload all
543 * possible modules that could let us run the binary. XXX lame 543 * possible modules that could let us run the binary. XXX lame
544 */ 544 */
545static void 545static void
546exec_autoload(void) 546exec_autoload(void)
547{ 547{
548#ifdef MODULAR 548#ifdef MODULAR
549 static const char * const native[] = { 549 static const char * const native[] = {
550 "exec_elf32", 550 "exec_elf32",
551 "exec_elf64", 551 "exec_elf64",
552 "exec_script", 552 "exec_script",
553 NULL 553 NULL
554 }; 554 };
555 static const char * const compat[] = { 555 static const char * const compat[] = {
556 "exec_elf32", 556 "exec_elf32",
557 "exec_elf64", 557 "exec_elf64",
558 "exec_script", 558 "exec_script",
559 "exec_aout", 559 "exec_aout",
560 "exec_coff", 560 "exec_coff",
561 "exec_ecoff", 561 "exec_ecoff",
562 "compat_aoutm68k", 562 "compat_aoutm68k",
563 "compat_freebsd", 563 "compat_freebsd",
564 "compat_ibcs2", 564 "compat_ibcs2",
565 "compat_linux", 565 "compat_linux",
566 "compat_linux32", 566 "compat_linux32",
567 "compat_netbsd32", 567 "compat_netbsd32",
568 "compat_sunos", 568 "compat_sunos",
569 "compat_sunos32", 569 "compat_sunos32",
570 "compat_svr4", 570 "compat_svr4",
571 "compat_svr4_32", 571 "compat_svr4_32",
572 "compat_ultrix", 572 "compat_ultrix",
573 NULL 573 NULL
574 }; 574 };
575 char const * const *list; 575 char const * const *list;
576 int i; 576 int i;
577 577
578 list = (nexecs == 0 ? native : compat); 578 list = (nexecs == 0 ? native : compat);
579 for (i = 0; list[i] != NULL; i++) { 579 for (i = 0; list[i] != NULL; i++) {
580 if (module_autoload(list[i], MODULE_CLASS_EXEC) != 0) { 580 if (module_autoload(list[i], MODULE_CLASS_EXEC) != 0) {
581 continue; 581 continue;
582 } 582 }
583 yield(); 583 yield();
584 } 584 }
585#endif 585#endif
586} 586}
587 587
588static int 588static int
589makepathbuf(struct lwp *l, const char *upath, struct pathbuf **pbp, 589makepathbuf(struct lwp *l, const char *upath, struct pathbuf **pbp,
590 size_t *offs) 590 size_t *offs)
591{ 591{
592 char *path, *bp; 592 char *path, *bp;
593 size_t len, tlen; 593 size_t len, tlen;
594 int error; 594 int error;
595 struct cwdinfo *cwdi; 595 struct cwdinfo *cwdi;
596 596
597 path = PNBUF_GET(); 597 path = PNBUF_GET();
598 error = copyinstr(upath, path, MAXPATHLEN, &len); 598 error = copyinstr(upath, path, MAXPATHLEN, &len);
599 if (error) { 599 if (error) {
600 PNBUF_PUT(path); 600 PNBUF_PUT(path);
601 DPRINTF(("%s: copyin path @%p %d\n", __func__, upath, error)); 601 DPRINTF(("%s: copyin path @%p %d\n", __func__, upath, error));
602 return error; 602 return error;
603 } 603 }
604 604
605 if (path[0] == '/') { 605 if (path[0] == '/') {
606 *offs = 0; 606 *offs = 0;
607 goto out; 607 goto out;
608 } 608 }
609 609
610 len++; 610 len++;
611 if (len + 1 >= MAXPATHLEN) 611 if (len + 1 >= MAXPATHLEN)
612 goto out; 612 goto out;
613 bp = path + MAXPATHLEN - len; 613 bp = path + MAXPATHLEN - len;
614 memmove(bp, path, len); 614 memmove(bp, path, len);
615 *(--bp) = '/'; 615 *(--bp) = '/';
616 616
617 cwdi = l->l_proc->p_cwdi;  617 cwdi = l->l_proc->p_cwdi;
618 rw_enter(&cwdi->cwdi_lock, RW_READER); 618 rw_enter(&cwdi->cwdi_lock, RW_READER);
619 error = getcwd_common(cwdi->cwdi_cdir, NULL, &bp, path, MAXPATHLEN / 2, 619 error = getcwd_common(cwdi->cwdi_cdir, NULL, &bp, path, MAXPATHLEN / 2,
620 GETCWD_CHECK_ACCESS, l); 620 GETCWD_CHECK_ACCESS, l);
621 rw_exit(&cwdi->cwdi_lock); 621 rw_exit(&cwdi->cwdi_lock);
622 622
623 if (error) { 623 if (error) {
624 DPRINTF(("%s: getcwd_common path %s %d\n", __func__, path, 624 DPRINTF(("%s: getcwd_common path %s %d\n", __func__, path,
625 error)); 625 error));
626 goto out; 626 goto out;
627 } 627 }
628 tlen = path + MAXPATHLEN - bp; 628 tlen = path + MAXPATHLEN - bp;
629 629
630 memmove(path, bp, tlen); 630 memmove(path, bp, tlen);
631 path[tlen] = '\0'; 631 path[tlen] = '\0';
632 *offs = tlen - len; 632 *offs = tlen - len;
633out: 633out:
634 *pbp = pathbuf_assimilate(path); 634 *pbp = pathbuf_assimilate(path);
635 return 0; 635 return 0;
636} 636}
637 637
638static int 638static int
639execve_loadvm(struct lwp *l, const char *path, char * const *args, 639execve_loadvm(struct lwp *l, const char *path, char * const *args,
640 char * const *envs, execve_fetch_element_t fetch_element, 640 char * const *envs, execve_fetch_element_t fetch_element,
641 struct execve_data * restrict data) 641 struct execve_data * restrict data)
642{ 642{
643 struct exec_package * const epp = &data->ed_pack; 643 struct exec_package * const epp = &data->ed_pack;
644 int error; 644 int error;
645 struct proc *p; 645 struct proc *p;
646 char *dp; 646 char *dp;
647 u_int modgen; 647 u_int modgen;
648 size_t offs = 0; // XXX: GCC 648 size_t offs = 0; // XXX: GCC
649 649
650 KASSERT(data != NULL); 650 KASSERT(data != NULL);
651 651
652 p = l->l_proc; 652 p = l->l_proc;
653 modgen = 0; 653 modgen = 0;
654 654
655 SDT_PROBE(proc, kernel, , exec, path, 0, 0, 0, 0); 655 SDT_PROBE(proc, kernel, , exec, path, 0, 0, 0, 0);
656 656
657 /* 657 /*
658 * Check if we have exceeded our number of processes limit. 658 * Check if we have exceeded our number of processes limit.
659 * This is so that we handle the case where a root daemon 659 * This is so that we handle the case where a root daemon
660 * forked, ran setuid to become the desired user and is trying 660 * forked, ran setuid to become the desired user and is trying
661 * to exec. The obvious place to do the reference counting check 661 * to exec. The obvious place to do the reference counting check
662 * is setuid(), but we don't do the reference counting check there 662 * is setuid(), but we don't do the reference counting check there
663 * like other OS's do because then all the programs that use setuid() 663 * like other OS's do because then all the programs that use setuid()
664 * must be modified to check the return code of setuid() and exit(). 664 * must be modified to check the return code of setuid() and exit().
665 * It is dangerous to make setuid() fail, because it fails open and 665 * It is dangerous to make setuid() fail, because it fails open and
666 * the program will continue to run as root. If we make it succeed 666 * the program will continue to run as root. If we make it succeed
667 * and return an error code, again we are not enforcing the limit. 667 * and return an error code, again we are not enforcing the limit.
668 * The best place to enforce the limit is here, when the process tries 668 * The best place to enforce the limit is here, when the process tries
669 * to execute a new image, because eventually the process will need 669 * to execute a new image, because eventually the process will need
670 * to call exec in order to do something useful. 670 * to call exec in order to do something useful.
671 */ 671 */
672 retry: 672 retry:
673 if (p->p_flag & PK_SUGID) { 673 if (p->p_flag & PK_SUGID) {
674 if (kauth_authorize_process(l->l_cred, KAUTH_PROCESS_RLIMIT, 674 if (kauth_authorize_process(l->l_cred, KAUTH_PROCESS_RLIMIT,
675 p, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS), 675 p, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS),
676 &p->p_rlimit[RLIMIT_NPROC], 676 &p->p_rlimit[RLIMIT_NPROC],
677 KAUTH_ARG(RLIMIT_NPROC)) != 0 && 677 KAUTH_ARG(RLIMIT_NPROC)) != 0 &&
678 chgproccnt(kauth_cred_getuid(l->l_cred), 0) > 678 chgproccnt(kauth_cred_getuid(l->l_cred), 0) >
679 p->p_rlimit[RLIMIT_NPROC].rlim_cur) 679 p->p_rlimit[RLIMIT_NPROC].rlim_cur)
680 return EAGAIN; 680 return EAGAIN;
681 } 681 }
682 682
683 /* 683 /*
684 * Drain existing references and forbid new ones. The process 684 * Drain existing references and forbid new ones. The process
685 * should be left alone until we're done here. This is necessary 685 * should be left alone until we're done here. This is necessary
686 * to avoid race conditions - e.g. in ptrace() - that might allow 686 * to avoid race conditions - e.g. in ptrace() - that might allow
687 * a local user to illicitly obtain elevated privileges. 687 * a local user to illicitly obtain elevated privileges.
688 */ 688 */
689 rw_enter(&p->p_reflock, RW_WRITER); 689 rw_enter(&p->p_reflock, RW_WRITER);
690 690
691 /* 691 /*
692 * Init the namei data to point the file user's program name. 692 * Init the namei data to point the file user's program name.
693 * This is done here rather than in check_exec(), so that it's 693 * This is done here rather than in check_exec(), so that it's
694 * possible to override this settings if any of makecmd/probe 694 * possible to override this settings if any of makecmd/probe
695 * functions call check_exec() recursively - for example, 695 * functions call check_exec() recursively - for example,
696 * see exec_script_makecmds(). 696 * see exec_script_makecmds().
697 */ 697 */
698 if ((error = makepathbuf(l, path, &data->ed_pathbuf, &offs)) != 0) 698 if ((error = makepathbuf(l, path, &data->ed_pathbuf, &offs)) != 0)
699 goto clrflg; 699 goto clrflg;
700 data->ed_pathstring = pathbuf_stringcopy_get(data->ed_pathbuf); 700 data->ed_pathstring = pathbuf_stringcopy_get(data->ed_pathbuf);
701 data->ed_resolvedpathbuf = PNBUF_GET(); 701 data->ed_resolvedpathbuf = PNBUF_GET();
702 702
703 /* 703 /*
704 * initialize the fields of the exec package. 704 * initialize the fields of the exec package.
705 */ 705 */
706 epp->ep_kname = data->ed_pathstring + offs; 706 epp->ep_kname = data->ed_pathstring + offs;
707 epp->ep_resolvedname = data->ed_resolvedpathbuf; 707 epp->ep_resolvedname = data->ed_resolvedpathbuf;
708 epp->ep_hdr = kmem_alloc(exec_maxhdrsz, KM_SLEEP); 708 epp->ep_hdr = kmem_alloc(exec_maxhdrsz, KM_SLEEP);
709 epp->ep_hdrlen = exec_maxhdrsz; 709 epp->ep_hdrlen = exec_maxhdrsz;
710 epp->ep_hdrvalid = 0; 710 epp->ep_hdrvalid = 0;
711 epp->ep_emul_arg = NULL; 711 epp->ep_emul_arg = NULL;
712 epp->ep_emul_arg_free = NULL; 712 epp->ep_emul_arg_free = NULL;
713 memset(&epp->ep_vmcmds, 0, sizeof(epp->ep_vmcmds)); 713 memset(&epp->ep_vmcmds, 0, sizeof(epp->ep_vmcmds));
714 epp->ep_vap = &data->ed_attr; 714 epp->ep_vap = &data->ed_attr;
715 epp->ep_flags = (p->p_flag & PK_32) ? EXEC_FROM32 : 0; 715 epp->ep_flags = (p->p_flag & PK_32) ? EXEC_FROM32 : 0;
716 MD_TOPDOWN_INIT(epp); 716 MD_TOPDOWN_INIT(epp);
717 epp->ep_emul_root = NULL; 717 epp->ep_emul_root = NULL;
718 epp->ep_interp = NULL; 718 epp->ep_interp = NULL;
719 epp->ep_esch = NULL; 719 epp->ep_esch = NULL;
720 epp->ep_pax_flags = 0; 720 epp->ep_pax_flags = 0;
721 memset(epp->ep_machine_arch, 0, sizeof(epp->ep_machine_arch)); 721 memset(epp->ep_machine_arch, 0, sizeof(epp->ep_machine_arch));
722 722
723 rw_enter(&exec_lock, RW_READER); 723 rw_enter(&exec_lock, RW_READER);
724 724
725 /* see if we can run it. */ 725 /* see if we can run it. */
726 if ((error = check_exec(l, epp, data->ed_pathbuf)) != 0) { 726 if ((error = check_exec(l, epp, data->ed_pathbuf)) != 0) {
727 if (error != ENOENT) { 727 if (error != ENOENT) {
728 DPRINTF(("%s: check exec failed %d\n", 728 DPRINTF(("%s: check exec failed %d\n",
729 __func__, error)); 729 __func__, error));
730 } 730 }
731 goto freehdr; 731 goto freehdr;
732 } 732 }
733 733
734 /* allocate an argument buffer */ 734 /* allocate an argument buffer */
735 data->ed_argp = pool_get(&exec_pool, PR_WAITOK); 735 data->ed_argp = pool_get(&exec_pool, PR_WAITOK);
736 KASSERT(data->ed_argp != NULL); 736 KASSERT(data->ed_argp != NULL);
737 dp = data->ed_argp; 737 dp = data->ed_argp;
738 738
739 if ((error = copyinargs(data, args, envs, fetch_element, &dp)) != 0) { 739 if ((error = copyinargs(data, args, envs, fetch_element, &dp)) != 0) {
740 goto bad; 740 goto bad;
741 } 741 }
742 742
743 /* 743 /*
744 * Calculate the new stack size. 744 * Calculate the new stack size.
745 */ 745 */
746 746
747#ifdef PAX_ASLR 747#ifdef PAX_ASLR
748#define ASLR_GAP(epp) (pax_aslr_epp_active(epp) ? (cprng_fast32() % PAGE_SIZE) : 0) 748#define ASLR_GAP(epp) (pax_aslr_epp_active(epp) ? (cprng_fast32() % PAGE_SIZE) : 0)
749#else 749#else
750#define ASLR_GAP(epp) 0 750#define ASLR_GAP(epp) 0
751#endif 751#endif
752 752
753#ifdef __MACHINE_STACK_GROWS_UP 753#ifdef __MACHINE_STACK_GROWS_UP
754/* 754/*
755 * copyargs() fills argc/argv/envp from the lower address even on 755 * copyargs() fills argc/argv/envp from the lower address even on
756 * __MACHINE_STACK_GROWS_UP machines. Reserve a few words just below the SP 756 * __MACHINE_STACK_GROWS_UP machines. Reserve a few words just below the SP
757 * so that _rtld() use it. 757 * so that _rtld() use it.
758 */ 758 */
759#define RTLD_GAP 32 759#define RTLD_GAP 32
760#else 760#else
761#define RTLD_GAP 0 761#define RTLD_GAP 0
762#endif 762#endif
763 763
764 const size_t argenvstrlen = (char *)ALIGN(dp) - data->ed_argp; 764 const size_t argenvstrlen = (char *)ALIGN(dp) - data->ed_argp;
765 765
766 data->ed_argslen = calcargs(data, argenvstrlen); 766 data->ed_argslen = calcargs(data, argenvstrlen);
767 767
768 const size_t len = calcstack(data, ASLR_GAP(epp) + RTLD_GAP); 768 const size_t len = calcstack(data, ASLR_GAP(epp) + RTLD_GAP);
769 769
770 if (len > epp->ep_ssize) { 770 if (len > epp->ep_ssize) {
771 /* in effect, compare to initial limit */ 771 /* in effect, compare to initial limit */
772 DPRINTF(("%s: stack limit exceeded %zu\n", __func__, len)); 772 DPRINTF(("%s: stack limit exceeded %zu\n", __func__, len));
773 error = ENOMEM; 773 error = ENOMEM;
774 goto bad; 774 goto bad;
775 } 775 }
776 /* adjust "active stack depth" for process VSZ */ 776 /* adjust "active stack depth" for process VSZ */
777 epp->ep_ssize = len; 777 epp->ep_ssize = len;
778 778
779 return 0; 779 return 0;
780 780
781 bad: 781 bad:
782 /* free the vmspace-creation commands, and release their references */ 782 /* free the vmspace-creation commands, and release their references */
783 kill_vmcmds(&epp->ep_vmcmds); 783 kill_vmcmds(&epp->ep_vmcmds);
784 /* kill any opened file descriptor, if necessary */ 784 /* kill any opened file descriptor, if necessary */
785 if (epp->ep_flags & EXEC_HASFD) { 785 if (epp->ep_flags & EXEC_HASFD) {
786 epp->ep_flags &= ~EXEC_HASFD; 786 epp->ep_flags &= ~EXEC_HASFD;
787 fd_close(epp->ep_fd); 787 fd_close(epp->ep_fd);
788 } 788 }
789 /* close and put the exec'd file */ 789 /* close and put the exec'd file */
790 vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY); 790 vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY);
791 VOP_CLOSE(epp->ep_vp, FREAD, l->l_cred); 791 VOP_CLOSE(epp->ep_vp, FREAD, l->l_cred);
792 vput(epp->ep_vp); 792 vput(epp->ep_vp);
793 pool_put(&exec_pool, data->ed_argp); 793 pool_put(&exec_pool, data->ed_argp);
794 794
795 freehdr: 795 freehdr:
796 kmem_free(epp->ep_hdr, epp->ep_hdrlen); 796 kmem_free(epp->ep_hdr, epp->ep_hdrlen);
797 if (epp->ep_emul_root != NULL) 797 if (epp->ep_emul_root != NULL)
798 vrele(epp->ep_emul_root); 798 vrele(epp->ep_emul_root);
799 if (epp->ep_interp != NULL) 799 if (epp->ep_interp != NULL)
800 vrele(epp->ep_interp); 800 vrele(epp->ep_interp);
801 801
802 rw_exit(&exec_lock); 802 rw_exit(&exec_lock);
803 803
804 pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring); 804 pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring);
805 pathbuf_destroy(data->ed_pathbuf); 805 pathbuf_destroy(data->ed_pathbuf);
806 PNBUF_PUT(data->ed_resolvedpathbuf); 806 PNBUF_PUT(data->ed_resolvedpathbuf);
807 807
808 clrflg: 808 clrflg:
809 rw_exit(&p->p_reflock); 809 rw_exit(&p->p_reflock);
810 810
811 if (modgen != module_gen && error == ENOEXEC) { 811 if (modgen != module_gen && error == ENOEXEC) {
812 modgen = module_gen; 812 modgen = module_gen;
813 exec_autoload(); 813 exec_autoload();
814 goto retry; 814 goto retry;
815 } 815 }
816 816
817 SDT_PROBE(proc, kernel, , exec__failure, error, 0, 0, 0, 0); 817 SDT_PROBE(proc, kernel, , exec__failure, error, 0, 0, 0, 0);
818 return error; 818 return error;
819} 819}
820 820
821static int 821static int
822execve_dovmcmds(struct lwp *l, struct execve_data * restrict data) 822execve_dovmcmds(struct lwp *l, struct execve_data * restrict data)
823{ 823{
824 struct exec_package * const epp = &data->ed_pack; 824 struct exec_package * const epp = &data->ed_pack;
825 struct proc *p = l->l_proc; 825 struct proc *p = l->l_proc;
826 struct exec_vmcmd *base_vcp; 826 struct exec_vmcmd *base_vcp;
827 int error = 0; 827 int error = 0;
828 size_t i; 828 size_t i;
829 829
830 /* record proc's vnode, for use by procfs and others */ 830 /* record proc's vnode, for use by procfs and others */
831 if (p->p_textvp) 831 if (p->p_textvp)
832 vrele(p->p_textvp); 832 vrele(p->p_textvp);
833 vref(epp->ep_vp); 833 vref(epp->ep_vp);
834 p->p_textvp = epp->ep_vp; 834 p->p_textvp = epp->ep_vp;
835 835
836 /* create the new process's VM space by running the vmcmds */ 836 /* create the new process's VM space by running the vmcmds */
837 KASSERTMSG(epp->ep_vmcmds.evs_used != 0, "%s: no vmcmds", __func__); 837 KASSERTMSG(epp->ep_vmcmds.evs_used != 0, "%s: no vmcmds", __func__);
838 838
839 DUMPVMCMDS(epp, 0, 0); 839 DUMPVMCMDS(epp, 0, 0);
840 840
841 base_vcp = NULL; 841 base_vcp = NULL;
842 842
843 for (i = 0; i < epp->ep_vmcmds.evs_used && !error; i++) { 843 for (i = 0; i < epp->ep_vmcmds.evs_used && !error; i++) {
844 struct exec_vmcmd *vcp; 844 struct exec_vmcmd *vcp;
845 845
846 vcp = &epp->ep_vmcmds.evs_cmds[i]; 846 vcp = &epp->ep_vmcmds.evs_cmds[i];
847 if (vcp->ev_flags & VMCMD_RELATIVE) { 847 if (vcp->ev_flags & VMCMD_RELATIVE) {
848 KASSERTMSG(base_vcp != NULL, 848 KASSERTMSG(base_vcp != NULL,
849 "%s: relative vmcmd with no base", __func__); 849 "%s: relative vmcmd with no base", __func__);
850 KASSERTMSG((vcp->ev_flags & VMCMD_BASE) == 0, 850 KASSERTMSG((vcp->ev_flags & VMCMD_BASE) == 0,
851 "%s: illegal base & relative vmcmd", __func__); 851 "%s: illegal base & relative vmcmd", __func__);
852 vcp->ev_addr += base_vcp->ev_addr; 852 vcp->ev_addr += base_vcp->ev_addr;
853 } 853 }
854 error = (*vcp->ev_proc)(l, vcp); 854 error = (*vcp->ev_proc)(l, vcp);
855 if (error) 855 if (error)
856 DUMPVMCMDS(epp, i, error); 856 DUMPVMCMDS(epp, i, error);
857 if (vcp->ev_flags & VMCMD_BASE) 857 if (vcp->ev_flags & VMCMD_BASE)
858 base_vcp = vcp; 858 base_vcp = vcp;
859 } 859 }
860 860
861 /* free the vmspace-creation commands, and release their references */ 861 /* free the vmspace-creation commands, and release their references */
862 kill_vmcmds(&epp->ep_vmcmds); 862 kill_vmcmds(&epp->ep_vmcmds);
863 863
864 vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY); 864 vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY);
865 VOP_CLOSE(epp->ep_vp, FREAD, l->l_cred); 865 VOP_CLOSE(epp->ep_vp, FREAD, l->l_cred);
866 vput(epp->ep_vp); 866 vput(epp->ep_vp);
867 867
868 /* if an error happened, deallocate and punt */ 868 /* if an error happened, deallocate and punt */
869 if (error != 0) { 869 if (error != 0) {
870 DPRINTF(("%s: vmcmd %zu failed: %d\n", __func__, i - 1, error)); 870 DPRINTF(("%s: vmcmd %zu failed: %d\n", __func__, i - 1, error));
871 } 871 }
872 return error; 872 return error;
873} 873}
874 874
875static void 875static void
876execve_free_data(struct execve_data *data) 876execve_free_data(struct execve_data *data)
877{ 877{
878 struct exec_package * const epp = &data->ed_pack; 878 struct exec_package * const epp = &data->ed_pack;
879 879
880 /* free the vmspace-creation commands, and release their references */ 880 /* free the vmspace-creation commands, and release their references */
881 kill_vmcmds(&epp->ep_vmcmds); 881 kill_vmcmds(&epp->ep_vmcmds);
882 /* kill any opened file descriptor, if necessary */ 882 /* kill any opened file descriptor, if necessary */
883 if (epp->ep_flags & EXEC_HASFD) { 883 if (epp->ep_flags & EXEC_HASFD) {
884 epp->ep_flags &= ~EXEC_HASFD; 884 epp->ep_flags &= ~EXEC_HASFD;
885 fd_close(epp->ep_fd); 885 fd_close(epp->ep_fd);
886 } 886 }
887 887
888 /* close and put the exec'd file */ 888 /* close and put the exec'd file */
889 vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY); 889 vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY);
890 VOP_CLOSE(epp->ep_vp, FREAD, curlwp->l_cred); 890 VOP_CLOSE(epp->ep_vp, FREAD, curlwp->l_cred);
891 vput(epp->ep_vp); 891 vput(epp->ep_vp);
892 pool_put(&exec_pool, data->ed_argp); 892 pool_put(&exec_pool, data->ed_argp);
893 893
894 kmem_free(epp->ep_hdr, epp->ep_hdrlen); 894 kmem_free(epp->ep_hdr, epp->ep_hdrlen);
895 if (epp->ep_emul_root != NULL) 895 if (epp->ep_emul_root != NULL)
896 vrele(epp->ep_emul_root); 896 vrele(epp->ep_emul_root);
897 if (epp->ep_interp != NULL) 897 if (epp->ep_interp != NULL)
898 vrele(epp->ep_interp); 898 vrele(epp->ep_interp);
899 899
900 pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring); 900 pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring);
901 pathbuf_destroy(data->ed_pathbuf); 901 pathbuf_destroy(data->ed_pathbuf);
902 PNBUF_PUT(data->ed_resolvedpathbuf); 902 PNBUF_PUT(data->ed_resolvedpathbuf);
903} 903}
904 904
905static void 905static void
906pathexec(struct exec_package *epp, struct proc *p, const char *pathstring) 906pathexec(struct exec_package *epp, struct proc *p, const char *pathstring)
907{ 907{
908 const char *commandname; 908 const char *commandname;
909 size_t commandlen; 909 size_t commandlen;
910 char *path; 910 char *path;
911 911
912 /* set command name & other accounting info */ 912 /* set command name & other accounting info */
913 commandname = strrchr(epp->ep_resolvedname, '/'); 913 commandname = strrchr(epp->ep_resolvedname, '/');
914 if (commandname != NULL) { 914 if (commandname != NULL) {
915 commandname++; 915 commandname++;
916 } else { 916 } else {
917 commandname = epp->ep_resolvedname; 917 commandname = epp->ep_resolvedname;
918 } 918 }
919 commandlen = min(strlen(commandname), MAXCOMLEN); 919 commandlen = min(strlen(commandname), MAXCOMLEN);
920 (void)memcpy(p->p_comm, commandname, commandlen); 920 (void)memcpy(p->p_comm, commandname, commandlen);
921 p->p_comm[commandlen] = '\0'; 921 p->p_comm[commandlen] = '\0';
922 922
923 923
924 /* 924 /*
925 * If the path starts with /, we don't need to do any work. 925 * If the path starts with /, we don't need to do any work.
926 * This handles the majority of the cases. 926 * This handles the majority of the cases.
927 * In the future perhaps we could canonicalize it? 927 * In the future perhaps we could canonicalize it?
928 */ 928 */
929 if (pathstring[0] == '/') { 929 if (pathstring[0] == '/') {
930 path = PNBUF_GET(); 930 path = PNBUF_GET();
931 (void)strlcpy(path, pathstring, MAXPATHLEN); 931 (void)strlcpy(path, pathstring, MAXPATHLEN);
932 epp->ep_path = path; 932 epp->ep_path = path;
933 } else 933 } else
934 epp->ep_path = NULL; 934 epp->ep_path = NULL;
935} 935}
936 936
937/* XXX elsewhere */ 937/* XXX elsewhere */
938static int 938static int
939credexec(struct lwp *l, struct vattr *attr) 939credexec(struct lwp *l, struct vattr *attr)
940{ 940{
941 struct proc *p = l->l_proc; 941 struct proc *p = l->l_proc;
942 int error; 942 int error;
943 943
944 /* 944 /*
945 * Deal with set[ug]id. MNT_NOSUID has already been used to disable 945 * Deal with set[ug]id. MNT_NOSUID has already been used to disable
946 * s[ug]id. It's OK to check for PSL_TRACED here as we have blocked 946 * s[ug]id. It's OK to check for PSL_TRACED here as we have blocked
947 * out additional references on the process for the moment. 947 * out additional references on the process for the moment.
948 */ 948 */
949 if ((p->p_slflag & PSL_TRACED) == 0 && 949 if ((p->p_slflag & PSL_TRACED) == 0 &&
950 950
951 (((attr->va_mode & S_ISUID) != 0 && 951 (((attr->va_mode & S_ISUID) != 0 &&
952 kauth_cred_geteuid(l->l_cred) != attr->va_uid) || 952 kauth_cred_geteuid(l->l_cred) != attr->va_uid) ||
953 953
954 ((attr->va_mode & S_ISGID) != 0 && 954 ((attr->va_mode & S_ISGID) != 0 &&
955 kauth_cred_getegid(l->l_cred) != attr->va_gid))) { 955 kauth_cred_getegid(l->l_cred) != attr->va_gid))) {
956 /* 956 /*
957 * Mark the process as SUGID before we do 957 * Mark the process as SUGID before we do
958 * anything that might block. 958 * anything that might block.
959 */ 959 */
960 proc_crmod_enter(); 960 proc_crmod_enter();
961 proc_crmod_leave(NULL, NULL, true); 961 proc_crmod_leave(NULL, NULL, true);
962 962
963 /* Make sure file descriptors 0..2 are in use. */ 963 /* Make sure file descriptors 0..2 are in use. */
964 if ((error = fd_checkstd()) != 0) { 964 if ((error = fd_checkstd()) != 0) {
965 DPRINTF(("%s: fdcheckstd failed %d\n", 965 DPRINTF(("%s: fdcheckstd failed %d\n",
966 __func__, error)); 966 __func__, error));
967 return error; 967 return error;
968 } 968 }
969 969
970 /* 970 /*
971 * Copy the credential so other references don't see our 971 * Copy the credential so other references don't see our
972 * changes. 972 * changes.
973 */ 973 */
974 l->l_cred = kauth_cred_copy(l->l_cred); 974 l->l_cred = kauth_cred_copy(l->l_cred);
975#ifdef KTRACE 975#ifdef KTRACE
976 /* 976 /*
977 * If the persistent trace flag isn't set, turn off. 977 * If the persistent trace flag isn't set, turn off.
978 */ 978 */
979 if (p->p_tracep) { 979 if (p->p_tracep) {
980 mutex_enter(&ktrace_lock); 980 mutex_enter(&ktrace_lock);
981 if (!(p->p_traceflag & KTRFAC_PERSISTENT)) 981 if (!(p->p_traceflag & KTRFAC_PERSISTENT))
982 ktrderef(p); 982 ktrderef(p);
983 mutex_exit(&ktrace_lock); 983 mutex_exit(&ktrace_lock);
984 } 984 }
985#endif 985#endif
986 if (attr->va_mode & S_ISUID) 986 if (attr->va_mode & S_ISUID)
987 kauth_cred_seteuid(l->l_cred, attr->va_uid); 987 kauth_cred_seteuid(l->l_cred, attr->va_uid);
988 if (attr->va_mode & S_ISGID) 988 if (attr->va_mode & S_ISGID)
989 kauth_cred_setegid(l->l_cred, attr->va_gid); 989 kauth_cred_setegid(l->l_cred, attr->va_gid);
990 } else { 990 } else {
991 if (kauth_cred_geteuid(l->l_cred) == 991 if (kauth_cred_geteuid(l->l_cred) ==
992 kauth_cred_getuid(l->l_cred) && 992 kauth_cred_getuid(l->l_cred) &&
993 kauth_cred_getegid(l->l_cred) == 993 kauth_cred_getegid(l->l_cred) ==
994 kauth_cred_getgid(l->l_cred)) 994 kauth_cred_getgid(l->l_cred))
995 p->p_flag &= ~PK_SUGID; 995 p->p_flag &= ~PK_SUGID;
996 } 996 }
997 997
998 /* 998 /*
999 * Copy the credential so other references don't see our changes. 999 * Copy the credential so other references don't see our changes.
1000 * Test to see if this is necessary first, since in the common case 1000 * Test to see if this is necessary first, since in the common case
1001 * we won't need a private reference. 1001 * we won't need a private reference.
1002 */ 1002 */
1003 if (kauth_cred_geteuid(l->l_cred) != kauth_cred_getsvuid(l->l_cred) || 1003 if (kauth_cred_geteuid(l->l_cred) != kauth_cred_getsvuid(l->l_cred) ||
1004 kauth_cred_getegid(l->l_cred) != kauth_cred_getsvgid(l->l_cred)) { 1004 kauth_cred_getegid(l->l_cred) != kauth_cred_getsvgid(l->l_cred)) {
1005 l->l_cred = kauth_cred_copy(l->l_cred); 1005 l->l_cred = kauth_cred_copy(l->l_cred);
1006 kauth_cred_setsvuid(l->l_cred, kauth_cred_geteuid(l->l_cred)); 1006 kauth_cred_setsvuid(l->l_cred, kauth_cred_geteuid(l->l_cred));
1007 kauth_cred_setsvgid(l->l_cred, kauth_cred_getegid(l->l_cred)); 1007 kauth_cred_setsvgid(l->l_cred, kauth_cred_getegid(l->l_cred));
1008 } 1008 }
1009 1009
1010 /* Update the master credentials. */ 1010 /* Update the master credentials. */
1011 if (l->l_cred != p->p_cred) { 1011 if (l->l_cred != p->p_cred) {
1012 kauth_cred_t ocred; 1012 kauth_cred_t ocred;
1013 1013
1014 kauth_cred_hold(l->l_cred); 1014 kauth_cred_hold(l->l_cred);
1015 mutex_enter(p->p_lock); 1015 mutex_enter(p->p_lock);
1016 ocred = p->p_cred; 1016 ocred = p->p_cred;
1017 p->p_cred = l->l_cred; 1017 p->p_cred = l->l_cred;
1018 mutex_exit(p->p_lock); 1018 mutex_exit(p->p_lock);
1019 kauth_cred_free(ocred); 1019 kauth_cred_free(ocred);
1020 } 1020 }
1021 1021
1022 return 0; 1022 return 0;
1023} 1023}
1024 1024
1025static void 1025static void
1026emulexec(struct lwp *l, struct exec_package *epp) 1026emulexec(struct lwp *l, struct exec_package *epp)
1027{ 1027{
1028 struct proc *p = l->l_proc; 1028 struct proc *p = l->l_proc;
1029 1029
1030 /* The emulation root will usually have been found when we looked 1030 /* The emulation root will usually have been found when we looked
1031 * for the elf interpreter (or similar), if not look now. */ 1031 * for the elf interpreter (or similar), if not look now. */
1032 if (epp->ep_esch->es_emul->e_path != NULL && 1032 if (epp->ep_esch->es_emul->e_path != NULL &&
1033 epp->ep_emul_root == NULL) 1033 epp->ep_emul_root == NULL)
1034 emul_find_root(l, epp); 1034 emul_find_root(l, epp);
1035 1035
1036 /* Any old emulation root got removed by fdcloseexec */ 1036 /* Any old emulation root got removed by fdcloseexec */
1037 rw_enter(&p->p_cwdi->cwdi_lock, RW_WRITER); 1037 rw_enter(&p->p_cwdi->cwdi_lock, RW_WRITER);
1038 p->p_cwdi->cwdi_edir = epp->ep_emul_root; 1038 p->p_cwdi->cwdi_edir = epp->ep_emul_root;
1039 rw_exit(&p->p_cwdi->cwdi_lock); 1039 rw_exit(&p->p_cwdi->cwdi_lock);
1040 epp->ep_emul_root = NULL; 1040 epp->ep_emul_root = NULL;
1041 if (epp->ep_interp != NULL) 1041 if (epp->ep_interp != NULL)
1042 vrele(epp->ep_interp); 1042 vrele(epp->ep_interp);
1043 1043
1044 /* 1044 /*
1045 * Call emulation specific exec hook. This can setup per-process 1045 * Call emulation specific exec hook. This can setup per-process
1046 * p->p_emuldata or do any other per-process stuff an emulation needs. 1046 * p->p_emuldata or do any other per-process stuff an emulation needs.
1047 * 1047 *
1048 * If we are executing process of different emulation than the 1048 * If we are executing process of different emulation than the
1049 * original forked process, call e_proc_exit() of the old emulation 1049 * original forked process, call e_proc_exit() of the old emulation
1050 * first, then e_proc_exec() of new emulation. If the emulation is 1050 * first, then e_proc_exec() of new emulation. If the emulation is
1051 * same, the exec hook code should deallocate any old emulation 1051 * same, the exec hook code should deallocate any old emulation
1052 * resources held previously by this process. 1052 * resources held previously by this process.
1053 */ 1053 */
1054 if (p->p_emul && p->p_emul->e_proc_exit 1054 if (p->p_emul && p->p_emul->e_proc_exit
1055 && p->p_emul != epp->ep_esch->es_emul) 1055 && p->p_emul != epp->ep_esch->es_emul)
1056 (*p->p_emul->e_proc_exit)(p); 1056 (*p->p_emul->e_proc_exit)(p);
1057 1057
1058 /* 1058 /*
1059 * This is now LWP 1. 1059 * This is now LWP 1.
1060 */ 1060 */
1061 /* XXX elsewhere */ 1061 /* XXX elsewhere */
1062 mutex_enter(p->p_lock); 1062 mutex_enter(p->p_lock);
1063 p->p_nlwpid = 1; 1063 p->p_nlwpid = 1;
1064 l->l_lid = 1; 1064 l->l_lid = 1;
1065 mutex_exit(p->p_lock); 1065 mutex_exit(p->p_lock);
1066 1066
1067 /* 1067 /*
1068 * Call exec hook. Emulation code may NOT store reference to anything 1068 * Call exec hook. Emulation code may NOT store reference to anything
1069 * from &pack. 1069 * from &pack.
1070 */ 1070 */
1071 if (epp->ep_esch->es_emul->e_proc_exec) 1071 if (epp->ep_esch->es_emul->e_proc_exec)
1072 (*epp->ep_esch->es_emul->e_proc_exec)(p, epp); 1072 (*epp->ep_esch->es_emul->e_proc_exec)(p, epp);
1073 1073
1074 /* update p_emul, the old value is no longer needed */ 1074 /* update p_emul, the old value is no longer needed */
1075 p->p_emul = epp->ep_esch->es_emul; 1075 p->p_emul = epp->ep_esch->es_emul;
1076 1076
1077 /* ...and the same for p_execsw */ 1077 /* ...and the same for p_execsw */
1078 p->p_execsw = epp->ep_esch; 1078 p->p_execsw = epp->ep_esch;
1079 1079
1080#ifdef __HAVE_SYSCALL_INTERN 1080#ifdef __HAVE_SYSCALL_INTERN
1081 (*p->p_emul->e_syscall_intern)(p); 1081 (*p->p_emul->e_syscall_intern)(p);
1082#endif 1082#endif
1083 ktremul(); 1083 ktremul();
1084} 1084}
1085 1085
1086static int 1086static int
1087execve_runproc(struct lwp *l, struct execve_data * restrict data, 1087execve_runproc(struct lwp *l, struct execve_data * restrict data,
1088 bool no_local_exec_lock, bool is_spawn) 1088 bool no_local_exec_lock, bool is_spawn)
1089{ 1089{
1090 struct exec_package * const epp = &data->ed_pack; 1090 struct exec_package * const epp = &data->ed_pack;
1091 int error = 0; 1091 int error = 0;
1092 struct proc *p; 1092 struct proc *p;
1093 1093
1094 /* 1094 /*
1095 * In case of a posix_spawn operation, the child doing the exec 1095 * In case of a posix_spawn operation, the child doing the exec
1096 * might not hold the reader lock on exec_lock, but the parent 1096 * might not hold the reader lock on exec_lock, but the parent
1097 * will do this instead. 1097 * will do this instead.
1098 */ 1098 */
1099 KASSERT(no_local_exec_lock || rw_lock_held(&exec_lock)); 1099 KASSERT(no_local_exec_lock || rw_lock_held(&exec_lock));
1100 KASSERT(!no_local_exec_lock || is_spawn); 1100 KASSERT(!no_local_exec_lock || is_spawn);
1101 KASSERT(data != NULL); 1101 KASSERT(data != NULL);
1102 1102
1103 p = l->l_proc; 1103 p = l->l_proc;
1104 1104
1105 /* Get rid of other LWPs. */ 1105 /* Get rid of other LWPs. */
1106 if (p->p_nlwps > 1) { 1106 if (p->p_nlwps > 1) {
1107 mutex_enter(p->p_lock); 1107 mutex_enter(p->p_lock);
1108 exit_lwps(l); 1108 exit_lwps(l);
1109 mutex_exit(p->p_lock); 1109 mutex_exit(p->p_lock);
1110 } 1110 }
1111 KDASSERT(p->p_nlwps == 1); 1111 KDASSERT(p->p_nlwps == 1);
1112 1112
1113 /* Destroy any lwpctl info. */ 1113 /* Destroy any lwpctl info. */
1114 if (p->p_lwpctl != NULL) 1114 if (p->p_lwpctl != NULL)
1115 lwp_ctl_exit(); 1115 lwp_ctl_exit();
1116 1116
1117 /* Remove POSIX timers */ 1117 /* Remove POSIX timers */
1118 timers_free(p, TIMERS_POSIX); 1118 timers_free(p, TIMERS_POSIX);
1119 1119
1120 /* Set the PaX flags. */ 1120 /* Set the PaX flags. */
1121 p->p_pax = epp->ep_pax_flags; 1121 p->p_pax = epp->ep_pax_flags;
1122 1122
1123 /* 1123 /*
1124 * Do whatever is necessary to prepare the address space 1124 * Do whatever is necessary to prepare the address space
1125 * for remapping. Note that this might replace the current 1125 * for remapping. Note that this might replace the current
1126 * vmspace with another! 1126 * vmspace with another!
1127 */ 1127 */
1128 if (is_spawn) 1128 if (is_spawn)
1129 uvmspace_spawn(l, epp->ep_vm_minaddr, 1129 uvmspace_spawn(l, epp->ep_vm_minaddr,
1130 epp->ep_vm_maxaddr, 1130 epp->ep_vm_maxaddr,
1131 epp->ep_flags & EXEC_TOPDOWN_VM); 1131 epp->ep_flags & EXEC_TOPDOWN_VM);
1132 else 1132 else
1133 uvmspace_exec(l, epp->ep_vm_minaddr, 1133 uvmspace_exec(l, epp->ep_vm_minaddr,
1134 epp->ep_vm_maxaddr, 1134 epp->ep_vm_maxaddr,
1135 epp->ep_flags & EXEC_TOPDOWN_VM); 1135 epp->ep_flags & EXEC_TOPDOWN_VM);
1136 1136
1137 struct vmspace *vm; 1137 struct vmspace *vm;
1138 vm = p->p_vmspace; 1138 vm = p->p_vmspace;
1139 vm->vm_taddr = (void *)epp->ep_taddr; 1139 vm->vm_taddr = (void *)epp->ep_taddr;
1140 vm->vm_tsize = btoc(epp->ep_tsize); 1140 vm->vm_tsize = btoc(epp->ep_tsize);
1141 vm->vm_daddr = (void*)epp->ep_daddr; 1141 vm->vm_daddr = (void*)epp->ep_daddr;
1142 vm->vm_dsize = btoc(epp->ep_dsize); 1142 vm->vm_dsize = btoc(epp->ep_dsize);
1143 vm->vm_ssize = btoc(epp->ep_ssize); 1143 vm->vm_ssize = btoc(epp->ep_ssize);
1144 vm->vm_issize = 0; 1144 vm->vm_issize = 0;
1145 vm->vm_maxsaddr = (void *)epp->ep_maxsaddr; 1145 vm->vm_maxsaddr = (void *)epp->ep_maxsaddr;
1146 vm->vm_minsaddr = (void *)epp->ep_minsaddr; 1146 vm->vm_minsaddr = (void *)epp->ep_minsaddr;
1147 1147
1148#ifdef PAX_ASLR 1148#ifdef PAX_ASLR
1149 pax_aslr_init_vm(l, vm); 1149 pax_aslr_init_vm(l, vm);
1150#endif /* PAX_ASLR */ 1150#endif /* PAX_ASLR */
1151 1151
1152 /* Now map address space. */ 1152 /* Now map address space. */
1153 error = execve_dovmcmds(l, data); 1153 error = execve_dovmcmds(l, data);
1154 if (error != 0) 1154 if (error != 0)
1155 goto exec_abort; 1155 goto exec_abort;
1156 1156
1157 pathexec(epp, p, data->ed_pathstring); 1157 pathexec(epp, p, data->ed_pathstring);
1158 1158
1159 char * const newstack = STACK_GROW(vm->vm_minsaddr, epp->ep_ssize); 1159 char * const newstack = STACK_GROW(vm->vm_minsaddr, epp->ep_ssize);
1160 1160
1161 error = copyoutargs(data, l, newstack); 1161 error = copyoutargs(data, l, newstack);
1162 if (error != 0) 1162 if (error != 0)
1163 goto exec_abort; 1163 goto exec_abort;
1164 1164
1165 cwdexec(p); 1165 cwdexec(p);
1166 fd_closeexec(); /* handle close on exec */ 1166 fd_closeexec(); /* handle close on exec */
1167 1167
1168 if (__predict_false(ktrace_on)) 1168 if (__predict_false(ktrace_on))
1169 fd_ktrexecfd(); 1169 fd_ktrexecfd();
1170 1170
1171 execsigs(p); /* reset catched signals */ 1171 execsigs(p); /* reset catched signals */
1172 1172
1173 mutex_enter(p->p_lock); 1173 mutex_enter(p->p_lock);
1174 l->l_ctxlink = NULL; /* reset ucontext link */ 1174 l->l_ctxlink = NULL; /* reset ucontext link */
1175 p->p_acflag &= ~AFORK; 1175 p->p_acflag &= ~AFORK;
1176 p->p_flag |= PK_EXEC; 1176 p->p_flag |= PK_EXEC;
1177 mutex_exit(p->p_lock); 1177 mutex_exit(p->p_lock);
1178 1178
1179 /* 1179 /*
1180 * Stop profiling. 1180 * Stop profiling.
1181 */ 1181 */
1182 if ((p->p_stflag & PST_PROFIL) != 0) { 1182 if ((p->p_stflag & PST_PROFIL) != 0) {
1183 mutex_spin_enter(&p->p_stmutex); 1183 mutex_spin_enter(&p->p_stmutex);
1184 stopprofclock(p); 1184 stopprofclock(p);
1185 mutex_spin_exit(&p->p_stmutex); 1185 mutex_spin_exit(&p->p_stmutex);
1186 } 1186 }
1187 1187
1188 /* 1188 /*
1189 * It's OK to test PL_PPWAIT unlocked here, as other LWPs have 1189 * It's OK to test PL_PPWAIT unlocked here, as other LWPs have
1190 * exited and exec()/exit() are the only places it will be cleared. 1190 * exited and exec()/exit() are the only places it will be cleared.
1191 */ 1191 */
1192 if ((p->p_lflag & PL_PPWAIT) != 0) { 1192 if ((p->p_lflag & PL_PPWAIT) != 0) {
1193#if 0 1193#if 0
1194 lwp_t *lp; 1194 lwp_t *lp;
1195 1195
1196 mutex_enter(proc_lock); 1196 mutex_enter(proc_lock);
1197 lp = p->p_vforklwp; 1197 lp = p->p_vforklwp;
1198 p->p_vforklwp = NULL; 1198 p->p_vforklwp = NULL;
1199 1199
1200 l->l_lwpctl = NULL; /* was on loan from blocked parent */ 1200 l->l_lwpctl = NULL; /* was on loan from blocked parent */
1201 p->p_lflag &= ~PL_PPWAIT; 1201 p->p_lflag &= ~PL_PPWAIT;
1202 1202
1203 lp->l_pflag &= ~LP_VFORKWAIT; /* XXX */ 1203 lp->l_pflag &= ~LP_VFORKWAIT; /* XXX */
1204 cv_broadcast(&lp->l_waitcv); 1204 cv_broadcast(&lp->l_waitcv);
1205 mutex_exit(proc_lock); 1205 mutex_exit(proc_lock);
1206#else 1206#else
1207 mutex_enter(proc_lock); 1207 mutex_enter(proc_lock);
1208 l->l_lwpctl = NULL; /* was on loan from blocked parent */ 1208 l->l_lwpctl = NULL; /* was on loan from blocked parent */
1209 p->p_lflag &= ~PL_PPWAIT; 1209 p->p_lflag &= ~PL_PPWAIT;
1210 cv_broadcast(&p->p_pptr->p_waitcv); 1210 cv_broadcast(&p->p_pptr->p_waitcv);
1211 mutex_exit(proc_lock); 1211 mutex_exit(proc_lock);
1212#endif 1212#endif
1213 } 1213 }
1214 1214
1215 error = credexec(l, &data->ed_attr); 1215 error = credexec(l, &data->ed_attr);
1216 if (error) 1216 if (error)
1217 goto exec_abort; 1217 goto exec_abort;
1218 1218
1219#if defined(__HAVE_RAS) 1219#if defined(__HAVE_RAS)
1220 /* 1220 /*
1221 * Remove all RASs from the address space. 1221 * Remove all RASs from the address space.
1222 */ 1222 */
1223 ras_purgeall(); 1223 ras_purgeall();
1224#endif 1224#endif
1225 1225
1226 doexechooks(p); 1226 doexechooks(p);
1227 1227
1228 /* 1228 /*
1229 * Set initial SP at the top of the stack. 1229 * Set initial SP at the top of the stack.
1230 * 1230 *
1231 * Note that on machines where stack grows up (e.g. hppa), SP points to 1231 * Note that on machines where stack grows up (e.g. hppa), SP points to
1232 * the end of arg/env strings. Userland guesses the address of argc 1232 * the end of arg/env strings. Userland guesses the address of argc
1233 * via ps_strings::ps_argvstr. 1233 * via ps_strings::ps_argvstr.
1234 */ 1234 */
1235 1235
1236 /* Setup new registers and do misc. setup. */ 1236 /* Setup new registers and do misc. setup. */
1237 (*epp->ep_esch->es_emul->e_setregs)(l, epp, (vaddr_t)newstack); 1237 (*epp->ep_esch->es_emul->e_setregs)(l, epp, (vaddr_t)newstack);
1238 if (epp->ep_esch->es_setregs) 1238 if (epp->ep_esch->es_setregs)
1239 (*epp->ep_esch->es_setregs)(l, epp, (vaddr_t)newstack); 1239 (*epp->ep_esch->es_setregs)(l, epp, (vaddr_t)newstack);
1240 1240
1241 /* Provide a consistent LWP private setting */ 1241 /* Provide a consistent LWP private setting */
1242 (void)lwp_setprivate(l, NULL); 1242 (void)lwp_setprivate(l, NULL);
1243 1243
1244 /* Discard all PCU state; need to start fresh */ 1244 /* Discard all PCU state; need to start fresh */
1245 pcu_discard_all(l); 1245 pcu_discard_all(l);
1246 1246
1247 /* map the process's signal trampoline code */ 1247 /* map the process's signal trampoline code */
1248 if ((error = exec_sigcode_map(p, epp->ep_esch->es_emul)) != 0) { 1248 if ((error = exec_sigcode_map(p, epp->ep_esch->es_emul)) != 0) {
1249 DPRINTF(("%s: map sigcode failed %d\n", __func__, error)); 1249 DPRINTF(("%s: map sigcode failed %d\n", __func__, error));
1250 goto exec_abort; 1250 goto exec_abort;
1251 } 1251 }
1252 1252
1253 pool_put(&exec_pool, data->ed_argp); 1253 pool_put(&exec_pool, data->ed_argp);
1254 1254
1255 /* notify others that we exec'd */ 1255 /* notify others that we exec'd */
1256 KNOTE(&p->p_klist, NOTE_EXEC); 1256 KNOTE(&p->p_klist, NOTE_EXEC);
1257 1257
1258 kmem_free(epp->ep_hdr, epp->ep_hdrlen); 1258 kmem_free(epp->ep_hdr, epp->ep_hdrlen);
1259 1259
1260 SDT_PROBE(proc, kernel, , exec__success, epp->ep_kname, 0, 0, 0, 0); 1260 SDT_PROBE(proc, kernel, , exec__success, epp->ep_kname, 0, 0, 0, 0);
1261 1261
1262 emulexec(l, epp); 1262 emulexec(l, epp);
1263 1263
1264 /* Allow new references from the debugger/procfs. */ 1264 /* Allow new references from the debugger/procfs. */
1265 rw_exit(&p->p_reflock); 1265 rw_exit(&p->p_reflock);
1266 if (!no_local_exec_lock) 1266 if (!no_local_exec_lock)
1267 rw_exit(&exec_lock); 1267 rw_exit(&exec_lock);
1268 1268
1269 mutex_enter(proc_lock); 1269 mutex_enter(proc_lock);
1270 1270
1271 if ((p->p_slflag & (PSL_TRACED|PSL_SYSCALL)) == PSL_TRACED) { 1271 if ((p->p_slflag & (PSL_TRACED|PSL_SYSCALL)) == PSL_TRACED) {
1272 ksiginfo_t ksi; 1272 ksiginfo_t ksi;
1273 1273
1274 KSI_INIT_EMPTY(&ksi); 1274 KSI_INIT_EMPTY(&ksi);
1275 ksi.ksi_signo = SIGTRAP; 1275 ksi.ksi_signo = SIGTRAP;
1276 ksi.ksi_lid = l->l_lid; 1276 ksi.ksi_lid = l->l_lid;
1277 kpsignal(p, &ksi, NULL); 1277 kpsignal(p, &ksi, NULL);
1278 } 1278 }
1279 1279
1280 if (p->p_sflag & PS_STOPEXEC) { 1280 if (p->p_sflag & PS_STOPEXEC) {
1281 ksiginfoq_t kq; 1281 ksiginfoq_t kq;
1282 1282
1283 KERNEL_UNLOCK_ALL(l, &l->l_biglocks); 1283 KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
1284 p->p_pptr->p_nstopchild++; 1284 p->p_pptr->p_nstopchild++;
1285 p->p_pptr->p_waited = 0; 1285 p->p_waited = 0;
1286 mutex_enter(p->p_lock); 1286 mutex_enter(p->p_lock);
1287 ksiginfo_queue_init(&kq); 1287 ksiginfo_queue_init(&kq);
1288 sigclearall(p, &contsigmask, &kq); 1288 sigclearall(p, &contsigmask, &kq);
1289 lwp_lock(l); 1289 lwp_lock(l);
1290 l->l_stat = LSSTOP; 1290 l->l_stat = LSSTOP;
1291 p->p_stat = SSTOP; 1291 p->p_stat = SSTOP;
1292 p->p_nrlwps--; 1292 p->p_nrlwps--;
1293 lwp_unlock(l); 1293 lwp_unlock(l);
1294 mutex_exit(p->p_lock); 1294 mutex_exit(p->p_lock);
1295 mutex_exit(proc_lock); 1295 mutex_exit(proc_lock);
1296 lwp_lock(l); 1296 lwp_lock(l);
1297 mi_switch(l); 1297 mi_switch(l);
1298 ksiginfo_queue_drain(&kq); 1298 ksiginfo_queue_drain(&kq);
1299 KERNEL_LOCK(l->l_biglocks, l); 1299 KERNEL_LOCK(l->l_biglocks, l);
1300 } else { 1300 } else {
1301 mutex_exit(proc_lock); 1301 mutex_exit(proc_lock);
1302 } 1302 }
1303 1303
1304 pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring); 1304 pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring);
1305 pathbuf_destroy(data->ed_pathbuf); 1305 pathbuf_destroy(data->ed_pathbuf);
1306 PNBUF_PUT(data->ed_resolvedpathbuf); 1306 PNBUF_PUT(data->ed_resolvedpathbuf);
1307 DPRINTF(("%s finished\n", __func__)); 1307 DPRINTF(("%s finished\n", __func__));
1308 return EJUSTRETURN; 1308 return EJUSTRETURN;
1309 1309
1310 exec_abort: 1310 exec_abort:
1311 SDT_PROBE(proc, kernel, , exec__failure, error, 0, 0, 0, 0); 1311 SDT_PROBE(proc, kernel, , exec__failure, error, 0, 0, 0, 0);
1312 rw_exit(&p->p_reflock); 1312 rw_exit(&p->p_reflock);
1313 if (!no_local_exec_lock) 1313 if (!no_local_exec_lock)
1314 rw_exit(&exec_lock); 1314 rw_exit(&exec_lock);
1315 1315
1316 pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring); 1316 pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring);
1317 pathbuf_destroy(data->ed_pathbuf); 1317 pathbuf_destroy(data->ed_pathbuf);
1318 PNBUF_PUT(data->ed_resolvedpathbuf); 1318 PNBUF_PUT(data->ed_resolvedpathbuf);
1319 1319
1320 /* 1320 /*
1321 * the old process doesn't exist anymore. exit gracefully. 1321 * the old process doesn't exist anymore. exit gracefully.
1322 * get rid of the (new) address space we have created, if any, get rid 1322 * get rid of the (new) address space we have created, if any, get rid
1323 * of our namei data and vnode, and exit noting failure 1323 * of our namei data and vnode, and exit noting failure
1324 */ 1324 */
1325 uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS, 1325 uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS,
1326 VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS); 1326 VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS);
1327 1327
1328 exec_free_emul_arg(epp); 1328 exec_free_emul_arg(epp);
1329 pool_put(&exec_pool, data->ed_argp); 1329 pool_put(&exec_pool, data->ed_argp);
1330 kmem_free(epp->ep_hdr, epp->ep_hdrlen); 1330 kmem_free(epp->ep_hdr, epp->ep_hdrlen);
1331 if (epp->ep_emul_root != NULL) 1331 if (epp->ep_emul_root != NULL)
1332 vrele(epp->ep_emul_root); 1332 vrele(epp->ep_emul_root);
1333 if (epp->ep_interp != NULL) 1333 if (epp->ep_interp != NULL)
1334 vrele(epp->ep_interp); 1334 vrele(epp->ep_interp);
1335 1335
1336 /* Acquire the sched-state mutex (exit1() will release it). */ 1336 /* Acquire the sched-state mutex (exit1() will release it). */
1337 if (!is_spawn) { 1337 if (!is_spawn) {
1338 mutex_enter(p->p_lock); 1338 mutex_enter(p->p_lock);
1339 exit1(l, W_EXITCODE(error, SIGABRT)); 1339 exit1(l, W_EXITCODE(error, SIGABRT));
1340 } 1340 }
1341 1341
1342 return error; 1342 return error;
1343} 1343}
1344 1344
1345int 1345int
1346execve1(struct lwp *l, const char *path, char * const *args, 1346execve1(struct lwp *l, const char *path, char * const *args,
1347 char * const *envs, execve_fetch_element_t fetch_element) 1347 char * const *envs, execve_fetch_element_t fetch_element)
1348{ 1348{
1349 struct execve_data data; 1349 struct execve_data data;
1350 int error; 1350 int error;
1351 1351
1352 error = execve_loadvm(l, path, args, envs, fetch_element, &data); 1352 error = execve_loadvm(l, path, args, envs, fetch_element, &data);
1353 if (error) 1353 if (error)
1354 return error; 1354 return error;
1355 error = execve_runproc(l, &data, false, false); 1355 error = execve_runproc(l, &data, false, false);
1356 return error; 1356 return error;
1357} 1357}
1358 1358
1359static size_t 1359static size_t
1360fromptrsz(const struct exec_package *epp) 1360fromptrsz(const struct exec_package *epp)
1361{ 1361{
1362 return (epp->ep_flags & EXEC_FROM32) ? sizeof(int) : sizeof(char *); 1362 return (epp->ep_flags & EXEC_FROM32) ? sizeof(int) : sizeof(char *);
1363} 1363}
1364 1364
1365static size_t 1365static size_t
1366ptrsz(const struct exec_package *epp) 1366ptrsz(const struct exec_package *epp)
1367{ 1367{
1368 return (epp->ep_flags & EXEC_32) ? sizeof(int) : sizeof(char *); 1368 return (epp->ep_flags & EXEC_32) ? sizeof(int) : sizeof(char *);
1369} 1369}
1370 1370
1371static size_t 1371static size_t
1372calcargs(struct execve_data * restrict data, const size_t argenvstrlen) 1372calcargs(struct execve_data * restrict data, const size_t argenvstrlen)
1373{ 1373{
1374 struct exec_package * const epp = &data->ed_pack; 1374 struct exec_package * const epp = &data->ed_pack;
1375 1375
1376 const size_t nargenvptrs = 1376 const size_t nargenvptrs =
1377 1 + /* long argc */ 1377 1 + /* long argc */
1378 data->ed_argc + /* char *argv[] */ 1378 data->ed_argc + /* char *argv[] */
1379 1 + /* \0 */ 1379 1 + /* \0 */
1380 data->ed_envc + /* char *env[] */ 1380 data->ed_envc + /* char *env[] */
1381 1 + /* \0 */ 1381 1 + /* \0 */
1382 epp->ep_esch->es_arglen; /* auxinfo */ 1382 epp->ep_esch->es_arglen; /* auxinfo */
1383 1383
1384 return (nargenvptrs * ptrsz(epp)) + argenvstrlen; 1384 return (nargenvptrs * ptrsz(epp)) + argenvstrlen;
1385} 1385}
1386 1386
1387static size_t 1387static size_t
1388calcstack(struct execve_data * restrict data, const size_t gaplen) 1388calcstack(struct execve_data * restrict data, const size_t gaplen)
1389{ 1389{
1390 struct exec_package * const epp = &data->ed_pack; 1390 struct exec_package * const epp = &data->ed_pack;
1391 1391
1392 data->ed_szsigcode = epp->ep_esch->es_emul->e_esigcode - 1392 data->ed_szsigcode = epp->ep_esch->es_emul->e_esigcode -
1393 epp->ep_esch->es_emul->e_sigcode; 1393 epp->ep_esch->es_emul->e_sigcode;
1394 1394
1395 data->ed_ps_strings_sz = (epp->ep_flags & EXEC_32) ? 1395 data->ed_ps_strings_sz = (epp->ep_flags & EXEC_32) ?
1396 sizeof(struct ps_strings32) : sizeof(struct ps_strings); 1396 sizeof(struct ps_strings32) : sizeof(struct ps_strings);
1397 1397
1398 const size_t sigcode_psstr_sz = 1398 const size_t sigcode_psstr_sz =
1399 data->ed_szsigcode + /* sigcode */ 1399 data->ed_szsigcode + /* sigcode */
1400 data->ed_ps_strings_sz + /* ps_strings */ 1400 data->ed_ps_strings_sz + /* ps_strings */
1401 STACK_PTHREADSPACE; /* pthread space */ 1401 STACK_PTHREADSPACE; /* pthread space */
1402 1402
1403 const size_t stacklen = 1403 const size_t stacklen =
1404 data->ed_argslen + 1404 data->ed_argslen +
1405 gaplen + 1405 gaplen +
1406 sigcode_psstr_sz; 1406 sigcode_psstr_sz;
1407 1407
1408 /* make the stack "safely" aligned */ 1408 /* make the stack "safely" aligned */
1409 return STACK_LEN_ALIGN(stacklen, STACK_ALIGNBYTES); 1409 return STACK_LEN_ALIGN(stacklen, STACK_ALIGNBYTES);
1410} 1410}
1411 1411
1412static int 1412static int
1413copyoutargs(struct execve_data * restrict data, struct lwp *l, 1413copyoutargs(struct execve_data * restrict data, struct lwp *l,
1414 char * const newstack) 1414 char * const newstack)
1415{ 1415{
1416 struct exec_package * const epp = &data->ed_pack; 1416 struct exec_package * const epp = &data->ed_pack;
1417 struct proc *p = l->l_proc; 1417 struct proc *p = l->l_proc;
1418 int error; 1418 int error;
1419 1419
1420 /* remember information about the process */ 1420 /* remember information about the process */
1421 data->ed_arginfo.ps_nargvstr = data->ed_argc; 1421 data->ed_arginfo.ps_nargvstr = data->ed_argc;
1422 data->ed_arginfo.ps_nenvstr = data->ed_envc; 1422 data->ed_arginfo.ps_nenvstr = data->ed_envc;
1423 1423
1424 /* 1424 /*
1425 * Allocate the stack address passed to the newly execve()'ed process. 1425 * Allocate the stack address passed to the newly execve()'ed process.
1426 * 1426 *
1427 * The new stack address will be set to the SP (stack pointer) register 1427 * The new stack address will be set to the SP (stack pointer) register
1428 * in setregs(). 1428 * in setregs().
1429 */ 1429 */
1430 1430
1431 char *newargs = STACK_ALLOC( 1431 char *newargs = STACK_ALLOC(
1432 STACK_SHRINK(newstack, data->ed_argslen), data->ed_argslen); 1432 STACK_SHRINK(newstack, data->ed_argslen), data->ed_argslen);
1433 1433
1434 error = (*epp->ep_esch->es_copyargs)(l, epp, 1434 error = (*epp->ep_esch->es_copyargs)(l, epp,
1435 &data->ed_arginfo, &newargs, data->ed_argp); 1435 &data->ed_arginfo, &newargs, data->ed_argp);
1436 1436
1437 if (epp->ep_path) { 1437 if (epp->ep_path) {
1438 PNBUF_PUT(epp->ep_path); 1438 PNBUF_PUT(epp->ep_path);
1439 epp->ep_path = NULL; 1439 epp->ep_path = NULL;
1440 } 1440 }
1441 if (error) { 1441 if (error) {
1442 DPRINTF(("%s: copyargs failed %d\n", __func__, error)); 1442 DPRINTF(("%s: copyargs failed %d\n", __func__, error));
1443 return error; 1443 return error;
1444 } 1444 }
1445 1445
1446 error = copyoutpsstrs(data, p); 1446 error = copyoutpsstrs(data, p);
1447 if (error != 0) 1447 if (error != 0)
1448 return error; 1448 return error;
1449 1449
1450 return 0; 1450 return 0;
1451} 1451}
1452 1452
1453static int 1453static int
1454copyoutpsstrs(struct execve_data * restrict data, struct proc *p) 1454copyoutpsstrs(struct execve_data * restrict data, struct proc *p)
1455{ 1455{
1456 struct exec_package * const epp = &data->ed_pack; 1456 struct exec_package * const epp = &data->ed_pack;
1457 struct ps_strings32 arginfo32; 1457 struct ps_strings32 arginfo32;
1458 void *aip; 1458 void *aip;
1459 int error; 1459 int error;
1460 1460
1461 /* fill process ps_strings info */ 1461 /* fill process ps_strings info */
1462 p->p_psstrp = (vaddr_t)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr, 1462 p->p_psstrp = (vaddr_t)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr,
1463 STACK_PTHREADSPACE), data->ed_ps_strings_sz); 1463 STACK_PTHREADSPACE), data->ed_ps_strings_sz);
1464 1464
1465 if (epp->ep_flags & EXEC_32) { 1465 if (epp->ep_flags & EXEC_32) {
1466 aip = &arginfo32; 1466 aip = &arginfo32;
1467 arginfo32.ps_argvstr = (vaddr_t)data->ed_arginfo.ps_argvstr; 1467 arginfo32.ps_argvstr = (vaddr_t)data->ed_arginfo.ps_argvstr;
1468 arginfo32.ps_nargvstr = data->ed_arginfo.ps_nargvstr; 1468 arginfo32.ps_nargvstr = data->ed_arginfo.ps_nargvstr;
1469 arginfo32.ps_envstr = (vaddr_t)data->ed_arginfo.ps_envstr; 1469 arginfo32.ps_envstr = (vaddr_t)data->ed_arginfo.ps_envstr;
1470 arginfo32.ps_nenvstr = data->ed_arginfo.ps_nenvstr; 1470 arginfo32.ps_nenvstr = data->ed_arginfo.ps_nenvstr;
1471 } else 1471 } else
1472 aip = &data->ed_arginfo; 1472 aip = &data->ed_arginfo;
1473 1473
1474 /* copy out the process's ps_strings structure */ 1474 /* copy out the process's ps_strings structure */
1475 if ((error = copyout(aip, (void *)p->p_psstrp, data->ed_ps_strings_sz)) 1475 if ((error = copyout(aip, (void *)p->p_psstrp, data->ed_ps_strings_sz))
1476 != 0) { 1476 != 0) {
1477 DPRINTF(("%s: ps_strings copyout %p->%p size %zu failed\n", 1477 DPRINTF(("%s: ps_strings copyout %p->%p size %zu failed\n",
1478 __func__, aip, (void *)p->p_psstrp, data->ed_ps_strings_sz)); 1478 __func__, aip, (void *)p->p_psstrp, data->ed_ps_strings_sz));
1479 return error; 1479 return error;
1480 } 1480 }
1481 1481
1482 return 0; 1482 return 0;
1483} 1483}
1484 1484
1485static int 1485static int
1486copyinargs(struct execve_data * restrict data, char * const *args, 1486copyinargs(struct execve_data * restrict data, char * const *args,
1487 char * const *envs, execve_fetch_element_t fetch_element, char **dpp) 1487 char * const *envs, execve_fetch_element_t fetch_element, char **dpp)
1488{ 1488{
1489 struct exec_package * const epp = &data->ed_pack; 1489 struct exec_package * const epp = &data->ed_pack;
1490 char *dp; 1490 char *dp;
1491 size_t i; 1491 size_t i;
1492 int error; 1492 int error;
1493 1493
1494 dp = *dpp; 1494 dp = *dpp;
1495 1495
1496 data->ed_argc = 0; 1496 data->ed_argc = 0;
1497 1497
1498 /* copy the fake args list, if there's one, freeing it as we go */ 1498 /* copy the fake args list, if there's one, freeing it as we go */
1499 if (epp->ep_flags & EXEC_HASARGL) { 1499 if (epp->ep_flags & EXEC_HASARGL) {
1500 struct exec_fakearg *fa = epp->ep_fa; 1500 struct exec_fakearg *fa = epp->ep_fa;
1501 1501
1502 while (fa->fa_arg != NULL) { 1502 while (fa->fa_arg != NULL) {
1503 const size_t maxlen = ARG_MAX - (dp - data->ed_argp); 1503 const size_t maxlen = ARG_MAX - (dp - data->ed_argp);
1504 size_t len; 1504 size_t len;
1505 1505
1506 len = strlcpy(dp, fa->fa_arg, maxlen); 1506 len = strlcpy(dp, fa->fa_arg, maxlen);
1507 /* Count NUL into len. */ 1507 /* Count NUL into len. */
1508 if (len < maxlen) 1508 if (len < maxlen)
1509 len++; 1509 len++;
1510 else { 1510 else {
1511 while (fa->fa_arg != NULL) { 1511 while (fa->fa_arg != NULL) {
1512 kmem_free(fa->fa_arg, fa->fa_len); 1512 kmem_free(fa->fa_arg, fa->fa_len);
1513 fa++; 1513 fa++;
1514 } 1514 }
1515 kmem_free(epp->ep_fa, epp->ep_fa_len); 1515 kmem_free(epp->ep_fa, epp->ep_fa_len);
1516 epp->ep_flags &= ~EXEC_HASARGL; 1516 epp->ep_flags &= ~EXEC_HASARGL;
1517 return E2BIG; 1517 return E2BIG;
1518 } 1518 }
1519 ktrexecarg(fa->fa_arg, len - 1); 1519 ktrexecarg(fa->fa_arg, len - 1);
1520 dp += len; 1520 dp += len;
1521 1521
1522 kmem_free(fa->fa_arg, fa->fa_len); 1522 kmem_free(fa->fa_arg, fa->fa_len);
1523 fa++; 1523 fa++;
1524 data->ed_argc++; 1524 data->ed_argc++;
1525 } 1525 }
1526 kmem_free(epp->ep_fa, epp->ep_fa_len); 1526 kmem_free(epp->ep_fa, epp->ep_fa_len);
1527 epp->ep_flags &= ~EXEC_HASARGL; 1527 epp->ep_flags &= ~EXEC_HASARGL;
1528 } 1528 }
1529 1529
1530 /* 1530 /*
1531 * Read and count argument strings from user. 1531 * Read and count argument strings from user.
1532 */ 1532 */
1533 1533
1534 if (args == NULL) { 1534 if (args == NULL) {
1535 DPRINTF(("%s: null args\n", __func__)); 1535 DPRINTF(("%s: null args\n", __func__));
1536 return EINVAL; 1536 return EINVAL;
1537 } 1537 }
1538 if (epp->ep_flags & EXEC_SKIPARG) 1538 if (epp->ep_flags & EXEC_SKIPARG)
1539 args = (const void *)((const char *)args + fromptrsz(epp)); 1539 args = (const void *)((const char *)args + fromptrsz(epp));
1540 i = 0; 1540 i = 0;
1541 error = copyinargstrs(data, args, fetch_element, &dp, &i, ktr_execarg); 1541 error = copyinargstrs(data, args, fetch_element, &dp, &i, ktr_execarg);
1542 if (error != 0) { 1542 if (error != 0) {
1543 DPRINTF(("%s: copyin arg %d\n", __func__, error)); 1543 DPRINTF(("%s: copyin arg %d\n", __func__, error));
1544 return error; 1544 return error;
1545 } 1545 }
1546 data->ed_argc += i; 1546 data->ed_argc += i;
1547 1547
1548 /* 1548 /*
1549 * Read and count environment strings from user. 1549 * Read and count environment strings from user.
1550 */ 1550 */
1551 1551
1552 data->ed_envc = 0; 1552 data->ed_envc = 0;
1553 /* environment need not be there */ 1553 /* environment need not be there */
1554 if (envs == NULL) 1554 if (envs == NULL)
1555 goto done; 1555 goto done;
1556 i = 0; 1556 i = 0;
1557 error = copyinargstrs(data, envs, fetch_element, &dp, &i, ktr_execenv); 1557 error = copyinargstrs(data, envs, fetch_element, &dp, &i, ktr_execenv);
1558 if (error != 0) { 1558 if (error != 0) {
1559 DPRINTF(("%s: copyin env %d\n", __func__, error)); 1559 DPRINTF(("%s: copyin env %d\n", __func__, error));
1560 return error; 1560 return error;
1561 } 1561 }
1562 data->ed_envc += i; 1562 data->ed_envc += i;
1563 1563
1564done: 1564done:
1565 *dpp = dp; 1565 *dpp = dp;
1566 1566
1567 return 0; 1567 return 0;
1568} 1568}
1569 1569
1570static int 1570static int
1571copyinargstrs(struct execve_data * restrict data, char * const *strs, 1571copyinargstrs(struct execve_data * restrict data, char * const *strs,
1572 execve_fetch_element_t fetch_element, char **dpp, size_t *ip, 1572 execve_fetch_element_t fetch_element, char **dpp, size_t *ip,
1573 void (*ktr)(const void *, size_t)) 1573 void (*ktr)(const void *, size_t))
1574{ 1574{
1575 char *dp, *sp; 1575 char *dp, *sp;
1576 size_t i; 1576 size_t i;
1577 int error; 1577 int error;
1578 1578
1579 dp = *dpp; 1579 dp = *dpp;
1580 1580
1581 i = 0; 1581 i = 0;
1582 while (1) { 1582 while (1) {
1583 const size_t maxlen = ARG_MAX - (dp - data->ed_argp); 1583 const size_t maxlen = ARG_MAX - (dp - data->ed_argp);
1584 size_t len; 1584 size_t len;
1585 1585
1586 if ((error = (*fetch_element)(strs, i, &sp)) != 0) { 1586 if ((error = (*fetch_element)(strs, i, &sp)) != 0) {
1587 return error; 1587 return error;
1588 } 1588 }
1589 if (!sp) 1589 if (!sp)
1590 break; 1590 break;
1591 if ((error = copyinstr(sp, dp, maxlen, &len)) != 0) { 1591 if ((error = copyinstr(sp, dp, maxlen, &len)) != 0) {
1592 if (error == ENAMETOOLONG) 1592 if (error == ENAMETOOLONG)
1593 error = E2BIG; 1593 error = E2BIG;
1594 return error; 1594 return error;
1595 } 1595 }
1596 if (__predict_false(ktrace_on)) 1596 if (__predict_false(ktrace_on))
1597 (*ktr)(dp, len - 1); 1597 (*ktr)(dp, len - 1);
1598 dp += len; 1598 dp += len;
1599 i++; 1599 i++;
1600 } 1600 }
1601 1601
1602 *dpp = dp; 1602 *dpp = dp;
1603 *ip = i; 1603 *ip = i;
1604 1604
1605 return 0; 1605 return 0;
1606} 1606}
1607 1607
1608/* 1608/*
1609 * Copy argv and env strings from kernel buffer (argp) to the new stack. 1609 * Copy argv and env strings from kernel buffer (argp) to the new stack.
1610 * Those strings are located just after auxinfo. 1610 * Those strings are located just after auxinfo.
1611 */ 1611 */
1612int 1612int
1613copyargs(struct lwp *l, struct exec_package *pack, struct ps_strings *arginfo, 1613copyargs(struct lwp *l, struct exec_package *pack, struct ps_strings *arginfo,
1614 char **stackp, void *argp) 1614 char **stackp, void *argp)
1615{ 1615{
1616 char **cpp, *dp, *sp; 1616 char **cpp, *dp, *sp;
1617 size_t len; 1617 size_t len;
1618 void *nullp; 1618 void *nullp;
1619 long argc, envc; 1619 long argc, envc;
1620 int error; 1620 int error;
1621 1621
1622 cpp = (char **)*stackp; 1622 cpp = (char **)*stackp;
1623 nullp = NULL; 1623 nullp = NULL;
1624 argc = arginfo->ps_nargvstr; 1624 argc = arginfo->ps_nargvstr;
1625 envc = arginfo->ps_nenvstr; 1625 envc = arginfo->ps_nenvstr;
1626 1626
1627 /* argc on stack is long */ 1627 /* argc on stack is long */
1628 CTASSERT(sizeof(*cpp) == sizeof(argc)); 1628 CTASSERT(sizeof(*cpp) == sizeof(argc));
1629 1629
1630 dp = (char *)(cpp + 1630 dp = (char *)(cpp +
1631 1 + /* long argc */ 1631 1 + /* long argc */
1632 argc + /* char *argv[] */ 1632 argc + /* char *argv[] */
1633 1 + /* \0 */ 1633 1 + /* \0 */
1634 envc + /* char *env[] */ 1634 envc + /* char *env[] */
1635 1 + /* \0 */ 1635 1 + /* \0 */
1636 /* XXX auxinfo multiplied by ptr size? */ 1636 /* XXX auxinfo multiplied by ptr size? */
1637 pack->ep_esch->es_arglen); /* auxinfo */ 1637 pack->ep_esch->es_arglen); /* auxinfo */
1638 sp = argp; 1638 sp = argp;
1639 1639
1640 if ((error = copyout(&argc, cpp++, sizeof(argc))) != 0) { 1640 if ((error = copyout(&argc, cpp++, sizeof(argc))) != 0) {
1641 COPYPRINTF("", cpp - 1, sizeof(argc)); 1641 COPYPRINTF("", cpp - 1, sizeof(argc));
1642 return error; 1642 return error;
1643 } 1643 }
1644 1644
1645 /* XXX don't copy them out, remap them! */ 1645 /* XXX don't copy them out, remap them! */
1646 arginfo->ps_argvstr = cpp; /* remember location of argv for later */ 1646 arginfo->ps_argvstr = cpp; /* remember location of argv for later */
1647 1647
1648 for (; --argc >= 0; sp += len, dp += len) { 1648 for (; --argc >= 0; sp += len, dp += len) {
1649 if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0) { 1649 if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0) {
1650 COPYPRINTF("", cpp - 1, sizeof(dp)); 1650 COPYPRINTF("", cpp - 1, sizeof(dp));
1651 return error; 1651 return error;
1652 } 1652 }
1653 if ((error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0) { 1653 if ((error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0) {
1654 COPYPRINTF("str", dp, (size_t)ARG_MAX); 1654 COPYPRINTF("str", dp, (size_t)ARG_MAX);
1655 return error; 1655 return error;
1656 } 1656 }
1657 } 1657 }
1658 1658
1659 if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0) { 1659 if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0) {
1660 COPYPRINTF("", cpp - 1, sizeof(nullp)); 1660 COPYPRINTF("", cpp - 1, sizeof(nullp));
1661 return error; 1661 return error;
1662 } 1662 }
1663 1663
1664 arginfo->ps_envstr = cpp; /* remember location of envp for later */ 1664 arginfo->ps_envstr = cpp; /* remember location of envp for later */
1665 1665
1666 for (; --envc >= 0; sp += len, dp += len) { 1666 for (; --envc >= 0; sp += len, dp += len) {
1667 if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0) { 1667 if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0) {
1668 COPYPRINTF("", cpp - 1, sizeof(dp)); 1668 COPYPRINTF("", cpp - 1, sizeof(dp));
1669 return error; 1669 return error;
1670 } 1670 }
1671 if ((error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0) { 1671 if ((error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0) {
1672 COPYPRINTF("str", dp, (size_t)ARG_MAX); 1672 COPYPRINTF("str", dp, (size_t)ARG_MAX);
1673 return error; 1673 return error;
1674 } 1674 }
1675 1675
1676 } 1676 }
1677 1677
1678 if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0) { 1678 if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0) {
1679 COPYPRINTF("", cpp - 1, sizeof(nullp)); 1679 COPYPRINTF("", cpp - 1, sizeof(nullp));
1680 return error; 1680 return error;
1681 } 1681 }
1682 1682
1683 *stackp = (char *)cpp; 1683 *stackp = (char *)cpp;
1684 return 0; 1684 return 0;
1685} 1685}
1686 1686
1687 1687
1688/* 1688/*
1689 * Add execsw[] entries. 1689 * Add execsw[] entries.
1690 */ 1690 */
1691int 1691int
1692exec_add(struct execsw *esp, int count) 1692exec_add(struct execsw *esp, int count)
1693{ 1693{
1694 struct exec_entry *it; 1694 struct exec_entry *it;
1695 int i; 1695 int i;
1696 1696
1697 if (count == 0) { 1697 if (count == 0) {
1698 return 0; 1698 return 0;
1699 } 1699 }
1700 1700
1701 /* Check for duplicates. */ 1701 /* Check for duplicates. */
1702 rw_enter(&exec_lock, RW_WRITER); 1702 rw_enter(&exec_lock, RW_WRITER);
1703 for (i = 0; i < count; i++) { 1703 for (i = 0; i < count; i++) {
1704 LIST_FOREACH(it, &ex_head, ex_list) { 1704 LIST_FOREACH(it, &ex_head, ex_list) {
1705 /* assume unique (makecmds, probe_func, emulation) */ 1705 /* assume unique (makecmds, probe_func, emulation) */
1706 if (it->ex_sw->es_makecmds == esp[i].es_makecmds && 1706 if (it->ex_sw->es_makecmds == esp[i].es_makecmds &&
1707 it->ex_sw->u.elf_probe_func == 1707 it->ex_sw->u.elf_probe_func ==
1708 esp[i].u.elf_probe_func && 1708 esp[i].u.elf_probe_func &&
1709 it->ex_sw->es_emul == esp[i].es_emul) { 1709 it->ex_sw->es_emul == esp[i].es_emul) {
1710 rw_exit(&exec_lock); 1710 rw_exit(&exec_lock);
1711 return EEXIST; 1711 return EEXIST;
1712 } 1712 }
1713 } 1713 }
1714 } 1714 }
1715 1715
1716 /* Allocate new entries. */ 1716 /* Allocate new entries. */
1717 for (i = 0; i < count; i++) { 1717 for (i = 0; i < count; i++) {
1718 it = kmem_alloc(sizeof(*it), KM_SLEEP); 1718 it = kmem_alloc(sizeof(*it), KM_SLEEP);
1719 it->ex_sw = &esp[i]; 1719 it->ex_sw = &esp[i];
1720 LIST_INSERT_HEAD(&ex_head, it, ex_list); 1720 LIST_INSERT_HEAD(&ex_head, it, ex_list);
1721 } 1721 }
1722 1722
1723 /* update execsw[] */ 1723 /* update execsw[] */
1724 exec_init(0); 1724 exec_init(0);
1725 rw_exit(&exec_lock); 1725 rw_exit(&exec_lock);
1726 return 0; 1726 return 0;
1727} 1727}
1728 1728
1729/* 1729/*
1730 * Remove execsw[] entry. 1730 * Remove execsw[] entry.
1731 */ 1731 */
1732int 1732int
1733exec_remove(struct execsw *esp, int count) 1733exec_remove(struct execsw *esp, int count)
1734{ 1734{
1735 struct exec_entry *it, *next; 1735 struct exec_entry *it, *next;
1736 int i; 1736 int i;
1737 const struct proclist_desc *pd; 1737 const struct proclist_desc *pd;
1738 proc_t *p; 1738 proc_t *p;
1739 1739
1740 if (count == 0) { 1740 if (count == 0) {
1741 return 0; 1741 return 0;
1742 } 1742 }
1743 1743
1744 /* Abort if any are busy. */ 1744 /* Abort if any are busy. */
1745 rw_enter(&exec_lock, RW_WRITER); 1745 rw_enter(&exec_lock, RW_WRITER);
1746 for (i = 0; i < count; i++) { 1746 for (i = 0; i < count; i++) {
1747 mutex_enter(proc_lock); 1747 mutex_enter(proc_lock);
1748 for (pd = proclists; pd->pd_list != NULL; pd++) { 1748 for (pd = proclists; pd->pd_list != NULL; pd++) {
1749 PROCLIST_FOREACH(p, pd->pd_list) { 1749 PROCLIST_FOREACH(p, pd->pd_list) {
1750 if (p->p_execsw == &esp[i]) { 1750 if (p->p_execsw == &esp[i]) {
1751 mutex_exit(proc_lock); 1751 mutex_exit(proc_lock);
1752 rw_exit(&exec_lock); 1752 rw_exit(&exec_lock);
1753 return EBUSY; 1753 return EBUSY;
1754 } 1754 }
1755 } 1755 }
1756 } 1756 }
1757 mutex_exit(proc_lock); 1757 mutex_exit(proc_lock);
1758 } 1758 }
1759 1759
1760 /* None are busy, so remove them all. */ 1760 /* None are busy, so remove them all. */
1761 for (i = 0; i < count; i++) { 1761 for (i = 0; i < count; i++) {
1762 for (it = LIST_FIRST(&ex_head); it != NULL; it = next) { 1762 for (it = LIST_FIRST(&ex_head); it != NULL; it = next) {
1763 next = LIST_NEXT(it, ex_list); 1763 next = LIST_NEXT(it, ex_list);
1764 if (it->ex_sw == &esp[i]) { 1764 if (it->ex_sw == &esp[i]) {
1765 LIST_REMOVE(it, ex_list); 1765 LIST_REMOVE(it, ex_list);
1766 kmem_free(it, sizeof(*it)); 1766 kmem_free(it, sizeof(*it));
1767 break; 1767 break;
1768 } 1768 }
1769 } 1769 }
1770 } 1770 }
1771 1771
1772 /* update execsw[] */ 1772 /* update execsw[] */
1773 exec_init(0); 1773 exec_init(0);
1774 rw_exit(&exec_lock); 1774 rw_exit(&exec_lock);
1775 return 0; 1775 return 0;
1776} 1776}
1777 1777
1778/* 1778/*
1779 * Initialize exec structures. If init_boot is true, also does necessary 1779 * Initialize exec structures. If init_boot is true, also does necessary
1780 * one-time initialization (it's called from main() that way). 1780 * one-time initialization (it's called from main() that way).
1781 * Once system is multiuser, this should be called with exec_lock held, 1781 * Once system is multiuser, this should be called with exec_lock held,
1782 * i.e. via exec_{add|remove}(). 1782 * i.e. via exec_{add|remove}().
1783 */ 1783 */
1784int 1784int
1785exec_init(int init_boot) 1785exec_init(int init_boot)
1786{ 1786{
1787 const struct execsw **sw; 1787 const struct execsw **sw;
1788 struct exec_entry *ex; 1788 struct exec_entry *ex;
1789 SLIST_HEAD(,exec_entry) first; 1789 SLIST_HEAD(,exec_entry) first;
1790 SLIST_HEAD(,exec_entry) any; 1790 SLIST_HEAD(,exec_entry) any;
1791 SLIST_HEAD(,exec_entry) last; 1791 SLIST_HEAD(,exec_entry) last;
1792 int i, sz; 1792 int i, sz;
1793 1793
1794 if (init_boot) { 1794 if (init_boot) {
1795 /* do one-time initializations */ 1795 /* do one-time initializations */
1796 rw_init(&exec_lock); 1796 rw_init(&exec_lock);
1797 mutex_init(&sigobject_lock, MUTEX_DEFAULT, IPL_NONE); 1797 mutex_init(&sigobject_lock, MUTEX_DEFAULT, IPL_NONE);
1798 pool_init(&exec_pool, NCARGS, 0, 0, PR_NOALIGN|PR_NOTOUCH, 1798 pool_init(&exec_pool, NCARGS, 0, 0, PR_NOALIGN|PR_NOTOUCH,
1799 "execargs", &exec_palloc, IPL_NONE); 1799 "execargs", &exec_palloc, IPL_NONE);
1800 pool_sethardlimit(&exec_pool, maxexec, "should not happen", 0); 1800 pool_sethardlimit(&exec_pool, maxexec, "should not happen", 0);
1801 } else { 1801 } else {
1802 KASSERT(rw_write_held(&exec_lock)); 1802 KASSERT(rw_write_held(&exec_lock));
1803 } 1803 }
1804 1804
1805 /* Sort each entry onto the appropriate queue. */ 1805 /* Sort each entry onto the appropriate queue. */
1806 SLIST_INIT(&first); 1806 SLIST_INIT(&first);
1807 SLIST_INIT(&any); 1807 SLIST_INIT(&any);
1808 SLIST_INIT(&last); 1808 SLIST_INIT(&last);
1809 sz = 0; 1809 sz = 0;
1810 LIST_FOREACH(ex, &ex_head, ex_list) { 1810 LIST_FOREACH(ex, &ex_head, ex_list) {
1811 switch(ex->ex_sw->es_prio) { 1811 switch(ex->ex_sw->es_prio) {
1812 case EXECSW_PRIO_FIRST: 1812 case EXECSW_PRIO_FIRST:
1813 SLIST_INSERT_HEAD(&first, ex, ex_slist); 1813 SLIST_INSERT_HEAD(&first, ex, ex_slist);
1814 break; 1814 break;
1815 case EXECSW_PRIO_ANY: 1815 case EXECSW_PRIO_ANY:
1816 SLIST_INSERT_HEAD(&any, ex, ex_slist); 1816 SLIST_INSERT_HEAD(&any, ex, ex_slist);
1817 break; 1817 break;
1818 case EXECSW_PRIO_LAST: 1818 case EXECSW_PRIO_LAST:
1819 SLIST_INSERT_HEAD(&last, ex, ex_slist); 1819 SLIST_INSERT_HEAD(&last, ex, ex_slist);
1820 break; 1820 break;
1821 default: 1821 default:
1822 panic("%s", __func__); 1822 panic("%s", __func__);
1823 break; 1823 break;
1824 } 1824 }
1825 sz++; 1825 sz++;
1826 } 1826 }
1827 1827
1828 /* 1828 /*
1829 * Create new execsw[]. Ensure we do not try a zero-sized 1829 * Create new execsw[]. Ensure we do not try a zero-sized
1830 * allocation. 1830 * allocation.
1831 */ 1831 */
1832 sw = kmem_alloc(sz * sizeof(struct execsw *) + 1, KM_SLEEP); 1832 sw = kmem_alloc(sz * sizeof(struct execsw *) + 1, KM_SLEEP);
1833 i = 0; 1833 i = 0;
1834 SLIST_FOREACH(ex, &first, ex_slist) { 1834 SLIST_FOREACH(ex, &first, ex_slist) {
1835 sw[i++] = ex->ex_sw; 1835 sw[i++] = ex->ex_sw;
1836 } 1836 }
1837 SLIST_FOREACH(ex, &any, ex_slist) { 1837 SLIST_FOREACH(ex, &any, ex_slist) {
1838 sw[i++] = ex->ex_sw; 1838 sw[i++] = ex->ex_sw;
1839 } 1839 }
1840 SLIST_FOREACH(ex, &last, ex_slist) { 1840 SLIST_FOREACH(ex, &last, ex_slist) {
1841 sw[i++] = ex->ex_sw; 1841 sw[i++] = ex->ex_sw;
1842 } 1842 }
1843 1843
1844 /* Replace old execsw[] and free used memory. */ 1844 /* Replace old execsw[] and free used memory. */
1845 if (execsw != NULL) { 1845 if (execsw != NULL) {
1846 kmem_free(__UNCONST(execsw), 1846 kmem_free(__UNCONST(execsw),
1847 nexecs * sizeof(struct execsw *) + 1); 1847 nexecs * sizeof(struct execsw *) + 1);
1848 } 1848 }
1849 execsw = sw; 1849 execsw = sw;
1850 nexecs = sz; 1850 nexecs = sz;
1851 1851
1852 /* Figure out the maximum size of an exec header. */ 1852 /* Figure out the maximum size of an exec header. */
1853 exec_maxhdrsz = sizeof(int); 1853 exec_maxhdrsz = sizeof(int);
1854 for (i = 0; i < nexecs; i++) { 1854 for (i = 0; i < nexecs; i++) {
1855 if (execsw[i]->es_hdrsz > exec_maxhdrsz) 1855 if (execsw[i]->es_hdrsz > exec_maxhdrsz)
1856 exec_maxhdrsz = execsw[i]->es_hdrsz; 1856 exec_maxhdrsz = execsw[i]->es_hdrsz;
1857 } 1857 }
1858 1858
1859 return 0; 1859 return 0;
1860} 1860}
1861 1861
1862static int 1862static int
1863exec_sigcode_map(struct proc *p, const struct emul *e) 1863exec_sigcode_map(struct proc *p, const struct emul *e)
1864{ 1864{
1865 vaddr_t va; 1865 vaddr_t va;
1866 vsize_t sz; 1866 vsize_t sz;
1867 int error; 1867 int error;
1868 struct uvm_object *uobj; 1868 struct uvm_object *uobj;
1869 1869
1870 sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode; 1870 sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode;
1871 1871
1872 if (e->e_sigobject == NULL || sz == 0) { 1872 if (e->e_sigobject == NULL || sz == 0) {
1873 return 0; 1873 return 0;
1874 } 1874 }
1875 1875
1876 /* 1876 /*
1877 * If we don't have a sigobject for this emulation, create one. 1877 * If we don't have a sigobject for this emulation, create one.
1878 * 1878 *
1879 * sigobject is an anonymous memory object (just like SYSV shared 1879 * sigobject is an anonymous memory object (just like SYSV shared
1880 * memory) that we keep a permanent reference to and that we map 1880 * memory) that we keep a permanent reference to and that we map
1881 * in all processes that need this sigcode. The creation is simple, 1881 * in all processes that need this sigcode. The creation is simple,
1882 * we create an object, add a permanent reference to it, map it in 1882 * we create an object, add a permanent reference to it, map it in
1883 * kernel space, copy out the sigcode to it and unmap it. 1883 * kernel space, copy out the sigcode to it and unmap it.
1884 * We map it with PROT_READ|PROT_EXEC into the process just 1884 * We map it with PROT_READ|PROT_EXEC into the process just
1885 * the way sys_mmap() would map it. 1885 * the way sys_mmap() would map it.
1886 */ 1886 */
1887 1887
1888 uobj = *e->e_sigobject; 1888 uobj = *e->e_sigobject;
1889 if (uobj == NULL) { 1889 if (uobj == NULL) {
1890 mutex_enter(&sigobject_lock); 1890 mutex_enter(&sigobject_lock);
1891 if ((uobj = *e->e_sigobject) == NULL) { 1891 if ((uobj = *e->e_sigobject) == NULL) {
1892 uobj = uao_create(sz, 0); 1892 uobj = uao_create(sz, 0);
1893 (*uobj->pgops->pgo_reference)(uobj); 1893 (*uobj->pgops->pgo_reference)(uobj);
1894 va = vm_map_min(kernel_map); 1894 va = vm_map_min(kernel_map);
1895 if ((error = uvm_map(kernel_map, &va, round_page(sz), 1895 if ((error = uvm_map(kernel_map, &va, round_page(sz),
1896 uobj, 0, 0, 1896 uobj, 0, 0,
1897 UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, 1897 UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
1898 UVM_INH_SHARE, UVM_ADV_RANDOM, 0)))) { 1898 UVM_INH_SHARE, UVM_ADV_RANDOM, 0)))) {
1899 printf("kernel mapping failed %d\n", error); 1899 printf("kernel mapping failed %d\n", error);
1900 (*uobj->pgops->pgo_detach)(uobj); 1900 (*uobj->pgops->pgo_detach)(uobj);
1901 mutex_exit(&sigobject_lock); 1901 mutex_exit(&sigobject_lock);
1902 return error; 1902 return error;
1903 } 1903 }
1904 memcpy((void *)va, e->e_sigcode, sz); 1904 memcpy((void *)va, e->e_sigcode, sz);
1905#ifdef PMAP_NEED_PROCWR 1905#ifdef PMAP_NEED_PROCWR
1906 pmap_procwr(&proc0, va, sz); 1906 pmap_procwr(&proc0, va, sz);
1907#endif 1907#endif
1908 uvm_unmap(kernel_map, va, va + round_page(sz)); 1908 uvm_unmap(kernel_map, va, va + round_page(sz));
1909 *e->e_sigobject = uobj; 1909 *e->e_sigobject = uobj;
1910 } 1910 }
1911 mutex_exit(&sigobject_lock); 1911 mutex_exit(&sigobject_lock);
1912 } 1912 }
1913 1913
1914 /* Just a hint to uvm_map where to put it. */ 1914 /* Just a hint to uvm_map where to put it. */
1915 va = e->e_vm_default_addr(p, (vaddr_t)p->p_vmspace->vm_daddr, 1915 va = e->e_vm_default_addr(p, (vaddr_t)p->p_vmspace->vm_daddr,
1916 round_page(sz)); 1916 round_page(sz));
1917 1917
1918#ifdef __alpha__ 1918#ifdef __alpha__
1919 /* 1919 /*
1920 * Tru64 puts /sbin/loader at the end of user virtual memory, 1920 * Tru64 puts /sbin/loader at the end of user virtual memory,
1921 * which causes the above calculation to put the sigcode at 1921 * which causes the above calculation to put the sigcode at
1922 * an invalid address. Put it just below the text instead. 1922 * an invalid address. Put it just below the text instead.
1923 */ 1923 */
1924 if (va == (vaddr_t)vm_map_max(&p->p_vmspace->vm_map)) { 1924 if (va == (vaddr_t)vm_map_max(&p->p_vmspace->vm_map)) {
1925 va = (vaddr_t)p->p_vmspace->vm_taddr - round_page(sz); 1925 va = (vaddr_t)p->p_vmspace->vm_taddr - round_page(sz);
1926 } 1926 }
1927#endif 1927#endif
1928 1928
1929 (*uobj->pgops->pgo_reference)(uobj); 1929 (*uobj->pgops->pgo_reference)(uobj);
1930 error = uvm_map(&p->p_vmspace->vm_map, &va, round_page(sz), 1930 error = uvm_map(&p->p_vmspace->vm_map, &va, round_page(sz),
1931 uobj, 0, 0, 1931 uobj, 0, 0,
1932 UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX, UVM_INH_SHARE, 1932 UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX, UVM_INH_SHARE,
1933 UVM_ADV_RANDOM, 0)); 1933 UVM_ADV_RANDOM, 0));
1934 if (error) { 1934 if (error) {
1935 DPRINTF(("%s, %d: map %p " 1935 DPRINTF(("%s, %d: map %p "
1936 "uvm_map %#"PRIxVSIZE"@%#"PRIxVADDR" failed %d\n", 1936 "uvm_map %#"PRIxVSIZE"@%#"PRIxVADDR" failed %d\n",
1937 __func__, __LINE__, &p->p_vmspace->vm_map, round_page(sz), 1937 __func__, __LINE__, &p->p_vmspace->vm_map, round_page(sz),
1938 va, error)); 1938 va, error));
1939 (*uobj->pgops->pgo_detach)(uobj); 1939 (*uobj->pgops->pgo_detach)(uobj);
1940 return error; 1940 return error;
1941 } 1941 }
1942 p->p_sigctx.ps_sigcode = (void *)va; 1942 p->p_sigctx.ps_sigcode = (void *)va;
1943 return 0; 1943 return 0;
1944} 1944}
1945 1945
1946/* 1946/*
1947 * Release a refcount on spawn_exec_data and destroy memory, if this 1947 * Release a refcount on spawn_exec_data and destroy memory, if this
1948 * was the last one. 1948 * was the last one.
1949 */ 1949 */
1950static void 1950static void
1951spawn_exec_data_release(struct spawn_exec_data *data) 1951spawn_exec_data_release(struct spawn_exec_data *data)
1952{ 1952{
1953 if (atomic_dec_32_nv(&data->sed_refcnt) != 0) 1953 if (atomic_dec_32_nv(&data->sed_refcnt) != 0)
1954 return; 1954 return;
1955 1955
1956 cv_destroy(&data->sed_cv_child_ready); 1956 cv_destroy(&data->sed_cv_child_ready);
1957 mutex_destroy(&data->sed_mtx_child); 1957 mutex_destroy(&data->sed_mtx_child);
1958 1958
1959 if (data->sed_actions) 1959 if (data->sed_actions)
1960 posix_spawn_fa_free(data->sed_actions, 1960 posix_spawn_fa_free(data->sed_actions,
1961 data->sed_actions->len); 1961 data->sed_actions->len);
1962 if (data->sed_attrs) 1962 if (data->sed_attrs)
1963 kmem_free(data->sed_attrs, 1963 kmem_free(data->sed_attrs,
1964 sizeof(*data->sed_attrs)); 1964 sizeof(*data->sed_attrs));
1965 kmem_free(data, sizeof(*data)); 1965 kmem_free(data, sizeof(*data));
1966} 1966}
1967 1967
1968/* 1968/*
1969 * A child lwp of a posix_spawn operation starts here and ends up in 1969 * A child lwp of a posix_spawn operation starts here and ends up in
1970 * cpu_spawn_return, dealing with all filedescriptor and scheduler 1970 * cpu_spawn_return, dealing with all filedescriptor and scheduler
1971 * manipulations in between. 1971 * manipulations in between.
1972 * The parent waits for the child, as it is not clear whether the child 1972 * The parent waits for the child, as it is not clear whether the child
1973 * will be able to acquire its own exec_lock. If it can, the parent can 1973 * will be able to acquire its own exec_lock. If it can, the parent can
1974 * be released early and continue running in parallel. If not (or if the 1974 * be released early and continue running in parallel. If not (or if the
1975 * magic debug flag is passed in the scheduler attribute struct), the 1975 * magic debug flag is passed in the scheduler attribute struct), the
1976 * child rides on the parent's exec lock until it is ready to return to 1976 * child rides on the parent's exec lock until it is ready to return to
1977 * to userland - and only then releases the parent. This method loses 1977 * to userland - and only then releases the parent. This method loses
1978 * concurrency, but improves error reporting. 1978 * concurrency, but improves error reporting.
1979 */ 1979 */
1980static void 1980static void
1981spawn_return(void *arg) 1981spawn_return(void *arg)
1982{ 1982{
1983 struct spawn_exec_data *spawn_data = arg; 1983 struct spawn_exec_data *spawn_data = arg;
1984 struct lwp *l = curlwp; 1984 struct lwp *l = curlwp;
1985 int error, newfd; 1985 int error, newfd;
1986 size_t i; 1986 size_t i;
1987 const struct posix_spawn_file_actions_entry *fae; 1987 const struct posix_spawn_file_actions_entry *fae;
1988 pid_t ppid; 1988 pid_t ppid;
1989 register_t retval; 1989 register_t retval;
1990 bool have_reflock; 1990 bool have_reflock;
1991 bool parent_is_waiting = true; 1991 bool parent_is_waiting = true;
1992 1992
1993 /* 1993 /*
1994 * Check if we can release parent early. 1994 * Check if we can release parent early.
1995 * We either need to have no sed_attrs, or sed_attrs does not 1995 * We either need to have no sed_attrs, or sed_attrs does not
1996 * have POSIX_SPAWN_RETURNERROR or one of the flags, that require 1996 * have POSIX_SPAWN_RETURNERROR or one of the flags, that require
1997 * safe access to the parent proc (passed in sed_parent). 1997 * safe access to the parent proc (passed in sed_parent).
1998 * We then try to get the exec_lock, and only if that works, we can 1998 * We then try to get the exec_lock, and only if that works, we can
1999 * release the parent here already. 1999 * release the parent here already.
2000 */ 2000 */
2001 ppid = spawn_data->sed_parent->p_pid; 2001 ppid = spawn_data->sed_parent->p_pid;
2002 if ((!spawn_data->sed_attrs 2002 if ((!spawn_data->sed_attrs
2003 || (spawn_data->sed_attrs->sa_flags 2003 || (spawn_data->sed_attrs->sa_flags
2004 & (POSIX_SPAWN_RETURNERROR|POSIX_SPAWN_SETPGROUP)) == 0) 2004 & (POSIX_SPAWN_RETURNERROR|POSIX_SPAWN_SETPGROUP)) == 0)
2005 && rw_tryenter(&exec_lock, RW_READER)) { 2005 && rw_tryenter(&exec_lock, RW_READER)) {
2006 parent_is_waiting = false; 2006 parent_is_waiting = false;
2007 mutex_enter(&spawn_data->sed_mtx_child); 2007 mutex_enter(&spawn_data->sed_mtx_child);
2008 cv_signal(&spawn_data->sed_cv_child_ready); 2008 cv_signal(&spawn_data->sed_cv_child_ready);
2009 mutex_exit(&spawn_data->sed_mtx_child); 2009 mutex_exit(&spawn_data->sed_mtx_child);
2010 } 2010 }
2011 2011
2012 /* don't allow debugger access yet */ 2012 /* don't allow debugger access yet */
2013 rw_enter(&l->l_proc->p_reflock, RW_WRITER); 2013 rw_enter(&l->l_proc->p_reflock, RW_WRITER);
2014 have_reflock = true; 2014 have_reflock = true;
2015 2015
2016 error = 0; 2016 error = 0;
2017 /* handle posix_spawn_file_actions */ 2017 /* handle posix_spawn_file_actions */
2018 if (spawn_data->sed_actions != NULL) { 2018 if (spawn_data->sed_actions != NULL) {
2019 for (i = 0; i < spawn_data->sed_actions->len; i++) { 2019 for (i = 0; i < spawn_data->sed_actions->len; i++) {
2020 fae = &spawn_data->sed_actions->fae[i]; 2020 fae = &spawn_data->sed_actions->fae[i];
2021 switch (fae->fae_action) { 2021 switch (fae->fae_action) {
2022 case FAE_OPEN: 2022 case FAE_OPEN:
2023 if (fd_getfile(fae->fae_fildes) != NULL) { 2023 if (fd_getfile(fae->fae_fildes) != NULL) {
2024 error = fd_close(fae->fae_fildes); 2024 error = fd_close(fae->fae_fildes);
2025 if (error) 2025 if (error)
2026 break; 2026 break;
2027 } 2027 }
2028 error = fd_open(fae->fae_path, fae->fae_oflag, 2028 error = fd_open(fae->fae_path, fae->fae_oflag,
2029 fae->fae_mode, &newfd); 2029 fae->fae_mode, &newfd);
2030 if (error) 2030 if (error)
2031 break; 2031 break;
2032 if (newfd != fae->fae_fildes) { 2032 if (newfd != fae->fae_fildes) {
2033 error = dodup(l, newfd, 2033 error = dodup(l, newfd,
2034 fae->fae_fildes, 0, &retval); 2034 fae->fae_fildes, 0, &retval);
2035 if (fd_getfile(newfd) != NULL) 2035 if (fd_getfile(newfd) != NULL)
2036 fd_close(newfd); 2036 fd_close(newfd);
2037 } 2037 }
2038 break; 2038 break;
2039 case FAE_DUP2: 2039 case FAE_DUP2:
2040 error = dodup(l, fae->fae_fildes, 2040 error = dodup(l, fae->fae_fildes,
2041 fae->fae_newfildes, 0, &retval); 2041 fae->fae_newfildes, 0, &retval);
2042 break; 2042 break;
2043 case FAE_CLOSE: 2043 case FAE_CLOSE:
2044 if (fd_getfile(fae->fae_fildes) == NULL) { 2044 if (fd_getfile(fae->fae_fildes) == NULL) {
2045 error = EBADF; 2045 error = EBADF;
2046 break; 2046 break;
2047 } 2047 }
2048 error = fd_close(fae->fae_fildes); 2048 error = fd_close(fae->fae_fildes);
2049 break; 2049 break;
2050 } 2050 }
2051 if (error) 2051 if (error)
2052 goto report_error; 2052 goto report_error;
2053 } 2053 }
2054 } 2054 }
2055 2055
2056 /* handle posix_spawnattr */ 2056 /* handle posix_spawnattr */
2057 if (spawn_data->sed_attrs != NULL) { 2057 if (spawn_data->sed_attrs != NULL) {
2058 int ostat; 2058 int ostat;
2059 struct sigaction sigact; 2059 struct sigaction sigact;
2060 sigact._sa_u._sa_handler = SIG_DFL; 2060 sigact._sa_u._sa_handler = SIG_DFL;
2061 sigact.sa_flags = 0; 2061 sigact.sa_flags = 0;
2062 2062
2063 /*  2063 /*
2064 * set state to SSTOP so that this proc can be found by pid. 2064 * set state to SSTOP so that this proc can be found by pid.
2065 * see proc_enterprp, do_sched_setparam below 2065 * see proc_enterprp, do_sched_setparam below
2066 */ 2066 */
2067 ostat = l->l_proc->p_stat; 2067 ostat = l->l_proc->p_stat;
2068 l->l_proc->p_stat = SSTOP; 2068 l->l_proc->p_stat = SSTOP;
2069 2069
2070 /* Set process group */ 2070 /* Set process group */
2071 if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETPGROUP) { 2071 if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETPGROUP) {
2072 pid_t mypid = l->l_proc->p_pid, 2072 pid_t mypid = l->l_proc->p_pid,
2073 pgrp = spawn_data->sed_attrs->sa_pgroup; 2073 pgrp = spawn_data->sed_attrs->sa_pgroup;
2074 2074
2075 if (pgrp == 0) 2075 if (pgrp == 0)
2076 pgrp = mypid; 2076 pgrp = mypid;
2077 2077
2078 error = proc_enterpgrp(spawn_data->sed_parent, 2078 error = proc_enterpgrp(spawn_data->sed_parent,
2079 mypid, pgrp, false); 2079 mypid, pgrp, false);
2080 if (error) 2080 if (error)
2081 goto report_error; 2081 goto report_error;
2082 } 2082 }
2083 2083
2084 /* Set scheduler policy */ 2084 /* Set scheduler policy */
2085 if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETSCHEDULER) 2085 if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETSCHEDULER)
2086 error = do_sched_setparam(l->l_proc->p_pid, 0, 2086 error = do_sched_setparam(l->l_proc->p_pid, 0,
2087 spawn_data->sed_attrs->sa_schedpolicy, 2087 spawn_data->sed_attrs->sa_schedpolicy,
2088 &spawn_data->sed_attrs->sa_schedparam); 2088 &spawn_data->sed_attrs->sa_schedparam);
2089 else if (spawn_data->sed_attrs->sa_flags 2089 else if (spawn_data->sed_attrs->sa_flags
2090 & POSIX_SPAWN_SETSCHEDPARAM) { 2090 & POSIX_SPAWN_SETSCHEDPARAM) {
2091 error = do_sched_setparam(ppid, 0, 2091 error = do_sched_setparam(ppid, 0,
2092 SCHED_NONE, &spawn_data->sed_attrs->sa_schedparam); 2092 SCHED_NONE, &spawn_data->sed_attrs->sa_schedparam);
2093 } 2093 }
2094 if (error) 2094 if (error)
2095 goto report_error; 2095 goto report_error;
2096 2096
2097 /* Reset user ID's */ 2097 /* Reset user ID's */
2098 if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_RESETIDS) { 2098 if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_RESETIDS) {
2099 error = do_setresuid(l, -1, 2099 error = do_setresuid(l, -1,
2100 kauth_cred_getgid(l->l_cred), -1, 2100 kauth_cred_getgid(l->l_cred), -1,
2101 ID_E_EQ_R | ID_E_EQ_S); 2101 ID_E_EQ_R | ID_E_EQ_S);
2102 if (error) 2102 if (error)
2103 goto report_error; 2103 goto report_error;
2104 error = do_setresuid(l, -1, 2104 error = do_setresuid(l, -1,
2105 kauth_cred_getuid(l->l_cred), -1, 2105 kauth_cred_getuid(l->l_cred), -1,
2106 ID_E_EQ_R | ID_E_EQ_S); 2106 ID_E_EQ_R | ID_E_EQ_S);
2107 if (error) 2107 if (error)
2108 goto report_error; 2108 goto report_error;
2109 } 2109 }
2110 2110
2111 /* Set signal masks/defaults */ 2111 /* Set signal masks/defaults */
2112 if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETSIGMASK) { 2112 if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETSIGMASK) {
2113 mutex_enter(l->l_proc->p_lock); 2113 mutex_enter(l->l_proc->p_lock);
2114 error = sigprocmask1(l, SIG_SETMASK, 2114 error = sigprocmask1(l, SIG_SETMASK,
2115 &spawn_data->sed_attrs->sa_sigmask, NULL); 2115 &spawn_data->sed_attrs->sa_sigmask, NULL);
2116 mutex_exit(l->l_proc->p_lock); 2116 mutex_exit(l->l_proc->p_lock);
2117 if (error) 2117 if (error)
2118 goto report_error; 2118 goto report_error;
2119 } 2119 }
2120 2120
2121 if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETSIGDEF) { 2121 if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETSIGDEF) {
2122 /* 2122 /*
2123 * The following sigaction call is using a sigaction 2123 * The following sigaction call is using a sigaction
2124 * version 0 trampoline which is in the compatibility 2124 * version 0 trampoline which is in the compatibility
2125 * code only. This is not a problem because for SIG_DFL 2125 * code only. This is not a problem because for SIG_DFL
2126 * and SIG_IGN, the trampolines are now ignored. If they 2126 * and SIG_IGN, the trampolines are now ignored. If they
2127 * were not, this would be a problem because we are 2127 * were not, this would be a problem because we are
2128 * holding the exec_lock, and the compat code needs 2128 * holding the exec_lock, and the compat code needs
2129 * to do the same in order to replace the trampoline 2129 * to do the same in order to replace the trampoline
2130 * code of the process. 2130 * code of the process.
2131 */ 2131 */
2132 for (i = 1; i <= NSIG; i++) { 2132 for (i = 1; i <= NSIG; i++) {
2133 if (sigismember( 2133 if (sigismember(
2134 &spawn_data->sed_attrs->sa_sigdefault, i)) 2134 &spawn_data->sed_attrs->sa_sigdefault, i))
2135 sigaction1(l, i, &sigact, NULL, NULL, 2135 sigaction1(l, i, &sigact, NULL, NULL,
2136 0); 2136 0);
2137 } 2137 }
2138 } 2138 }
2139 l->l_proc->p_stat = ostat; 2139 l->l_proc->p_stat = ostat;
2140 } 2140 }
2141 2141
2142 /* now do the real exec */ 2142 /* now do the real exec */
2143 error = execve_runproc(l, &spawn_data->sed_exec, parent_is_waiting, 2143 error = execve_runproc(l, &spawn_data->sed_exec, parent_is_waiting,
2144 true); 2144 true);
2145 have_reflock = false; 2145 have_reflock = false;
2146 if (error == EJUSTRETURN) 2146 if (error == EJUSTRETURN)
2147 error = 0; 2147 error = 0;
2148 else if (error) 2148 else if (error)
2149 goto report_error; 2149 goto report_error;
2150 2150
2151 if (parent_is_waiting) { 2151 if (parent_is_waiting) {
2152 mutex_enter(&spawn_data->sed_mtx_child); 2152 mutex_enter(&spawn_data->sed_mtx_child);
2153 cv_signal(&spawn_data->sed_cv_child_ready); 2153 cv_signal(&spawn_data->sed_cv_child_ready);
2154 mutex_exit(&spawn_data->sed_mtx_child); 2154 mutex_exit(&spawn_data->sed_mtx_child);
2155 } 2155 }
2156 2156
2157 /* release our refcount on the data */ 2157 /* release our refcount on the data */
2158 spawn_exec_data_release(spawn_data); 2158 spawn_exec_data_release(spawn_data);
2159 2159
2160 /* and finally: leave to userland for the first time */ 2160 /* and finally: leave to userland for the first time */
2161 cpu_spawn_return(l); 2161 cpu_spawn_return(l);
2162 2162
2163 /* NOTREACHED */ 2163 /* NOTREACHED */
2164 return; 2164 return;
2165 2165
2166 report_error: 2166 report_error:
2167 if (have_reflock) { 2167 if (have_reflock) {
2168 /* 2168 /*
2169 * We have not passed through execve_runproc(), 2169 * We have not passed through execve_runproc(),
2170 * which would have released the p_reflock and also 2170 * which would have released the p_reflock and also
2171 * taken ownership of the sed_exec part of spawn_data, 2171 * taken ownership of the sed_exec part of spawn_data,
2172 * so release/free both here. 2172 * so release/free both here.
2173 */ 2173 */
2174 rw_exit(&l->l_proc->p_reflock); 2174 rw_exit(&l->l_proc->p_reflock);
2175 execve_free_data(&spawn_data->sed_exec); 2175 execve_free_data(&spawn_data->sed_exec);
2176 } 2176 }
2177 2177
2178 if (parent_is_waiting) { 2178 if (parent_is_waiting) {
2179 /* pass error to parent */ 2179 /* pass error to parent */
2180 mutex_enter(&spawn_data->sed_mtx_child); 2180 mutex_enter(&spawn_data->sed_mtx_child);
2181 spawn_data->sed_error = error; 2181 spawn_data->sed_error = error;
2182 cv_signal(&spawn_data->sed_cv_child_ready); 2182 cv_signal(&spawn_data->sed_cv_child_ready);
2183 mutex_exit(&spawn_data->sed_mtx_child); 2183 mutex_exit(&spawn_data->sed_mtx_child);
2184 } else { 2184 } else {
2185 rw_exit(&exec_lock); 2185 rw_exit(&exec_lock);
2186 } 2186 }
2187 2187
2188 /* release our refcount on the data */ 2188 /* release our refcount on the data */
2189 spawn_exec_data_release(spawn_data); 2189 spawn_exec_data_release(spawn_data);
2190 2190
2191 /* done, exit */ 2191 /* done, exit */
2192 mutex_enter(l->l_proc->p_lock); 2192 mutex_enter(l->l_proc->p_lock);
2193 /* 2193 /*
2194 * Posix explicitly asks for an exit code of 127 if we report 2194 * Posix explicitly asks for an exit code of 127 if we report
2195 * errors from the child process - so, unfortunately, there 2195 * errors from the child process - so, unfortunately, there
2196 * is no way to report a more exact error code. 2196 * is no way to report a more exact error code.
2197 * A NetBSD specific workaround is POSIX_SPAWN_RETURNERROR as 2197 * A NetBSD specific workaround is POSIX_SPAWN_RETURNERROR as
2198 * flag bit in the attrp argument to posix_spawn(2), see above. 2198 * flag bit in the attrp argument to posix_spawn(2), see above.
2199 */ 2199 */
2200 exit1(l, W_EXITCODE(127, 0)); 2200 exit1(l, W_EXITCODE(127, 0));
2201} 2201}
2202 2202
2203void 2203void
2204posix_spawn_fa_free(struct posix_spawn_file_actions *fa, size_t len) 2204posix_spawn_fa_free(struct posix_spawn_file_actions *fa, size_t len)
2205{ 2205{
2206 2206
2207 for (size_t i = 0; i < len; i++) { 2207 for (size_t i = 0; i < len; i++) {
2208 struct posix_spawn_file_actions_entry *fae = &fa->fae[i]; 2208 struct posix_spawn_file_actions_entry *fae = &fa->fae[i];
2209 if (fae->fae_action != FAE_OPEN) 2209 if (fae->fae_action != FAE_OPEN)
2210 continue; 2210 continue;
2211 kmem_free(fae->fae_path, strlen(fae->fae_path) + 1); 2211 kmem_free(fae->fae_path, strlen(fae->fae_path) + 1);
2212 } 2212 }
2213 if (fa->len > 0) 2213 if (fa->len > 0)
2214 kmem_free(fa->fae, sizeof(*fa->fae) * fa->len); 2214 kmem_free(fa->fae, sizeof(*fa->fae) * fa->len);
2215 kmem_free(fa, sizeof(*fa)); 2215 kmem_free(fa, sizeof(*fa));
2216} 2216}
2217 2217
2218static int 2218static int
2219posix_spawn_fa_alloc(struct posix_spawn_file_actions **fap, 2219posix_spawn_fa_alloc(struct posix_spawn_file_actions **fap,
2220 const struct posix_spawn_file_actions *ufa, rlim_t lim) 2220 const struct posix_spawn_file_actions *ufa, rlim_t lim)
2221{ 2221{
2222 struct posix_spawn_file_actions *fa; 2222 struct posix_spawn_file_actions *fa;
2223 struct posix_spawn_file_actions_entry *fae; 2223 struct posix_spawn_file_actions_entry *fae;
2224 char *pbuf = NULL; 2224 char *pbuf = NULL;
2225 int error; 2225 int error;
2226 size_t i = 0; 2226 size_t i = 0;
2227 2227
2228 fa = kmem_alloc(sizeof(*fa), KM_SLEEP); 2228 fa = kmem_alloc(sizeof(*fa), KM_SLEEP);
2229 error = copyin(ufa, fa, sizeof(*fa)); 2229 error = copyin(ufa, fa, sizeof(*fa));
2230 if (error || fa->len == 0) { 2230 if (error || fa->len == 0) {
2231 kmem_free(fa, sizeof(*fa)); 2231 kmem_free(fa, sizeof(*fa));
2232 return error; /* 0 if not an error, and len == 0 */ 2232 return error; /* 0 if not an error, and len == 0 */
2233 } 2233 }
2234 2234
2235 if (fa->len > lim) { 2235 if (fa->len > lim) {
2236 kmem_free(fa, sizeof(*fa)); 2236 kmem_free(fa, sizeof(*fa));
2237 return EINVAL; 2237 return EINVAL;
2238 } 2238 }
2239 2239
2240 fa->size = fa->len; 2240 fa->size = fa->len;
2241 size_t fal = fa->len * sizeof(*fae); 2241 size_t fal = fa->len * sizeof(*fae);
2242 fae = fa->fae; 2242 fae = fa->fae;
2243 fa->fae = kmem_alloc(fal, KM_SLEEP); 2243 fa->fae = kmem_alloc(fal, KM_SLEEP);
2244 error = copyin(fae, fa->fae, fal); 2244 error = copyin(fae, fa->fae, fal);
2245 if (error) 2245 if (error)
2246 goto out; 2246 goto out;
2247 2247
2248 pbuf = PNBUF_GET(); 2248 pbuf = PNBUF_GET();
2249 for (; i < fa->len; i++) { 2249 for (; i < fa->len; i++) {
2250 fae = &fa->fae[i]; 2250 fae = &fa->fae[i];
2251 if (fae->fae_action != FAE_OPEN) 2251 if (fae->fae_action != FAE_OPEN)
2252 continue; 2252 continue;
2253 error = copyinstr(fae->fae_path, pbuf, MAXPATHLEN, &fal); 2253 error = copyinstr(fae->fae_path, pbuf, MAXPATHLEN, &fal);
2254 if (error) 2254 if (error)
2255 goto out; 2255 goto out;
2256 fae->fae_path = kmem_alloc(fal, KM_SLEEP); 2256 fae->fae_path = kmem_alloc(fal, KM_SLEEP);
2257 memcpy(fae->fae_path, pbuf, fal); 2257 memcpy(fae->fae_path, pbuf, fal);
2258 } 2258 }
2259 PNBUF_PUT(pbuf); 2259 PNBUF_PUT(pbuf);
2260 2260
2261 *fap = fa; 2261 *fap = fa;
2262 return 0; 2262 return 0;
2263out: 2263out:
2264 if (pbuf) 2264 if (pbuf)
2265 PNBUF_PUT(pbuf); 2265 PNBUF_PUT(pbuf);
2266 posix_spawn_fa_free(fa, i); 2266 posix_spawn_fa_free(fa, i);
2267 return error; 2267 return error;
2268} 2268}
2269 2269
2270int 2270int
2271check_posix_spawn(struct lwp *l1) 2271check_posix_spawn(struct lwp *l1)
2272{ 2272{
2273 int error, tnprocs, count; 2273 int error, tnprocs, count;
2274 uid_t uid; 2274 uid_t uid;
2275 struct proc *p1; 2275 struct proc *p1;
2276 2276
2277 p1 = l1->l_proc; 2277 p1 = l1->l_proc;
2278 uid = kauth_cred_getuid(l1->l_cred); 2278 uid = kauth_cred_getuid(l1->l_cred);
2279 tnprocs = atomic_inc_uint_nv(&nprocs); 2279 tnprocs = atomic_inc_uint_nv(&nprocs);
2280 2280
2281 /* 2281 /*
2282 * Although process entries are dynamically created, we still keep 2282 * Although process entries are dynamically created, we still keep
2283 * a global limit on the maximum number we will create. 2283 * a global limit on the maximum number we will create.
2284 */ 2284 */