Thu Nov 30 14:03:41 2017 UTC ()
Pull up following revision(s) (requested by maxv in ticket #401):
	sys/arch/amd64/amd64/machdep.c: revision 1.267
Mmh, don't forget to clear the TLS gdt slots on Xen. Otherwise, when doing
a lwp32->lwp64 context switch, the new lwp can use the slots to reconstruct
the address of the previous lwp's TLS space (and defeat ASLR?).


(martin)
diff -r1.255.6.1 -r1.255.6.2 src/sys/arch/amd64/amd64/machdep.c

cvs diff -r1.255.6.1 -r1.255.6.2 src/sys/arch/amd64/amd64/machdep.c (switch to unified diff)

--- src/sys/arch/amd64/amd64/machdep.c 2017/09/04 20:41:28 1.255.6.1
+++ src/sys/arch/amd64/amd64/machdep.c 2017/11/30 14:03:41 1.255.6.2
@@ -1,1450 +1,1453 @@ @@ -1,1450 +1,1453 @@
1/* $NetBSD: machdep.c,v 1.255.6.1 2017/09/04 20:41:28 snj Exp $ */ 1/* $NetBSD: machdep.c,v 1.255.6.2 2017/11/30 14:03:41 martin Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011 4 * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011
5 * The NetBSD Foundation, Inc. 5 * The NetBSD Foundation, Inc.
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * This code is derived from software contributed to The NetBSD Foundation 8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace 9 * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace
10 * Simulation Facility, NASA Ames Research Center. 10 * Simulation Facility, NASA Ames Research Center.
11 * 11 *
12 * This code is derived from software contributed to The NetBSD Foundation 12 * This code is derived from software contributed to The NetBSD Foundation
13 * by Coyote Point Systems, Inc. which was written under contract to Coyote 13 * by Coyote Point Systems, Inc. which was written under contract to Coyote
14 * Point by Jed Davis and Devon O'Dell. 14 * Point by Jed Davis and Devon O'Dell.
15 * 15 *
16 * Redistribution and use in source and binary forms, with or without 16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions 17 * modification, are permitted provided that the following conditions
18 * are met: 18 * are met:
19 * 1. Redistributions of source code must retain the above copyright 19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer. 20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright 21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the 22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution. 23 * documentation and/or other materials provided with the distribution.
24 * 24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE. 35 * POSSIBILITY OF SUCH DAMAGE.
36 */ 36 */
37 37
38/* 38/*
39 * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr> 39 * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr>
40 * 40 *
41 * Permission to use, copy, modify, and distribute this software for any 41 * Permission to use, copy, modify, and distribute this software for any
42 * purpose with or without fee is hereby granted, provided that the above 42 * purpose with or without fee is hereby granted, provided that the above
43 * copyright notice and this permission notice appear in all copies. 43 * copyright notice and this permission notice appear in all copies.
44 * 44 *
45 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 45 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
46 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 46 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
47 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 47 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
48 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 48 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
49 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 49 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
50 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 50 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
51 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 51 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
52 */ 52 */
53 53
54/* 54/*
55 * Copyright (c) 2007 Manuel Bouyer. 55 * Copyright (c) 2007 Manuel Bouyer.
56 * 56 *
57 * Redistribution and use in source and binary forms, with or without 57 * Redistribution and use in source and binary forms, with or without
58 * modification, are permitted provided that the following conditions 58 * modification, are permitted provided that the following conditions
59 * are met: 59 * are met:
60 * 1. Redistributions of source code must retain the above copyright 60 * 1. Redistributions of source code must retain the above copyright
61 * notice, this list of conditions and the following disclaimer. 61 * notice, this list of conditions and the following disclaimer.
62 * 2. Redistributions in binary form must reproduce the above copyright 62 * 2. Redistributions in binary form must reproduce the above copyright
63 * notice, this list of conditions and the following disclaimer in the 63 * notice, this list of conditions and the following disclaimer in the
64 * documentation and/or other materials provided with the distribution. 64 * documentation and/or other materials provided with the distribution.
65 * 65 *
66 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 66 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
67 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 67 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
68 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 68 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
69 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 69 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
70 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 70 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
71 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 71 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
72 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 72 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
73 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 73 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
74 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 74 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
75 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 75 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
76 * 76 *
77 */ 77 */
78 78
79/*- 79/*-
80 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. 80 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
81 * All rights reserved. 81 * All rights reserved.
82 * 82 *
83 * This code is derived from software contributed to Berkeley by 83 * This code is derived from software contributed to Berkeley by
84 * William Jolitz. 84 * William Jolitz.
85 * 85 *
86 * Redistribution and use in source and binary forms, with or without 86 * Redistribution and use in source and binary forms, with or without
87 * modification, are permitted provided that the following conditions 87 * modification, are permitted provided that the following conditions
88 * are met: 88 * are met:
89 * 1. Redistributions of source code must retain the above copyright 89 * 1. Redistributions of source code must retain the above copyright
90 * notice, this list of conditions and the following disclaimer. 90 * notice, this list of conditions and the following disclaimer.
91 * 2. Redistributions in binary form must reproduce the above copyright 91 * 2. Redistributions in binary form must reproduce the above copyright
92 * notice, this list of conditions and the following disclaimer in the 92 * notice, this list of conditions and the following disclaimer in the
93 * documentation and/or other materials provided with the distribution. 93 * documentation and/or other materials provided with the distribution.
94 * 3. Neither the name of the University nor the names of its contributors 94 * 3. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software 95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission. 96 * without specific prior written permission.
97 * 97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE. 108 * SUCH DAMAGE.
109 * 109 *
110 * @(#)machdep.c 7.4 (Berkeley) 6/3/91 110 * @(#)machdep.c 7.4 (Berkeley) 6/3/91
111 */ 111 */
112 112
113#include <sys/cdefs.h> 113#include <sys/cdefs.h>
114__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.255.6.1 2017/09/04 20:41:28 snj Exp $"); 114__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.255.6.2 2017/11/30 14:03:41 martin Exp $");
115 115
116/* #define XENDEBUG_LOW */ 116/* #define XENDEBUG_LOW */
117 117
118#include "opt_modular.h" 118#include "opt_modular.h"
119#include "opt_user_ldt.h" 119#include "opt_user_ldt.h"
120#include "opt_ddb.h" 120#include "opt_ddb.h"
121#include "opt_kgdb.h" 121#include "opt_kgdb.h"
122#include "opt_cpureset_delay.h" 122#include "opt_cpureset_delay.h"
123#include "opt_mtrr.h" 123#include "opt_mtrr.h"
124#include "opt_realmem.h" 124#include "opt_realmem.h"
125#include "opt_xen.h" 125#include "opt_xen.h"
126#ifndef XEN 126#ifndef XEN
127#include "opt_physmem.h" 127#include "opt_physmem.h"
128#endif 128#endif
129#include "isa.h" 129#include "isa.h"
130#include "pci.h" 130#include "pci.h"
131 131
132#include <sys/param.h> 132#include <sys/param.h>
133#include <sys/systm.h> 133#include <sys/systm.h>
134#include <sys/signal.h> 134#include <sys/signal.h>
135#include <sys/signalvar.h> 135#include <sys/signalvar.h>
136#include <sys/kernel.h> 136#include <sys/kernel.h>
137#include <sys/cpu.h> 137#include <sys/cpu.h>
138#include <sys/exec.h> 138#include <sys/exec.h>
139#include <sys/exec_aout.h> /* for MID_* */ 139#include <sys/exec_aout.h> /* for MID_* */
140#include <sys/reboot.h> 140#include <sys/reboot.h>
141#include <sys/conf.h> 141#include <sys/conf.h>
142#include <sys/mbuf.h> 142#include <sys/mbuf.h>
143#include <sys/msgbuf.h> 143#include <sys/msgbuf.h>
144#include <sys/mount.h> 144#include <sys/mount.h>
145#include <sys/core.h> 145#include <sys/core.h>
146#include <sys/kcore.h> 146#include <sys/kcore.h>
147#include <sys/ucontext.h> 147#include <sys/ucontext.h>
148#include <machine/kcore.h> 148#include <machine/kcore.h>
149#include <sys/ras.h> 149#include <sys/ras.h>
150#include <sys/syscallargs.h> 150#include <sys/syscallargs.h>
151#include <sys/ksyms.h> 151#include <sys/ksyms.h>
152#include <sys/device.h> 152#include <sys/device.h>
153#include <sys/lwp.h> 153#include <sys/lwp.h>
154#include <sys/proc.h> 154#include <sys/proc.h>
155 155
156#ifdef KGDB 156#ifdef KGDB
157#include <sys/kgdb.h> 157#include <sys/kgdb.h>
158#endif 158#endif
159 159
160#include <dev/cons.h> 160#include <dev/cons.h>
161#include <dev/mm.h> 161#include <dev/mm.h>
162 162
163#include <uvm/uvm.h> 163#include <uvm/uvm.h>
164#include <uvm/uvm_page.h> 164#include <uvm/uvm_page.h>
165 165
166#include <sys/sysctl.h> 166#include <sys/sysctl.h>
167 167
168#include <machine/cpu.h> 168#include <machine/cpu.h>
169#include <machine/cpufunc.h> 169#include <machine/cpufunc.h>
170#include <machine/gdt.h> 170#include <machine/gdt.h>
171#include <machine/intr.h> 171#include <machine/intr.h>
172#include <machine/pio.h> 172#include <machine/pio.h>
173#include <machine/psl.h> 173#include <machine/psl.h>
174#include <machine/reg.h> 174#include <machine/reg.h>
175#include <machine/specialreg.h> 175#include <machine/specialreg.h>
176#include <machine/bootinfo.h> 176#include <machine/bootinfo.h>
177#include <x86/fpu.h> 177#include <x86/fpu.h>
178#include <x86/dbregs.h> 178#include <x86/dbregs.h>
179#include <machine/mtrr.h> 179#include <machine/mtrr.h>
180#include <machine/mpbiosvar.h> 180#include <machine/mpbiosvar.h>
181 181
182#include <x86/cputypes.h> 182#include <x86/cputypes.h>
183#include <x86/cpuvar.h> 183#include <x86/cpuvar.h>
184#include <x86/machdep.h> 184#include <x86/machdep.h>
185 185
186#include <x86/x86/tsc.h> 186#include <x86/x86/tsc.h>
187 187
188#include <dev/isa/isareg.h> 188#include <dev/isa/isareg.h>
189#include <machine/isa_machdep.h> 189#include <machine/isa_machdep.h>
190#include <dev/ic/i8042reg.h> 190#include <dev/ic/i8042reg.h>
191 191
192#ifdef XEN 192#ifdef XEN
193#include <xen/xen.h> 193#include <xen/xen.h>
194#include <xen/hypervisor.h> 194#include <xen/hypervisor.h>
195#include <xen/evtchn.h> 195#include <xen/evtchn.h>
196#endif 196#endif
197 197
198#ifdef DDB 198#ifdef DDB
199#include <machine/db_machdep.h> 199#include <machine/db_machdep.h>
200#include <ddb/db_extern.h> 200#include <ddb/db_extern.h>
201#include <ddb/db_output.h> 201#include <ddb/db_output.h>
202#include <ddb/db_interface.h> 202#include <ddb/db_interface.h>
203#endif 203#endif
204 204
205#include "acpica.h" 205#include "acpica.h"
206 206
207#if NACPICA > 0 207#if NACPICA > 0
208#include <dev/acpi/acpivar.h> 208#include <dev/acpi/acpivar.h>
209#define ACPI_MACHDEP_PRIVATE 209#define ACPI_MACHDEP_PRIVATE
210#include <machine/acpi_machdep.h> 210#include <machine/acpi_machdep.h>
211#else 211#else
212#include <machine/i82489var.h> 212#include <machine/i82489var.h>
213#endif 213#endif
214 214
215#include "isa.h" 215#include "isa.h"
216#include "isadma.h" 216#include "isadma.h"
217#include "ksyms.h" 217#include "ksyms.h"
218 218
219/* the following is used externally (sysctl_hw) */ 219/* the following is used externally (sysctl_hw) */
220char machine[] = "amd64"; /* CPU "architecture" */ 220char machine[] = "amd64"; /* CPU "architecture" */
221char machine_arch[] = "x86_64"; /* machine == machine_arch */ 221char machine_arch[] = "x86_64"; /* machine == machine_arch */
222 222
223#ifdef CPURESET_DELAY 223#ifdef CPURESET_DELAY
224int cpureset_delay = CPURESET_DELAY; 224int cpureset_delay = CPURESET_DELAY;
225#else 225#else
226int cpureset_delay = 2000; /* default to 2s */ 226int cpureset_delay = 2000; /* default to 2s */
227#endif 227#endif
228 228
229int cpu_class = CPUCLASS_686; 229int cpu_class = CPUCLASS_686;
230 230
231#ifdef MTRR 231#ifdef MTRR
232struct mtrr_funcs *mtrr_funcs; 232struct mtrr_funcs *mtrr_funcs;
233#endif 233#endif
234 234
235uint64_t dumpmem_low; 235uint64_t dumpmem_low;
236uint64_t dumpmem_high; 236uint64_t dumpmem_high;
237int cpu_class; 237int cpu_class;
238int use_pae; 238int use_pae;
239 239
240#ifndef NO_SPARSE_DUMP 240#ifndef NO_SPARSE_DUMP
241int sparse_dump = 1; 241int sparse_dump = 1;
242 242
243paddr_t max_paddr = 0; 243paddr_t max_paddr = 0;
244unsigned char *sparse_dump_physmap; 244unsigned char *sparse_dump_physmap;
245#endif 245#endif
246 246
247char *dump_headerbuf, *dump_headerbuf_ptr; 247char *dump_headerbuf, *dump_headerbuf_ptr;
248#define dump_headerbuf_size PAGE_SIZE 248#define dump_headerbuf_size PAGE_SIZE
249#define dump_headerbuf_end (dump_headerbuf + dump_headerbuf_size) 249#define dump_headerbuf_end (dump_headerbuf + dump_headerbuf_size)
250#define dump_headerbuf_avail (dump_headerbuf_end - dump_headerbuf_ptr) 250#define dump_headerbuf_avail (dump_headerbuf_end - dump_headerbuf_ptr)
251daddr_t dump_header_blkno; 251daddr_t dump_header_blkno;
252 252
253size_t dump_nmemsegs; 253size_t dump_nmemsegs;
254size_t dump_npages; 254size_t dump_npages;
255size_t dump_header_size; 255size_t dump_header_size;
256size_t dump_totalbytesleft; 256size_t dump_totalbytesleft;
257 257
258vaddr_t idt_vaddr; 258vaddr_t idt_vaddr;
259paddr_t idt_paddr; 259paddr_t idt_paddr;
260vaddr_t gdt_vaddr; 260vaddr_t gdt_vaddr;
261paddr_t gdt_paddr; 261paddr_t gdt_paddr;
262vaddr_t ldt_vaddr; 262vaddr_t ldt_vaddr;
263paddr_t ldt_paddr; 263paddr_t ldt_paddr;
264 264
265vaddr_t module_start, module_end; 265vaddr_t module_start, module_end;
266static struct vm_map module_map_store; 266static struct vm_map module_map_store;
267extern struct vm_map *module_map; 267extern struct vm_map *module_map;
268vaddr_t kern_end; 268vaddr_t kern_end;
269 269
270struct vm_map *phys_map = NULL; 270struct vm_map *phys_map = NULL;
271 271
272extern paddr_t lowmem_rsvd; 272extern paddr_t lowmem_rsvd;
273extern paddr_t avail_start, avail_end; 273extern paddr_t avail_start, avail_end;
274#ifdef XEN 274#ifdef XEN
275extern paddr_t pmap_pa_start, pmap_pa_end; 275extern paddr_t pmap_pa_start, pmap_pa_end;
276#endif 276#endif
277 277
278#ifndef XEN 278#ifndef XEN
279void (*delay_func)(unsigned int) = i8254_delay; 279void (*delay_func)(unsigned int) = i8254_delay;
280void (*initclock_func)(void) = i8254_initclocks; 280void (*initclock_func)(void) = i8254_initclocks;
281#else /* XEN */ 281#else /* XEN */
282void (*delay_func)(unsigned int) = xen_delay; 282void (*delay_func)(unsigned int) = xen_delay;
283void (*initclock_func)(void) = xen_initclocks; 283void (*initclock_func)(void) = xen_initclocks;
284#endif 284#endif
285 285
286struct pool x86_dbregspl; 286struct pool x86_dbregspl;
287 287
288/* 288/*
289 * Size of memory segments, before any memory is stolen. 289 * Size of memory segments, before any memory is stolen.
290 */ 290 */
291phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX]; 291phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX];
292int mem_cluster_cnt; 292int mem_cluster_cnt;
293 293
294char x86_64_doubleflt_stack[4096]; 294char x86_64_doubleflt_stack[4096];
295 295
296int cpu_dump(void); 296int cpu_dump(void);
297int cpu_dumpsize(void); 297int cpu_dumpsize(void);
298u_long cpu_dump_mempagecnt(void); 298u_long cpu_dump_mempagecnt(void);
299void dodumpsys(void); 299void dodumpsys(void);
300void dumpsys(void); 300void dumpsys(void);
301 301
302extern int time_adjusted; /* XXX no common header */ 302extern int time_adjusted; /* XXX no common header */
303 303
304void dump_misc_init(void); 304void dump_misc_init(void);
305void dump_seg_prep(void); 305void dump_seg_prep(void);
306int dump_seg_iter(int (*)(paddr_t, paddr_t)); 306int dump_seg_iter(int (*)(paddr_t, paddr_t));
307 307
308#ifndef NO_SPARSE_DUMP 308#ifndef NO_SPARSE_DUMP
309void sparse_dump_reset(void); 309void sparse_dump_reset(void);
310void sparse_dump_mark(void); 310void sparse_dump_mark(void);
311void cpu_dump_prep_sparse(void); 311void cpu_dump_prep_sparse(void);
312#endif 312#endif
313 313
314void dump_header_start(void); 314void dump_header_start(void);
315int dump_header_flush(void); 315int dump_header_flush(void);
316int dump_header_addbytes(const void*, size_t); 316int dump_header_addbytes(const void*, size_t);
317int dump_header_addseg(paddr_t, paddr_t); 317int dump_header_addseg(paddr_t, paddr_t);
318int dump_header_finish(void); 318int dump_header_finish(void);
319 319
320int dump_seg_count_range(paddr_t, paddr_t); 320int dump_seg_count_range(paddr_t, paddr_t);
321int dumpsys_seg(paddr_t, paddr_t); 321int dumpsys_seg(paddr_t, paddr_t);
322 322
323void init_x86_64(paddr_t); 323void init_x86_64(paddr_t);
324 324
325/* 325/*
326 * Machine-dependent startup code 326 * Machine-dependent startup code
327 */ 327 */
328void 328void
329cpu_startup(void) 329cpu_startup(void)
330{ 330{
331 int x, y; 331 int x, y;
332 vaddr_t minaddr, maxaddr; 332 vaddr_t minaddr, maxaddr;
333 psize_t sz; 333 psize_t sz;
334 334
335 /* 335 /*
336 * For console drivers that require uvm and pmap to be initialized, 336 * For console drivers that require uvm and pmap to be initialized,
337 * we'll give them one more chance here... 337 * we'll give them one more chance here...
338 */ 338 */
339 consinit(); 339 consinit();
340 340
341 /* 341 /*
342 * Initialize error message buffer (et end of core). 342 * Initialize error message buffer (et end of core).
343 */ 343 */
344 if (msgbuf_p_cnt == 0) 344 if (msgbuf_p_cnt == 0)
345 panic("msgbuf paddr map has not been set up"); 345 panic("msgbuf paddr map has not been set up");
346 for (x = 0, sz = 0; x < msgbuf_p_cnt; sz += msgbuf_p_seg[x++].sz) 346 for (x = 0, sz = 0; x < msgbuf_p_cnt; sz += msgbuf_p_seg[x++].sz)
347 continue; 347 continue;
348 348
349 msgbuf_vaddr = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_VAONLY); 349 msgbuf_vaddr = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_VAONLY);
350 if (msgbuf_vaddr == 0) 350 if (msgbuf_vaddr == 0)
351 panic("failed to valloc msgbuf_vaddr"); 351 panic("failed to valloc msgbuf_vaddr");
352 352
353 for (y = 0, sz = 0; y < msgbuf_p_cnt; y++) { 353 for (y = 0, sz = 0; y < msgbuf_p_cnt; y++) {
354 for (x = 0; x < btoc(msgbuf_p_seg[y].sz); x++, sz += PAGE_SIZE) 354 for (x = 0; x < btoc(msgbuf_p_seg[y].sz); x++, sz += PAGE_SIZE)
355 pmap_kenter_pa((vaddr_t)msgbuf_vaddr + sz, 355 pmap_kenter_pa((vaddr_t)msgbuf_vaddr + sz,
356 msgbuf_p_seg[y].paddr + x * PAGE_SIZE, 356 msgbuf_p_seg[y].paddr + x * PAGE_SIZE,
357 VM_PROT_READ|VM_PROT_WRITE, 0); 357 VM_PROT_READ|VM_PROT_WRITE, 0);
358 } 358 }
359 359
360 pmap_update(pmap_kernel()); 360 pmap_update(pmap_kernel());
361 361
362 initmsgbuf((void *)msgbuf_vaddr, round_page(sz)); 362 initmsgbuf((void *)msgbuf_vaddr, round_page(sz));
363 363
364 minaddr = 0; 364 minaddr = 0;
365 365
366 /* 366 /*
367 * Allocate a submap for physio. 367 * Allocate a submap for physio.
368 */ 368 */
369 phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr, 369 phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
370 VM_PHYS_SIZE, 0, false, NULL); 370 VM_PHYS_SIZE, 0, false, NULL);
371 371
372 /* 372 /*
373 * Create the module map. 373 * Create the module map.
374 * 374 *
375 * The kernel uses RIP-relative addressing with a maximum offset of 375 * The kernel uses RIP-relative addressing with a maximum offset of
376 * 2GB. The problem is, kernel_map is too far away in memory from 376 * 2GB. The problem is, kernel_map is too far away in memory from
377 * the kernel .text. So we cannot use it, and have to create a 377 * the kernel .text. So we cannot use it, and have to create a
378 * special module_map. 378 * special module_map.
379 * 379 *
380 * The module map is taken as what is left of the bootstrap memory 380 * The module map is taken as what is left of the bootstrap memory
381 * created in locore.S. This memory is right above the kernel 381 * created in locore.S. This memory is right above the kernel
382 * image, so this is the best place to put our modules. 382 * image, so this is the best place to put our modules.
383 */ 383 */
384 uvm_map_setup(&module_map_store, module_start, module_end, 0); 384 uvm_map_setup(&module_map_store, module_start, module_end, 0);
385 module_map_store.pmap = pmap_kernel(); 385 module_map_store.pmap = pmap_kernel();
386 module_map = &module_map_store; 386 module_map = &module_map_store;
387 387
388 /* Say hello. */ 388 /* Say hello. */
389 banner(); 389 banner();
390 390
391#if NISA > 0 || NPCI > 0 391#if NISA > 0 || NPCI > 0
392 /* Safe for i/o port / memory space allocation to use malloc now. */ 392 /* Safe for i/o port / memory space allocation to use malloc now. */
393 x86_bus_space_mallocok(); 393 x86_bus_space_mallocok();
394#endif 394#endif
395 395
396 gdt_init(); 396 gdt_init();
397 x86_64_proc0_tss_ldt_init(); 397 x86_64_proc0_tss_ldt_init();
398 398
399 cpu_init_tss(&cpu_info_primary); 399 cpu_init_tss(&cpu_info_primary);
400#if !defined(XEN) 400#if !defined(XEN)
401 ltr(cpu_info_primary.ci_tss_sel); 401 ltr(cpu_info_primary.ci_tss_sel);
402#endif /* !defined(XEN) */ 402#endif /* !defined(XEN) */
403 403
404 x86_startup(); 404 x86_startup();
405} 405}
406 406
407#ifdef XEN 407#ifdef XEN
408/* used in assembly */ 408/* used in assembly */
409void hypervisor_callback(void); 409void hypervisor_callback(void);
410void failsafe_callback(void); 410void failsafe_callback(void);
411void x86_64_switch_context(struct pcb *); 411void x86_64_switch_context(struct pcb *);
412void x86_64_tls_switch(struct lwp *); 412void x86_64_tls_switch(struct lwp *);
413 413
414void 414void
415x86_64_switch_context(struct pcb *new) 415x86_64_switch_context(struct pcb *new)
416{ 416{
417 HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), new->pcb_rsp0); 417 HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), new->pcb_rsp0);
418 struct physdev_op physop; 418 struct physdev_op physop;
419 physop.cmd = PHYSDEVOP_SET_IOPL; 419 physop.cmd = PHYSDEVOP_SET_IOPL;
420 physop.u.set_iopl.iopl = new->pcb_iopl; 420 physop.u.set_iopl.iopl = new->pcb_iopl;
421 HYPERVISOR_physdev_op(&physop); 421 HYPERVISOR_physdev_op(&physop);
422} 422}
423 423
424void 424void
425x86_64_tls_switch(struct lwp *l) 425x86_64_tls_switch(struct lwp *l)
426{ 426{
427 struct cpu_info *ci = curcpu(); 427 struct cpu_info *ci = curcpu();
428 struct pcb *pcb = lwp_getpcb(l); 428 struct pcb *pcb = lwp_getpcb(l);
429 struct trapframe *tf = l->l_md.md_regs; 429 struct trapframe *tf = l->l_md.md_regs;
 430 uint64_t zero = 0;
430 431
431 /* 432 /*
432 * Raise the IPL to IPL_HIGH. 433 * Raise the IPL to IPL_HIGH.
433 * FPU IPIs can alter the LWP's saved cr0. Dropping the priority 434 * FPU IPIs can alter the LWP's saved cr0. Dropping the priority
434 * is deferred until mi_switch(), when cpu_switchto() returns. 435 * is deferred until mi_switch(), when cpu_switchto() returns.
435 */ 436 */
436 (void)splhigh(); 437 (void)splhigh();
437 /* 438 /*
438 * If our floating point registers are on a different CPU, 439 * If our floating point registers are on a different CPU,
439 * set CR0_TS so we'll trap rather than reuse bogus state. 440 * set CR0_TS so we'll trap rather than reuse bogus state.
440 */ 441 */
441 if (l != ci->ci_fpcurlwp) { 442 if (l != ci->ci_fpcurlwp) {
442 HYPERVISOR_fpu_taskswitch(1); 443 HYPERVISOR_fpu_taskswitch(1);
443 } 444 }
444 445
445 /* Update TLS segment pointers */ 446 /* Update TLS segment pointers */
446 if (pcb->pcb_flags & PCB_COMPAT32) { 447 if (pcb->pcb_flags & PCB_COMPAT32) {
447 update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &pcb->pcb_fs); 448 update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &pcb->pcb_fs);
448 update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &pcb->pcb_gs); 449 update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &pcb->pcb_gs);
449 setfs(tf->tf_fs); 450 setfs(tf->tf_fs);
450 HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, tf->tf_gs); 451 HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, tf->tf_gs);
451 } else { 452 } else {
 453 update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &zero);
 454 update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &zero);
452 setfs(0); 455 setfs(0);
453 HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, 0); 456 HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, 0);
454 HYPERVISOR_set_segment_base(SEGBASE_FS, pcb->pcb_fs); 457 HYPERVISOR_set_segment_base(SEGBASE_FS, pcb->pcb_fs);
455 HYPERVISOR_set_segment_base(SEGBASE_GS_USER, pcb->pcb_gs); 458 HYPERVISOR_set_segment_base(SEGBASE_GS_USER, pcb->pcb_gs);
456 } 459 }
457} 460}
458#endif /* XEN */ 461#endif /* XEN */
459 462
460/* 463/*
461 * Set up proc0's TSS and LDT. 464 * Set up proc0's TSS and LDT.
462 */ 465 */
463void 466void
464x86_64_proc0_tss_ldt_init(void) 467x86_64_proc0_tss_ldt_init(void)
465{ 468{
466 struct lwp *l = &lwp0; 469 struct lwp *l = &lwp0;
467 struct pcb *pcb = lwp_getpcb(l); 470 struct pcb *pcb = lwp_getpcb(l);
468 471
469 pcb->pcb_flags = 0; 472 pcb->pcb_flags = 0;
470 pcb->pcb_fs = 0; 473 pcb->pcb_fs = 0;
471 pcb->pcb_gs = 0; 474 pcb->pcb_gs = 0;
472 pcb->pcb_rsp0 = (uvm_lwp_getuarea(l) + USPACE - 16) & ~0xf; 475 pcb->pcb_rsp0 = (uvm_lwp_getuarea(l) + USPACE - 16) & ~0xf;
473 pcb->pcb_iopl = SEL_KPL; 476 pcb->pcb_iopl = SEL_KPL;
474 pcb->pcb_dbregs = NULL; 477 pcb->pcb_dbregs = NULL;
475 478
476 pmap_kernel()->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL); 479 pmap_kernel()->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL);
477 pcb->pcb_cr0 = rcr0() & ~CR0_TS; 480 pcb->pcb_cr0 = rcr0() & ~CR0_TS;
478 l->l_md.md_regs = (struct trapframe *)pcb->pcb_rsp0 - 1; 481 l->l_md.md_regs = (struct trapframe *)pcb->pcb_rsp0 - 1;
479 482
480#if !defined(XEN) 483#if !defined(XEN)
481 lldt(pmap_kernel()->pm_ldt_sel); 484 lldt(pmap_kernel()->pm_ldt_sel);
482#else 485#else
483 { 486 {
484 struct physdev_op physop; 487 struct physdev_op physop;
485 xen_set_ldt((vaddr_t) ldtstore, LDT_SIZE >> 3); 488 xen_set_ldt((vaddr_t) ldtstore, LDT_SIZE >> 3);
486 /* Reset TS bit and set kernel stack for interrupt handlers */ 489 /* Reset TS bit and set kernel stack for interrupt handlers */
487 HYPERVISOR_fpu_taskswitch(1); 490 HYPERVISOR_fpu_taskswitch(1);
488 HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), pcb->pcb_rsp0); 491 HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), pcb->pcb_rsp0);
489 physop.cmd = PHYSDEVOP_SET_IOPL; 492 physop.cmd = PHYSDEVOP_SET_IOPL;
490 physop.u.set_iopl.iopl = pcb->pcb_iopl; 493 physop.u.set_iopl.iopl = pcb->pcb_iopl;
491 HYPERVISOR_physdev_op(&physop); 494 HYPERVISOR_physdev_op(&physop);
492 } 495 }
493#endif /* XEN */ 496#endif /* XEN */
494} 497}
495 498
496/* 499/*
497 * Set up TSS and I/O bitmap. 500 * Set up TSS and I/O bitmap.
498 */ 501 */
499void 502void
500cpu_init_tss(struct cpu_info *ci) 503cpu_init_tss(struct cpu_info *ci)
501{ 504{
502 struct x86_64_tss *tss = &ci->ci_tss; 505 struct x86_64_tss *tss = &ci->ci_tss;
503 uintptr_t p; 506 uintptr_t p;
504 507
505 tss->tss_iobase = IOMAP_INVALOFF << 16; 508 tss->tss_iobase = IOMAP_INVALOFF << 16;
506 /* tss->tss_ist[0] is filled by cpu_intr_init */ 509 /* tss->tss_ist[0] is filled by cpu_intr_init */
507 510
508 /* double fault */ 511 /* double fault */
509 tss->tss_ist[1] = (uint64_t)x86_64_doubleflt_stack + PAGE_SIZE - 16; 512 tss->tss_ist[1] = (uint64_t)x86_64_doubleflt_stack + PAGE_SIZE - 16;
510 513
511 /* NMI */ 514 /* NMI */
512 p = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED); 515 p = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED);
513 tss->tss_ist[2] = p + PAGE_SIZE - 16; 516 tss->tss_ist[2] = p + PAGE_SIZE - 16;
514 ci->ci_tss_sel = tss_alloc(tss); 517 ci->ci_tss_sel = tss_alloc(tss);
515} 518}
516 519
517void 520void
518buildcontext(struct lwp *l, void *catcher, void *f) 521buildcontext(struct lwp *l, void *catcher, void *f)
519{ 522{
520 struct trapframe *tf = l->l_md.md_regs; 523 struct trapframe *tf = l->l_md.md_regs;
521 524
522 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); 525 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
523 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); 526 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
524 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL); 527 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
525 tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL); 528 tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
526 529
527 tf->tf_rip = (uint64_t)catcher; 530 tf->tf_rip = (uint64_t)catcher;
528 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 531 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
529 tf->tf_rflags &= ~PSL_CLEARSIG; 532 tf->tf_rflags &= ~PSL_CLEARSIG;
530 tf->tf_rsp = (uint64_t)f; 533 tf->tf_rsp = (uint64_t)f;
531 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 534 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
532 535
533 /* Ensure FP state is sane */ 536 /* Ensure FP state is sane */
534 fpu_save_area_reset(l); 537 fpu_save_area_reset(l);
535} 538}
536 539
537void 540void
538sendsig_sigcontext(const ksiginfo_t *ksi, const sigset_t *mask) 541sendsig_sigcontext(const ksiginfo_t *ksi, const sigset_t *mask)
539{ 542{
540 543
541 printf("sendsig_sigcontext: illegal\n"); 544 printf("sendsig_sigcontext: illegal\n");
542 sigexit(curlwp, SIGILL); 545 sigexit(curlwp, SIGILL);
543} 546}
544 547
545void 548void
546sendsig_siginfo(const ksiginfo_t *ksi, const sigset_t *mask) 549sendsig_siginfo(const ksiginfo_t *ksi, const sigset_t *mask)
547{ 550{
548 struct lwp *l = curlwp; 551 struct lwp *l = curlwp;
549 struct proc *p = l->l_proc; 552 struct proc *p = l->l_proc;
550 struct sigacts *ps = p->p_sigacts; 553 struct sigacts *ps = p->p_sigacts;
551 int onstack, error; 554 int onstack, error;
552 int sig = ksi->ksi_signo; 555 int sig = ksi->ksi_signo;
553 struct sigframe_siginfo *fp, frame; 556 struct sigframe_siginfo *fp, frame;
554 sig_t catcher = SIGACTION(p, sig).sa_handler; 557 sig_t catcher = SIGACTION(p, sig).sa_handler;
555 struct trapframe *tf = l->l_md.md_regs; 558 struct trapframe *tf = l->l_md.md_regs;
556 char *sp; 559 char *sp;
557 560
558 KASSERT(mutex_owned(p->p_lock)); 561 KASSERT(mutex_owned(p->p_lock));
559 562
560 /* Do we need to jump onto the signal stack? */ 563 /* Do we need to jump onto the signal stack? */
561 onstack = 564 onstack =
562 (l->l_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 && 565 (l->l_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
563 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; 566 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
564 567
565 /* Allocate space for the signal handler context. */ 568 /* Allocate space for the signal handler context. */
566 if (onstack) 569 if (onstack)
567 sp = ((char *)l->l_sigstk.ss_sp + l->l_sigstk.ss_size); 570 sp = ((char *)l->l_sigstk.ss_sp + l->l_sigstk.ss_size);
568 else 571 else
569 /* AMD64 ABI 128-bytes "red zone". */ 572 /* AMD64 ABI 128-bytes "red zone". */
570 sp = (char *)tf->tf_rsp - 128; 573 sp = (char *)tf->tf_rsp - 128;
571 574
572 sp -= sizeof(struct sigframe_siginfo); 575 sp -= sizeof(struct sigframe_siginfo);
573 /* Round down the stackpointer to a multiple of 16 for the ABI. */ 576 /* Round down the stackpointer to a multiple of 16 for the ABI. */
574 fp = (struct sigframe_siginfo *)(((unsigned long)sp & ~15) - 8); 577 fp = (struct sigframe_siginfo *)(((unsigned long)sp & ~15) - 8);
575 578
576 frame.sf_ra = (uint64_t)ps->sa_sigdesc[sig].sd_tramp; 579 frame.sf_ra = (uint64_t)ps->sa_sigdesc[sig].sd_tramp;
577 frame.sf_si._info = ksi->ksi_info; 580 frame.sf_si._info = ksi->ksi_info;
578 frame.sf_uc.uc_flags = _UC_SIGMASK; 581 frame.sf_uc.uc_flags = _UC_SIGMASK;
579 frame.sf_uc.uc_sigmask = *mask; 582 frame.sf_uc.uc_sigmask = *mask;
580 frame.sf_uc.uc_link = l->l_ctxlink; 583 frame.sf_uc.uc_link = l->l_ctxlink;
581 frame.sf_uc.uc_flags |= (l->l_sigstk.ss_flags & SS_ONSTACK) 584 frame.sf_uc.uc_flags |= (l->l_sigstk.ss_flags & SS_ONSTACK)
582 ? _UC_SETSTACK : _UC_CLRSTACK; 585 ? _UC_SETSTACK : _UC_CLRSTACK;
583 memset(&frame.sf_uc.uc_stack, 0, sizeof(frame.sf_uc.uc_stack)); 586 memset(&frame.sf_uc.uc_stack, 0, sizeof(frame.sf_uc.uc_stack));
584 sendsig_reset(l, sig); 587 sendsig_reset(l, sig);
585 588
586 mutex_exit(p->p_lock); 589 mutex_exit(p->p_lock);
587 cpu_getmcontext(l, &frame.sf_uc.uc_mcontext, &frame.sf_uc.uc_flags); 590 cpu_getmcontext(l, &frame.sf_uc.uc_mcontext, &frame.sf_uc.uc_flags);
588 /* Copyout all the fp regs, the signal handler might expect them. */ 591 /* Copyout all the fp regs, the signal handler might expect them. */
589 error = copyout(&frame, fp, sizeof frame); 592 error = copyout(&frame, fp, sizeof frame);
590 mutex_enter(p->p_lock); 593 mutex_enter(p->p_lock);
591 594
592 if (error != 0) { 595 if (error != 0) {
593 /* 596 /*
594 * Process has trashed its stack; give it an illegal 597 * Process has trashed its stack; give it an illegal
595 * instruction to halt it in its tracks. 598 * instruction to halt it in its tracks.
596 */ 599 */
597 sigexit(l, SIGILL); 600 sigexit(l, SIGILL);
598 /* NOTREACHED */ 601 /* NOTREACHED */
599 } 602 }
600 603
601 buildcontext(l, catcher, fp); 604 buildcontext(l, catcher, fp);
602 605
603 tf->tf_rdi = sig; 606 tf->tf_rdi = sig;
604 tf->tf_rsi = (uint64_t)&fp->sf_si; 607 tf->tf_rsi = (uint64_t)&fp->sf_si;
605 tf->tf_rdx = tf->tf_r15 = (uint64_t)&fp->sf_uc; 608 tf->tf_rdx = tf->tf_r15 = (uint64_t)&fp->sf_uc;
606 609
607 /* Remember that we're now on the signal stack. */ 610 /* Remember that we're now on the signal stack. */
608 if (onstack) 611 if (onstack)
609 l->l_sigstk.ss_flags |= SS_ONSTACK; 612 l->l_sigstk.ss_flags |= SS_ONSTACK;
610 613
611 if ((vaddr_t)catcher >= VM_MAXUSER_ADDRESS) { 614 if ((vaddr_t)catcher >= VM_MAXUSER_ADDRESS) {
612 /* 615 /*
613 * process has given an invalid address for the 616 * process has given an invalid address for the
614 * handler. Stop it, but do not do it before so 617 * handler. Stop it, but do not do it before so
615 * we can return the right info to userland (or in core dump) 618 * we can return the right info to userland (or in core dump)
616 */ 619 */
617 sigexit(l, SIGILL); 620 sigexit(l, SIGILL);
618 /* NOTREACHED */ 621 /* NOTREACHED */
619 } 622 }
620} 623}
621 624
622struct pcb dumppcb; 625struct pcb dumppcb;
623 626
624void 627void
625cpu_reboot(int howto, char *bootstr) 628cpu_reboot(int howto, char *bootstr)
626{ 629{
627 static bool syncdone = false; 630 static bool syncdone = false;
628 int s = IPL_NONE; 631 int s = IPL_NONE;
629 __USE(s); /* ugly otherwise */ 632 __USE(s); /* ugly otherwise */
630 633
631 if (cold) { 634 if (cold) {
632 howto |= RB_HALT; 635 howto |= RB_HALT;
633 goto haltsys; 636 goto haltsys;
634 } 637 }
635 638
636 boothowto = howto; 639 boothowto = howto;
637 640
638 /* i386 maybe_dump() */ 641 /* i386 maybe_dump() */
639 642
640 /* 643 /*
641 * If we've panic'd, don't make the situation potentially 644 * If we've panic'd, don't make the situation potentially
642 * worse by syncing or unmounting the file systems. 645 * worse by syncing or unmounting the file systems.
643 */ 646 */
644 if ((howto & RB_NOSYNC) == 0 && panicstr == NULL) { 647 if ((howto & RB_NOSYNC) == 0 && panicstr == NULL) {
645 if (!syncdone) { 648 if (!syncdone) {
646 syncdone = true; 649 syncdone = true;
647 /* XXX used to force unmount as well, here */ 650 /* XXX used to force unmount as well, here */
648 vfs_sync_all(curlwp); 651 vfs_sync_all(curlwp);
649 /* 652 /*
650 * If we've been adjusting the clock, the todr 653 * If we've been adjusting the clock, the todr
651 * will be out of synch; adjust it now. 654 * will be out of synch; adjust it now.
652 * 655 *
653 * XXX used to do this after unmounting all 656 * XXX used to do this after unmounting all
654 * filesystems with vfs_shutdown(). 657 * filesystems with vfs_shutdown().
655 */ 658 */
656 if (time_adjusted != 0) 659 if (time_adjusted != 0)
657 resettodr(); 660 resettodr();
658 } 661 }
659 662
660 while (vfs_unmountall1(curlwp, false, false) || 663 while (vfs_unmountall1(curlwp, false, false) ||
661 config_detach_all(boothowto) || 664 config_detach_all(boothowto) ||
662 vfs_unmount_forceone(curlwp)) 665 vfs_unmount_forceone(curlwp))
663 ; /* do nothing */ 666 ; /* do nothing */
664 } else 667 } else
665 suspendsched(); 668 suspendsched();
666 669
667 pmf_system_shutdown(boothowto); 670 pmf_system_shutdown(boothowto);
668 671
669 /* Disable interrupts. */ 672 /* Disable interrupts. */
670 s = splhigh(); 673 s = splhigh();
671 674
672 /* Do a dump if requested. */ 675 /* Do a dump if requested. */
673 if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP) 676 if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP)
674 dumpsys(); 677 dumpsys();
675 678
676haltsys: 679haltsys:
677 doshutdownhooks(); 680 doshutdownhooks();
678 681
679 if ((howto & RB_POWERDOWN) == RB_POWERDOWN) { 682 if ((howto & RB_POWERDOWN) == RB_POWERDOWN) {
680#if NACPICA > 0 683#if NACPICA > 0
681 if (s != IPL_NONE) 684 if (s != IPL_NONE)
682 splx(s); 685 splx(s);
683 686
684 acpi_enter_sleep_state(ACPI_STATE_S5); 687 acpi_enter_sleep_state(ACPI_STATE_S5);
685#endif 688#endif
686#ifdef XEN 689#ifdef XEN
687 HYPERVISOR_shutdown(); 690 HYPERVISOR_shutdown();
688#endif /* XEN */ 691#endif /* XEN */
689 } 692 }
690 693
691 cpu_broadcast_halt(); 694 cpu_broadcast_halt();
692 695
693 if (howto & RB_HALT) { 696 if (howto & RB_HALT) {
694#if NACPICA > 0 697#if NACPICA > 0
695 acpi_disable(); 698 acpi_disable();
696#endif 699#endif
697 700
698 printf("\n"); 701 printf("\n");
699 printf("The operating system has halted.\n"); 702 printf("The operating system has halted.\n");
700 printf("Please press any key to reboot.\n\n"); 703 printf("Please press any key to reboot.\n\n");
701 cnpollc(1); /* for proper keyboard command handling */ 704 cnpollc(1); /* for proper keyboard command handling */
702 if (cngetc() == 0) { 705 if (cngetc() == 0) {
703 /* no console attached, so just hlt */ 706 /* no console attached, so just hlt */
704 printf("No keyboard - cannot reboot after all.\n"); 707 printf("No keyboard - cannot reboot after all.\n");
705 for(;;) { 708 for(;;) {
706 x86_hlt(); 709 x86_hlt();
707 } 710 }
708 } 711 }
709 cnpollc(0); 712 cnpollc(0);
710 } 713 }
711 714
712 printf("rebooting...\n"); 715 printf("rebooting...\n");
713 if (cpureset_delay > 0) 716 if (cpureset_delay > 0)
714 delay(cpureset_delay * 1000); 717 delay(cpureset_delay * 1000);
715 cpu_reset(); 718 cpu_reset();
716 for(;;) ; 719 for(;;) ;
717 /*NOTREACHED*/ 720 /*NOTREACHED*/
718} 721}
719 722
720/* 723/*
721 * XXXfvdl share dumpcode. 724 * XXXfvdl share dumpcode.
722 */ 725 */
723 726
724/* 727/*
725 * Perform assorted dump-related initialization tasks. Assumes that 728 * Perform assorted dump-related initialization tasks. Assumes that
726 * the maximum physical memory address will not increase afterwards. 729 * the maximum physical memory address will not increase afterwards.
727 */ 730 */
728void 731void
729dump_misc_init(void) 732dump_misc_init(void)
730{ 733{
731#ifndef NO_SPARSE_DUMP 734#ifndef NO_SPARSE_DUMP
732 int i; 735 int i;
733#endif 736#endif
734 737
735 if (dump_headerbuf != NULL) 738 if (dump_headerbuf != NULL)
736 return; /* already called */ 739 return; /* already called */
737 740
738#ifndef NO_SPARSE_DUMP 741#ifndef NO_SPARSE_DUMP
739 for (i = 0; i < mem_cluster_cnt; ++i) { 742 for (i = 0; i < mem_cluster_cnt; ++i) {
740 paddr_t top = mem_clusters[i].start + mem_clusters[i].size; 743 paddr_t top = mem_clusters[i].start + mem_clusters[i].size;
741 if (max_paddr < top) 744 if (max_paddr < top)
742 max_paddr = top; 745 max_paddr = top;
743 } 746 }
744#ifdef DEBUG 747#ifdef DEBUG
745 printf("dump_misc_init: max_paddr = 0x%lx\n", 748 printf("dump_misc_init: max_paddr = 0x%lx\n",
746 (unsigned long)max_paddr); 749 (unsigned long)max_paddr);
747#endif 750#endif
748 if (max_paddr == 0) { 751 if (max_paddr == 0) {
749 printf("Your machine does not initialize mem_clusters; " 752 printf("Your machine does not initialize mem_clusters; "
750 "sparse_dumps disabled\n"); 753 "sparse_dumps disabled\n");
751 sparse_dump = 0; 754 sparse_dump = 0;
752 } else { 755 } else {
753 sparse_dump_physmap = (void *)uvm_km_alloc(kernel_map, 756 sparse_dump_physmap = (void *)uvm_km_alloc(kernel_map,
754 roundup(max_paddr / (PAGE_SIZE * NBBY), PAGE_SIZE), 757 roundup(max_paddr / (PAGE_SIZE * NBBY), PAGE_SIZE),
755 PAGE_SIZE, UVM_KMF_WIRED|UVM_KMF_ZERO); 758 PAGE_SIZE, UVM_KMF_WIRED|UVM_KMF_ZERO);
756 } 759 }
757#endif 760#endif
758 dump_headerbuf = (void *)uvm_km_alloc(kernel_map, 761 dump_headerbuf = (void *)uvm_km_alloc(kernel_map,
759 dump_headerbuf_size, 762 dump_headerbuf_size,
760 PAGE_SIZE, UVM_KMF_WIRED|UVM_KMF_ZERO); 763 PAGE_SIZE, UVM_KMF_WIRED|UVM_KMF_ZERO);
761 /* XXXjld should check for failure here, disable dumps if so. */ 764 /* XXXjld should check for failure here, disable dumps if so. */
762} 765}
763 766
764#ifndef NO_SPARSE_DUMP 767#ifndef NO_SPARSE_DUMP
765/* 768/*
766 * Clear the set of pages to include in a sparse dump. 769 * Clear the set of pages to include in a sparse dump.
767 */ 770 */
768void 771void
769sparse_dump_reset(void) 772sparse_dump_reset(void)
770{ 773{
771 memset(sparse_dump_physmap, 0, 774 memset(sparse_dump_physmap, 0,
772 roundup(max_paddr / (PAGE_SIZE * NBBY), PAGE_SIZE)); 775 roundup(max_paddr / (PAGE_SIZE * NBBY), PAGE_SIZE));
773} 776}
774 777
775/* 778/*
776 * Include or exclude pages in a sparse dump. 779 * Include or exclude pages in a sparse dump.
777 */ 780 */
778void 781void
779sparse_dump_mark(void) 782sparse_dump_mark(void)
780{ 783{
781 paddr_t p, pstart, pend; 784 paddr_t p, pstart, pend;
782 struct vm_page *pg; 785 struct vm_page *pg;
783 int i; 786 int i;
784 uvm_physseg_t upm; 787 uvm_physseg_t upm;
785 788
786 /* 789 /*
787 * Mark all memory pages, then unmark pages that are uninteresting. 790 * Mark all memory pages, then unmark pages that are uninteresting.
788 * Dereferenceing pg->uobject might crash again if another CPU 791 * Dereferenceing pg->uobject might crash again if another CPU
789 * frees the object out from under us, but we can't lock anything 792 * frees the object out from under us, but we can't lock anything
790 * so it's a risk we have to take. 793 * so it's a risk we have to take.
791 */ 794 */
792 795
793 for (i = 0; i < mem_cluster_cnt; ++i) { 796 for (i = 0; i < mem_cluster_cnt; ++i) {
794 pstart = mem_clusters[i].start / PAGE_SIZE; 797 pstart = mem_clusters[i].start / PAGE_SIZE;
795 pend = pstart + mem_clusters[i].size / PAGE_SIZE; 798 pend = pstart + mem_clusters[i].size / PAGE_SIZE;
796 799
797 for (p = pstart; p < pend; p++) { 800 for (p = pstart; p < pend; p++) {
798 setbit(sparse_dump_physmap, p); 801 setbit(sparse_dump_physmap, p);
799 } 802 }
800 } 803 }
801 for (upm = uvm_physseg_get_first(); 804 for (upm = uvm_physseg_get_first();
802 uvm_physseg_valid_p(upm); 805 uvm_physseg_valid_p(upm);
803 upm = uvm_physseg_get_next(upm)) { 806 upm = uvm_physseg_get_next(upm)) {
804 paddr_t pfn; 807 paddr_t pfn;
805 808
806 /* 809 /*
807 * We assume that seg->start to seg->end are 810 * We assume that seg->start to seg->end are
808 * uvm_page_physload()ed 811 * uvm_page_physload()ed
809 */ 812 */
810 for (pfn = uvm_physseg_get_start(upm); 813 for (pfn = uvm_physseg_get_start(upm);
811 pfn < uvm_physseg_get_end(upm); 814 pfn < uvm_physseg_get_end(upm);
812 pfn++) { 815 pfn++) {
813 pg = PHYS_TO_VM_PAGE(ptoa(pfn)); 816 pg = PHYS_TO_VM_PAGE(ptoa(pfn));
814 817
815 if (pg->uanon || (pg->pqflags & PQ_FREE) || 818 if (pg->uanon || (pg->pqflags & PQ_FREE) ||
816 (pg->uobject && pg->uobject->pgops)) { 819 (pg->uobject && pg->uobject->pgops)) {
817 p = VM_PAGE_TO_PHYS(pg) / PAGE_SIZE; 820 p = VM_PAGE_TO_PHYS(pg) / PAGE_SIZE;
818 clrbit(sparse_dump_physmap, p); 821 clrbit(sparse_dump_physmap, p);
819 } 822 }
820 } 823 }
821 } 824 }
822} 825}
823 826
824/* 827/*
825 * Machine-dependently decides on the contents of a sparse dump, using 828 * Machine-dependently decides on the contents of a sparse dump, using
826 * the above. 829 * the above.
827 */ 830 */
828void 831void
829cpu_dump_prep_sparse(void) 832cpu_dump_prep_sparse(void)
830{ 833{
831 sparse_dump_reset(); 834 sparse_dump_reset();
832 /* XXX could the alternate recursive page table be skipped? */ 835 /* XXX could the alternate recursive page table be skipped? */
833 sparse_dump_mark(); 836 sparse_dump_mark();
834 /* Memory for I/O buffers could be unmarked here, for example. */ 837 /* Memory for I/O buffers could be unmarked here, for example. */
835 /* The kernel text could also be unmarked, but gdb would be upset. */ 838 /* The kernel text could also be unmarked, but gdb would be upset. */
836} 839}
837#endif 840#endif
838 841
839/* 842/*
840 * Abstractly iterate over the collection of memory segments to be 843 * Abstractly iterate over the collection of memory segments to be
841 * dumped; the callback lacks the customary environment-pointer 844 * dumped; the callback lacks the customary environment-pointer
842 * argument because none of the current users really need one. 845 * argument because none of the current users really need one.
843 * 846 *
844 * To be used only after dump_seg_prep is called to set things up. 847 * To be used only after dump_seg_prep is called to set things up.
845 */ 848 */
846int 849int
847dump_seg_iter(int (*callback)(paddr_t, paddr_t)) 850dump_seg_iter(int (*callback)(paddr_t, paddr_t))
848{ 851{
849 int error, i; 852 int error, i;
850 853
851#define CALLBACK(start,size) do { \ 854#define CALLBACK(start,size) do { \
852 error = callback(start,size); \ 855 error = callback(start,size); \
853 if (error) \ 856 if (error) \
854 return error; \ 857 return error; \
855} while(0) 858} while(0)
856 859
857 for (i = 0; i < mem_cluster_cnt; ++i) { 860 for (i = 0; i < mem_cluster_cnt; ++i) {
858#ifndef NO_SPARSE_DUMP 861#ifndef NO_SPARSE_DUMP
859 /* 862 /*
860 * The bitmap is scanned within each memory segment, 863 * The bitmap is scanned within each memory segment,
861 * rather than over its entire domain, in case any 864 * rather than over its entire domain, in case any
862 * pages outside of the memory proper have been mapped 865 * pages outside of the memory proper have been mapped
863 * into kva; they might be devices that wouldn't 866 * into kva; they might be devices that wouldn't
864 * appreciate being arbitrarily read, and including 867 * appreciate being arbitrarily read, and including
865 * them could also break the assumption that a sparse 868 * them could also break the assumption that a sparse
866 * dump will always be smaller than a full one. 869 * dump will always be smaller than a full one.
867 */ 870 */
868 if (sparse_dump && sparse_dump_physmap) { 871 if (sparse_dump && sparse_dump_physmap) {
869 paddr_t p, start, end; 872 paddr_t p, start, end;
870 int lastset; 873 int lastset;
871 874
872 start = mem_clusters[i].start; 875 start = mem_clusters[i].start;
873 end = start + mem_clusters[i].size; 876 end = start + mem_clusters[i].size;
874 start = rounddown(start, PAGE_SIZE); /* unnecessary? */ 877 start = rounddown(start, PAGE_SIZE); /* unnecessary? */
875 lastset = 0; 878 lastset = 0;
876 for (p = start; p < end; p += PAGE_SIZE) { 879 for (p = start; p < end; p += PAGE_SIZE) {
877 int thisset = isset(sparse_dump_physmap, 880 int thisset = isset(sparse_dump_physmap,
878 p/PAGE_SIZE); 881 p/PAGE_SIZE);
879 882
880 if (!lastset && thisset) 883 if (!lastset && thisset)
881 start = p; 884 start = p;
882 if (lastset && !thisset) 885 if (lastset && !thisset)
883 CALLBACK(start, p - start); 886 CALLBACK(start, p - start);
884 lastset = thisset; 887 lastset = thisset;
885 } 888 }
886 if (lastset) 889 if (lastset)
887 CALLBACK(start, p - start); 890 CALLBACK(start, p - start);
888 } else 891 } else
889#endif 892#endif
890 CALLBACK(mem_clusters[i].start, mem_clusters[i].size); 893 CALLBACK(mem_clusters[i].start, mem_clusters[i].size);
891 } 894 }
892 return 0; 895 return 0;
893#undef CALLBACK 896#undef CALLBACK
894} 897}
895 898
896/* 899/*
897 * Prepare for an impending core dump: decide what's being dumped and 900 * Prepare for an impending core dump: decide what's being dumped and
898 * how much space it will take up. 901 * how much space it will take up.
899 */ 902 */
900void 903void
901dump_seg_prep(void) 904dump_seg_prep(void)
902{ 905{
903#ifndef NO_SPARSE_DUMP 906#ifndef NO_SPARSE_DUMP
904 if (sparse_dump && sparse_dump_physmap) 907 if (sparse_dump && sparse_dump_physmap)
905 cpu_dump_prep_sparse(); 908 cpu_dump_prep_sparse();
906#endif 909#endif
907 910
908 dump_nmemsegs = 0; 911 dump_nmemsegs = 0;
909 dump_npages = 0; 912 dump_npages = 0;
910 dump_seg_iter(dump_seg_count_range); 913 dump_seg_iter(dump_seg_count_range);
911 914
912 dump_header_size = ALIGN(sizeof(kcore_seg_t)) + 915 dump_header_size = ALIGN(sizeof(kcore_seg_t)) +
913 ALIGN(sizeof(cpu_kcore_hdr_t)) + 916 ALIGN(sizeof(cpu_kcore_hdr_t)) +
914 ALIGN(dump_nmemsegs * sizeof(phys_ram_seg_t)); 917 ALIGN(dump_nmemsegs * sizeof(phys_ram_seg_t));
915 dump_header_size = roundup(dump_header_size, dbtob(1)); 918 dump_header_size = roundup(dump_header_size, dbtob(1));
916 919
917 /* 920 /*
918 * savecore(8) will read this to decide how many pages to 921 * savecore(8) will read this to decide how many pages to
919 * copy, and cpu_dumpconf has already used the pessimistic 922 * copy, and cpu_dumpconf has already used the pessimistic
920 * value to set dumplo, so it's time to tell the truth. 923 * value to set dumplo, so it's time to tell the truth.
921 */ 924 */
922 dumpsize = dump_npages; /* XXX could these just be one variable? */ 925 dumpsize = dump_npages; /* XXX could these just be one variable? */
923} 926}
924 927
925int 928int
926dump_seg_count_range(paddr_t start, paddr_t size) 929dump_seg_count_range(paddr_t start, paddr_t size)
927{ 930{
928 ++dump_nmemsegs; 931 ++dump_nmemsegs;
929 dump_npages += size / PAGE_SIZE; 932 dump_npages += size / PAGE_SIZE;
930 return 0; 933 return 0;
931} 934}
932 935
933/* 936/*
934 * A sparse dump's header may be rather large, due to the number of 937 * A sparse dump's header may be rather large, due to the number of
935 * "segments" emitted. These routines manage a simple output buffer, 938 * "segments" emitted. These routines manage a simple output buffer,
936 * so that the header can be written to disk incrementally. 939 * so that the header can be written to disk incrementally.
937 */ 940 */
938void 941void
939dump_header_start(void) 942dump_header_start(void)
940{ 943{
941 dump_headerbuf_ptr = dump_headerbuf; 944 dump_headerbuf_ptr = dump_headerbuf;
942 dump_header_blkno = dumplo; 945 dump_header_blkno = dumplo;
943} 946}
944 947
945int 948int
946dump_header_flush(void) 949dump_header_flush(void)
947{ 950{
948 const struct bdevsw *bdev; 951 const struct bdevsw *bdev;
949 size_t to_write; 952 size_t to_write;
950 int error; 953 int error;
951 954
952 bdev = bdevsw_lookup(dumpdev); 955 bdev = bdevsw_lookup(dumpdev);
953 to_write = roundup(dump_headerbuf_ptr - dump_headerbuf, dbtob(1)); 956 to_write = roundup(dump_headerbuf_ptr - dump_headerbuf, dbtob(1));
954 error = bdev->d_dump(dumpdev, dump_header_blkno, 957 error = bdev->d_dump(dumpdev, dump_header_blkno,
955 dump_headerbuf, to_write); 958 dump_headerbuf, to_write);
956 dump_header_blkno += btodb(to_write); 959 dump_header_blkno += btodb(to_write);
957 dump_headerbuf_ptr = dump_headerbuf; 960 dump_headerbuf_ptr = dump_headerbuf;
958 return error; 961 return error;
959} 962}
960 963
961int 964int
962dump_header_addbytes(const void* vptr, size_t n) 965dump_header_addbytes(const void* vptr, size_t n)
963{ 966{
964 const char* ptr = vptr; 967 const char* ptr = vptr;
965 int error; 968 int error;
966 969
967 while (n > dump_headerbuf_avail) { 970 while (n > dump_headerbuf_avail) {
968 memcpy(dump_headerbuf_ptr, ptr, dump_headerbuf_avail); 971 memcpy(dump_headerbuf_ptr, ptr, dump_headerbuf_avail);
969 ptr += dump_headerbuf_avail; 972 ptr += dump_headerbuf_avail;
970 n -= dump_headerbuf_avail; 973 n -= dump_headerbuf_avail;
971 dump_headerbuf_ptr = dump_headerbuf_end; 974 dump_headerbuf_ptr = dump_headerbuf_end;
972 error = dump_header_flush(); 975 error = dump_header_flush();
973 if (error) 976 if (error)
974 return error; 977 return error;
975 } 978 }
976 memcpy(dump_headerbuf_ptr, ptr, n); 979 memcpy(dump_headerbuf_ptr, ptr, n);
977 dump_headerbuf_ptr += n; 980 dump_headerbuf_ptr += n;
978 981
979 return 0; 982 return 0;
980} 983}
981 984
982int 985int
983dump_header_addseg(paddr_t start, paddr_t size) 986dump_header_addseg(paddr_t start, paddr_t size)
984{ 987{
985 phys_ram_seg_t seg = { start, size }; 988 phys_ram_seg_t seg = { start, size };
986 989
987 return dump_header_addbytes(&seg, sizeof(seg)); 990 return dump_header_addbytes(&seg, sizeof(seg));
988} 991}
989 992
990int 993int
991dump_header_finish(void) 994dump_header_finish(void)
992{ 995{
993 memset(dump_headerbuf_ptr, 0, dump_headerbuf_avail); 996 memset(dump_headerbuf_ptr, 0, dump_headerbuf_avail);
994 return dump_header_flush(); 997 return dump_header_flush();
995} 998}
996 999
997 1000
998/* 1001/*
999 * These variables are needed by /sbin/savecore 1002 * These variables are needed by /sbin/savecore
1000 */ 1003 */
1001uint32_t dumpmag = 0x8fca0101; /* magic number */ 1004uint32_t dumpmag = 0x8fca0101; /* magic number */
1002int dumpsize = 0; /* pages */ 1005int dumpsize = 0; /* pages */
1003long dumplo = 0; /* blocks */ 1006long dumplo = 0; /* blocks */
1004 1007
1005/* 1008/*
1006 * cpu_dumpsize: calculate size of machine-dependent kernel core dump headers 1009 * cpu_dumpsize: calculate size of machine-dependent kernel core dump headers
1007 * for a full (non-sparse) dump. 1010 * for a full (non-sparse) dump.
1008 */ 1011 */
1009int 1012int
1010cpu_dumpsize(void) 1013cpu_dumpsize(void)
1011{ 1014{
1012 int size; 1015 int size;
1013 1016
1014 size = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t)) + 1017 size = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t)) +
1015 ALIGN(mem_cluster_cnt * sizeof(phys_ram_seg_t)); 1018 ALIGN(mem_cluster_cnt * sizeof(phys_ram_seg_t));
1016 if (roundup(size, dbtob(1)) != dbtob(1)) 1019 if (roundup(size, dbtob(1)) != dbtob(1))
1017 return (-1); 1020 return (-1);
1018 1021
1019 return (1); 1022 return (1);
1020} 1023}
1021 1024
1022/* 1025/*
1023 * cpu_dump_mempagecnt: calculate the size of RAM (in pages) to be dumped 1026 * cpu_dump_mempagecnt: calculate the size of RAM (in pages) to be dumped
1024 * for a full (non-sparse) dump. 1027 * for a full (non-sparse) dump.
1025 */ 1028 */
1026u_long 1029u_long
1027cpu_dump_mempagecnt(void) 1030cpu_dump_mempagecnt(void)
1028{ 1031{
1029 u_long i, n; 1032 u_long i, n;
1030 1033
1031 n = 0; 1034 n = 0;
1032 for (i = 0; i < mem_cluster_cnt; i++) 1035 for (i = 0; i < mem_cluster_cnt; i++)
1033 n += atop(mem_clusters[i].size); 1036 n += atop(mem_clusters[i].size);
1034 return (n); 1037 return (n);
1035} 1038}
1036 1039
1037/* 1040/*
1038 * cpu_dump: dump the machine-dependent kernel core dump headers. 1041 * cpu_dump: dump the machine-dependent kernel core dump headers.
1039 */ 1042 */
1040int 1043int
1041cpu_dump(void) 1044cpu_dump(void)
1042{ 1045{
1043 kcore_seg_t seg; 1046 kcore_seg_t seg;
1044 cpu_kcore_hdr_t cpuhdr; 1047 cpu_kcore_hdr_t cpuhdr;
1045 const struct bdevsw *bdev; 1048 const struct bdevsw *bdev;
1046 1049
1047 bdev = bdevsw_lookup(dumpdev); 1050 bdev = bdevsw_lookup(dumpdev);
1048 if (bdev == NULL) 1051 if (bdev == NULL)
1049 return (ENXIO); 1052 return (ENXIO);
1050 1053
1051 /* 1054 /*
1052 * Generate a segment header. 1055 * Generate a segment header.
1053 */ 1056 */
1054 CORE_SETMAGIC(seg, KCORE_MAGIC, MID_MACHINE, CORE_CPU); 1057 CORE_SETMAGIC(seg, KCORE_MAGIC, MID_MACHINE, CORE_CPU);
1055 seg.c_size = dump_header_size - ALIGN(sizeof(seg)); 1058 seg.c_size = dump_header_size - ALIGN(sizeof(seg));
1056 (void)dump_header_addbytes(&seg, ALIGN(sizeof(seg))); 1059 (void)dump_header_addbytes(&seg, ALIGN(sizeof(seg)));
1057 1060
1058 /* 1061 /*
1059 * Add the machine-dependent header info. 1062 * Add the machine-dependent header info.
1060 */ 1063 */
1061 cpuhdr.ptdpaddr = PDPpaddr; 1064 cpuhdr.ptdpaddr = PDPpaddr;
1062 cpuhdr.nmemsegs = dump_nmemsegs; 1065 cpuhdr.nmemsegs = dump_nmemsegs;
1063 (void)dump_header_addbytes(&cpuhdr, ALIGN(sizeof(cpuhdr))); 1066 (void)dump_header_addbytes(&cpuhdr, ALIGN(sizeof(cpuhdr)));
1064 1067
1065 /* 1068 /*
1066 * Write out the memory segment descriptors. 1069 * Write out the memory segment descriptors.
1067 */ 1070 */
1068 return dump_seg_iter(dump_header_addseg); 1071 return dump_seg_iter(dump_header_addseg);
1069} 1072}
1070 1073
1071/* 1074/*
1072 * Doadump comes here after turning off memory management and 1075 * Doadump comes here after turning off memory management and
1073 * getting on the dump stack, either when called above, or by 1076 * getting on the dump stack, either when called above, or by
1074 * the auto-restart code. 1077 * the auto-restart code.
1075 */ 1078 */
1076#define BYTES_PER_DUMP PAGE_SIZE /* must be a multiple of pagesize XXX small */ 1079#define BYTES_PER_DUMP PAGE_SIZE /* must be a multiple of pagesize XXX small */
1077static vaddr_t dumpspace; 1080static vaddr_t dumpspace;
1078 1081
1079vaddr_t 1082vaddr_t
1080reserve_dumppages(vaddr_t p) 1083reserve_dumppages(vaddr_t p)
1081{ 1084{
1082 1085
1083 dumpspace = p; 1086 dumpspace = p;
1084 return (p + BYTES_PER_DUMP); 1087 return (p + BYTES_PER_DUMP);
1085} 1088}
1086 1089
1087int 1090int
1088dumpsys_seg(paddr_t maddr, paddr_t bytes) 1091dumpsys_seg(paddr_t maddr, paddr_t bytes)
1089{ 1092{
1090 u_long i, m, n; 1093 u_long i, m, n;
1091 daddr_t blkno; 1094 daddr_t blkno;
1092 const struct bdevsw *bdev; 1095 const struct bdevsw *bdev;
1093 int (*dump)(dev_t, daddr_t, void *, size_t); 1096 int (*dump)(dev_t, daddr_t, void *, size_t);
1094 int error; 1097 int error;
1095 1098
1096 if (dumpdev == NODEV) 1099 if (dumpdev == NODEV)
1097 return ENODEV; 1100 return ENODEV;
1098 bdev = bdevsw_lookup(dumpdev); 1101 bdev = bdevsw_lookup(dumpdev);
1099 if (bdev == NULL || bdev->d_psize == NULL) 1102 if (bdev == NULL || bdev->d_psize == NULL)
1100 return ENODEV; 1103 return ENODEV;
1101 1104
1102 dump = bdev->d_dump; 1105 dump = bdev->d_dump;
1103 1106
1104 blkno = dump_header_blkno; 1107 blkno = dump_header_blkno;
1105 for (i = 0; i < bytes; i += n, dump_totalbytesleft -= n) { 1108 for (i = 0; i < bytes; i += n, dump_totalbytesleft -= n) {
1106 /* Print out how many MBs we have left to go. */ 1109 /* Print out how many MBs we have left to go. */
1107 if ((dump_totalbytesleft % (1024*1024)) == 0) 1110 if ((dump_totalbytesleft % (1024*1024)) == 0)
1108 printf_nolog("%lu ", (unsigned long) 1111 printf_nolog("%lu ", (unsigned long)
1109 (dump_totalbytesleft / (1024 * 1024))); 1112 (dump_totalbytesleft / (1024 * 1024)));
1110 1113
1111 /* Limit size for next transfer. */ 1114 /* Limit size for next transfer. */
1112 n = bytes - i; 1115 n = bytes - i;
1113 if (n > BYTES_PER_DUMP) 1116 if (n > BYTES_PER_DUMP)
1114 n = BYTES_PER_DUMP; 1117 n = BYTES_PER_DUMP;
1115 1118
1116 for (m = 0; m < n; m += NBPG) 1119 for (m = 0; m < n; m += NBPG)
1117 pmap_kenter_pa(dumpspace + m, maddr + m, 1120 pmap_kenter_pa(dumpspace + m, maddr + m,
1118 VM_PROT_READ, 0); 1121 VM_PROT_READ, 0);
1119 pmap_update(pmap_kernel()); 1122 pmap_update(pmap_kernel());
1120 1123
1121 error = (*dump)(dumpdev, blkno, (void *)dumpspace, n); 1124 error = (*dump)(dumpdev, blkno, (void *)dumpspace, n);
1122 pmap_kremove_local(dumpspace, n); 1125 pmap_kremove_local(dumpspace, n);
1123 if (error) 1126 if (error)
1124 return error; 1127 return error;
1125 maddr += n; 1128 maddr += n;
1126 blkno += btodb(n); /* XXX? */ 1129 blkno += btodb(n); /* XXX? */
1127 1130
1128#if 0 /* XXX this doesn't work. grr. */ 1131#if 0 /* XXX this doesn't work. grr. */
1129 /* operator aborting dump? */ 1132 /* operator aborting dump? */
1130 if (sget() != NULL) 1133 if (sget() != NULL)
1131 return EINTR; 1134 return EINTR;
1132#endif 1135#endif
1133 } 1136 }
1134 dump_header_blkno = blkno; 1137 dump_header_blkno = blkno;
1135 1138
1136 return 0; 1139 return 0;
1137} 1140}
1138 1141
1139void 1142void
1140dodumpsys(void) 1143dodumpsys(void)
1141{ 1144{
1142 const struct bdevsw *bdev; 1145 const struct bdevsw *bdev;
1143 int dumpend, psize; 1146 int dumpend, psize;
1144 int error; 1147 int error;
1145 1148
1146 if (dumpdev == NODEV) 1149 if (dumpdev == NODEV)
1147 return; 1150 return;
1148 1151
1149 bdev = bdevsw_lookup(dumpdev); 1152 bdev = bdevsw_lookup(dumpdev);
1150 if (bdev == NULL || bdev->d_psize == NULL) 1153 if (bdev == NULL || bdev->d_psize == NULL)
1151 return; 1154 return;
1152 /* 1155 /*
1153 * For dumps during autoconfiguration, 1156 * For dumps during autoconfiguration,
1154 * if dump device has already configured... 1157 * if dump device has already configured...
1155 */ 1158 */
1156 if (dumpsize == 0) 1159 if (dumpsize == 0)
1157 cpu_dumpconf(); 1160 cpu_dumpconf();
1158 1161
1159 printf("\ndumping to dev %llu,%llu (offset=%ld, size=%d):", 1162 printf("\ndumping to dev %llu,%llu (offset=%ld, size=%d):",
1160 (unsigned long long)major(dumpdev), 1163 (unsigned long long)major(dumpdev),
1161 (unsigned long long)minor(dumpdev), dumplo, dumpsize); 1164 (unsigned long long)minor(dumpdev), dumplo, dumpsize);
1162 1165
1163 if (dumplo <= 0 || dumpsize <= 0) { 1166 if (dumplo <= 0 || dumpsize <= 0) {
1164 printf(" not possible\n"); 1167 printf(" not possible\n");
1165 return; 1168 return;
1166 } 1169 }
1167 1170
1168 psize = bdev_size(dumpdev); 1171 psize = bdev_size(dumpdev);
1169 printf("\ndump "); 1172 printf("\ndump ");
1170 if (psize == -1) { 1173 if (psize == -1) {
1171 printf("area unavailable\n"); 1174 printf("area unavailable\n");
1172 return; 1175 return;
1173 } 1176 }
1174 1177
1175#if 0 /* XXX this doesn't work. grr. */ 1178#if 0 /* XXX this doesn't work. grr. */
1176 /* toss any characters present prior to dump */ 1179 /* toss any characters present prior to dump */
1177 while (sget() != NULL); /*syscons and pccons differ */ 1180 while (sget() != NULL); /*syscons and pccons differ */
1178#endif 1181#endif
1179 1182
1180 dump_seg_prep(); 1183 dump_seg_prep();
1181 dumpend = dumplo + btodb(dump_header_size) + ctod(dump_npages); 1184 dumpend = dumplo + btodb(dump_header_size) + ctod(dump_npages);
1182 if (dumpend > psize) { 1185 if (dumpend > psize) {
1183 printf("failed: insufficient space (%d < %d)\n", 1186 printf("failed: insufficient space (%d < %d)\n",
1184 psize, dumpend); 1187 psize, dumpend);
1185 goto failed; 1188 goto failed;
1186 } 1189 }
1187 1190
1188 dump_header_start(); 1191 dump_header_start();
1189 if ((error = cpu_dump()) != 0) 1192 if ((error = cpu_dump()) != 0)
1190 goto err; 1193 goto err;
1191 if ((error = dump_header_finish()) != 0) 1194 if ((error = dump_header_finish()) != 0)
1192 goto err; 1195 goto err;
1193 1196
1194 if (dump_header_blkno != dumplo + btodb(dump_header_size)) { 1197 if (dump_header_blkno != dumplo + btodb(dump_header_size)) {
1195 printf("BAD header size (%ld [written] != %ld [expected])\n", 1198 printf("BAD header size (%ld [written] != %ld [expected])\n",
1196 (long)(dump_header_blkno - dumplo), 1199 (long)(dump_header_blkno - dumplo),
1197 (long)btodb(dump_header_size)); 1200 (long)btodb(dump_header_size));
1198 goto failed; 1201 goto failed;
1199 } 1202 }
1200 1203
1201 dump_totalbytesleft = roundup(ptoa(dump_npages), BYTES_PER_DUMP); 1204 dump_totalbytesleft = roundup(ptoa(dump_npages), BYTES_PER_DUMP);
1202 error = dump_seg_iter(dumpsys_seg); 1205 error = dump_seg_iter(dumpsys_seg);
1203 1206
1204 if (error == 0 && dump_header_blkno != dumpend) { 1207 if (error == 0 && dump_header_blkno != dumpend) {
1205 printf("BAD dump size (%ld [written] != %ld [expected])\n", 1208 printf("BAD dump size (%ld [written] != %ld [expected])\n",
1206 (long)(dumpend - dumplo), 1209 (long)(dumpend - dumplo),
1207 (long)(dump_header_blkno - dumplo)); 1210 (long)(dump_header_blkno - dumplo));
1208 goto failed; 1211 goto failed;
1209 } 1212 }
1210 1213
1211err: 1214err:
1212 switch (error) { 1215 switch (error) {
1213 1216
1214 case ENXIO: 1217 case ENXIO:
1215 printf("device bad\n"); 1218 printf("device bad\n");
1216 break; 1219 break;
1217 1220
1218 case EFAULT: 1221 case EFAULT:
1219 printf("device not ready\n"); 1222 printf("device not ready\n");
1220 break; 1223 break;
1221 1224
1222 case EINVAL: 1225 case EINVAL:
1223 printf("area improper\n"); 1226 printf("area improper\n");
1224 break; 1227 break;
1225 1228
1226 case EIO: 1229 case EIO:
1227 printf("i/o error\n"); 1230 printf("i/o error\n");
1228 break; 1231 break;
1229 1232
1230 case EINTR: 1233 case EINTR:
1231 printf("aborted from console\n"); 1234 printf("aborted from console\n");
1232 break; 1235 break;
1233 1236
1234 case 0: 1237 case 0:
1235 printf("succeeded\n"); 1238 printf("succeeded\n");
1236 break; 1239 break;
1237 1240
1238 default: 1241 default:
1239 printf("error %d\n", error); 1242 printf("error %d\n", error);
1240 break; 1243 break;
1241 } 1244 }
1242failed: 1245failed:
1243 printf("\n\n"); 1246 printf("\n\n");
1244 delay(5000000); /* 5 seconds */ 1247 delay(5000000); /* 5 seconds */
1245} 1248}
1246 1249
1247/* 1250/*
1248 * This is called by main to set dumplo and dumpsize. 1251 * This is called by main to set dumplo and dumpsize.
1249 * Dumps always skip the first PAGE_SIZE of disk space 1252 * Dumps always skip the first PAGE_SIZE of disk space
1250 * in case there might be a disk label stored there. 1253 * in case there might be a disk label stored there.
1251 * If there is extra space, put dump at the end to 1254 * If there is extra space, put dump at the end to
1252 * reduce the chance that swapping trashes it. 1255 * reduce the chance that swapping trashes it.
1253 * 1256 *
1254 * Sparse dumps can't placed as close to the end as possible, because 1257 * Sparse dumps can't placed as close to the end as possible, because
1255 * savecore(8) has to know where to start reading in the dump device 1258 * savecore(8) has to know where to start reading in the dump device
1256 * before it has access to any of the crashed system's state. 1259 * before it has access to any of the crashed system's state.
1257 * 1260 *
1258 * Note also that a sparse dump will never be larger than a full one: 1261 * Note also that a sparse dump will never be larger than a full one:
1259 * in order to add a phys_ram_seg_t to the header, at least one page 1262 * in order to add a phys_ram_seg_t to the header, at least one page
1260 * must be removed. 1263 * must be removed.
1261 */ 1264 */
1262void 1265void
1263cpu_dumpconf(void) 1266cpu_dumpconf(void)
1264{ 1267{
1265 int nblks, dumpblks; /* size of dump area */ 1268 int nblks, dumpblks; /* size of dump area */
1266 1269
1267 if (dumpdev == NODEV) 1270 if (dumpdev == NODEV)
1268 goto bad; 1271 goto bad;
1269 nblks = bdev_size(dumpdev); 1272 nblks = bdev_size(dumpdev);
1270 if (nblks <= ctod(1)) 1273 if (nblks <= ctod(1))
1271 goto bad; 1274 goto bad;
1272 1275
1273 dumpblks = cpu_dumpsize(); 1276 dumpblks = cpu_dumpsize();
1274 if (dumpblks < 0) 1277 if (dumpblks < 0)
1275 goto bad; 1278 goto bad;
1276 1279
1277 /* dumpsize is in page units, and doesn't include headers. */ 1280 /* dumpsize is in page units, and doesn't include headers. */
1278 dumpsize = cpu_dump_mempagecnt(); 1281 dumpsize = cpu_dump_mempagecnt();
1279 1282
1280 dumpblks += ctod(dumpsize); 1283 dumpblks += ctod(dumpsize);
1281 1284
1282 /* If dump won't fit (incl. room for possible label), punt. */ 1285 /* If dump won't fit (incl. room for possible label), punt. */
1283 if (dumpblks > (nblks - ctod(1))) { 1286 if (dumpblks > (nblks - ctod(1))) {
1284#ifndef NO_SPARSE_DUMP 1287#ifndef NO_SPARSE_DUMP
1285 /* A sparse dump might (and hopefully will) fit. */ 1288 /* A sparse dump might (and hopefully will) fit. */
1286 dumplo = ctod(1); 1289 dumplo = ctod(1);
1287#else 1290#else
1288 /* But if we're not configured for that, punt. */ 1291 /* But if we're not configured for that, punt. */
1289 goto bad; 1292 goto bad;
1290#endif 1293#endif
1291 } else { 1294 } else {
1292 /* Put dump at end of partition */ 1295 /* Put dump at end of partition */
1293 dumplo = nblks - dumpblks; 1296 dumplo = nblks - dumpblks;
1294 } 1297 }
1295 1298
1296 1299
1297 /* Now that we've decided this will work, init ancillary stuff. */ 1300 /* Now that we've decided this will work, init ancillary stuff. */
1298 dump_misc_init(); 1301 dump_misc_init();
1299 return; 1302 return;
1300 1303
1301 bad: 1304 bad:
1302 dumpsize = 0; 1305 dumpsize = 0;
1303} 1306}
1304 1307
1305/* 1308/*
1306 * Clear registers on exec 1309 * Clear registers on exec
1307 */ 1310 */
1308void 1311void
1309setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack) 1312setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack)
1310{ 1313{
1311 struct pcb *pcb = lwp_getpcb(l); 1314 struct pcb *pcb = lwp_getpcb(l);
1312 struct trapframe *tf; 1315 struct trapframe *tf;
1313 1316
1314#ifdef USER_LDT 1317#ifdef USER_LDT
1315 pmap_ldt_cleanup(l); 1318 pmap_ldt_cleanup(l);
1316#endif 1319#endif
1317 1320
1318 fpu_save_area_clear(l, pack->ep_osversion >= 699002600 1321 fpu_save_area_clear(l, pack->ep_osversion >= 699002600
1319 ? __NetBSD_NPXCW__ : __NetBSD_COMPAT_NPXCW__); 1322 ? __NetBSD_NPXCW__ : __NetBSD_COMPAT_NPXCW__);
1320 pcb->pcb_flags = 0; 1323 pcb->pcb_flags = 0;
1321 if (pcb->pcb_dbregs != NULL) { 1324 if (pcb->pcb_dbregs != NULL) {
1322 pool_put(&x86_dbregspl, pcb->pcb_dbregs); 1325 pool_put(&x86_dbregspl, pcb->pcb_dbregs);
1323 pcb->pcb_dbregs = NULL; 1326 pcb->pcb_dbregs = NULL;
1324 } 1327 }
1325 1328
1326 l->l_proc->p_flag &= ~PK_32; 1329 l->l_proc->p_flag &= ~PK_32;
1327 1330
1328 l->l_md.md_flags = MDL_IRET; 1331 l->l_md.md_flags = MDL_IRET;
1329 1332
1330 tf = l->l_md.md_regs; 1333 tf = l->l_md.md_regs;
1331 tf->tf_ds = LSEL(LUDATA_SEL, SEL_UPL); 1334 tf->tf_ds = LSEL(LUDATA_SEL, SEL_UPL);
1332 tf->tf_es = LSEL(LUDATA_SEL, SEL_UPL); 1335 tf->tf_es = LSEL(LUDATA_SEL, SEL_UPL);
1333 cpu_fsgs_zero(l); 1336 cpu_fsgs_zero(l);
1334 tf->tf_rdi = 0; 1337 tf->tf_rdi = 0;
1335 tf->tf_rsi = 0; 1338 tf->tf_rsi = 0;
1336 tf->tf_rbp = 0; 1339 tf->tf_rbp = 0;
1337 tf->tf_rbx = l->l_proc->p_psstrp; 1340 tf->tf_rbx = l->l_proc->p_psstrp;
1338 tf->tf_rdx = 0; 1341 tf->tf_rdx = 0;
1339 tf->tf_rcx = 0; 1342 tf->tf_rcx = 0;
1340 tf->tf_rax = 0; 1343 tf->tf_rax = 0;
1341 tf->tf_rip = pack->ep_entry; 1344 tf->tf_rip = pack->ep_entry;
1342 tf->tf_cs = LSEL(LUCODE_SEL, SEL_UPL); 1345 tf->tf_cs = LSEL(LUCODE_SEL, SEL_UPL);
1343 tf->tf_rflags = PSL_USERSET; 1346 tf->tf_rflags = PSL_USERSET;
1344 tf->tf_rsp = stack; 1347 tf->tf_rsp = stack;
1345 tf->tf_ss = LSEL(LUDATA_SEL, SEL_UPL); 1348 tf->tf_ss = LSEL(LUDATA_SEL, SEL_UPL);
1346} 1349}
1347 1350
1348/* 1351/*
1349 * Initialize segments and descriptor tables 1352 * Initialize segments and descriptor tables
1350 */ 1353 */
1351 1354
1352#ifdef XEN 1355#ifdef XEN
1353struct trap_info *xen_idt; 1356struct trap_info *xen_idt;
1354int xen_idt_idx; 1357int xen_idt_idx;
1355#endif 1358#endif
1356char *ldtstore; 1359char *ldtstore;
1357char *gdtstore; 1360char *gdtstore;
1358 1361
1359void 1362void
1360setgate(struct gate_descriptor *gd, void *func, int ist, int type, int dpl, int sel) 1363setgate(struct gate_descriptor *gd, void *func, int ist, int type, int dpl, int sel)
1361{ 1364{
1362 1365
1363 kpreempt_disable(); 1366 kpreempt_disable();
1364 pmap_changeprot_local(idt_vaddr, VM_PROT_READ|VM_PROT_WRITE); 1367 pmap_changeprot_local(idt_vaddr, VM_PROT_READ|VM_PROT_WRITE);
1365 1368
1366 gd->gd_looffset = (uint64_t)func & 0xffff; 1369 gd->gd_looffset = (uint64_t)func & 0xffff;
1367 gd->gd_selector = sel; 1370 gd->gd_selector = sel;
1368 gd->gd_ist = ist; 1371 gd->gd_ist = ist;
1369 gd->gd_type = type; 1372 gd->gd_type = type;
1370 gd->gd_dpl = dpl; 1373 gd->gd_dpl = dpl;
1371 gd->gd_p = 1; 1374 gd->gd_p = 1;
1372 gd->gd_hioffset = (uint64_t)func >> 16; 1375 gd->gd_hioffset = (uint64_t)func >> 16;
1373 gd->gd_zero = 0; 1376 gd->gd_zero = 0;
1374 gd->gd_xx1 = 0; 1377 gd->gd_xx1 = 0;
1375 gd->gd_xx2 = 0; 1378 gd->gd_xx2 = 0;
1376 gd->gd_xx3 = 0; 1379 gd->gd_xx3 = 0;
1377 1380
1378 pmap_changeprot_local(idt_vaddr, VM_PROT_READ); 1381 pmap_changeprot_local(idt_vaddr, VM_PROT_READ);
1379 kpreempt_enable(); 1382 kpreempt_enable();
1380} 1383}
1381 1384
1382void 1385void
1383unsetgate(struct gate_descriptor *gd) 1386unsetgate(struct gate_descriptor *gd)
1384{ 1387{
1385 1388
1386 kpreempt_disable(); 1389 kpreempt_disable();
1387 pmap_changeprot_local(idt_vaddr, VM_PROT_READ|VM_PROT_WRITE); 1390 pmap_changeprot_local(idt_vaddr, VM_PROT_READ|VM_PROT_WRITE);
1388 1391
1389 memset(gd, 0, sizeof (*gd)); 1392 memset(gd, 0, sizeof (*gd));
1390 1393
1391 pmap_changeprot_local(idt_vaddr, VM_PROT_READ); 1394 pmap_changeprot_local(idt_vaddr, VM_PROT_READ);
1392 kpreempt_enable(); 1395 kpreempt_enable();
1393} 1396}
1394 1397
1395void 1398void
1396setregion(struct region_descriptor *rd, void *base, uint16_t limit) 1399setregion(struct region_descriptor *rd, void *base, uint16_t limit)
1397{ 1400{
1398 rd->rd_limit = limit; 1401 rd->rd_limit = limit;
1399 rd->rd_base = (uint64_t)base; 1402 rd->rd_base = (uint64_t)base;
1400} 1403}
1401 1404
1402/* 1405/*
1403 * Note that the base and limit fields are ignored in long mode. 1406 * Note that the base and limit fields are ignored in long mode.
1404 */ 1407 */
1405void 1408void
1406set_mem_segment(struct mem_segment_descriptor *sd, void *base, size_t limit, 1409set_mem_segment(struct mem_segment_descriptor *sd, void *base, size_t limit,
1407 int type, int dpl, int gran, int def32, int is64) 1410 int type, int dpl, int gran, int def32, int is64)
1408{ 1411{
1409 sd->sd_lolimit = (unsigned)limit; 1412 sd->sd_lolimit = (unsigned)limit;
1410 sd->sd_lobase = (unsigned long)base; 1413 sd->sd_lobase = (unsigned long)base;
1411 sd->sd_type = type; 1414 sd->sd_type = type;
1412 sd->sd_dpl = dpl; 1415 sd->sd_dpl = dpl;
1413 sd->sd_p = 1; 1416 sd->sd_p = 1;
1414 sd->sd_hilimit = (unsigned)limit >> 16; 1417 sd->sd_hilimit = (unsigned)limit >> 16;
1415 sd->sd_avl = 0; 1418 sd->sd_avl = 0;
1416 sd->sd_long = is64; 1419 sd->sd_long = is64;
1417 sd->sd_def32 = def32; 1420 sd->sd_def32 = def32;
1418 sd->sd_gran = gran; 1421 sd->sd_gran = gran;
1419 sd->sd_hibase = (unsigned long)base >> 24; 1422 sd->sd_hibase = (unsigned long)base >> 24;
1420} 1423}
1421 1424
1422void 1425void
1423set_sys_segment(struct sys_segment_descriptor *sd, void *base, size_t limit, 1426set_sys_segment(struct sys_segment_descriptor *sd, void *base, size_t limit,
1424 int type, int dpl, int gran) 1427 int type, int dpl, int gran)
1425{ 1428{
1426 memset(sd, 0, sizeof *sd); 1429 memset(sd, 0, sizeof *sd);
1427 sd->sd_lolimit = (unsigned)limit; 1430 sd->sd_lolimit = (unsigned)limit;
1428 sd->sd_lobase = (uint64_t)base; 1431 sd->sd_lobase = (uint64_t)base;
1429 sd->sd_type = type; 1432 sd->sd_type = type;
1430 sd->sd_dpl = dpl; 1433 sd->sd_dpl = dpl;
1431 sd->sd_p = 1; 1434 sd->sd_p = 1;
1432 sd->sd_hilimit = (unsigned)limit >> 16; 1435 sd->sd_hilimit = (unsigned)limit >> 16;
1433 sd->sd_gran = gran; 1436 sd->sd_gran = gran;
1434 sd->sd_hibase = (uint64_t)base >> 24; 1437 sd->sd_hibase = (uint64_t)base >> 24;
1435} 1438}
1436 1439
1437void 1440void
1438cpu_init_idt(void) 1441cpu_init_idt(void)
1439{ 1442{
1440#ifndef XEN 1443#ifndef XEN
1441 struct region_descriptor region; 1444 struct region_descriptor region;
1442 1445
1443 setregion(&region, idt, NIDT * sizeof(idt[0]) - 1); 1446 setregion(&region, idt, NIDT * sizeof(idt[0]) - 1);
1444 lidt(&region);  1447 lidt(&region);
1445#else 1448#else
1446 if (HYPERVISOR_set_trap_table(xen_idt)) 1449 if (HYPERVISOR_set_trap_table(xen_idt))
1447 panic("HYPERVISOR_set_trap_table() failed"); 1450 panic("HYPERVISOR_set_trap_table() failed");
1448#endif 1451#endif
1449} 1452}
1450 1453